3511 files changed, 166096 insertions, 39677 deletions
diff --git a/test/Analysis/BasicAA/2008-04-15-Byval.ll b/test/Analysis/BasicAA/2008-04-15-Byval.ll
index 428189a8a874..2ea0314d5b60 100644
--- a/test/Analysis/BasicAA/2008-04-15-Byval.ll
+++ b/test/Analysis/BasicAA/2008-04-15-Byval.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -S | FileCheck %s
+; RUN: opt < %s -O3 -S | FileCheck %s
 ; ModuleID = 'small2.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
diff --git a/test/Analysis/BasicAA/assume.ll b/test/Analysis/BasicAA/assume.ll
new file mode 100644
index 000000000000..e163b5a4161c
--- /dev/null
+++ b/test/Analysis/BasicAA/assume.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0
+declare void @llvm.assume(i1) #0
+
+define void @test1(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.assume(i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+
+; CHECK-LABEL: Function: test1:
+
+; CHECK: MayAlias:	i8* %P, i8* %Q
+; CHECK: NoModRef:  Ptr: i8* %P	<->  tail call void @llvm.assume(i1 true)
+; CHECK: NoModRef:  Ptr: i8* %Q	<->  tail call void @llvm.assume(i1 true)
+; CHECK: Both ModRef:  Ptr: i8* %P	<->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: Both ModRef:  Ptr: i8* %Q	<->  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.assume(i1 true) <->   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+; CHECK: NoModRef:   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false) <->   tail call void @llvm.assume(i1 true)
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 4de2daf6b2dc..9699d92ea83f 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -29,9 +29,9 @@ entry:
   ret i32 %tmp5.lobit
 }
 
-!0 = metadata !{metadata !4, metadata !4, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !5, metadata !5, i64 0}
-!4 = metadata !{metadata !"double", metadata !1}
-!5 = metadata !{metadata !"int", metadata !1}
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!5, !5, i64 0}
+!4 = !{!"double", !1}
+!5 = !{!"int", !1}
diff --git a/test/Analysis/BasicAA/gcsetest.ll b/test/Analysis/BasicAA/gcsetest.ll
index db557b7a248b..64792eb00a2f 100644
--- a/test/Analysis/BasicAA/gcsetest.ll
+++ b/test/Analysis/BasicAA/gcsetest.ll
@@ -1,5 +1,5 @@
-; Test that GCSE uses basicaa to do alias analysis, which is capable of 
-; disambiguating some obvious cases.  All loads should be removable in 
+; Test that GCSE uses basicaa to do alias analysis, which is capable of
+; disambiguating some obvious cases.  All loads should be removable in
 ; this testcase.
 
 ; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s
diff --git a/test/Analysis/BasicAA/invariant_load.ll b/test/Analysis/BasicAA/invariant_load.ll
index 09b54013a06b..bc629cdfe9ea 100644
--- a/test/Analysis/BasicAA/invariant_load.ll
+++ b/test/Analysis/BasicAA/invariant_load.ll
@@ -23,4 +23,4 @@ entry:
 ; CHECK: %add = add nsw i32 %0, 1
 }
 
-!3 = metadata !{}
+!3 = !{}
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 8421faf9c2c4..0d8bf71d42fe 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -8,20 +8,20 @@ declare void @external(i32*)
 define i32 @test0(i8* %P) {
   %A = alloca i32
   call void @external(i32* %A)
-  
+
   store i32 0, i32* %A
-  
+
   call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
-  
+
   %B = load i32* %A
   ret i32 %B
-  
-; CHECK: @test0
+
+; CHECK-LABEL: @test0
 ; CHECK: ret i32 0
 }
 
 define i8 @test1() {
-; CHECK: @test1
+; CHECK-LABEL: @test1
   %A = alloca i8
   %B = alloca i8
 
@@ -35,7 +35,7 @@ define i8 @test1() {
 }
 
 define i8 @test2(i8* %P) {
-; CHECK: @test2
+; CHECK-LABEL: @test2
   %P2 = getelementptr i8* %P, i32 127
   store i8 1, i8* %P2  ;; Not dead across memset
   call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
@@ -45,12 +45,12 @@ define i8 @test2(i8* %P) {
 }
 
 define i8 @test2a(i8* %P) {
-; CHECK: @test2
+; CHECK-LABEL: @test2
   %P2 = getelementptr i8* %P, i32 126
-  
+
   ;; FIXME: DSE isn't zapping this dead store.
   store i8 1, i8* %P2  ;; Dead, clobbered by memset.
-  
+
   call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
@@ -59,11 +59,11 @@ define i8 @test2a(i8* %P) {
 }
 
 define void @test3(i8* %P, i8 %X) {
-; CHECK: @test3
+; CHECK-LABEL: @test3
 ; CHECK-NOT: store
 ; CHECK-NOT: %Y
   %Y = add i8 %X, 1     ;; Dead, because the only use (the store) is dead.
-  
+
   %P2 = getelementptr i8* %P, i32 2
   store i8 %Y, i8* %P2  ;; Not read by lifetime.end, should be removed.
 ; CHECK: store i8 2, i8* %P2
@@ -75,9 +75,9 @@ define void @test3(i8* %P, i8 %X) {
 }
 
 define void @test3a(i8* %P, i8 %X) {
-; CHECK: @test3a
+; CHECK-LABEL: @test3a
   %Y = add i8 %X, 1     ;; Dead, because the only use (the store) is dead.
-  
+
   %P2 = getelementptr i8* %P, i32 2
   store i8 %Y, i8* %P2
 ; CHECK-NEXT: call void @llvm.lifetime.end
@@ -95,7 +95,7 @@ define i32 @test4(i8* %P) {
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
-; CHECK: @test4
+; CHECK-LABEL: @test4
 ; CHECK-NOT: load
 ; CHECK: memset.p0i8.i32
 ; CHECK-NOT: load
@@ -123,7 +123,7 @@ define i8 @test6(i8* %p, i8* noalias %a) {
   %y = load i8* %a
   %z = add i8 %x, %y
   ret i8 %z
-; CHECK: @test6
+; CHECK-LABEL: @test6
 ; CHECK: load i8* %a
 ; CHECK-NOT: load
 ; CHECK: ret
@@ -139,7 +139,7 @@ entry:
   call void @test7decl(i32* %add.ptr)
   %tmp = load i32* %x, align 4
   ret i32 %tmp
-; CHECK: @test7(
+; CHECK-LABEL: @test7(
 ; CHECK: store i32 0
 ; CHECK: call void @test7decl
 ; CHECK: load i32*
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index 74279e1c4c93..c1100f1d36fc 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -39,6 +39,7 @@ return:
 
 ; CHECK-LABEL: pr18068
 ; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
 
 define i32 @pr18068(i32* %jj7, i32* %j) {
 entry:
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
new file mode 100644
index 000000000000..b59d16cc5f51
--- /dev/null
+++ b/test/Analysis/BasicAA/zext.ll
@@ -0,0 +1,209 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test_with_zext
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_zext() {
+  %1 = tail call i8* @malloc(i64 120)
+  %a = getelementptr inbounds i8* %1, i64 8
+  %2 = getelementptr inbounds i8* %1, i64 16
+  %3 = zext i32 3 to i64
+  %b = getelementptr inbounds i8* %2, i64 %3
+  ret void
+}
+
+; CHECK-LABEL: test_with_lshr
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_lshr(i64 %i) {
+  %1 = tail call i8* @malloc(i64 120)
+  %a = getelementptr inbounds i8* %1, i64 8
+  %2 = getelementptr inbounds i8* %1, i64 16
+  %3 = lshr i64 %i, 2
+  %b = getelementptr inbounds i8* %2, i64 %3
+  ret void
+}
+
+; CHECK-LABEL: test_with_a_loop
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_a_loop(i8* %mem) {
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %a = getelementptr inbounds i8* %mem, i64 8
+  %a.plus1 = getelementptr inbounds i8* %mem, i64 16
+  %i.64 = zext i32 %i to i64
+  %b = getelementptr inbounds i8* %a.plus1, i64 %i.64
+  %i.plus1 = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.plus1, 10
+  br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_with_varying_base_pointer_in_loop
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_varying_base_pointer_in_loop(i8* %mem.orig) {
+  br label %for.loop
+
+for.loop:
+  %mem = phi i8* [ %mem.orig, %0 ], [ %mem.plus1, %for.loop ]
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %a = getelementptr inbounds i8* %mem, i64 8
+  %a.plus1 = getelementptr inbounds i8* %mem, i64 16
+  %i.64 = zext i32 %i to i64
+  %b = getelementptr inbounds i8* %a.plus1, i64 %i.64
+  %i.plus1 = add nuw nsw i32 %i, 1
+  %mem.plus1 = getelementptr inbounds i8* %mem, i64 8
+  %cmp = icmp eq i32 %i.plus1, 10
+  br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_sign_extension
+; CHECK:  PartialAlias: i64* %b.i64, i8* %a
+
+define void @test_sign_extension(i32 %p) {
+  %1 = tail call i8* @malloc(i64 120)
+  %p.64 = zext i32 %p to i64
+  %a = getelementptr inbounds i8* %1, i64 %p.64
+  %p.minus1 = add i32 %p, -1
+  %p.minus1.64 = zext i32 %p.minus1 to i64
+  %b.i8 = getelementptr inbounds i8* %1, i64 %p.minus1.64
+  %b.i64 = bitcast i8* %b.i8 to i64*
+  ret void
+}
+
+; CHECK-LABEL: test_fe_tools
+; CHECK:  PartialAlias: i32* %a, i32* %b
+
+define void @test_fe_tools([8 x i32]* %values) {
+  br label %reorder
+
+for.loop:
+  %i = phi i32 [ 0, %reorder ], [ %i.next, %for.loop ]
+  %idxprom = zext i32 %i to i64
+  %b = getelementptr inbounds [8 x i32]* %values, i64 0, i64 %idxprom
+  %i.next = add nuw nsw i32 %i, 1
+  %1 = icmp eq i32 %i.next, 10
+  br i1 %1, label %for.loop.exit, label %for.loop
+
+reorder:
+  %a = getelementptr inbounds [8 x i32]* %values, i64 0, i64 1
+  br label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+@b = global i32 0, align 4
+@d = global i32 0, align 4
+
+; CHECK-LABEL: test_spec2006
+; CHECK:  PartialAlias: i32** %x, i32** %y
+
+define void @test_spec2006() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %d.val = load i32* @d, align 4
+  %d.promoted = sext i32 %d.val to i64
+  %1 = icmp slt i32 %d.val, 2
+  br i1 %1, label %.lr.ph, label %3
+
+.lr.ph:                                           ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %.lr.ph, %2
+  %i = phi i32 [ %d.val, %.lr.ph ], [ %i.plus1, %2 ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 %d.promoted, i64 %i.promoted
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %2, label %3
+
+; <label>:3                                      ; preds = %._crit_edge, %0
+  %y = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_easy_case
+; CHECK:  NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_easy_case(i64 %i) {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %x = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 %i, i64 0
+  %y = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_in_loop
+; CHECK:  NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_in_loop() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 0
+  %y = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_with_global
+; CHECK:  PartialAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_with_global() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %b = load i32* @b, align 4
+  %b.promoted = sext i32 %b to i64
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 %b.promoted
+  %y = getelementptr inbounds [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_const_eval
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_const_eval(i8* %ptr, i64 %offset) {
+  %a = getelementptr inbounds i8* %ptr, i64 %offset
+  %a.dup = getelementptr inbounds i8* %ptr, i64 %offset
+  %three = zext i32 3 to i64
+  %b = getelementptr inbounds i8* %a.dup, i64 %three
+  ret void
+}
+
+; CHECK-LABEL: test_const_eval_scaled
+; CHECK: MustAlias: i8* %a, i8* %b
+define void @test_const_eval_scaled(i8* %ptr) {
+  %three = zext i32 3 to i64
+  %six = mul i64 %three, 2
+  %a = getelementptr inbounds i8* %ptr, i64 %six
+  %b = getelementptr inbounds i8* %ptr, i64 6
+  ret void
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64)
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
index bcdc1e6f0bc0..da62dca67a8b 100644
--- a/test/Analysis/BlockFrequencyInfo/bad_input.ll
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -23,7 +23,7 @@ for.end:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 0, i32 3}
+!0 = !{!"branch_weights", i32 0, i32 3}
 
 ; CHECK-LABEL: Printing analysis {{.*}} for function 'infinite_loop'
 ; CHECK-NEXT: block-frequency-info: infinite_loop
@@ -47,4 +47,4 @@ for.end:
   ret void
 }
 
-!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!1 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 006e6ab4d74e..1c24176d6153 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -47,7 +47,7 @@ exit:
   ret i32 %result
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!0 = !{!"branch_weights", i32 64, i32 4}
 
 define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; CHECK-LABEL: Printing analysis {{.*}} for function 'test3':
@@ -90,7 +90,7 @@ exit:
   ret i32 %result
 }
 
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
 
 define void @nested_loops(i32 %a) {
 ; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loops':
@@ -138,4 +138,4 @@ for.end13:
 
 declare void @g(i32)
 
-!2 = metadata !{metadata !"branch_weights", i32 1, i32 4000}
+!2 = !{!"branch_weights", i32 1, i32 4000}
diff --git a/test/Analysis/BlockFrequencyInfo/double_backedge.ll b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
index df8217cfa1b1..597bf8329b26 100644
--- a/test/Analysis/BlockFrequencyInfo/double_backedge.ll
+++ b/test/Analysis/BlockFrequencyInfo/double_backedge.ll
@@ -23,5 +23,5 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 9}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 5}
+!0 = !{!"branch_weights", i32 1, i32 9}
+!1 = !{!"branch_weights", i32 4, i32 5}
diff --git a/test/Analysis/BlockFrequencyInfo/double_exit.ll b/test/Analysis/BlockFrequencyInfo/double_exit.ll
index 75f664d07ac4..3063ba701731 100644
--- a/test/Analysis/BlockFrequencyInfo/double_exit.ll
+++ b/test/Analysis/BlockFrequencyInfo/double_exit.ll
@@ -66,9 +66,9 @@ exit:
   ret i32 %Return.2
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 1}
-!2 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!0 = !{!"branch_weights", i32 1, i32 3}
+!1 = !{!"branch_weights", i32 4, i32 1}
+!2 = !{!"branch_weights", i32 2, i32 1}
 
 declare i32 @c2(i32, i32)
 declare i32 @logic2(i32, i32, i32)
@@ -159,7 +159,7 @@ exit:
   ret i32 %Return.0
 }
 
-!3 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 1}
 
 declare i32 @c3(i32, i32, i32)
 declare i32 @logic3(i32, i32, i32, i32)
diff --git a/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
index f1ddd13ef0d6..e55deaff428d 100644
--- a/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
+++ b/test/Analysis/BlockFrequencyInfo/extremely-likely-loop-successor.ll
@@ -37,4 +37,4 @@ exit:
 
 declare i1 @foo()
 
-!0 = metadata !{metadata !"branch_weights", i32 4294967295, i32 1}
+!0 = !{!"branch_weights", i32 4294967295, i32 1}
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible.ll b/test/Analysis/BlockFrequencyInfo/irreducible.ll
index af4ad15d9c1d..b275aae62792 100644
--- a/test/Analysis/BlockFrequencyInfo/irreducible.ll
+++ b/test/Analysis/BlockFrequencyInfo/irreducible.ll
@@ -31,8 +31,8 @@ return:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 7}
-!1 = metadata !{metadata !"branch_weights", i32 3, i32 4}
+!0 = !{!"branch_weights", i32 1, i32 7}
+!1 = !{!"branch_weights", i32 3, i32 4}
 
 ; Irreducible control flow
 ; ========================
@@ -112,7 +112,7 @@ exit:
   ret void
 }
 
-!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!2 = !{!"branch_weights", i32 3, i32 1}
 
 ; Testcase #2
 ; ===========
@@ -156,7 +156,7 @@ exit:
   ret void
 }
 
-!3 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2}
+!3 = !{!"branch_weights", i32 2, i32 2, i32 2}
 
 ; A true loop with irreducible control flow inside.
 define void @loop_around_irreducible(i1 %x) {
@@ -186,8 +186,8 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!4 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!5 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!4 = !{!"branch_weights", i32 1, i32 1}
+!5 = !{!"branch_weights", i32 3, i32 1}
 
 ; Two unrelated irreducible SCCs.
 define void @two_sccs(i1 %x) {
@@ -225,9 +225,9 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!6 = metadata !{metadata !"branch_weights", i32 3, i32 1}
-!7 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!8 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+!6 = !{!"branch_weights", i32 3, i32 1}
+!7 = !{!"branch_weights", i32 1, i32 1}
+!8 = !{!"branch_weights", i32 4, i32 1}
 
 ; A true loop inside irreducible control flow.
 define void @loop_inside_irreducible(i1 %x) {
@@ -257,9 +257,9 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!9 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!10 = metadata !{metadata !"branch_weights", i32 3, i32 1}
-!11 = metadata !{metadata !"branch_weights", i32 2, i32 1}
+!9 = !{!"branch_weights", i32 1, i32 1}
+!10 = !{!"branch_weights", i32 3, i32 1}
+!11 = !{!"branch_weights", i32 2, i32 1}
 
 ; Irreducible control flow in a branch that's in a true loop.
 define void @loop_around_branch_with_irreducible(i1 %x) {
@@ -301,8 +301,8 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!12 = metadata !{metadata !"branch_weights", i32 3, i32 1}
-!13 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!12 = !{!"branch_weights", i32 3, i32 1}
+!13 = !{!"branch_weights", i32 1, i32 1}
 
 ; Irreducible control flow between two true loops.
 define void @loop_around_branch_with_irreducible_around_loop(i1 %x) {
@@ -348,10 +348,10 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!14 = metadata !{metadata !"branch_weights", i32 2, i32 1}
-!15 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!16 = metadata !{metadata !"branch_weights", i32 3, i32 1}
-!17 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+!14 = !{!"branch_weights", i32 2, i32 1}
+!15 = !{!"branch_weights", i32 1, i32 1}
+!16 = !{!"branch_weights", i32 3, i32 1}
+!17 = !{!"branch_weights", i32 4, i32 1}
 
 ; An irreducible SCC with a non-header.
 define void @nonheader(i1 %x) {
@@ -377,9 +377,9 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!18 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!19 = metadata !{metadata !"branch_weights", i32 1, i32 3}
-!20 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!18 = !{!"branch_weights", i32 1, i32 1}
+!19 = !{!"branch_weights", i32 1, i32 3}
+!20 = !{!"branch_weights", i32 3, i32 1}
 
 ; An irreducible SCC with an irreducible sub-SCC.  In the current version of
 ; -block-freq, this means an extra header.
@@ -416,6 +416,6 @@ exit:
 ; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]]
   ret void
 }
-!21 = metadata !{metadata !"branch_weights", i32 2, i32 1}
-!22 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!23 = metadata !{metadata !"branch_weights", i32 8, i32 1, i32 3, i32 12}
+!21 = !{!"branch_weights", i32 2, i32 1}
+!22 = !{!"branch_weights", i32 1, i32 1}
+!23 = !{!"branch_weights", i32 8, i32 1, i32 3, i32 12}
diff --git a/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
index 9d27b6bf0f20..9a86564c548d 100644
--- a/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
+++ b/test/Analysis/BlockFrequencyInfo/loop_with_branch.ll
@@ -40,5 +40,5 @@ exit:
 declare i1 @foo0(i32)
 declare i2 @foo1(i32)
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
-!1 = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 3}
+!0 = !{!"branch_weights", i32 1, i32 3}
+!1 = !{!"branch_weights", i32 1, i32 2, i32 3}
diff --git a/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
index d93ffceb5fab..19d165805b04 100644
--- a/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
+++ b/test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll
@@ -55,5 +55,5 @@ declare i1 @foo4(i32)
 declare i1 @foo5(i32)
 declare i1 @foo6(i32)
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 3}
-!1 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!0 = !{!"branch_weights", i32 1, i32 3}
+!1 = !{!"branch_weights", i32 3, i32 1}
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 05cb31dca0ee..5915ed152b06 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -43,7 +43,7 @@ exit:
   ret i32 %result
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!0 = !{!"branch_weights", i32 64, i32 4}
 
 define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; CHECK: Printing analysis {{.*}} for function 'test3'
@@ -87,7 +87,7 @@ exit:
   ret i32 %result
 }
 
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
 
 define i32 @test4(i32 %x) nounwind uwtable readnone ssp {
 ; CHECK: Printing analysis {{.*}} for function 'test4'
@@ -114,7 +114,7 @@ return:
   ret i32 %retval.0
 }
 
-!2 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
+!2 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
 
 declare void @coldfunc() cold
 
diff --git a/test/Analysis/CFLAliasAnalysis/arguments-globals.ll b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
new file mode 100644
index 000000000000..18bbe8b6b41f
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
@@ -0,0 +1,20 @@
+; This testcase ensures that CFL AA gives conservative answers on variables
+; that involve arguments.
+; (Everything should alias everything, because args can alias globals, so the
+; aliasing sets should of args+alloca+global should be combined)
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK:     Function: test
+
+@g = external global i32
+
+define void @test(i1 %c, i32* %arg1, i32* %arg2) {
+  ; CHECK: 15 Total Alias Queries Performed
+  ; CHECK: 0 no alias responses
+  %A = alloca i32, align 4
+  %B = select i1 %c, i32* %arg1, i32* %arg2
+  %C = select i1 %c, i32* @g, i32* %A
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/arguments.ll b/test/Analysis/CFLAliasAnalysis/arguments.ll
new file mode 100644
index 000000000000..f3e6679c35e2
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/arguments.ll
@@ -0,0 +1,15 @@
+; This testcase ensures that CFL AA gives conservative answers on variables
+; that involve arguments.
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK:     Function: test
+
+define void @test(i1 %c, i32* %arg1, i32* %arg2) {
+  ; CHECK: 6 Total Alias Queries Performed
+  ; CHECK: 3 no alias responses
+  %a = alloca i32, align 4
+  %b = select i1 %c, i32* %arg1, i32* %arg2
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/basic-interproc-ret.ll b/test/Analysis/CFLAliasAnalysis/basic-interproc-ret.ll
new file mode 100644
index 000000000000..d56a4552b51e
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/basic-interproc-ret.ll
@@ -0,0 +1,26 @@
+; This testcase ensures that CFL AA gives conservative answers on variables
+; that involve arguments.
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK:     Function: test
+; CHECK: 4 Total Alias Queries Performed
+; CHECK: 3 no alias responses
+; ^ The 1 MayAlias is due to %arg1. Sadly, we don't currently have machinery
+; in place to check whether %arg1 aliases %a, because BasicAA takes care of 
+; that for us.
+
+define i32* @test2(i32* %arg1) {
+  store i32 0, i32* %arg1
+
+  %a = alloca i32, align 4
+  ret i32* %a
+}
+
+define void @test() {
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = call i32* @test2(i32* %a)
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/basic-interproc.ll b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
new file mode 100644
index 000000000000..c0a5404eab6e
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
@@ -0,0 +1,24 @@
+; This testcase ensures that CFL AA gives conservative answers on variables
+; that involve arguments.
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK:     Function: test
+; CHECK: 2 Total Alias Queries Performed
+; CHECK: 1 no alias responses
+; ^^ In @test2, %arg1 and %arg2 may alias
+
+define void @test2(i32* %arg1, i32* %arg2) {
+  store i32 0, i32* %arg1
+  store i32 0, i32* %arg2
+
+  ret void
+}
+
+define void @test() {
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  call void @test2(i32* %a, i32* %b)
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
new file mode 100644
index 000000000000..9ae200bc5701
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
@@ -0,0 +1,21 @@
+; This testcase consists of alias relations which should be completely
+; resolvable by cfl-aa, but require analysis of getelementptr constant exprs.
+; Derived from BasicAA/2003-12-11-ConstExprGEP.ll
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+%T = type { i32, [10 x i8] }
+
+@G = external global %T
+
+; CHECK:     Function: test
+; CHECK-NOT:   May:
+
+define void @test() {
+  %D = getelementptr %T* @G, i64 0, i32 0
+  %E = getelementptr %T* @G, i64 0, i32 1, i64 5
+  %F = getelementptr i32* getelementptr (%T* @G, i64 0, i32 0), i64 0
+  %X = getelementptr [10 x i8]* getelementptr (%T* @G, i64 0, i32 1), i64 0, i64 5
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/constant-over-index.ll b/test/Analysis/CFLAliasAnalysis/constant-over-index.ll
new file mode 100644
index 000000000000..fb44b95d134f
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/constant-over-index.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
+
+; CFL AA currently returns PartialAlias, BasicAA returns MayAlias, both seem
+; acceptable (although we might decide that we don't want PartialAlias, and if
+; so, we should update this test case accordingly).
+; CHECK: {{PartialAlias|MayAlias}}: double* %p.0.i.0, double* %p3
+
+; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
+; so MayAlias is needed.
+
+define void @foo([3 x [3 x double]]* noalias %p) {
+entry:
+  %p3 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 0, i64 3
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+  %p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
+
+  store volatile double 0.0, double* %p3
+  store volatile double 0.1, double* %p.0.i.0
+
+  %i.next = add i64 %i, 1
+  %cmp = icmp slt i64 %i.next, 3
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/empty.ll b/test/Analysis/CFLAliasAnalysis/empty.ll
new file mode 100644
index 000000000000..907fa481d86d
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/empty.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK:      Function: foo:
+; CHECK-NEXT:   NoAlias: {}* %p, {}* %q
+
+define void @foo({}* %p, {}* %q) {
+  store {} {}, {}* %p
+  store {} {}, {}* %q
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
new file mode 100644
index 000000000000..664ea9eb1180
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
+; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
+; Adapted from the BasicAA full-store-partial-alias.ll test.
+
+; CFL AA should notice that the store stores to the entire %u object,
+; so the %tmp5 load is PartialAlias with the store and suppress TBAA.
+; Without CFL AA, TBAA should say that %tmp5 is NoAlias with the store.
+
+target datalayout = "e-p:64:64:64"
+
+%union.anon = type { double }
+
+@u = global %union.anon { double -2.500000e-01 }, align 8
+@endianness_test = global i64 1, align 8
+
+define i32 @signbit(double %x) nounwind {
+; CFLAA: ret i32 %tmp5.lobit
+; CHECK:   ret i32 0
+entry:
+  %u = alloca %union.anon, align 8
+  %tmp9 = getelementptr inbounds %union.anon* %u, i64 0, i32 0
+  store double %x, double* %tmp9, align 8, !tbaa !0
+  %tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
+  %idxprom = sext i32 %tmp2 to i64
+  %tmp4 = bitcast %union.anon* %u to [2 x i32]*
+  %arrayidx = getelementptr inbounds [2 x i32]* %tmp4, i64 0, i64 %idxprom
+  %tmp5 = load i32* %arrayidx, align 4, !tbaa !3
+  %tmp5.lobit = lshr i32 %tmp5, 31
+  ret i32 %tmp5.lobit
+}
+
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!5, !5, i64 0}
+!4 = !{!"double", !1}
+!5 = !{!"int", !1}
diff --git a/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
new file mode 100644
index 000000000000..a0195d7f8d6a
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; Derived from BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+
+target datalayout = "e-p:32:32:32"
+
+; CHECK: 1 partial alias response
+
+define i32 @test(i32* %tab, i32 %indvar) nounwind {
+  %tmp31 = mul i32 %indvar, -2
+  %tmp32 = add i32 %tmp31, 30
+  %t.5 = getelementptr i32* %tab, i32 %tmp32
+  %loada = load i32* %tab
+  store i32 0, i32* %t.5
+  %loadb = load i32* %tab
+  %rval = add i32 %loada, %loadb
+  ret i32 %rval
+}
diff --git a/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
new file mode 100644
index 000000000000..9bbc721a7b16
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
@@ -0,0 +1,31 @@
+; This testcase ensures that CFL AA responds conservatively when we union 
+; groups of pointers together through ternary/conditional operations
+; Derived from:
+; void foo(bool c) {
+;   char a, b;
+;   char *m = c ? &a : &b;
+;   *m;
+; }
+;
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+%T = type { i32, [10 x i8] }
+
+; CHECK:     Function: test
+
+define void @test(i1 %C) {
+; CHECK: 10 Total Alias Queries Performed
+; CHECK: 4 no alias responses
+  %M = alloca %T*, align 8 ; NoAlias with %A, %B, %MS, %AP
+  %A = alloca %T, align 8
+  %B = alloca %T, align 8
+
+  %MS = select i1 %C, %T* %B, %T* %A
+
+  store %T* %MS, %T** %M
+
+  %AP = load %T** %M ; PartialAlias with %A, %B
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/multilevel.ll b/test/Analysis/CFLAliasAnalysis/multilevel.ll
new file mode 100644
index 000000000000..9c9eb9a49779
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/multilevel.ll
@@ -0,0 +1,30 @@
+; This testcase ensures that CFL AA handles trivial cases with storing 
+; pointers in pointers appropriately.
+; Derived from:
+; char a, b;
+; char *m = &a, *n = &b;
+; *m;
+; *n;
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+%T = type { i32, [10 x i8] }
+
+; CHECK:     Function: test
+
+define void @test() {
+; CHECK: 15 Total Alias Queries Performed
+; CHECK: 13 no alias responses
+  %M = alloca %T*, align 8
+  %N = alloca %T*, align 8
+  %A = alloca %T, align 8
+  %B = alloca %T, align 8
+
+  store %T* %A, %T** %M
+  store %T* %B, %T** %N
+
+  %AP = load %T** %M ; PartialAlias with %A
+  %BP = load %T** %N ; PartialAlias with %B
+
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/must-and-partial.ll b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
new file mode 100644
index 000000000000..df7de38ebd73
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
+
+; When merging MustAlias and PartialAlias, merge to PartialAlias
+; instead of MayAlias.
+
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: PartialAlias:  i16* %bigbase0, i8* %phi
+define i8 @test0(i8* %base, i1 %x) {
+entry:
+  %baseplusone = getelementptr i8* %base, i64 1
+  br i1 %x, label %red, label %green
+red:
+  br label %green
+green:
+  %phi = phi i8* [ %baseplusone, %red ], [ %base, %entry ]
+  store i8 0, i8* %phi
+
+  %bigbase0 = bitcast i8* %base to i16*
+  store i16 -1, i16* %bigbase0
+
+  %loaded = load i8* %phi
+  ret i8 %loaded
+}
+
+; CHECK: PartialAlias:  i16* %bigbase1, i8* %sel
+define i8 @test1(i8* %base, i1 %x) {
+entry:
+  %baseplusone = getelementptr i8* %base, i64 1
+  %sel = select i1 %x, i8* %baseplusone, i8* %base
+  store i8 0, i8* %sel
+
+  %bigbase1 = bitcast i8* %base to i16*
+  store i16 -1, i16* %bigbase1
+
+  %loaded = load i8* %sel
+  ret i8 %loaded
+}
diff --git a/test/Analysis/CFLAliasAnalysis/phi-and-select.ll b/test/Analysis/CFLAliasAnalysis/phi-and-select.ll
new file mode 100644
index 000000000000..a0e71a7e7dca
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/phi-and-select.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; Derived from (a subset of) BasicAA/phi-and-select.ll 
+
+; CHECK: Function: qux
+; CHECK:  NoAlias: double* %a, double* %b
+; CHECK: ===== Alias Analysis Evaluator Report =====
+
+; Two PHIs with disjoint sets of inputs.
+define void @qux(i1 %m, double* noalias %x, double* noalias %y,
+                 i1 %n, double* noalias %v, double* noalias %w) {
+entry:
+  br i1 %m, label %true, label %false
+
+true:
+  br label %exit
+
+false:
+  br label %exit
+
+exit:
+  %a = phi double* [ %x, %true ], [ %y, %false ]
+  br i1 %n, label %ntrue, label %nfalse
+
+ntrue:
+  br label %nexit
+
+nfalse:
+  br label %nexit
+
+nexit:
+  %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
+  ret void
+}
+
diff --git a/test/Analysis/CFLAliasAnalysis/simple.ll b/test/Analysis/CFLAliasAnalysis/simple.ll
new file mode 100644
index 000000000000..7bc455ae7a2d
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/simple.ll
@@ -0,0 +1,18 @@
+; This testcase consists of alias relations which should be completely
+; resolvable by cfl-aa (derived from BasicAA/2003-11-04-SimpleCases.ll).
+
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+%T = type { i32, [10 x i8] }
+
+; CHECK:     Function: test
+; CHECK-NOT:   May:
+
+define void @test(%T* %P) {
+  %A = getelementptr %T* %P, i64 0
+  %B = getelementptr %T* %P, i64 0, i32 0
+  %C = getelementptr %T* %P, i64 0, i32 1
+  %D = getelementptr %T* %P, i64 0, i32 1, i64 0
+  %E = getelementptr %T* %P, i64 0, i32 1, i64 5
+  ret void
+}
diff --git a/test/Analysis/CFLAliasAnalysis/va.ll b/test/Analysis/CFLAliasAnalysis/va.ll
new file mode 100644
index 000000000000..3094cb0967f6
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/va.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK-LABEL: Function: test1
+; CHECK: 0 no alias responses
+
+define i32 @test1(i32 %X, ...) {
+  ; Initialize variable argument processing
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap2)
+
+  ; Read a single integer argument
+  %tmp = va_arg i8** %ap, i32
+
+  ; Demonstrate usage of llvm.va_copy and llvm.va_end
+  %aq = alloca i8*
+  %aq2 = bitcast i8** %aq to i8*
+  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+  call void @llvm.va_end(i8* %aq2)
+
+  ; Stop processing of arguments.
+  call void @llvm.va_end(i8* %ap2)
+  ret i32 %tmp
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 662110f2720d..18d6e841433f 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -221,35 +221,35 @@ define i32 @casts() {
   %r96 = fptoui <2 x float> undef to <2 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r97 = fptosi <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 28 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r98 = fptoui <2 x float> undef to <2 x i64>
-  ; CHECK: cost of 28 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r99 = fptosi <2 x float> undef to <2 x i64>
 
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r100 = fptoui <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r101 = fptosi <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r102 = fptoui <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r103 = fptosi <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK: cost of 16 {{.*}} fptoui
   %r104 = fptoui <2 x double> undef to <2 x i16>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK: cost of 16 {{.*}} fptosi
   %r105 = fptosi <2 x double> undef to <2 x i16>
   ; CHECK: cost of 2 {{.*}} fptoui
   %r106 = fptoui <2 x double> undef to <2 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r107 = fptosi <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 28 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r108 = fptoui <2 x double> undef to <2 x i64>
-  ; CHECK: cost of 28 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r109 = fptosi <2 x double> undef to <2 x i64>
 
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r110 = fptoui <4 x float> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r111 = fptosi <4 x float> undef to <4 x i1>
   ; CHECK: cost of 3 {{.*}} fptoui
   %r112 = fptoui <4 x float> undef to <4 x i8>
@@ -263,39 +263,39 @@ define i32 @casts() {
   %r116 = fptoui <4 x float> undef to <4 x i32>
   ; CHECK: cost of 1 {{.*}} fptosi
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 56 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK: cost of 56 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r120 = fptoui <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r121 = fptosi <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r122 = fptoui <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r123 = fptosi <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r124 = fptoui <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r125 = fptosi <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK: cost of 32 {{.*}} fptoui
   %r126 = fptoui <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK: cost of 32 {{.*}} fptosi
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 56 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK: cost of 56 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r130 = fptoui <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r131 = fptosi <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r132 = fptoui <8 x float> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r133 = fptosi <8 x float> undef to <8 x i8>
   ; CHECK: cost of 4 {{.*}} fptoui
   %r134 = fptoui <8 x float> undef to <8 x i16>
@@ -305,39 +305,39 @@ define i32 @casts() {
   %r136 = fptoui <8 x float> undef to <8 x i32>
   ; CHECK: cost of 2 {{.*}} fptosi
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 112 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK: cost of 112 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r140 = fptoui <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r141 = fptosi <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r142 = fptoui <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r143 = fptosi <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r144 = fptoui <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r145 = fptosi <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK: cost of 64 {{.*}} fptoui
   %r146 = fptoui <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK: cost of 64 {{.*}} fptosi
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 112 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK: cost of 112 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r150 = fptoui <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r151 = fptosi <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r152 = fptoui <16 x float> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r153 = fptosi <16 x float> undef to <16 x i8>
   ; CHECK: cost of 8 {{.*}} fptoui
   %r154 = fptoui <16 x float> undef to <16 x i16>
@@ -347,30 +347,30 @@ define i32 @casts() {
   %r156 = fptoui <16 x float> undef to <16 x i32>
   ; CHECK: cost of 4 {{.*}} fptosi
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 224 {{.*}} fptoui
+  ; CHECK: cost of 256 {{.*}} fptoui
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK: cost of 224 {{.*}} fptosi
+  ; CHECK: cost of 256 {{.*}} fptosi
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r160 = fptoui <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r161 = fptosi <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r162 = fptoui <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r163 = fptosi <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r164 = fptoui <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r165 = fptosi <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK: cost of 128 {{.*}} fptoui
   %r166 = fptoui <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK: cost of 128 {{.*}} fptosi
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 224 {{.*}} fptoui
+  ; CHECK: cost of 256 {{.*}} fptoui
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK: cost of 224 {{.*}} fptosi
+  ; CHECK: cost of 256 {{.*}} fptosi
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
   ; CHECK: cost of 8 {{.*}} uitofp
diff --git a/test/Analysis/CostModel/PowerPC/cmp-expanded.ll b/test/Analysis/CostModel/PowerPC/cmp-expanded.ll
new file mode 100644
index 000000000000..38c8439b9fb6
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/cmp-expanded.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @exts() {
+
+   ; VSX is disabled, so this cost needs to include scalarization (because
+   ; <4 x double> is legalized to scalars).
+   ; CHECK: cost of 44 {{.*}} fcmp
+   %v1 = fcmp ugt <4 x double> undef, undef
+
+  ret void
+}
+
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index 7f97b176f7c1..fb16af635f07 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -1,3 +1,4 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX2
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX
 
@@ -83,6 +84,19 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK-AVX: cost of 4 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
 
+  ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
+  %D1 = zext <16 x i32> undef to <16 x i64>
+
+  ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
+  %D2 = sext <16 x i32> undef to <16 x i64>
+
+  ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
+  %D3 = zext <16 x i16> undef to <16 x i32>
+  ;CHECK-AVX512: cost of 1 {{.*}} %D4 = zext
+  %D4 = zext <16 x i8> undef to <16 x i32>
+  ;CHECK-AVX512: cost of 2 {{.*}} %D5 = zext
+  %D5 = zext <16 x i1> undef to <16 x i32>
+
   ;CHECK-AVX2: cost of 2 {{.*}} trunc
   ;CHECK-AVX: cost of 4 {{.*}} trunc
   %E = trunc <4 x i64> undef to <4 x i32>
@@ -101,8 +115,12 @@ define i32 @zext_sext(<8 x i1> %in) {
 
   ;CHECK-AVX2: cost of 4 {{.*}} trunc
   ;CHECK-AVX: cost of 9 {{.*}} trunc
+  ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
   %G = trunc <8 x i64> undef to <8 x i32>
 
+  ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
+  %G1 = trunc <16 x i64> undef to <16 x i32>
+
   ret i32 undef
 }
 
@@ -207,7 +225,30 @@ define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
   ; CHECK: cost of 5 {{.*}} uitofp
   %C1 = uitofp <8 x i16> %c to <8 x float>
 
-  ; CHECK: cost of 9 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 8 {{.*}} uitofp
+  ; CHECK-AVX512: cost of 8 {{.*}} uitofp
+  ; CHECK-AVX: cost of 9 {{.*}} uitofp
   %D1 = uitofp <8 x i32> %d to <8 x float>
   ret void
 }
+
+define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+;CHECK-LABEL: for function 'fp_conv'
+  ; CHECK-AVX512: cost of 1 {{.*}} fpext
+  %A1 = fpext <8 x float> %a to <8 x double>
+
+  ; CHECK-AVX512: cost of 3 {{.*}} fpext
+  %A2 = fpext <16 x float> %b to <16 x double>
+
+  ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
+  ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
+  %A3 = fpext <8 x float> %a to <8 x double>
+
+  ; CHECK-AVX2:   cost of 3 {{.*}} %A4 = fptrunc
+  ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
+  %A4 = fptrunc <8 x double> undef to <8 x float>
+
+  ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
+  %A5 = fptrunc <16 x double> undef to <16 x float>
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
index 9f2bdb3c21b7..469cd735f8a0 100644
--- a/test/Analysis/CostModel/X86/cmp.ll
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck -check-prefix=CHECK -check-prefix=AVX1 %s
 ; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-avx2 | FileCheck -check-prefix=CHECK -check-prefix=AVX2 %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck -check-prefix=CHECK -check-prefix=AVX512 %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -22,6 +23,11 @@ define i32 @cmp(i32 %arg) {
   ;AVX2: cost of 1 {{.*}} fcmp
   %E = fcmp olt <4 x double> undef, undef
 
+  ; AVX512: cost of 1 {{.*}} %E1 = fcmp
+  %E1 = fcmp olt <16 x float> undef, undef
+  ; AVX512: cost of 2 {{.*}} %E2 = fcmp
+  %E2 = fcmp olt <16 x double> undef, undef
+
   ;  -- integers --
 
   ;AVX1: cost of 1 {{.*}} icmp
@@ -49,6 +55,11 @@ define i32 @cmp(i32 %arg) {
   ;AVX2: cost of 1 {{.*}} icmp
   %M = icmp eq <32 x i8> undef, undef
 
+  ; AVX512: cost of 1 {{.*}} %M1 = icmp
+  %M1 = icmp eq <16 x i32> undef, undef
+  ; AVX512: cost of 2 {{.*}} %M2 = icmp
+  %M2 = icmp eq <16 x i64> undef, undef
+
   ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
index 338d97416526..edc937ecf946 100644
--- a/test/Analysis/CostModel/X86/sitofp.ll
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=core2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
 
 define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
   ; SSE2: sitofpv2i8v2double
@@ -279,3 +280,47 @@ define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
   %1 = sitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }
+
+; AVX512F-LABEL: sitofp_16i8_float
+; AVX512F: cost of 2 {{.*}} sitofp
+define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
+  ; AVX512F-LABEL: sitofp_16i16_float
+  ; AVX512F: cost of 2 {{.*}} sitofp
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; AVX512F-LABEL: sitofp_8i8_double
+; AVX512F: cost of 2 {{.*}} sitofp
+define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
+  %1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_8i16_double
+; AVX512F: cost of 2 {{.*}} sitofp
+define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_8i1_double
+; AVX512F: cost of 4 {{.*}} sitofp
+define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
+  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
+  %1 = sitofp <8 x i1> %cmpres to <8 x double>
+  ret <8 x double> %1
+}
+
+; AVX512F-LABEL: sitofp_16i1_float
+; AVX512F: cost of 3 {{.*}} sitofp
+define <16 x float> @sitofp_16i1_float(<16 x float> %a) {
+  %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
+  %1 = sitofp <16 x i1> %cmpres to <16 x float>
+  ret <16 x float> %1
+}
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
index a41a04df84ec..27ec268b42a8 100644
--- a/test/Analysis/CostModel/X86/uitofp.ll
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -235,7 +235,7 @@ define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
 
 define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
   ; SSE2: uitofpv4i8v4float
-  ; SSE2: cost of 15 {{.*}} uitofp
+  ; SSE2: cost of 8 {{.*}} uitofp
   %1 = uitofp <4 x i8> %a to <4 x float>
   ret <4 x float> %1
 }
@@ -270,7 +270,7 @@ define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
 
 define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
   ; SSE2: uitofpv4i16v4float
-  ; SSE2: cost of 15 {{.*}} uitofp
+  ; SSE2: cost of 8 {{.*}} uitofp
   %1 = uitofp <4 x i16> %a to <4 x float>
   ret <4 x float> %1
 }
@@ -305,28 +305,28 @@ define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
 
 define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
   ; SSE2: uitofpv4i32v4float
-  ; SSE2: cost of 15 {{.*}} uitofp
+  ; SSE2: cost of 8 {{.*}} uitofp
   %1 = uitofp <4 x i32> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
   ; SSE2: uitofpv8i32v8float
-  ; SSE2: cost of 30 {{.*}} uitofp
+  ; SSE2: cost of 16 {{.*}} uitofp
   %1 = uitofp <8 x i32> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
   ; SSE2: uitofpv16i32v16float
-  ; SSE2: cost of 60 {{.*}} uitofp
+  ; SSE2: cost of 32 {{.*}} uitofp
   %1 = uitofp <16 x i32> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
   ; SSE2: uitofpv32i32v32float
-  ; SSE2: cost of 120 {{.*}} uitofp
+  ; SSE2: cost of 64 {{.*}} uitofp
   %1 = uitofp <32 x i32> %a to <32 x float>
   ret <32 x float> %1
 }
diff --git a/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
new file mode 100644
index 000000000000..95e5e52bc2a7
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize=false | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -da -da-delinearize | FileCheck %s -check-prefix=DELIN
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+; for (int i = 0; i < 100; ++i) {
+;   int t0 = a[i][i];
+;   int t1 = t0 + 1;
+;   a[i][5] = t1;
+; }
+; The subscript 5 in a[i][5] is deliberately an i32, mismatching the types of
+; other subscript. DependenceAnalysis before the fix crashed due to this
+; mismatch.
+define void @i32_subscript([100 x [100 x i32]]* %a, i32* %b) {
+; CHECK-LABEL: 'Dependence Analysis' for function 'i32_subscript'
+; DELIN-LABEL: 'Dependence Analysis' for function 'i32_subscript'
+entry:
+  br label %for.body
+
+for.body:
+; CHECK: da analyze - none!
+; CHECK: da analyze - anti [=|<]!
+; CHECK: da analyze - none!
+; DELIN: da analyze - none!
+; DELIN: da analyze - anti [=|<]!
+; DELIN: da analyze - none!
+  %i = phi i64 [ 0, %entry ], [ %i.inc, %for.body ]
+  %a.addr = getelementptr [100 x [100 x i32]]* %a, i64 0, i64 %i, i64 %i
+  %a.addr.2 = getelementptr [100 x [100 x i32]]* %a, i64 0, i64 %i, i32 5
+  %0 = load i32* %a.addr, align 4
+  %1 = add i32 %0, 1
+  store i32 %1, i32* %a.addr.2, align 4
+  %i.inc = add nsw i64 %i, 1
+  %exitcond = icmp ne i64 %i.inc, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/Dominators/basic.ll b/test/Analysis/Dominators/basic.ll
new file mode 100644
index 000000000000..353c3397b5da
--- /dev/null
+++ b/test/Analysis/Dominators/basic.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -domtree -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OLDPM
+; RUN: opt < %s -disable-output -passes='print<domtree>' 2>&1 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NEWPM
+
+define void @test1() {
+; CHECK-OLDPM-LABEL: 'Dominator Tree Construction' for function 'test1':
+; CHECK-NEWPM-LABEL: DominatorTree for function: test1
+; CHECK:      [1] %entry
+; CHECK-NEXT:   [2] %a
+; CHECK-NEXT:   [2] %c
+; CHECK-NEXT:     [3] %d
+; CHECK-NEXT:     [3] %e
+; CHECK-NEXT:   [2] %b
+
+entry:
+  br i1 undef, label %a, label %b
+
+a:
+  br label %c
+
+b:
+  br label %c
+
+c:
+  br i1 undef, label %d, label %e
+
+d:
+  ret void
+
+e:
+  ret void
+}
+
+define void @test2() {
+; CHECK-OLDPM-LABEL: 'Dominator Tree Construction' for function 'test2':
+; CHECK-NEWPM-LABEL: DominatorTree for function: test2
+; CHECK:      [1] %entry
+; CHECK-NEXT:   [2] %a
+; CHECK-NEXT:     [3] %b
+; CHECK-NEXT:       [4] %c
+; CHECK-NEXT:         [5] %d
+; CHECK-NEXT:         [5] %ret
+
+entry:
+  br label %a
+
+a:
+  br label %b
+
+b:
+  br i1 undef, label %a, label %c
+
+c:
+  br i1 undef, label %d, label %ret
+
+d:
+  br i1 undef, label %a, label %ret
+
+ret:
+  ret void
+}
diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll
index 1c5ac43f8d27..c221f4c087f8 100644
--- a/test/Analysis/GlobalsModRef/pr12351.ll
+++ b/test/Analysis/GlobalsModRef/pr12351.ll
@@ -26,8 +26,8 @@ define i32 @foo2() {
 
 define void @bar2(i32* %foo)  {
   store i32 0, i32* %foo, align 4
-  tail call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  tail call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   ret void
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
diff --git a/test/Analysis/ScalarEvolution/load-with-range-metadata.ll b/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
new file mode 100644
index 000000000000..32c1074b1742
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
@@ -0,0 +1,37 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define i32 @slt_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) {
+; CHECK-LABEL: slt_trip_count_with_range
+ entry:
+  %limit = load i32* %ptr0, !range !0
+  br label %loop
+
+ loop:
+; CHECK: Loop %loop: max backedge-taken count is 98
+  %index = phi i32 [ 0, %entry ], [ %index.inc, %loop ]
+  %index.inc = add i32 %index, 1
+  %continue = icmp slt i32 %index.inc, %limit
+  br i1 %continue, label %loop, label %loop.exit
+
+ loop.exit:
+  ret i32 0
+}
+
+define i32 @ult_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) {
+; CHECK-LABEL: ult_trip_count_with_range
+ entry:
+  %limit = load i32* %ptr0, !range !0
+  br label %loop
+
+ loop:
+; CHECK: Loop %loop: max backedge-taken count is 98
+  %index = phi i32 [ 0, %entry ], [ %index.inc, %loop ]
+  %index.inc = add i32 %index, 1
+  %continue = icmp ult i32 %index.inc, %limit
+  br i1 %continue, label %loop, label %loop.exit
+
+ loop.exit:
+  ret i32 0
+}
+
+!0 = !{i32 1, i32 100}
diff --git a/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
new file mode 100644
index 000000000000..29cf65857797
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to fold away the sign-extensions
+; on this loop with a primary induction variable incremented with
+; a nsw add of 2 (this test is derived from the nsw-offset.ll test, but uses an
+; assume instead of a preheader conditional branch to guard the loop).
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(i32 %no, double* nocapture %d, double* nocapture %q) nounwind {
+entry:
+  %n = and i32 %no, 4294967294
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  tail call void @llvm.assume(i1 %0)
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb1
+  %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ]   ; <i32> [#uses=5]
+
+; CHECK: %1 = sext i32 %i.01 to i64
+; CHECK: -->  {0,+,2}<nuw><nsw><%bb>
+  %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+
+; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
+; CHECK: -->  {%d,+,16}<nsw><%bb>
+  %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
+
+  %3 = load double* %2, align 8                   ; <double> [#uses=1]
+  %4 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+  %5 = getelementptr inbounds double* %q, i64 %4  ; <double*> [#uses=1]
+  %6 = load double* %5, align 8                   ; <double> [#uses=1]
+  %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %8 = sext i32 %7 to i64
+; CHECK: -->  {1,+,2}<nuw><nsw><%bb>
+  %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
+; CHECK: {(8 + %q),+,16}<nsw><%bb>
+  %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
+
+; Artificially repeat the above three instructions, this time using
+; add nsw instead of or.
+  %t7 = add nsw i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %t8 = sext i32 %t7 to i64
+; CHECK: -->  {1,+,2}<nuw><nsw><%bb>
+  %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
+; CHECK: {(8 + %q),+,16}<nsw><%bb>
+  %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
+
+  %10 = load double* %9, align 8                  ; <double> [#uses=1]
+  %11 = fadd double %6, %10                       ; <double> [#uses=1]
+  %12 = fadd double %11, 3.200000e+00             ; <double> [#uses=1]
+  %13 = fmul double %3, %12                       ; <double> [#uses=1]
+  %14 = sext i32 %i.01 to i64                     ; <i64> [#uses=1]
+  %15 = getelementptr inbounds double* %d, i64 %14 ; <double*> [#uses=1]
+  store double %13, double* %15, align 8
+  %16 = add nsw i32 %i.01, 2                      ; <i32> [#uses=2]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %17 = icmp slt i32 %16, %n                      ; <i1> [#uses=1]
+  br i1 %17, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:                             ; preds = %bb1
+  br label %return
+
+return:                                           ; preds = %bb1.return_crit_edge, %entry
+  ret void
+}
+
+declare void @llvm.assume(i1) nounwind
+
+; Note: Without the preheader assume, there is an 'smax' in the
+; backedge-taken count expression:
+; CHECK: Loop %bb: backedge-taken count is ((-1 + (2 * (%no /u 2))) /u 2)
+; CHECK: Loop %bb: max backedge-taken count is 1073741822
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index 05992eadbac0..d776a5a5da78 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -123,9 +123,8 @@ exit:
   ret i32 %result
 }
 
-; TODO: This could fold down to '1'
 ; CHECK-LABEL: PR12375
-; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb1>		Exits: (4 + (4 * ((-1 + (-1 * %arg) + ((4 + %arg) umax (8 + %arg)<nsw>)) /u 4)) + %arg)
+; CHECK: -->  {(4 + %arg),+,4}<nuw><%bb1>		Exits: (8 + %arg)<nsw>
 define i32 @PR12375(i32* readnone %arg) {
 bb:
   %tmp = getelementptr inbounds i32* %arg, i64 2
@@ -158,3 +157,23 @@ bb2:                                              ; preds = %bb2, %bb
 bb5:                                              ; preds = %bb2
   ret void
 }
+
+declare void @f(i32)
+
+; CHECK-LABEL: nswnowrap
+; CHECK: --> {(1 + %v),+,1}<nsw><%for.body>   Exits: (2 + %v)
+define void @nswnowrap(i32 %v) {
+entry:
+  %add = add nsw i32 %v, 1
+  br label %for.body
+
+for.body:
+  %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
+  %inc = add nsw i32 %i.04, 1
+  tail call void @f(i32 %i.04)
+  %cmp = icmp slt i32 %i.04, %add
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/pr22179.ll b/test/Analysis/ScalarEvolution/pr22179.ll
new file mode 100644
index 000000000000..d9fb5104436e
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr22179.ll
@@ -0,0 +1,28 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+%struct.anon = type { i8 }
+%struct.S = type { i32 }
+
+@a = common global %struct.anon zeroinitializer, align 1
+@b = common global %struct.S zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @main() {
+; CHECK-LABEL: Classifying expressions for: @main
+  store i8 0, i8* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 1
+  br label %loop
+
+loop:
+  %storemerge1 = phi i8 [ 0, %0 ], [ %inc, %loop ]
+  %m = load volatile i32* getelementptr inbounds (%struct.S* @b, i64 0, i32 0), align 4
+  %inc = add nuw i8 %storemerge1, 1
+; CHECK:   %inc = add nuw i8 %storemerge1, 1
+; CHECK-NEXT: -->  {1,+,1}<nuw><%loop>
+; CHECK-NOT: -->  {1,+,1}<nuw><nsw><%loop>
+  %exitcond = icmp eq i8 %inc, -128
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  store i8 -128, i8* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 1
+  ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/sext-iv-1.ll b/test/Analysis/ScalarEvolution/sext-iv-1.ll
index c34596d35af0..a6f70dbff9a0 100644
--- a/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -1,5 +1,12 @@
 ; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | grep " -->  (sext i. {.*,+,.*}<%bb1> to i64)" | count 5
+; RUN:  | FileCheck %s
+
+; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
+; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
+; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
+; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
+; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
+; CHECK-NOT: -->  (sext
 
 ; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
 ; where the trip count is not within range.
diff --git a/test/Analysis/ScopedNoAliasAA/basic-domains.ll b/test/Analysis/ScopedNoAliasAA/basic-domains.ll
new file mode 100644
index 000000000000..7633a6dfcb4c
--- /dev/null
+++ b/test/Analysis/ScopedNoAliasAA/basic-domains.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+; CHECK-LABEL: Function: foo1
+  %0 = load float* %c, align 4, !alias.scope !9
+  %arrayidx.i = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx.i, align 4, !noalias !6
+
+  %1 = load float* %c, align 4, !alias.scope !5
+  %arrayidx.i2 = getelementptr inbounds float* %a, i64 15
+  store float %1, float* %arrayidx.i2, align 4, !noalias !6
+
+  %2 = load float* %c, align 4, !alias.scope !6
+  %arrayidx.i3 = getelementptr inbounds float* %a, i64 16
+  store float %2, float* %arrayidx.i3, align 4, !noalias !5
+
+  ret void
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = !{!0, !"some domain"}
+!1 = !{!1, !"some other domain"}
+
+; Two scopes (which must be self-referential to avoid being "uniqued"):
+!2 = !{!2, !0, !"a scope in dom0"}
+!3 = !{!2}
+
+!4 = !{!4, !0, !"another scope in dom0"}
+!5 = !{!4}
+
+; A list of the two scopes.
+!6 = !{!2, !4}
+
+; Another scope in the second domain
+!7 = !{!7, !1, !"another scope in dom1"}
+!8 = !{!7}
+
+; A list of scopes from both domains.
+!9 = !{!2, !4, !7}
+
+; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: MayAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   store float %1, float* %arrayidx.i2, align 4, !noalias !6 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   store float %2, float* %arrayidx.i3, align 4, !noalias !7 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   store float %2, float* %arrayidx.i3, align 4, !noalias !7 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+
diff --git a/test/Analysis/ScopedNoAliasAA/basic.ll b/test/Analysis/ScopedNoAliasAA/basic.ll
new file mode 100644
index 000000000000..bb232b5b6cb6
--- /dev/null
+++ b/test/Analysis/ScopedNoAliasAA/basic.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+; CHECK-LABEL: Function: foo1
+  %0 = load float* %c, align 4, !alias.scope !1
+  %arrayidx.i = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx.i, align 4, !noalias !1
+  %1 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %1, float* %arrayidx, align 4
+  ret void
+
+; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
+; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
+; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
+; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
+; CHECK: NoAlias:   store float %1, float* %arrayidx, align 4 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = !{!0, !"some domain"}
+!1 = !{!1, !0, !"some scope"}
+
diff --git a/test/Analysis/ScopedNoAliasAA/basic2.ll b/test/Analysis/ScopedNoAliasAA/basic2.ll
new file mode 100644
index 000000000000..a154b13d98bf
--- /dev/null
+++ b/test/Analysis/ScopedNoAliasAA/basic2.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+; CHECK-LABEL: Function: foo2
+  %0 = load float* %c, align 4, !alias.scope !0
+  %arrayidx.i = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx.i, align 4, !alias.scope !5, !noalias !4
+  %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !5
+  %1 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %1, float* %arrayidx, align 4
+  ret void
+
+; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalia
+; CHECK: s !5
+; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noali
+; CHECK: as !4
+; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
+; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5
+; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4
+; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
+; CHECK: NoAlias:   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 <->   store float %0, float* %arrayidx.i, align
+; CHECK: 4, !alias.scope !4, !noalias !5
+; CHECK: NoAlias:   store float %1, float* %arrayidx, align 4 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5
+; CHECK: MayAlias:   store float %1, float* %arrayidx, align 4 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !
+; CHECK: 4
+}
+
+attributes #0 = { nounwind uwtable }
+
+!0 = !{!1, !3}
+!1 = !{!1, !2, !"some scope"}
+!2 = !{!2, !"some domain"}
+!3 = !{!3, !2, !"some other scope"}
+!4 = !{!1}
+!5 = !{!3}
+
diff --git a/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
index 905113995c30..920d6f56d957 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
@@ -32,14 +32,14 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.4"}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{metadata !6, metadata !2, i64 8}
-!6 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11RINS_1FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS2_S3_EEEEE1GEPSD_EE", metadata !7, i64 8}
-!7 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS1_S2_EEEEE1GE", metadata !8, i64 0}
-!8 = metadata !{metadata !"_ZTSN12_GLOBAL__N_11DIKPi8TreeIterEE", metadata !2, i64 0, metadata !9, i64 8}
-!9 = metadata !{metadata !"_ZTS8TreeIter", metadata !2, i64 8, metadata !10, i64 16}
-!10 = metadata !{metadata !"bool", metadata !3, i64 0}
+!0 = !{!"clang version 3.4"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !2, i64 8}
+!6 = !{!"_ZTSN12_GLOBAL__N_11RINS_1FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS2_S3_EEEEE1GEPSD_EE", !7, i64 8}
+!7 = !{!"_ZTSN12_GLOBAL__N_11FIPi8TreeIterN1I1S1LENS_1KINS_1DIKS1_S2_EEEEE1GE", !8, i64 0}
+!8 = !{!"_ZTSN12_GLOBAL__N_11DIKPi8TreeIterEE", !2, i64 0, !9, i64 8}
+!9 = !{!"_ZTS8TreeIter", !2, i64 8, !10, i64 16}
+!10 = !{!"bool", !3, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
index 76a88c859a6b..10da13a8d460 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -45,23 +45,23 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 }
 
 ; Root note.
-!0 = metadata !{ }
+!0 = !{ }
 ; Some type.
-!1 = metadata !{metadata !7, metadata !7, i64 0}
+!1 = !{!7, !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{metadata !8, metadata !8, i64 0}
+!2 = !{!8, !8, i64 0}
 
 ; Some type.
-!3 = metadata !{metadata !9, metadata !9, i64 0}
+!3 = !{!9, !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{metadata !10, metadata !10, i64 0}
+!4 = !{!10, !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
+!5 = !{!11, !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
-!7 = metadata !{ metadata !"foo", metadata !0 }
-!8 = metadata !{ metadata !"bar", metadata !0 }
-!9 = metadata !{ metadata !"foo", metadata !0 }
-!10 = metadata !{ metadata !"bar", metadata !"different" }
-!11 = metadata !{ metadata !"qux", metadata !0}
+!6 = !{!11, !11, i64 0, i1 0}
+!7 = !{ !"foo", !0 }
+!8 = !{ !"bar", !0 }
+!9 = !{ !"foo", !0 }
+!10 = !{ !"bar", !"different" }
+!11 = !{ !"qux", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
index 14bbeac14d69..31f775e04665 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -32,8 +32,8 @@ define i32 @callercaller(i32* %Q) {
   ret i32 %X
 }
 
-!0 = metadata !{metadata !"test"}
-!1 = metadata !{metadata !3, metadata !3, i64 0}
-!2 = metadata !{metadata !4, metadata !4, i64 0}
-!3 = metadata !{metadata !"green", metadata !0}
-!4 = metadata !{metadata !"blue", metadata !0}
+!0 = !{!"test"}
+!1 = !{!3, !3, i64 0}
+!2 = !{!4, !4, i64 0}
+!3 = !{!"green", !0}
+!4 = !{!"blue", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index bcf1f2c5275d..09f8feb610a9 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -tbaa -basicaa -dse -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; DSE should make use of TBAA.
 
@@ -49,23 +50,23 @@ define i8 @test1_no(i8* %a, i8* %b) nounwind {
 }
 
 ; Root note.
-!0 = metadata !{ }
+!0 = !{ }
 ; Some type.
-!1 = metadata !{metadata !7, metadata !7, i64 0}
+!1 = !{!7, !7, i64 0}
 ; Some other non-aliasing type.
-!2 = metadata !{metadata !8, metadata !8, i64 0}
+!2 = !{!8, !8, i64 0}
 
 ; Some type.
-!3 = metadata !{metadata !9, metadata !9, i64 0}
+!3 = !{!9, !9, i64 0}
 ; Some type in a different type system.
-!4 = metadata !{metadata !10, metadata !10, i64 0}
+!4 = !{!10, !10, i64 0}
 
 ; Invariant memory.
-!5 = metadata !{metadata !11, metadata !11, i64 0, i1 1}
+!5 = !{!11, !11, i64 0, i1 1}
 ; Not invariant memory.
-!6 = metadata !{metadata !11, metadata !11, i64 0, i1 0}
-!7 = metadata !{ metadata !"foo", metadata !0 }
-!8 = metadata !{ metadata !"bar", metadata !0 }
-!9 = metadata !{ metadata !"foo", metadata !0 }
-!10 = metadata !{ metadata !"bar", metadata !"different" }
-!11 = metadata !{ metadata !"qux", metadata !0}
+!6 = !{!11, !11, i64 0, i1 0}
+!7 = !{ !"foo", !0 }
+!8 = !{ !"bar", !0 }
+!9 = !{ !"foo", !0 }
+!10 = !{ !"bar", !"different" }
+!11 = !{ !"qux", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 4dc40739edfb..732f5d7cfcbb 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -123,15 +123,15 @@ for.end:                                          ; preds = %for.body
   ret float %tmp10
 }
 
-; CHECK: [[TAG]] = metadata !{metadata [[TYPE_LL:!.*]], metadata [[TYPE_LL]], i64 0}
-; CHECK: [[TYPE_LL]] = metadata !{metadata !"long long", metadata {{!.*}}}
-!0 = metadata !{metadata !6, metadata !6, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !7, metadata !7, i64 0}
-!4 = metadata !{metadata !8, metadata !8, i64 0}
-!5 = metadata !{metadata !9, metadata !9, i64 0}
-!6 = metadata !{metadata !"short", metadata !1}
-!7 = metadata !{metadata !"long long", metadata !1}
-!8 = metadata !{metadata !"int", metadata !1}
-!9 = metadata !{metadata !"float", metadata !1}
+; CHECK: [[TAG]] = !{[[TYPE_LL:!.*]], [[TYPE_LL]], i64 0}
+; CHECK: [[TYPE_LL]] = !{!"long long", {{!.*}}}
+!0 = !{!6, !6, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!7, !7, i64 0}
+!4 = !{!8, !8, i64 0}
+!5 = !{!9, !9, i64 0}
+!6 = !{!"short", !1}
+!7 = !{!"long long", !1}
+!8 = !{!"int", !1}
+!9 = !{!"float", !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index e9fb9418e704..6c9439afeeab 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -77,10 +77,10 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 ; CHECK: attributes #2 = { nounwind readonly }
 
 ; Root note.
-!0 = metadata !{ }
+!0 = !{ }
 
 ; Invariant memory.
-!1 = metadata !{metadata !3, metadata !3, i64 0, i1 1 }
+!1 = !{!3, !3, i64 0, i1 1 }
 ; Not invariant memory.
-!2 = metadata !{metadata !3, metadata !3, i64 0, i1 0 }
-!3 = metadata !{ metadata !"foo", metadata !0 }
+!2 = !{!3, !3, i64 0, i1 0 }
+!3 = !{ !"foo", !0 }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index 90e1abbb673a..edea6d02800d 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -46,12 +46,12 @@ entry:
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p, !tbaa !4
+  %t = load i32* %p, !tbaa !3
   store i32 %t, i32* %q
   ret void
 
 if.else:
-  %u = load i32* %p, !tbaa !3
+  %u = load i32* %p, !tbaa !4
   store i32 %u, i32* %q
   ret void
 }
@@ -61,11 +61,11 @@ if.else:
 
 ; CHECK: @watch_out_for_another_type_change
 ; CHECK: if.then:
-; CHECK:   %t = load i32* %p
-; CHECK:   store i32 %t, i32* %q
+; CHECK:   store i32 0, i32* %q
 ; CHECK:   ret void
 ; CHECK: if.else:
-; CHECK:   store i32 0, i32* %q
+; CHECK:   %u = load i32* %p
+; CHECK:   store i32 %u, i32* %q
 
 define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
 entry:
@@ -74,22 +74,22 @@ entry:
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p, !tbaa !3
+  %t = load i32* %p, !tbaa !4
   store i32 %t, i32* %q
   ret void
 
 if.else:
-  %u = load i32* %p, !tbaa !4
+  %u = load i32* %p, !tbaa !3
   store i32 %u, i32* %q
   ret void
 }
 
-!0 = metadata !{}
-!1 = metadata !{metadata !5, metadata !5, i64 0}
-!2 = metadata !{metadata !6, metadata !6, i64 0}
-!3 = metadata !{metadata !7, metadata !7, i64 0}
-!4 = metadata !{metadata !8, metadata !8, i64 0}
-!5 = metadata !{metadata !"red", metadata !0}
-!6 = metadata !{metadata !"blu", metadata !0}
-!7 = metadata !{metadata !"outer space"}
-!8 = metadata !{metadata !"brick red", metadata !5}
+!0 = !{}
+!1 = !{!5, !5, i64 0}
+!2 = !{!6, !6, i64 0}
+!3 = !{!7, !7, i64 0}
+!4 = !{!8, !8, i64 0}
+!5 = !{!"red", !0}
+!6 = !{!"blu", !0}
+!7 = !{!"outer space"}
+!8 = !{!"brick red", !5}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 93b8e503b855..0c12cac27d3a 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -25,8 +25,8 @@ declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 ; CHECK: attributes #0 = { nounwind readonly }
 ; CHECK: attributes [[NUW]] = { nounwind }
 
-!0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !3, metadata !3, i64 0}
-!2 = metadata !{metadata !4, metadata !4, i64 0}
-!3 = metadata !{metadata !"A", metadata !0}
-!4 = metadata !{metadata !"B", metadata !0}
+!0 = !{!"tbaa root", null}
+!1 = !{!3, !3, i64 0}
+!2 = !{!4, !4, i64 0}
+!3 = !{!"A", !0}
+!4 = !{!"B", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index e45fc85478d3..0722a2ce86a8 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -29,9 +29,9 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-!0 = metadata !{metadata !"root", null}
-!1 = metadata !{metadata !6, metadata !6, i64 0}
-!2 = metadata !{metadata !7, metadata !7, i64 0}
+!0 = !{!"root", null}
+!1 = !{!6, !6, i64 0}
+!2 = !{!7, !7, i64 0}
 
 ; LICM shouldn't hoist anything here.
 
@@ -56,10 +56,10 @@ loop:
   br label %loop
 }
 
-!3 = metadata !{metadata !"pointer", metadata !8}
-!4 = metadata !{metadata !8, metadata !8, i64 0}
-!5 = metadata !{metadata !9, metadata !9, i64 0}
-!6 = metadata !{metadata !"pointer", metadata !0}
-!7 = metadata !{metadata !"double", metadata !0}
-!8 = metadata !{metadata !"char", metadata !9}
-!9 = metadata !{metadata !"root", null}
+!3 = !{!"pointer", !8}
+!4 = !{!8, !8, i64 0}
+!5 = !{!9, !9, i64 0}
+!6 = !{!"pointer", !0}
+!7 = !{!"double", !0}
+!8 = !{!"char", !9}
+!9 = !{!"root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
index cdf72811ce5f..9fc9e42fc6cb 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -18,10 +18,10 @@ define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
-; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
-; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
-!0 = metadata !{metadata !"tbaa root", null}
-!1 = metadata !{metadata !3, metadata !3, i64 0}
-!2 = metadata !{metadata !4, metadata !4, i64 0}
-!3 = metadata !{metadata !"A", metadata !0}
-!4 = metadata !{metadata !"B", metadata !0}
+; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0}
+; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
+!0 = !{!"tbaa root", null}
+!1 = !{!3, !3, i64 0}
+!2 = !{!4, !4, i64 0}
+!3 = !{!"A", !0}
+!4 = !{!"B", !0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
index 609e87c2313f..fd05dbea7cba 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-aa-metadata -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 ; Generated with "clang -cc1 -disable-llvm-optzns -O1 -emit-llvm"
 ; #include <new>
@@ -97,14 +97,14 @@ declare noalias i8* @_Znwm(i64)
 
 attributes #0 = { nounwind }
 
-!0 = metadata !{metadata !1, metadata !1, i64 0}
-!1 = metadata !{metadata !"int", metadata !2, i64 0}
-!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
-!3 = metadata !{metadata !"Simple C/C++ TBAA"}
-!4 = metadata !{metadata !5, metadata !5, i64 0}
-!5 = metadata !{metadata !"any pointer", metadata !2, i64 0}
-!6 = metadata !{metadata !7, metadata !8, i64 0}
-!7 = metadata !{metadata !"_ZTS3Foo", metadata !8, i64 0}
-!8 = metadata !{metadata !"long", metadata !2, i64 0}
-!9 = metadata !{metadata !10, metadata !5, i64 0}
-!10 = metadata !{metadata !"_ZTS3Bar", metadata !5, i64 0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"int", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"any pointer", !2, i64 0}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"_ZTS3Foo", !8, i64 0}
+!8 = !{!"long", !2, i64 0}
+!9 = !{!10, !5, i64 0}
+!10 = !{!"_ZTS3Bar", !5, i64 0}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index b219ef19284e..0b697b2e8869 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -39,12 +39,12 @@ entry:
   ret i64 %tmp3
 }
 
-!0 = metadata !{metadata !2, metadata !2, i64 0}
-!1 = metadata !{metadata !"simple"}
-!2 = metadata !{metadata !"int", metadata !1}
-!3 = metadata !{metadata !6, metadata !6, i64 0}
-!4 = metadata !{metadata !7, metadata !7, i64 0}
-!5 = metadata !{metadata !8, metadata !8, i64 0}
-!6 = metadata !{metadata !"float", metadata !1}
-!7 = metadata !{metadata !"long", metadata !1}
-!8 = metadata !{metadata !"small", metadata !1}
+!0 = !{!2, !2, i64 0}
+!1 = !{!"simple"}
+!2 = !{!"int", !1}
+!3 = !{!6, !6, i64 0}
+!4 = !{!7, !7, i64 0}
+!5 = !{!8, !8, i64 0}
+!6 = !{!"float", !1}
+!7 = !{!"long", !1}
+!8 = !{!"small", !1}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
index 726da6ce1e81..1a124b86083b 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/sink.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -15,10 +15,10 @@ b:
   ret void
 }
 
-; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA:!.*]], metadata [[TYPEA]], i64 0}
-; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
-!0 = metadata !{metadata !3, metadata !3, i64 0}
-!1 = metadata !{metadata !4, metadata !4, i64 0}
-!2 = metadata !{metadata !"test"}
-!3 = metadata !{metadata !"A", metadata !2}
-!4 = metadata !{metadata !"B", metadata !2}
+; CHECK: [[TAGA]] = !{[[TYPEA:!.*]], [[TYPEA]], i64 0}
+; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
+!0 = !{!3, !3, i64 0}
+!1 = !{!4, !4, i64 0}
+!2 = !{!"test"}
+!3 = !{!"A", !2}
+!4 = !{!"B", !2}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
index e1c5d4526470..3c035af7c529 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-tbaa -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -tbaa -basicaa -aa-eval -evaluate-aa-metadata -print-no-aliases -print-may-aliases -disable-output 2>&1 | FileCheck %s
 ; RUN: opt < %s -tbaa -basicaa -gvn -S | FileCheck %s --check-prefix=OPT
 ; Generated from clang/test/CodeGen/tbaa.cpp with "-O1 -struct-path-tbaa -disable-llvm-optzns".
 
@@ -363,30 +363,30 @@ entry:
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !1, metadata !1, i64 0}
-!1 = metadata !{metadata !"any pointer", metadata !2}
-!2 = metadata !{metadata !"omnipotent char", metadata !3}
-!3 = metadata !{metadata !"Simple C/C++ TBAA"}
-!4 = metadata !{metadata !5, metadata !5, i64 0}
-!5 = metadata !{metadata !"long long", metadata !2}
-!6 = metadata !{metadata !7, metadata !7, i64 0}
-!7 = metadata !{metadata !"int", metadata !2}
-!8 = metadata !{metadata !9, metadata !7, i64 4}
-!9 = metadata !{metadata !"_ZTS7StructA", metadata !10, i64 0, metadata !7, i64 4, metadata !10, i64 8, metadata !7, i64 12}
-!10 = metadata !{metadata !"short", metadata !2}
-!11 = metadata !{metadata !9, metadata !10, i64 0}
-!12 = metadata !{metadata !13, metadata !7, i64 8}
-!13 = metadata !{metadata !"_ZTS7StructB", metadata !10, i64 0, metadata !9, i64 4, metadata !7, i64 20}
-!14 = metadata !{metadata !13, metadata !10, i64 4}
-!15 = metadata !{metadata !13, metadata !7, i64 20}
-!16 = metadata !{metadata !13, metadata !7, i64 16}
-!17 = metadata !{metadata !18, metadata !7, i64 4}
-!18 = metadata !{metadata !"_ZTS7StructS", metadata !10, i64 0, metadata !7, i64 4}
-!19 = metadata !{metadata !18, metadata !10, i64 0}
-!20 = metadata !{metadata !21, metadata !7, i64 4}
-!21 = metadata !{metadata !"_ZTS8StructS2", metadata !10, i64 0, metadata !7, i64 4}
-!22 = metadata !{metadata !21, metadata !10, i64 0}
-!23 = metadata !{metadata !24, metadata !7, i64 12}
-!24 = metadata !{metadata !"_ZTS7StructC", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28}
-!25 = metadata !{metadata !26, metadata !7, i64 12}
-!26 = metadata !{metadata !"_ZTS7StructD", metadata !10, i64 0, metadata !13, i64 4, metadata !7, i64 28, metadata !2, i64 32}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2}
+!2 = !{!"omnipotent char", !3}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"long long", !2}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !2}
+!8 = !{!9, !7, i64 4}
+!9 = !{!"_ZTS7StructA", !10, i64 0, !7, i64 4, !10, i64 8, !7, i64 12}
+!10 = !{!"short", !2}
+!11 = !{!9, !10, i64 0}
+!12 = !{!13, !7, i64 8}
+!13 = !{!"_ZTS7StructB", !10, i64 0, !9, i64 4, !7, i64 20}
+!14 = !{!13, !10, i64 4}
+!15 = !{!13, !7, i64 20}
+!16 = !{!13, !7, i64 16}
+!17 = !{!18, !7, i64 4}
+!18 = !{!"_ZTS7StructS", !10, i64 0, !7, i64 4}
+!19 = !{!18, !10, i64 0}
+!20 = !{!21, !7, i64 4}
+!21 = !{!"_ZTS8StructS2", !10, i64 0, !7, i64 4}
+!22 = !{!21, !10, i64 0}
+!23 = !{!24, !7, i64 12}
+!24 = !{!"_ZTS7StructC", !10, i64 0, !13, i64 4, !7, i64 28}
+!25 = !{!26, !7, i64 12}
+!26 = !{!"_ZTS7StructD", !10, i64 0, !13, i64 4, !7, i64 28, !2, i64 32}
diff --git a/test/Assembler/2002-03-08-NameCollision.ll b/test/Assembler/2002-03-08-NameCollision.ll
index b49789b2902d..089d3fb2864b 100644
--- a/test/Assembler/2002-03-08-NameCollision.ll
+++ b/test/Assembler/2002-03-08-NameCollision.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; Method arguments were being checked for collisions at the global scope before
 ; the method object was created by the parser.  Because of this, false
diff --git a/test/Assembler/2002-03-08-NameCollision2.ll b/test/Assembler/2002-03-08-NameCollision2.ll
index 1f7a4e16f8b3..dc98a36e8779 100644
--- a/test/Assembler/2002-03-08-NameCollision2.ll
+++ b/test/Assembler/2002-03-08-NameCollision2.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; Another name collision problem.  Here the problem was that if a forward
 ; declaration for a method was found, that this would cause spurious conflicts
diff --git a/test/Assembler/2002-04-07-HexFloatConstants.ll b/test/Assembler/2002-04-07-HexFloatConstants.ll
index b0d7cc0e43a4..90ee85a2a302 100644
--- a/test/Assembler/2002-04-07-HexFloatConstants.ll
+++ b/test/Assembler/2002-04-07-HexFloatConstants.ll
@@ -9,6 +9,7 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -constprop | \
 ; RUN: llvm-dis > %t.2
 ; RUN: diff %t.1 %t.2
+; RUN: verify-uselistorder %s
 
 define double @test() {
         %tmp = fmul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
diff --git a/test/Assembler/2002-04-07-InfConstant.ll b/test/Assembler/2002-04-07-InfConstant.ll
index 71837c94378c..6cd544791c57 100644
--- a/test/Assembler/2002-04-07-InfConstant.ll
+++ b/test/Assembler/2002-04-07-InfConstant.ll
@@ -1,6 +1,7 @@
 ; The output formater prints out 1.0e100 as Inf!
 ;
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep 0x7FF0000000000000
+; RUN: verify-uselistorder %s
 
 define float @test() {
         %tmp = fmul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
diff --git a/test/Assembler/2002-04-29-NameBinding.ll b/test/Assembler/2002-04-29-NameBinding.ll
index 7960c20ddcea..960209bcb5b3 100644
--- a/test/Assembler/2002-04-29-NameBinding.ll
+++ b/test/Assembler/2002-04-29-NameBinding.ll
@@ -7,6 +7,7 @@
 ; RUN: opt < %s -globaldce -S | \
 ; RUN:   not grep constant
 ;
+; RUN: verify-uselistorder %s
 
 @v1 = internal constant i32 5           
 
diff --git a/test/Assembler/2002-05-02-InvalidForwardRef.ll b/test/Assembler/2002-05-02-InvalidForwardRef.ll
index 234545c2936f..38c42b349a0f 100644
--- a/test/Assembler/2002-05-02-InvalidForwardRef.ll
+++ b/test/Assembler/2002-05-02-InvalidForwardRef.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 ; It looks like the assembler is not forward resolving the function declaraion
 ; correctly.
 
diff --git a/test/Assembler/2002-07-14-OpaqueType.ll b/test/Assembler/2002-07-14-OpaqueType.ll
index 662fb0f31c1d..6256aab63e52 100644
--- a/test/Assembler/2002-07-14-OpaqueType.ll
+++ b/test/Assembler/2002-07-14-OpaqueType.ll
@@ -1,6 +1,7 @@
 ; Test that opaque types are preserved correctly
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
 ;
+; RUN: verify-uselistorder %s
 
 %Ty = type opaque
 
diff --git a/test/Assembler/2002-07-25-QuoteInString.ll b/test/Assembler/2002-07-25-QuoteInString.ll
index facc5bdae62e..154568063f60 100644
--- a/test/Assembler/2002-07-25-QuoteInString.ll
+++ b/test/Assembler/2002-07-25-QuoteInString.ll
@@ -1,5 +1,6 @@
 ; Test double quotes in strings work correctly!
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
 ;
+; RUN: verify-uselistorder %s
 @str = internal global [6 x i8] c"\22foo\22\00"         ; <[6 x i8]*> [#uses=0]
 
diff --git a/test/Assembler/2002-07-25-ReturnPtrFunction.ll b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
index 6988fad8398e..fdee93c1ca70 100644
--- a/test/Assembler/2002-07-25-ReturnPtrFunction.ll
+++ b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
@@ -2,6 +2,7 @@
 ; the right thing.
 ;
 ; RUN: llvm-as < %s | llvm-dis | llvm-as
+; RUN: verify-uselistorder %s
 
 declare void (i32)* @foo()
 
diff --git a/test/Assembler/2002-07-31-SlashInString.ll b/test/Assembler/2002-07-31-SlashInString.ll
index ff48258870dd..879a96555c7e 100644
--- a/test/Assembler/2002-07-31-SlashInString.ll
+++ b/test/Assembler/2002-07-31-SlashInString.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as 
+; RUN: verify-uselistorder %s
 
 ; Make sure that \\ works in a string initializer
 @Slashtest = internal global [8 x i8] c"\5Cbegin{\00"
diff --git a/test/Assembler/2002-08-15-CastAmbiguity.ll b/test/Assembler/2002-08-15-CastAmbiguity.ll
index c71652446d6f..5f952b42b246 100644
--- a/test/Assembler/2002-08-15-CastAmbiguity.ll
+++ b/test/Assembler/2002-08-15-CastAmbiguity.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 define void @test(i32 %X) {
         call void @test( i32 6 )
diff --git a/test/Assembler/2002-08-15-ConstantExprProblem.ll b/test/Assembler/2002-08-15-ConstantExprProblem.ll
index 02b9ea9adb87..343a1044b9c9 100644
--- a/test/Assembler/2002-08-15-ConstantExprProblem.ll
+++ b/test/Assembler/2002-08-15-ConstantExprProblem.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
index 2ba3f14a48e5..6bbe0cfe7f6d 100644
--- a/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
+++ b/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2002-08-16-ConstExprInlined.ll b/test/Assembler/2002-08-16-ConstExprInlined.ll
index f233bacca46b..b78d858c8cf9 100644
--- a/test/Assembler/2002-08-16-ConstExprInlined.ll
+++ b/test/Assembler/2002-08-16-ConstExprInlined.ll
@@ -9,6 +9,7 @@
 ; around!
 ;
 ; RUN: llvm-as < %s | llvm-dis | llvm-as
+; RUN: verify-uselistorder %s
 
 @.LC0 = internal global [4 x i8] c"foo\00"		; <[4 x i8]*> [#uses=1]
 @X = global i8* null		; <i8**> [#uses=0]
diff --git a/test/Assembler/2002-08-19-BytecodeReader.ll b/test/Assembler/2002-08-19-BytecodeReader.ll
index e211014eb0e8..0722885ea9fa 100644
--- a/test/Assembler/2002-08-19-BytecodeReader.ll
+++ b/test/Assembler/2002-08-19-BytecodeReader.ll
@@ -2,6 +2,7 @@
 ; "crafty" spec benchmark.
 ;
 ; RUN: opt < %s -instcombine | llvm-dis
+; RUN: verify-uselistorder %s
 	
 %CHESS_POSITION = type { i32, i32 }
 @pawn_probes = external global i32		; <i32*> [#uses=0]
diff --git a/test/Assembler/2002-08-22-DominanceProblem.ll b/test/Assembler/2002-08-22-DominanceProblem.ll
index 0dc192df2356..504861030d96 100644
--- a/test/Assembler/2002-08-22-DominanceProblem.ll
+++ b/test/Assembler/2002-08-22-DominanceProblem.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; Dominance relationships is not calculated correctly for unreachable blocks,
 ; which causes the verifier to barf on this input.
diff --git a/test/Assembler/2002-10-08-LargeArrayPerformance.ll b/test/Assembler/2002-10-08-LargeArrayPerformance.ll
index 34a993214e92..acd92800ae23 100644
--- a/test/Assembler/2002-10-08-LargeArrayPerformance.ll
+++ b/test/Assembler/2002-10-08-LargeArrayPerformance.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 ; This testcase comes from the following really simple c file:
 ;; int foo[30000]
 ;;; We should not be soo slow for such a simple case!
diff --git a/test/Assembler/2002-10-13-ConstantEncodingProblem.ll b/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
index bf3a52154342..a0f7b3d5ad72 100644
--- a/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
+++ b/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 %Domain = type { %Domain**, %Domain* }
 @D = global %Domain zeroinitializer             ; <%Domain*> [#uses=0]
diff --git a/test/Assembler/2002-12-15-GlobalResolve.ll b/test/Assembler/2002-12-15-GlobalResolve.ll
index a873a6107e1c..87608cca4221 100644
--- a/test/Assembler/2002-12-15-GlobalResolve.ll
+++ b/test/Assembler/2002-12-15-GlobalResolve.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @X = external global i32*
 @X1 = external global %T* 
diff --git a/test/Assembler/2003-01-30-UnsignedString.ll b/test/Assembler/2003-01-30-UnsignedString.ll
index 3c14d71621c7..27550ad96adb 100644
--- a/test/Assembler/2003-01-30-UnsignedString.ll
+++ b/test/Assembler/2003-01-30-UnsignedString.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @spell_order = global [4 x i8] c"\FF\00\F7\00"
 
diff --git a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
index f1a5ed7b56b2..61fd911d8a17 100644
--- a/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
+++ b/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 ; There should be absolutely no problem with this testcase.
 
 define i32 @test(i32 %arg1, i32 %arg2) {
diff --git a/test/Assembler/2003-05-03-BytecodeReaderProblem.ll b/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
index f4a691160518..5cd57eaddaa8 100644
--- a/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
+++ b/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 define void @test() {
         %tmp.123 = trunc i64 0 to i32           ; <i32> [#uses=0]
diff --git a/test/Assembler/2003-05-12-MinIntProblem.ll b/test/Assembler/2003-05-12-MinIntProblem.ll
index ebe169013b4c..1064a76fc94a 100644
--- a/test/Assembler/2003-05-12-MinIntProblem.ll
+++ b/test/Assembler/2003-05-12-MinIntProblem.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep -- -2147483648
+; RUN: verify-uselistorder %s
 
 define i32 @foo() {
         ret i32 -2147483648
diff --git a/test/Assembler/2003-05-15-AssemblerProblem.ll b/test/Assembler/2003-05-15-AssemblerProblem.ll
index 146ce6534d70..eba26a2093ee 100644
--- a/test/Assembler/2003-05-15-AssemblerProblem.ll
+++ b/test/Assembler/2003-05-15-AssemblerProblem.ll
@@ -1,6 +1,7 @@
 ; This bug was caused by two CPR's existing for the same global variable, 
 ; colliding in the Module level CPR map.
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 define void @test() {
         call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 )
diff --git a/test/Assembler/2003-05-15-SwitchBug.ll b/test/Assembler/2003-05-15-SwitchBug.ll
index 3768d9c9a677..432be818174b 100644
--- a/test/Assembler/2003-05-15-SwitchBug.ll
+++ b/test/Assembler/2003-05-15-SwitchBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; Check minimal switch statement
 
diff --git a/test/Assembler/2003-05-21-ConstantShiftExpr.ll b/test/Assembler/2003-05-21-ConstantShiftExpr.ll
index 40b96514e045..5b8e5d26f845 100644
--- a/test/Assembler/2003-05-21-ConstantShiftExpr.ll
+++ b/test/Assembler/2003-05-21-ConstantShiftExpr.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 ; Test that shift instructions can be used in constant expressions.
 
 global i32 3670016
diff --git a/test/Assembler/2003-05-21-EmptyStructTest.ll b/test/Assembler/2003-05-21-EmptyStructTest.ll
index 26e83d931c4d..934e32a0cf81 100644
--- a/test/Assembler/2003-05-21-EmptyStructTest.ll
+++ b/test/Assembler/2003-05-21-EmptyStructTest.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; The old C front-end never generated empty structures, now the new one
 ; can.  For some reason we never handled them in the parser. Weird.
diff --git a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
index 50cdeedd695e..911f0ff71592 100644
--- a/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
+++ b/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+; RUN: verify-uselistorder %s
 
 @.str_1 = internal constant [6 x i8] c"_Bool\00"                ; <[6 x i8]*> [#uses=2]
 
diff --git a/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll b/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
index b76f7745849e..926d4ed7b914 100644
--- a/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
+++ b/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | not grep getelementptr
+; RUN: verify-uselistorder %s
 
 @A = external global { float }          ; <{ float }*> [#uses=2]
 global i32* bitcast ({ float }* @A to i32*)             ; <i32**>:0 [#uses=0]
diff --git a/test/Assembler/2003-11-05-ConstantExprShift.ll b/test/Assembler/2003-11-05-ConstantExprShift.ll
index 86b093e35129..ddfcecaff5a3 100644
--- a/test/Assembler/2003-11-05-ConstantExprShift.ll
+++ b/test/Assembler/2003-11-05-ConstantExprShift.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 define i32 @test() {
         ret i32 ashr (i32 ptrtoint (i32 ()* @test to i32), i32 2)
diff --git a/test/Assembler/2003-11-12-ConstantExprCast.ll b/test/Assembler/2003-11-12-ConstantExprCast.ll
index 47a53537f875..c9ad26668af7 100644
--- a/test/Assembler/2003-11-12-ConstantExprCast.ll
+++ b/test/Assembler/2003-11-12-ConstantExprCast.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | not grep " bitcast ("
+; RUN: verify-uselistorder %s
 
 @.Base64_1 = external constant [4 x i8]         ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/Assembler/2004-01-11-getelementptrfolding.ll b/test/Assembler/2004-01-11-getelementptrfolding.ll
index 5249d0e5bcc0..188a95fa9505 100644
--- a/test/Assembler/2004-01-11-getelementptrfolding.ll
+++ b/test/Assembler/2004-01-11-getelementptrfolding.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s | llvm-dis | \
 ; RUN:   not grep "getelementptr.*getelementptr"
+; RUN: verify-uselistorder %s
 
 %struct.TTriangleItem = type { i8*, i8*, [3 x %struct.TUVVertex] }
 %struct.TUVVertex = type { i16, i16, i16, i16 }
diff --git a/test/Assembler/2004-01-20-MaxLongLong.ll b/test/Assembler/2004-01-20-MaxLongLong.ll
index 8af5332ccc67..23eb402769df 100644
--- a/test/Assembler/2004-01-20-MaxLongLong.ll
+++ b/test/Assembler/2004-01-20-MaxLongLong.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep 9223372036854775808
+; RUN: verify-uselistorder %s
 
 global i64 -9223372036854775808
 
diff --git a/test/Assembler/2004-02-01-NegativeZero.ll b/test/Assembler/2004-02-01-NegativeZero.ll
index b28930f2852c..98bd4cb0d946 100644
--- a/test/Assembler/2004-02-01-NegativeZero.ll
+++ b/test/Assembler/2004-02-01-NegativeZero.ll
@@ -1,5 +1,9 @@
-; RUN: llvm-as < %s | llvm-dis | grep -- -0.0
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
+; CHECK: global double -0.000000e+00
 global double 0x8000000000000000
+
+; CHECK: global float -0.000000e+00
 global float -0.0
 
diff --git a/test/Assembler/2004-02-27-SelfUseAssertError.ll b/test/Assembler/2004-02-27-SelfUseAssertError.ll
index 7052eac5cbd4..252a1b2415e0 100644
--- a/test/Assembler/2004-02-27-SelfUseAssertError.ll
+++ b/test/Assembler/2004-02-27-SelfUseAssertError.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 ; %inc2 uses it's own value, but that's ok, as it's unreachable!
 
diff --git a/test/Assembler/2004-03-07-FunctionAddressAlignment.ll b/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
index e3bf0bb8ac78..7fa0802db405 100644
--- a/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
+++ b/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | not grep ptrtoint
+; RUN: verify-uselistorder %s
 ; All of these should be eliminable
 
 
diff --git a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
index ab46f887be07..a86fe63edf75 100644
--- a/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
+++ b/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 define i32* @t1({ float, i32 }* %X) {
         %W = getelementptr { float, i32 }* %X, i32 20, i32 1            ; <i32*> [#uses=0]
diff --git a/test/Assembler/2004-06-07-VerifierBug.ll b/test/Assembler/2004-06-07-VerifierBug.ll
index 07d2383ccff9..090599a9a855 100644
--- a/test/Assembler/2004-06-07-VerifierBug.ll
+++ b/test/Assembler/2004-06-07-VerifierBug.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s > /dev/null
+; RUN: verify-uselistorder %s
 
 define void @t() {
 entry:
diff --git a/test/Assembler/2004-10-22-BCWriterUndefBug.ll b/test/Assembler/2004-10-22-BCWriterUndefBug.ll
index 694b80b78c16..b93413139430 100644
--- a/test/Assembler/2004-10-22-BCWriterUndefBug.ll
+++ b/test/Assembler/2004-10-22-BCWriterUndefBug.ll
@@ -1,5 +1,6 @@
 ;; The bytecode writer was trying to treat undef values as ConstantArray's when
 ;; they looked like strings.
 ;; RUN: llvm-as %s -o /dev/null
+;; RUN: verify-uselistorder %s
 @G = internal global [8 x i8] undef
 
diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
index 4db5b7453b0f..7260f19bfd9a 100644
--- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll
+++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
@@ -1,4 +1,5 @@
 ; Test for PR463.  This program is erroneous, but should not crash llvm-as.
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'struct.none'"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+; CHECK: use of undefined type named 'struct.none'
 
 @.FOO  = internal global %struct.none zeroinitializer
diff --git a/test/Assembler/2005-01-03-FPConstantDisassembly.ll b/test/Assembler/2005-01-03-FPConstantDisassembly.ll
index aaa776f51b81..643d04c84b3f 100644
--- a/test/Assembler/2005-01-03-FPConstantDisassembly.ll
+++ b/test/Assembler/2005-01-03-FPConstantDisassembly.ll
@@ -1,6 +1,8 @@
-; RUN: llvm-as < %s | llvm-dis | grep 1.0
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 define double @test() {
+; CHECK: ret double 1.0
         ret double 1.0   ;; This should not require hex notation
 }
 
diff --git a/test/Assembler/2005-01-31-CallingAggregateFunction.ll b/test/Assembler/2005-01-31-CallingAggregateFunction.ll
index ce769a2e9d7b..a5a917d08aba 100644
--- a/test/Assembler/2005-01-31-CallingAggregateFunction.ll
+++ b/test/Assembler/2005-01-31-CallingAggregateFunction.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 define void @test() {
 	call {i32} @foo()
diff --git a/test/Assembler/2005-05-05-OpaqueUndefValues.ll b/test/Assembler/2005-05-05-OpaqueUndefValues.ll
index 8cd1419b08b3..01456f10c9dc 100644
--- a/test/Assembler/2005-05-05-OpaqueUndefValues.ll
+++ b/test/Assembler/2005-05-05-OpaqueUndefValues.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as > /dev/null
+; RUN: verify-uselistorder %s
 
 %t = type opaque
 @x = global %t undef
diff --git a/test/Assembler/2005-12-21-ZeroInitVector.ll b/test/Assembler/2005-12-21-ZeroInitVector.ll
index d3a692c60c9e..edcf60518f85 100644
--- a/test/Assembler/2005-12-21-ZeroInitVector.ll
+++ b/test/Assembler/2005-12-21-ZeroInitVector.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s > /dev/null
+; RUN: verify-uselistorder %s
 
 define <4 x i32> @foo() {
         ret <4 x i32> zeroinitializer
diff --git a/test/Assembler/2006-12-09-Cast-To-Bool.ll b/test/Assembler/2006-12-09-Cast-To-Bool.ll
index a70262c802c1..91abe770a41c 100644
--- a/test/Assembler/2006-12-09-Cast-To-Bool.ll
+++ b/test/Assembler/2006-12-09-Cast-To-Bool.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep bitcast
+; RUN: verify-uselistorder %s
 
 define i1 @main(i32 %X) {
   %res = bitcast i1 true to i1
diff --git a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
index 184e543123f4..a0542ee00137 100644
--- a/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
+++ b/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
@@ -1,5 +1,7 @@
 ; The assembler should catch an undefined argument type .
-; RUN: not llvm-as %s -o /dev/null 2>&1 | grep "use of undefined type named 'typedef.bc_struct'"
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+; CHECK: use of undefined type named 'typedef.bc_struct'
 
 ; %typedef.bc_struct = type opaque
 
diff --git a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
index e3f67ba13afc..54a4372b977e 100644
--- a/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
+++ b/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
@@ -1,5 +1,6 @@
 ; Test Case for PR1080
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @str = internal constant [4 x i8] c"-ga\00"             ; <[4 x i8]*> [#uses=2]
 
diff --git a/test/Assembler/2007-03-19-NegValue.ll b/test/Assembler/2007-03-19-NegValue.ll
index 64eb3cb5903a..a2deac280b84 100644
--- a/test/Assembler/2007-03-19-NegValue.ll
+++ b/test/Assembler/2007-03-19-NegValue.ll
@@ -1,7 +1,9 @@
 ; Test whether negative values > 64 bits retain their negativeness.
-; RUN: llvm-as < %s | llvm-dis | grep "add i65.*, -1"
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 define i65 @testConsts(i65 %N) {
+; CHECK: add i65 %N, -1
   %a = add i65 %N, -1
   ret i65 %a
 }
diff --git a/test/Assembler/2007-04-20-AlignedLoad.ll b/test/Assembler/2007-04-20-AlignedLoad.ll
index 98a5428a97d8..bcf65fd6eac7 100644
--- a/test/Assembler/2007-04-20-AlignedLoad.ll
+++ b/test/Assembler/2007-04-20-AlignedLoad.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep "align 1024"
+; RUN: verify-uselistorder %s
 
 define i32 @test(i32* %arg) {
 entry:
diff --git a/test/Assembler/2007-04-20-AlignedStore.ll b/test/Assembler/2007-04-20-AlignedStore.ll
index 9e4dd9fd07b5..9605af23c29a 100644
--- a/test/Assembler/2007-04-20-AlignedStore.ll
+++ b/test/Assembler/2007-04-20-AlignedStore.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep "align 1024"
+; RUN: verify-uselistorder %s
 
 define void @test(i32* %arg) {
 entry:
diff --git a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
index b0ca1aad86e7..7c73abc6a2b5 100644
--- a/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
+++ b/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
@@ -1,5 +1,8 @@
-; RUN: llvm-as < %s | llvm-dis | grep "icmp.*test_weak.*null"
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; PR1358
+
+; CHECK: icmp ne (i32 (...)* @test_weak, i32 (...)* null)
 @G = global i1 icmp ne (i32 (...)* @test_weak, i32 (...)* null)
 
 declare extern_weak i32 @test_weak(...)
diff --git a/test/Assembler/2007-05-21-Escape.ll b/test/Assembler/2007-05-21-Escape.ll
index 08681339d212..97162441fd81 100644
--- a/test/Assembler/2007-05-21-Escape.ll
+++ b/test/Assembler/2007-05-21-Escape.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; RUN: verify-uselistorder %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "x86_64-apple-darwin8"
 	%struct.bar = type { i32 }
diff --git a/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll b/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
index 9c7daa841ab8..68aeef222aa3 100644
--- a/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
+++ b/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
@@ -1,5 +1,6 @@
 ; PR1553
 ; RUN: llvm-as < %s > /dev/null
+; RUN: verify-uselistorder %s
 define void @bar() {
         %t = call i8 @foo( i8 10 )
         zext i8 %t to i32
diff --git a/test/Assembler/2007-09-10-AliasFwdRef.ll b/test/Assembler/2007-09-10-AliasFwdRef.ll
index 2ebfc2719e95..8e0a5718058c 100644
--- a/test/Assembler/2007-09-10-AliasFwdRef.ll
+++ b/test/Assembler/2007-09-10-AliasFwdRef.ll
@@ -1,8 +1,9 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR1645
 
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)    
-@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel   
+@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel
 
 
 
diff --git a/test/Assembler/2007-09-29-GC.ll b/test/Assembler/2007-09-29-GC.ll
index 9aefd0b04166..f2cafbcc0fb3 100644
--- a/test/Assembler/2007-09-29-GC.ll
+++ b/test/Assembler/2007-09-29-GC.ll
@@ -1,5 +1,9 @@
-; RUN: llvm-as < %s | llvm-dis | grep "@f.*gc.*shadowstack"
-; RUN: llvm-as < %s | llvm-dis | grep "@g.*gc.*java"
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+
+
+; CHECK: define void @f() gc "shadowstack"
+; CHECK: define void @g() gc "java"
 
 define void @f() gc "shadowstack" {
 entry:
diff --git a/test/Assembler/2007-12-11-AddressSpaces.ll b/test/Assembler/2007-12-11-AddressSpaces.ll
index 7c9b5b529848..f860f57e8ddd 100644
--- a/test/Assembler/2007-12-11-AddressSpaces.ll
+++ b/test/Assembler/2007-12-11-AddressSpaces.ll
@@ -3,6 +3,7 @@
 ; RUN: llvm-as < %s | llvm-dis | grep "addrspace(66)" | count 2
 ; RUN: llvm-as < %s | llvm-dis | grep "addrspace(11)" | count 6
 ; RUN: llvm-as < %s | llvm-dis | grep "addrspace(22)" | count 5
+; RUN: verify-uselistorder %s
 
 	%struct.mystruct = type { i32, i32 addrspace(33)*, i32, i32 addrspace(33)* }
 @input = weak addrspace(42) global %struct.mystruct zeroinitializer  		; <%struct.mystruct addrspace(42)*> [#uses=1]
diff --git a/test/Assembler/2008-01-11-VarargAttrs.ll b/test/Assembler/2008-01-11-VarargAttrs.ll
index c0aedc80b4c3..0b6592c69424 100644
--- a/test/Assembler/2008-01-11-VarargAttrs.ll
+++ b/test/Assembler/2008-01-11-VarargAttrs.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | grep byval
+; RUN: verify-uselistorder %s
 
 	%struct = type {  }
 
diff --git a/test/Assembler/2008-07-10-APInt.ll b/test/Assembler/2008-07-10-APInt.ll
index 99347e92aba0..fe3608d4f133 100644
--- a/test/Assembler/2008-07-10-APInt.ll
+++ b/test/Assembler/2008-07-10-APInt.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR2538
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/Assembler/2008-09-02-FunctionNotes.ll b/test/Assembler/2008-09-02-FunctionNotes.ll
index 11a0411ef79f..a629c9341a9f 100644
--- a/test/Assembler/2008-09-02-FunctionNotes.ll
+++ b/test/Assembler/2008-09-02-FunctionNotes.ll
@@ -1,5 +1,6 @@
 ; Test function attributes
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: define void @fn1() #0
 define void @fn1() alwaysinline {
diff --git a/test/Assembler/2008-09-29-RetAttr.ll b/test/Assembler/2008-09-29-RetAttr.ll
index f7db96d22641..5eb608d4a693 100644
--- a/test/Assembler/2008-09-29-RetAttr.ll
+++ b/test/Assembler/2008-09-29-RetAttr.ll
@@ -1,6 +1,7 @@
 ; Test return attributes
 ; RUN: llvm-as < %s | llvm-dis | grep "define inreg i32"
 ; RUN: llvm-as < %s | llvm-dis | grep "call inreg i32"
+; RUN: verify-uselistorder %s
 
 define inreg i32 @fn1() {
   ret i32 0
diff --git a/test/Assembler/2008-10-14-QuoteInName.ll b/test/Assembler/2008-10-14-QuoteInName.ll
index ccd77791caac..aa95e79eef91 100644
--- a/test/Assembler/2008-10-14-QuoteInName.ll
+++ b/test/Assembler/2008-10-14-QuoteInName.ll
@@ -1,3 +1,4 @@
 ; RUN: llvm-as < %s | llvm-dis | grep "quote"
+; RUN: verify-uselistorder %s
 
 @"a\22quote" = global i32 0
diff --git a/test/Assembler/2009-02-01-UnnamedForwardRef.ll b/test/Assembler/2009-02-01-UnnamedForwardRef.ll
index 9c6e20d7335e..5b1d9eebf780 100644
--- a/test/Assembler/2009-02-01-UnnamedForwardRef.ll
+++ b/test/Assembler/2009-02-01-UnnamedForwardRef.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR3372
 
 @X = global i32* @0
diff --git a/test/Assembler/2009-02-28-CastOpc.ll b/test/Assembler/2009-02-28-CastOpc.ll
index 60356439d5f8..e9d2308064cc 100644
--- a/test/Assembler/2009-02-28-CastOpc.ll
+++ b/test/Assembler/2009-02-28-CastOpc.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 
 define void @foo() {
diff --git a/test/Assembler/2009-02-28-StripOpaqueName.ll b/test/Assembler/2009-02-28-StripOpaqueName.ll
index f61a44cbd15d..614cc5721d80 100644
--- a/test/Assembler/2009-02-28-StripOpaqueName.ll
+++ b/test/Assembler/2009-02-28-StripOpaqueName.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -strip -S | llvm-as | llvm-dis
+; RUN: verify-uselistorder %s
 
 ; Stripping the name from A should not break references to it.
 %A = type opaque
diff --git a/test/Assembler/2009-03-24-ZextConstantExpr.ll b/test/Assembler/2009-03-24-ZextConstantExpr.ll
index daedb95da15a..98bab4b19a02 100644
--- a/test/Assembler/2009-03-24-ZextConstantExpr.ll
+++ b/test/Assembler/2009-03-24-ZextConstantExpr.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR3876
 @gdtr = external global [0 x i8]
 
diff --git a/test/Assembler/2009-07-24-ZeroArgGEP.ll b/test/Assembler/2009-07-24-ZeroArgGEP.ll
index 2a3d11477cb1..92f4d59d2f7f 100644
--- a/test/Assembler/2009-07-24-ZeroArgGEP.ll
+++ b/test/Assembler/2009-07-24-ZeroArgGEP.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @foo = global i32 0
 @bar = constant i32* getelementptr(i32* @foo)
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index 17dd745682b0..50ad32e1c9ed 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -1,4 +1,5 @@
-; RUN: opt -std-compile-opts < %s | llvm-dis | not grep badref
+; RUN: opt -S -O3 < %s | FileCheck %s
+; RUN: verify-uselistorder %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.2"
@@ -11,7 +12,8 @@ target triple = "x86_64-apple-darwin10.2"
 
 define i32 @main() nounwind readonly {
   %diff1 = alloca i64                             ; <i64*> [#uses=2]
-  call void @llvm.dbg.declare(metadata !{i64* %diff1}, metadata !0)
+; CHECK: call void @llvm.dbg.value(metadata i64 72,
+  call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !{!"0x102"})
   store i64 72, i64* %diff1, align 8
   %v1 = load %struct.test** @TestArrayPtr, align 8 ; <%struct.test*> [#uses=1]
   %v2 = ptrtoint %struct.test* %v1 to i64 ; <i64> [#uses=1]
@@ -20,15 +22,18 @@ define i32 @main() nounwind readonly {
   ret i32 4
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-!7 = metadata !{metadata !1}
-!6 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !7, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!0 = metadata !{i32 786688, metadata !1, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786478, metadata !8, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !6, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
-!9 = metadata !{i32 0}
+!7 = !{!1}
+!6 = !{!"0x11\0012\00clang version 3.0 (trunk 131941)\001\00\000\00\000", !8, !9, !9, !7, null, null} ; [ DW_TAG_compile_unit ]
+!0 = !{!"0x100\00c\002\000", !1, !2, !5} ; [ DW_TAG_auto_variable ]
+!1 = !{!"0x2e\00main\00main\00\001\000\001\000\006\00256\000\001", !8, !2, !3, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !6} ; [ DW_TAG_base_type ]
+!8 = !{!"/d/j/debug-test.c", !"/Volumes/Data/b"}
+!9 = !{i32 0}
+
+!llvm.module.flags = !{!10}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Assembler/ConstantExprFold.ll b/test/Assembler/ConstantExprFold.ll
index fc18ce708edc..dc4133115135 100644
--- a/test/Assembler/ConstantExprFold.ll
+++ b/test/Assembler/ConstantExprFold.ll
@@ -2,6 +2,7 @@
 ; situations
 
 ; RUN: llvm-as < %s | llvm-dis | not grep "("
+; RUN: verify-uselistorder %s
 
 @A = global i64 0
 
diff --git a/test/Assembler/ConstantExprFoldCast.ll b/test/Assembler/ConstantExprFoldCast.ll
index 161a4ca35757..094f87be92cf 100644
--- a/test/Assembler/ConstantExprFoldCast.ll
+++ b/test/Assembler/ConstantExprFoldCast.ll
@@ -1,6 +1,7 @@
 ; This test checks to make sure that constant exprs fold in some simple situations
 
 ; RUN: llvm-as < %s | llvm-dis | not grep cast
+; RUN: verify-uselistorder %s
 
 @A = global i32* bitcast (i8* null to i32*)  ; Cast null -> fold
 @B = global i32** bitcast (i32** @A to i32**)   ; Cast to same type -> fold
diff --git a/test/Assembler/ConstantExprFoldSelect.ll b/test/Assembler/ConstantExprFoldSelect.ll
index b000e02653c6..5d218a9570b9 100644
--- a/test/Assembler/ConstantExprFoldSelect.ll
+++ b/test/Assembler/ConstantExprFoldSelect.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; PR18319
 
 define void @function() {
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
index b41959f494fd..83236d5793b3 100644
--- a/test/Assembler/ConstantExprNoFold.ll
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -2,6 +2,7 @@
 ; situations
 
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; Even give it a datalayout, to tempt folding as much as possible.
 target datalayout = "p:32:32"
@@ -24,3 +25,26 @@ target datalayout = "p:32:32"
 
 ; CHECK: @E = global i64 addrspace(1)* addrspacecast (i64* @A to i64 addrspace(1)*)
 @E = global i64 addrspace(1)* addrspacecast(i64* @A to i64 addrspace(1)*)
+
+; Don't add an inbounds on @weak.gep, since @weak may be null.
+; CHECK: @weak.gep = global i32* getelementptr (i32* @weak, i32 1)
+@weak.gep = global i32* getelementptr (i32* @weak, i32 1)
+@weak = extern_weak global i32
+
+; An object with weak linkage cannot have it's identity determined at compile time.
+; CHECK: @F = global i1 icmp eq (i32* @weakany, i32* @glob)
+@F = global i1 icmp eq (i32* @weakany, i32* @glob)
+@weakany = weak global i32 0
+
+; Empty globals might end up anywhere, even on top of another global.
+; CHECK: @empty.cmp = global i1 icmp eq ([0 x i8]* @empty.1, [0 x i8]* @empty.2)
+@empty.1 = external global [0 x i8], align 1
+@empty.2 = external global [0 x i8], align 1
+@empty.cmp = global i1 icmp eq ([0 x i8]* @empty.1, [0 x i8]* @empty.2)
+
+; Don't add an inbounds on @glob.a3, since it's not inbounds.
+; CHECK: @glob.a3 = alias getelementptr (i32* @glob.a2, i32 1)
+@glob = global i32 0
+@glob.a3 = alias getelementptr (i32* @glob.a2, i32 1)
+@glob.a2 = alias getelementptr (i32* @glob.a1, i32 1)
+@glob.a1 = alias i32* @glob
diff --git a/test/Assembler/MultipleReturnValueType.ll b/test/Assembler/MultipleReturnValueType.ll
index 617714394542..5812632cf3e8 100644
--- a/test/Assembler/MultipleReturnValueType.ll
+++ b/test/Assembler/MultipleReturnValueType.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s
+; RUN: verify-uselistorder %s
 
         %struct.S_102 = type { float, float }
 
diff --git a/test/Assembler/addrspacecast-alias.ll b/test/Assembler/addrspacecast-alias.ll
index d7516599dfe2..745e525c15f4 100644
--- a/test/Assembler/addrspacecast-alias.ll
+++ b/test/Assembler/addrspacecast-alias.ll
@@ -1,7 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; Test that global aliases are allowed to be constant addrspacecast
 
 @i = internal addrspace(1) global i8 42
-@ia = alias internal addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
-; CHECK: @ia = alias internal addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
+@ia = internal alias addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
+; CHECK: @ia = internal alias addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
diff --git a/test/Assembler/aggregate-constant-values.ll b/test/Assembler/aggregate-constant-values.ll
index d0aab81a4d68..9e68e0675891 100644
--- a/test/Assembler/aggregate-constant-values.ll
+++ b/test/Assembler/aggregate-constant-values.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: @foo
 ; CHECK: store { i32, i32 } { i32 7, i32 9 }, { i32, i32 }* %x
diff --git a/test/Assembler/aggregate-return-single-value.ll b/test/Assembler/aggregate-return-single-value.ll
index 04540b54af1f..a77c25026437 100644
--- a/test/Assembler/aggregate-return-single-value.ll
+++ b/test/Assembler/aggregate-return-single-value.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 define { i32 } @foob() nounwind {
   ret {i32}{ i32 0 }
diff --git a/test/Assembler/alias-use-list-order.ll b/test/Assembler/alias-use-list-order.ll
new file mode 100644
index 000000000000..d29fd6e435fe
--- /dev/null
+++ b/test/Assembler/alias-use-list-order.ll
@@ -0,0 +1,11 @@
+; RUN: verify-uselistorder < %s
+
+; Globals.
+@global = global i32 0
+@alias.ref1 = global i32* getelementptr inbounds (i32* @alias, i64 1)
+@alias.ref2 = global i32* getelementptr inbounds (i32* @alias, i64 1)
+
+; Aliases.
+@alias = alias i32* @global
+@alias.ref3 = alias i32* getelementptr inbounds (i32* @alias, i64 1)
+@alias.ref4 = alias i32* getelementptr inbounds (i32* @alias, i64 1)
diff --git a/test/Assembler/align-inst.ll b/test/Assembler/align-inst.ll
index 6f7100e065d3..1952fbc52972 100644
--- a/test/Assembler/align-inst.ll
+++ b/test/Assembler/align-inst.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 @A = global i1 0, align 536870912
 
diff --git a/test/Assembler/alignstack.ll b/test/Assembler/alignstack.ll
index 9f2059f722fa..784f44aa8ecf 100644
--- a/test/Assembler/alignstack.ll
+++ b/test/Assembler/alignstack.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
 
diff --git a/test/Assembler/anon-functions.ll b/test/Assembler/anon-functions.ll
index ac06e8ce3055..42eea837227a 100644
--- a/test/Assembler/anon-functions.ll
+++ b/test/Assembler/anon-functions.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR3611
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Assembler/atomic.ll b/test/Assembler/atomic.ll
index d7ccd9900bd8..0356f5fc582c 100644
--- a/test/Assembler/atomic.ll
+++ b/test/Assembler/atomic.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s | opt -S | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Basic smoke test for atomic operations.
 
 define void @f(i32* %x) {
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
index 8f655cec9eb2..f16e5fee6c36 100644
--- a/test/Assembler/auto_upgrade_intrinsics.ll
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -1,5 +1,6 @@
 ; Test to make sure intrinsics are automatically upgraded.
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 declare i8 @llvm.ctlz.i8(i8)
 declare i16 @llvm.ctlz.i16(i16)
diff --git a/test/Assembler/bcwrap.ll b/test/Assembler/bcwrap.ll
index 4bec48cf63ba..bc260ab2f2fa 100644
--- a/test/Assembler/bcwrap.ll
+++ b/test/Assembler/bcwrap.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s > %t
 ; RUN: llvm-nm %t | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Test for isBitcodeFile, llvm-nm must read from a file for this test.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll
index 16362abc716f..edf098948972 100644
--- a/test/Assembler/comment.ll
+++ b/test/Assembler/comment.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s
 ; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s
+; RUN: verify-uselistorder %s
 
 ; The bare version of this file should not have any #uses lines.
 ; BARE: @B =
diff --git a/test/Assembler/distinct-mdnode.ll b/test/Assembler/distinct-mdnode.ll
new file mode 100644
index 000000000000..abd7aeacbfc7
--- /dev/null
+++ b/test/Assembler/distinct-mdnode.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
+
+!0 = !{}
+!1 = !{}   ; This should merge with !0.
+!2 = !{!0}
+!3 = !{!0} ; This should merge with !2.
+!4 = distinct !{}
+!5 = distinct !{}
+!6 = distinct !{!0}
+!7 = distinct !{!0}
+!8 = distinct !{!8}
+!9 = distinct !{!9}
+!10 = !{!10} ; This should become distinct.
+
+; CHECK: !named = !{!0, !0, !1, !1, !2, !3, !4, !5, !6, !7, !8}
+; CHECK:      !0 = !{}
+; CHECK-NEXT: !1 = !{!0}
+; CHECK-NEXT: !2 = distinct !{}
+; CHECK-NEXT: !3 = distinct !{}
+; CHECK-NEXT: !4 = distinct !{!0}
+; CHECK-NEXT: !5 = distinct !{!0}
+; CHECK-NEXT: !6 = distinct !{!6}
+; CHECK-NEXT: !7 = distinct !{!7}
+; CHECK-NEXT: !8 = distinct !{!8}
+; CHECK-NOT:  !
diff --git a/test/Assembler/externally-initialized.ll b/test/Assembler/externally-initialized.ll
index 4be6e629a1d0..ea93367a26db 100644
--- a/test/Assembler/externally-initialized.ll
+++ b/test/Assembler/externally-initialized.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: @G = externally_initialized global i32 0
 
diff --git a/test/Assembler/fast-math-flags.ll b/test/Assembler/fast-math-flags.ll
index 3a116c507f48..8e75bdf0f966 100644
--- a/test/Assembler/fast-math-flags.ll
+++ b/test/Assembler/fast-math-flags.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 ; RUN: opt -S < %s | FileCheck %s
+; RUN: verify-uselistorder %s
 
 @addr   = external global i64
 @select = external global i1
diff --git a/test/Assembler/flags.ll b/test/Assembler/flags.ll
index 310b807c5d56..e74311afd9a6 100644
--- a/test/Assembler/flags.ll
+++ b/test/Assembler/flags.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 @addr = external global i64
 
diff --git a/test/Assembler/functionlocal-metadata.ll b/test/Assembler/functionlocal-metadata.ll
index f9b1d7403796..517138dba5fa 100644
--- a/test/Assembler/functionlocal-metadata.ll
+++ b/test/Assembler/functionlocal-metadata.ll
@@ -1,34 +1,33 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
-  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !2)
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID2:[0-9]+]])
+  call void @llvm.dbg.value(metadata i32* %1, i64 16, metadata !2, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.value(metadata i32* %1, i64 16, metadata ![[ID2:[0-9]+]], metadata {{.*}})
   %0 = add i32 %a, 1                              ; <i32> [#uses=1]
   %two = add i32 %b, %0                           ; <i32> [#uses=0]
   %1 = alloca i32                                 ; <i32*> [#uses=1]
 
-  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32* %1})
-; CHECK: metadata !{i32* %1}, metadata !{i32* %1}
-  call void @llvm.dbg.declare(metadata !{i32 %two}, metadata !{i32 %0})
-; CHECK: metadata !{i32 %two}, metadata !{i32 %0}
-  call void @llvm.dbg.declare(metadata !{i32 %0}, metadata !{i32* %1, i32 %0})
-; CHECK: metadata !{i32 %0}, metadata !{i32* %1, i32 %0}
-  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32 %b, i32 %0})
-; CHECK: metadata !{i32* %1}, metadata !{i32 %b, i32 %0}
-  call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"})
-; CHECK: metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}
-  call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two})
-; CHECK: metadata !{i32 %b}, metadata !{metadata ![[ID0:[0-9]+]], i32 %two}
-
-  call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1)
-; CHECK: metadata !{i32 %a}, i64 0, metadata ![[ID1:[0-9]+]]
-  call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0)
-; CHECK: metadata !{i32 %0}, i64 25, metadata ![[ID0]]
-  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !3)
-; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata ![[ID3:[0-9]+]])
-  call void @llvm.dbg.value(metadata !3, i64 12, metadata !2)
-; CHECK: metadata ![[ID3]], i64 12, metadata ![[ID2]]
+  call void @llvm.dbg.declare(metadata i32* %1, metadata i32* %1, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.declare(metadata i32* %1, metadata i32* %1, metadata {{.*}})
+  call void @llvm.dbg.declare(metadata i32 %two, metadata i32 %0, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.declare(metadata i32 %two, metadata i32 %0, metadata {{.*}})
+  call void @llvm.dbg.declare(metadata i32* %1, metadata i32 %b, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.declare(metadata i32* %1, metadata i32 %b, metadata {{.*}})
+  call void @llvm.dbg.declare(metadata i32 %a, metadata i32 %a, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.declare(metadata i32 %a, metadata i32 %a, metadata {{.*}})
+  call void @llvm.dbg.declare(metadata i32 %b, metadata i32 %two, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.declare(metadata i32 %b, metadata i32 %two, metadata {{.*}})
+
+  call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !1, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata ![[ID1:[0-9]+]], metadata {{.*}})
+  call void @llvm.dbg.value(metadata i32 %0, i64 25, metadata !0, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.value(metadata i32 %0, i64 25, metadata ![[ID0:[0-9]+]], metadata {{.*}})
+  call void @llvm.dbg.value(metadata i32* %1, i64 16, metadata !3, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.value(metadata i32* %1, i64 16, metadata ![[ID3:[0-9]+]], metadata {{.*}})
+  call void @llvm.dbg.value(metadata !3, i64 12, metadata !2, metadata !{!"0x102"})
+; CHECK: call void @llvm.dbg.value(metadata ![[ID3]], i64 12, metadata ![[ID2]], metadata {{.*}})
 
   ret void, !foo !0, !bar !1
 ; CHECK: ret void, !foo ![[FOO:[0-9]+]], !bar ![[BAR:[0-9]+]]
@@ -36,21 +35,21 @@ entry:
 
 !llvm.module.flags = !{!4}
 
-!0 = metadata !{i32 662302, i32 26, metadata !1, null}
-!1 = metadata !{i32 4, metadata !"foo"}
-!2 = metadata !{metadata !"bar"}
-!3 = metadata !{metadata !"foo"}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 662302, column: 26, scope: !1)
+!1 = !{i32 4, !"foo"}
+!2 = !{!"bar"}
+!3 = !{!"foo"}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !foo = !{ !0 }
 !bar = !{ !1 }
 
 ; CHECK: !foo = !{![[FOO]]}
 ; CHECK: !bar = !{![[BAR]]}
-; CHECK: ![[ID0]] = metadata !{i32 662302, i32 26, metadata ![[ID1]], null}
-; CHECK: ![[ID1]] = metadata !{i32 4, metadata !"foo"}
-; CHECK: ![[ID2]] = metadata !{metadata !"bar"}
-; CHECK: ![[ID3]] = metadata !{metadata !"foo"}
+; CHECK: ![[ID0]] = !MDLocation(line: 662302, column: 26, scope: ![[ID1]])
+; CHECK: ![[ID1]] = !{i32 4, !"foo"}
+; CHECK: ![[ID2]] = !{!"bar"}
+; CHECK: ![[ID3]] = !{!"foo"}
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index af03fca6d2c1..e938ff4ad8ce 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; Verify that over-indexed getelementptrs are folded.
 @A = external global [2 x [3 x [5 x [7 x i32]]]]
diff --git a/test/Assembler/global-addrspace-forwardref.ll b/test/Assembler/global-addrspace-forwardref.ll
index f0f094a2248d..4a036e08be6a 100644
--- a/test/Assembler/global-addrspace-forwardref.ll
+++ b/test/Assembler/global-addrspace-forwardref.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; Make sure the address space of forward decls is preserved
 
diff --git a/test/Assembler/half-constprop.ll b/test/Assembler/half-constprop.ll
index 9e24f7242ba9..c5ae3bf16abb 100644
--- a/test/Assembler/half-constprop.ll
+++ b/test/Assembler/half-constprop.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -O3 -S | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Testing half constant propagation.
 
 define half @abc() nounwind {
diff --git a/test/Assembler/half-conv.ll b/test/Assembler/half-conv.ll
index 70a6b86c393f..e6f73cf71ca7 100644
--- a/test/Assembler/half-conv.ll
+++ b/test/Assembler/half-conv.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -O3 -S | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Testing half to float conversion.
 
 define float @abc() nounwind {
diff --git a/test/Assembler/half.ll b/test/Assembler/half.ll
index 63ad39235d19..cbd03cbf2df8 100644
--- a/test/Assembler/half.ll
+++ b/test/Assembler/half.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Basic smoke test for half type.
 
 ; CHECK: define half @halftest
diff --git a/test/Assembler/huge-array.ll b/test/Assembler/huge-array.ll
index a1abf879710f..6f89e83bf5e0 100644
--- a/test/Assembler/huge-array.ll
+++ b/test/Assembler/huge-array.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: define [18446744073709551615 x i8]* @foo() {
 ; CHECK: ret [18446744073709551615 x i8]* null
diff --git a/test/Assembler/inalloca.ll b/test/Assembler/inalloca.ll
index ff7a87e0a392..f433066fa2fb 100644
--- a/test/Assembler/inalloca.ll
+++ b/test/Assembler/inalloca.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 define void @a() {
 entry:
@@ -12,5 +13,5 @@ entry:
   ret void
 }
 
-!0 = metadata !{i32 662302, null}
+!0 = !{i32 662302, null}
 !foo = !{ !0 }
diff --git a/test/Assembler/inline-asm-clobber.ll b/test/Assembler/inline-asm-clobber.ll
new file mode 100644
index 000000000000..65c8e444e808
--- /dev/null
+++ b/test/Assembler/inline-asm-clobber.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as <%s 2>&1  | FileCheck %s
+
+; "~x{21}" is not a valid clobber constraint.
+
+; CHECK: invalid type for inline asm constraint string
+
+define void @foo() nounwind {
+  call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~x{21}"() nounwind
+  ret void
+}
diff --git a/test/Assembler/insertextractvalue.ll b/test/Assembler/insertextractvalue.ll
index 6c00b138ceeb..692843e9910b 100644
--- a/test/Assembler/insertextractvalue.ll
+++ b/test/Assembler/insertextractvalue.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK:      @foo
 ; CHECK-NEXT: load
diff --git a/test/Assembler/internal-hidden-alias.ll b/test/Assembler/internal-hidden-alias.ll
index 660514bb1850..df547c0838cb 100644
--- a/test/Assembler/internal-hidden-alias.ll
+++ b/test/Assembler/internal-hidden-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = hidden alias internal i32* @global
+@alias = internal hidden alias i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-protected-alias.ll b/test/Assembler/internal-protected-alias.ll
index d78582684c50..46a05ec732f5 100644
--- a/test/Assembler/internal-protected-alias.ll
+++ b/test/Assembler/internal-protected-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = protected alias internal i32* @global
+@alias = internal protected alias i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/invalid-attrgrp.ll b/test/Assembler/invalid-attrgrp.ll
new file mode 100644
index 000000000000..bf1961aa68bf
--- /dev/null
+++ b/test/Assembler/invalid-attrgrp.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+attributes
+; CHECK: expected attribute group id
diff --git a/test/Assembler/invalid-comdat.ll b/test/Assembler/invalid-comdat.ll
index 987e1e1e7d92..7351999df88f 100644
--- a/test/Assembler/invalid-comdat.ll
+++ b/test/Assembler/invalid-comdat.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
-@v = global i32 0, comdat $v
+@v = global i32 0, comdat($v)
 ; CHECK: use of undefined comdat '$v'
diff --git a/test/Assembler/invalid-datalayout1.ll b/test/Assembler/invalid-datalayout1.ll
new file mode 100644
index 000000000000..d1befdcdf294
--- /dev/null
+++ b/test/Assembler/invalid-datalayout1.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "^"
+; CHECK: Unknown specifier in datalayout string
diff --git a/test/Assembler/invalid-datalayout10.ll b/test/Assembler/invalid-datalayout10.ll
new file mode 100644
index 000000000000..9f19688f852b
--- /dev/null
+++ b/test/Assembler/invalid-datalayout10.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "m"
+; CHECK: Expected mangling specifier in datalayout string
diff --git a/test/Assembler/invalid-datalayout11.ll b/test/Assembler/invalid-datalayout11.ll
new file mode 100644
index 000000000000..f8fed8ff9ff3
--- /dev/null
+++ b/test/Assembler/invalid-datalayout11.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "m."
+; CHECK: Unexpected trailing characters after mangling specifier in datalayout string
diff --git a/test/Assembler/invalid-datalayout12.ll b/test/Assembler/invalid-datalayout12.ll
new file mode 100644
index 000000000000..d79c196baab1
--- /dev/null
+++ b/test/Assembler/invalid-datalayout12.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "f"
+; CHECK: Missing alignment specification in datalayout string
diff --git a/test/Assembler/invalid-datalayout13.ll b/test/Assembler/invalid-datalayout13.ll
new file mode 100644
index 000000000000..5ac719dbb7a9
--- /dev/null
+++ b/test/Assembler/invalid-datalayout13.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = ":32"
+; CHECK: Expected token before separator in datalayout string
diff --git a/test/Assembler/invalid-datalayout2.ll b/test/Assembler/invalid-datalayout2.ll
new file mode 100644
index 000000000000..a435612bf854
--- /dev/null
+++ b/test/Assembler/invalid-datalayout2.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "m:v"
+; CHECK: Unknown mangling in datalayout string
diff --git a/test/Assembler/invalid-datalayout3.ll b/test/Assembler/invalid-datalayout3.ll
new file mode 100644
index 000000000000..44535fd055b5
--- /dev/null
+++ b/test/Assembler/invalid-datalayout3.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "n0"
+; CHECK: Zero width native integer type in datalayout string
diff --git a/test/Assembler/invalid-datalayout4.ll b/test/Assembler/invalid-datalayout4.ll
new file mode 100644
index 000000000000..2d946d32609d
--- /dev/null
+++ b/test/Assembler/invalid-datalayout4.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "p16777216:64:64:64"
+; CHECK: Invalid address space, must be a 24bit integer
diff --git a/test/Assembler/invalid-datalayout5.ll b/test/Assembler/invalid-datalayout5.ll
new file mode 100644
index 000000000000..3ce8791c0870
--- /dev/null
+++ b/test/Assembler/invalid-datalayout5.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "a1:64"
+; CHECK: Sized aggregate specification in datalayout string
diff --git a/test/Assembler/invalid-datalayout6.ll b/test/Assembler/invalid-datalayout6.ll
new file mode 100644
index 000000000000..425099f7cad8
--- /dev/null
+++ b/test/Assembler/invalid-datalayout6.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "a:"
+; CHECK: Trailing separator in datalayout string
diff --git a/test/Assembler/invalid-datalayout7.ll b/test/Assembler/invalid-datalayout7.ll
new file mode 100644
index 000000000000..097227ae6ae8
--- /dev/null
+++ b/test/Assembler/invalid-datalayout7.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "p:52"
+; CHECK: number of bits must be a byte width multiple
diff --git a/test/Assembler/invalid-datalayout8.ll b/test/Assembler/invalid-datalayout8.ll
new file mode 100644
index 000000000000..28832ffb17dd
--- /dev/null
+++ b/test/Assembler/invalid-datalayout8.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "e-p"
+; CHECK: Missing size specification for pointer in datalayout string
diff --git a/test/Assembler/invalid-datalayout9.ll b/test/Assembler/invalid-datalayout9.ll
new file mode 100644
index 000000000000..dfeac65cf604
--- /dev/null
+++ b/test/Assembler/invalid-datalayout9.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "e-p:64"
+; CHECK: Missing alignment specification for pointer in datalayout string
diff --git a/test/Assembler/invalid-fwdref2.ll b/test/Assembler/invalid-fwdref2.ll
new file mode 100644
index 000000000000..d823481f8e1b
--- /dev/null
+++ b/test/Assembler/invalid-fwdref2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | grep "forward reference and definition of global have different types"
+
+@a2 = alias void ()* @g2
+@g2 = internal global i8 42
diff --git a/test/Assembler/invalid-hexint.ll b/test/Assembler/invalid-hexint.ll
new file mode 100644
index 000000000000..a11b8cd0e88b
--- /dev/null
+++ b/test/Assembler/invalid-hexint.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+global i64 u0x0p001
+; CHECK: expected value token
diff --git a/test/Assembler/invalid-mdlocation-field-bad.ll b/test/Assembler/invalid-mdlocation-field-bad.ll
new file mode 100644
index 000000000000..6ec7c64bb4d0
--- /dev/null
+++ b/test/Assembler/invalid-mdlocation-field-bad.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:18: error: invalid field 'bad'
+!0 = !MDLocation(bad: 0)
diff --git a/test/Assembler/invalid-mdlocation-field-twice.ll b/test/Assembler/invalid-mdlocation-field-twice.ll
new file mode 100644
index 000000000000..2335c935a5f4
--- /dev/null
+++ b/test/Assembler/invalid-mdlocation-field-twice.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+!0 = !{}
+
+; CHECK: <stdin>:[[@LINE+1]]:38: error: field 'line' cannot be specified more than once
+!1 = !MDLocation(line: 3, scope: !0, line: 3)
diff --git a/test/Assembler/invalid-mdlocation-overflow-column.ll b/test/Assembler/invalid-mdlocation-overflow-column.ll
new file mode 100644
index 000000000000..d1dbb27173dc
--- /dev/null
+++ b/test/Assembler/invalid-mdlocation-overflow-column.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+!0 = !{}
+
+; CHECK-NOT: error
+!1 = !MDLocation(column: 255, scope: !0)
+
+; CHECK: <stdin>:[[@LINE+1]]:26: error: value for 'column' too large, limit is 255
+!2 = !MDLocation(column: 256, scope: !0)
diff --git a/test/Assembler/invalid-mdlocation-overflow-line.ll b/test/Assembler/invalid-mdlocation-overflow-line.ll
new file mode 100644
index 000000000000..8e19f6919bf9
--- /dev/null
+++ b/test/Assembler/invalid-mdlocation-overflow-line.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+!0 = !{}
+
+; CHECK-NOT: error
+!1 = !MDLocation(line: 16777215, scope: !0)
+
+; CHECK: <stdin>:[[@LINE+1]]:24: error: value for 'line' too large, limit is 16777215
+!2 = !MDLocation(line: 16777216, scope: !0)
diff --git a/test/Assembler/invalid-mdnode-vector.ll b/test/Assembler/invalid-mdnode-vector.ll
new file mode 100644
index 000000000000..65231c07053a
--- /dev/null
+++ b/test/Assembler/invalid-mdnode-vector.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+!0 = !
+; CHECK: expected '{' here
diff --git a/test/Assembler/invalid-mdnode-vector2.ll b/test/Assembler/invalid-mdnode-vector2.ll
new file mode 100644
index 000000000000..f8007952a5ce
--- /dev/null
+++ b/test/Assembler/invalid-mdnode-vector2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+!0 = !{
+; CHECK: expected metadata operand
diff --git a/test/Assembler/invalid-metadata-attachment-has-type.ll b/test/Assembler/invalid-metadata-attachment-has-type.ll
new file mode 100644
index 000000000000..74e4151b715d
--- /dev/null
+++ b/test/Assembler/invalid-metadata-attachment-has-type.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as -disable-output < %s 2>&1 | FileCheck %s
+; Check common error from old format.
+
+define void @foo() {
+; CHECK: {{.*}}:[[@LINE+1]]:{{[0-9]+}}: error: invalid metadata-value-metadata roundtrip
+  ret void, !bar !{metadata !0}
+}
+!0 = !{}
diff --git a/test/Assembler/invalid-metadata-function-local-attachments.ll b/test/Assembler/invalid-metadata-function-local-attachments.ll
new file mode 100644
index 000000000000..20cb0a9218c5
--- /dev/null
+++ b/test/Assembler/invalid-metadata-function-local-attachments.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo(i32 %v) {
+entry:
+; CHECK: <stdin>:[[@LINE+1]]:{{[0-9]+}}: error: invalid use of function-local name
+  ret void, !foo !{i32 %v}
+}
diff --git a/test/Assembler/invalid-metadata-function-local-complex-1.ll b/test/Assembler/invalid-metadata-function-local-complex-1.ll
new file mode 100644
index 000000000000..be1d16d2c178
--- /dev/null
+++ b/test/Assembler/invalid-metadata-function-local-complex-1.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo(i32 %v) {
+entry:
+; CHECK: <stdin>:[[@LINE+1]]:{{[0-9]+}}: error: invalid use of function-local name
+  call void @llvm.bar(metadata !{i32 %v, i32 0})
+  ret void
+}
+
+declare void @llvm.bar(metadata)
diff --git a/test/Assembler/invalid-metadata-function-local-complex-2.ll b/test/Assembler/invalid-metadata-function-local-complex-2.ll
new file mode 100644
index 000000000000..72fa41a1f00b
--- /dev/null
+++ b/test/Assembler/invalid-metadata-function-local-complex-2.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo(i32 %v) {
+entry:
+; CHECK: <stdin>:[[@LINE+1]]:{{[0-9]+}}: error: invalid use of function-local name
+  call void @llvm.bar(metadata !{i32 0, i32 %v})
+  ret void
+}
+
+declare void @llvm.bar(metadata)
diff --git a/test/Assembler/invalid-metadata-function-local-complex-3.ll b/test/Assembler/invalid-metadata-function-local-complex-3.ll
new file mode 100644
index 000000000000..35ec76353738
--- /dev/null
+++ b/test/Assembler/invalid-metadata-function-local-complex-3.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo(i32 %v) {
+entry:
+; CHECK: <stdin>:[[@LINE+1]]:{{[0-9]+}}: error: invalid use of function-local name
+  call void @llvm.bar(metadata !{i32 %v})
+  ret void
+}
+
+declare void @llvm.bar(metadata)
diff --git a/test/Assembler/invalid-metadata-has-type.ll b/test/Assembler/invalid-metadata-has-type.ll
new file mode 100644
index 000000000000..647cb6703968
--- /dev/null
+++ b/test/Assembler/invalid-metadata-has-type.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as -disable-output < %s 2>&1 | FileCheck %s
+; Check common error from old format.
+
+; CHECK: {{.*}}:[[@LINE+1]]:{{[0-9]+}}: error: unexpected type in metadata definition
+!0 = metadata !{}
diff --git a/test/Assembler/invalid-name.ll b/test/Assembler/invalid-name.ll
index d9d7a1108808..0681ea528bf4 100644
--- a/test/Assembler/invalid-name.ll
+++ b/test/Assembler/invalid-name.ll
diff --git a/test/Assembler/invalid-name2.ll b/test/Assembler/invalid-name2.ll
new file mode 100644
index 000000000000..384dee6777d8
--- /dev/null
+++ b/test/Assembler/invalid-name2.ll
diff --git a/test/Assembler/invalid-specialized-mdnode.ll b/test/Assembler/invalid-specialized-mdnode.ll
new file mode 100644
index 000000000000..9a84abbb3142
--- /dev/null
+++ b/test/Assembler/invalid-specialized-mdnode.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:6: error: expected metadata type
+!0 = !Invalid(field: 0)
diff --git a/test/Assembler/invalid-uselistorder-function-between-blocks.ll b/test/Assembler/invalid-uselistorder-function-between-blocks.ll
new file mode 100644
index 000000000000..8f771e86dd44
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-function-between-blocks.ll
@@ -0,0 +1,37 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected uselistorder directive
+
+define i32 @f32(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  br label %first
+
+; <label 0>:
+  %eh = mul i32 %e, %1
+  %sum = add i32 %eh, %ef
+  br label %preexit
+
+preexit:
+  %product = phi i32 [%ef, %first], [%sum, %0]
+  %backto0 = icmp slt i32 %product, -9
+  br i1 %backto0, label %0, label %exit
+
+first:
+  %e = add i32 %a, 7
+  %f = add i32 %b, 7
+  %g = add i32 %c, 8
+  %1 = add i32 %d, 8
+  %ef = mul i32 %e, %f
+  %g1 = mul i32 %g, %1
+  %goto0 = icmp slt i32 %g1, -9
+  br i1 %goto0, label %0, label %preexit
+
+; uselistorder directives
+  uselistorder i32 7, { 1, 0 }
+  uselistorder i32 %1, { 1, 0 }
+  uselistorder i32 %e, { 1, 0 }
+  uselistorder label %0, { 1, 0 }
+  uselistorder label %preexit, { 1, 0 }
+
+exit:
+  ret i32 %product
+}
diff --git a/test/Assembler/invalid-uselistorder-function-missing-named.ll b/test/Assembler/invalid-uselistorder-function-missing-named.ll
new file mode 100644
index 000000000000..c682fbe77c19
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-function-missing-named.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: value has no uses
+define void @foo() {
+  unreachable
+  uselistorder i32 %val, { 1, 0 }
+}
diff --git a/test/Assembler/invalid-uselistorder-function-missing-numbered.ll b/test/Assembler/invalid-uselistorder-function-missing-numbered.ll
new file mode 100644
index 000000000000..e3bf0e19222c
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-function-missing-numbered.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: value has no uses
+define void @foo() {
+  unreachable
+  uselistorder i32 %1, { 1, 0 }
+}
diff --git a/test/Assembler/invalid-uselistorder-global-missing.ll b/test/Assembler/invalid-uselistorder-global-missing.ll
new file mode 100644
index 000000000000..92f9350d29df
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-global-missing.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: value has no uses
+uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-duplicated.ll b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
new file mode 100644
index 000000000000..e4affc53c1f6
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected distinct uselistorder indexes in range [0, size)
+@global = global i32 0
+@alias1 = alias i32* @global
+@alias2 = alias i32* @global
+@alias3 = alias i32* @global
+uselistorder i32* @global, { 0, 0, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-empty.ll b/test/Assembler/invalid-uselistorder-indexes-empty.ll
new file mode 100644
index 000000000000..82bbc97aafe0
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-empty.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: value has no uses
+@global = global i32 0
+uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-one.ll b/test/Assembler/invalid-uselistorder-indexes-one.ll
new file mode 100644
index 000000000000..f5eac80a3ca2
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-one.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: value only has one use
+@global = global i32 0
+@alias = alias i32* @global
+uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-ordered.ll b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
new file mode 100644
index 000000000000..7bdc40037afe
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected uselistorder indexes to change the order
+@global = global i32 0
+@alias1 = alias i32* @global
+@alias2 = alias i32* @global
+@alias3 = alias i32* @global
+uselistorder i32* @global, { 0, 1, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-range.ll b/test/Assembler/invalid-uselistorder-indexes-range.ll
new file mode 100644
index 000000000000..fc97acac5e63
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-range.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected distinct uselistorder indexes in range [0, size)
+@global = global i32 0
+@alias1 = alias i32* @global
+@alias2 = alias i32* @global
+@alias3 = alias i32* @global
+uselistorder i32* @global, { 0, 3, 1 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toofew.ll b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
new file mode 100644
index 000000000000..88a76fc568a9
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: wrong number of indexes, expected 3
+@global = global i32 0
+@alias1 = alias i32* @global
+@alias2 = alias i32* @global
+@alias3 = alias i32* @global
+uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toomany.ll b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
new file mode 100644
index 000000000000..a2cf3da0bd38
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: wrong number of indexes, expected 2
+@global = global i32 0
+@alias1 = alias i32* @global
+@alias2 = alias i32* @global
+uselistorder i32* @global, { 1, 0, 2 }
diff --git a/test/Assembler/invalid-uselistorder-type.ll b/test/Assembler/invalid-uselistorder-type.ll
new file mode 100644
index 000000000000..e426a7d22f22
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder-type.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: '@global' defined with type 'i32*'
+@global = global i32 0
+uselistorder i31* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-missing-bb.ll b/test/Assembler/invalid-uselistorder_bb-missing-bb.ll
new file mode 100644
index 000000000000..bd12faa85b6b
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-missing-bb.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: invalid basic block in uselistorder_bb
+define void @foo() {
+  unreachable
+}
+uselistorder_bb @foo, %bb, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-missing-body.ll b/test/Assembler/invalid-uselistorder_bb-missing-body.ll
new file mode 100644
index 000000000000..0fbc3a833880
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-missing-body.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: invalid declaration in uselistorder_bb
+declare void @foo()
+uselistorder_bb @foo, %bb, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-missing-func.ll b/test/Assembler/invalid-uselistorder_bb-missing-func.ll
new file mode 100644
index 000000000000..5a1466fcbc86
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-missing-func.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: invalid function forward reference in uselistorder_bb
+uselistorder_bb @foo, %bb, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-not-bb.ll b/test/Assembler/invalid-uselistorder_bb-not-bb.ll
new file mode 100644
index 000000000000..e59e7543ae7d
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-not-bb.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected basic block in uselistorder_bb
+define i32 @foo(i32 %arg) {
+  ret i32 %arg
+}
+uselistorder_bb @foo, %arg, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-not-func.ll b/test/Assembler/invalid-uselistorder_bb-not-func.ll
new file mode 100644
index 000000000000..080ddc1990e0
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-not-func.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: expected function name in uselistorder_bb
+@global = global i1 0
+uselistorder_bb @global, %bb, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder_bb-numbered.ll b/test/Assembler/invalid-uselistorder_bb-numbered.ll
new file mode 100644
index 000000000000..d7d170f769ff
--- /dev/null
+++ b/test/Assembler/invalid-uselistorder_bb-numbered.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+; CHECK: error: invalid numeric label in uselistorder_bb
+
+@ba1 = constant i8* blockaddress (@foo, %1)
+
+define void @foo() {
+  br label %1
+  unreachable
+}
+
+uselistorder_bb @foo, %1, { 1, 0 }
diff --git a/test/Assembler/mdlocation.ll b/test/Assembler/mdlocation.ll
new file mode 100644
index 000000000000..c1815ff50e09
--- /dev/null
+++ b/test/Assembler/mdlocation.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+
+; CHECK: !named = !{!0, !1, !1, !2, !2, !3, !3}
+!named = !{!0, !1, !2, !3, !4, !5, !6}
+
+; CHECK: !0 = !{}
+!0 = !{}
+
+; CHECK-NEXT: !1 = !MDLocation(line: 3, column: 7, scope: !0)
+!1 = !MDLocation(line: 3, column: 7, scope: !0)
+!2 = !MDLocation(scope: !0, column: 7, line: 3)
+
+; CHECK-NEXT: !2 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
+!3 = !MDLocation(scope: !0, inlinedAt: !1, column: 7, line: 3)
+!4 = !MDLocation(column: 7, line: 3, scope: !0, inlinedAt: !1)
+
+; CHECK-NEXT: !3 = !MDLocation(line: 0, scope: !0)
+!5 = !MDLocation(scope: !0)
+!6 = !MDLocation(scope: !0, column: 0, line: 0)
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 56888fd70347..e2c59237981f 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: @test
 ; CHECK: ret void, !bar !1, !foo !0
@@ -10,8 +11,8 @@ define void @test() {
   ret void, !foo !0, !bar !1
 }
 
-!0 = metadata !{i32 662302, i32 26, metadata !1, null}
-!1 = metadata !{i32 4, metadata !"foo"}
+!0 = !MDLocation(line: 662302, column: 26, scope: !1)
+!1 = !{i32 4, !"foo"}
 
 declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
diff --git a/test/Assembler/musttail-invalid-1.ll b/test/Assembler/musttail-invalid-1.ll
new file mode 100644
index 000000000000..b123a9151d3d
--- /dev/null
+++ b/test/Assembler/musttail-invalid-1.ll
@@ -0,0 +1,14 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check the error message on using ", ..." when we can't actually forward
+; varargs.
+
+%struct.A = type { i32 }
+
+declare i8* @f(i8*, ...)
+
+define i8* @f_thunk(i8* %this) {
+  %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
+; CHECK: error: unexpected ellipsis in argument list for musttail call in non-varargs function
+  ret i8* %rv
+}
diff --git a/test/Assembler/musttail-invalid-2.ll b/test/Assembler/musttail-invalid-2.ll
new file mode 100644
index 000000000000..3bcb51fc4851
--- /dev/null
+++ b/test/Assembler/musttail-invalid-2.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check the error message on skipping ", ..." at the end of a musttail call argument list.
+
+%struct.A = type { i32 }
+
+declare i8* @f(i8*, ...)
+
+define i8* @f_thunk(i8* %this, ...) {
+  %rv = musttail call i8* (i8*, ...)* @f(i8* %this)
+; CHECK: error: expected '...' at end of argument list for musttail call in varargs function
+  ret i8* %rv
+}
diff --git a/test/Assembler/musttail.ll b/test/Assembler/musttail.ll
new file mode 100644
index 000000000000..6e2a9b2ef327
--- /dev/null
+++ b/test/Assembler/musttail.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; Check that the ellipsis round trips.
+
+%struct.A = type { i32 }
+
+declare i8* @f(i8*, ...)
+
+define i8* @f_thunk(i8* %this, ...) {
+  %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
+  ret i8* %rv
+}
+; CHECK-LABEL: define i8* @f_thunk(i8* %this, ...)
+; CHECK: %rv = musttail call i8* (i8*, ...)* @f(i8* %this, ...)
diff --git a/test/Assembler/named-metadata.ll b/test/Assembler/named-metadata.ll
index db728108ac92..9fa37a7989d4 100644
--- a/test/Assembler/named-metadata.ll
+++ b/test/Assembler/named-metadata.ll
@@ -1,8 +1,9 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
-!0 = metadata !{metadata !"zero"}
-!1 = metadata !{metadata !"one"}
-!2 = metadata !{metadata !"two"}
+!0 = !{!"zero"}
+!1 = !{!"one"}
+!2 = !{!"two"}
 
 !foo = !{!0, !1, !2}
 ; CHECK: !foo = !{!0, !1, !2}
diff --git a/test/Assembler/numbered-values.ll b/test/Assembler/numbered-values.ll
index 2439c831a706..70b63779ccb6 100644
--- a/test/Assembler/numbered-values.ll
+++ b/test/Assembler/numbered-values.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; RUN: verify-uselistorder %s
 ; PR2480
 
 define i32 @test(i32 %X) nounwind {
diff --git a/test/Assembler/private-hidden-alias.ll b/test/Assembler/private-hidden-alias.ll
index 58be92a34f25..2e770e58784e 100644
--- a/test/Assembler/private-hidden-alias.ll
+++ b/test/Assembler/private-hidden-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = hidden alias private i32* @global
+@alias = private hidden alias i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-protected-alias.ll b/test/Assembler/private-protected-alias.ll
index a72c248f0b03..f1824a2f3c7c 100644
--- a/test/Assembler/private-protected-alias.ll
+++ b/test/Assembler/private-protected-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = protected alias private i32* @global
+@alias = private protected alias i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/select.ll b/test/Assembler/select.ll
index 2d3f412d256d..fe4677a733e5 100644
--- a/test/Assembler/select.ll
+++ b/test/Assembler/select.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o /dev/null
+; RUN: verify-uselistorder %s
 
 
 define i32 @test(i1 %C, i32 %V1, i32 %V2) {
diff --git a/test/Assembler/short-hexpair.ll b/test/Assembler/short-hexpair.ll
new file mode 100644
index 000000000000..067ea30b0ddb
--- /dev/null
+++ b/test/Assembler/short-hexpair.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@x = global fp128 0xL01
+; CHECK: @x = global fp128 0xL00000000000000000000000000000001
diff --git a/test/Assembler/tls-models.ll b/test/Assembler/tls-models.ll
index 42f24962aed1..fbc0777fd8c1 100644
--- a/test/Assembler/tls-models.ll
+++ b/test/Assembler/tls-models.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: @a = thread_local global i32 0
 ; CHECK: @b = thread_local(localdynamic) global i32 0
diff --git a/test/Assembler/unnamed-addr.ll b/test/Assembler/unnamed-addr.ll
index 35b3b39ce48f..304e54409cd0 100644
--- a/test/Assembler/unnamed-addr.ll
+++ b/test/Assembler/unnamed-addr.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 %struct.foobar = type { i32 }
 
diff --git a/test/Assembler/unnamed-comdat.ll b/test/Assembler/unnamed-comdat.ll
new file mode 100644
index 000000000000..8aa0f78c9741
--- /dev/null
+++ b/test/Assembler/unnamed-comdat.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: comdat cannot be unnamed
+
+define void @0() comdat {
+  ret void
+}
diff --git a/test/Assembler/unnamed.ll b/test/Assembler/unnamed.ll
index fb4fa6244e5a..099a15a254af 100644
--- a/test/Assembler/unnamed.ll
+++ b/test/Assembler/unnamed.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis
+; RUN: verify-uselistorder %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/test/Assembler/upgrade-loop-metadata.ll b/test/Assembler/upgrade-loop-metadata.ll
index 1c0311dd09e9..0852469774bf 100644
--- a/test/Assembler/upgrade-loop-metadata.ll
+++ b/test/Assembler/upgrade-loop-metadata.ll
@@ -5,6 +5,7 @@
 ;
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 ; RUN: opt -S < %s | FileCheck %s
+; RUN: verify-uselistorder %s
 
 define void @_Z28loop_with_vectorize_metadatav() {
 entry:
@@ -30,12 +31,12 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-; CHECK: !{metadata !"llvm.loop.interleave.count", i32 4}
-; CHECK: !{metadata !"llvm.loop.vectorize.width", i32 8}
-; CHECK: !{metadata !"llvm.loop.vectorize.enable", i1 true}
+; CHECK: !{!"llvm.loop.interleave.count", i32 4}
+; CHECK: !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
 
-!0 = metadata !{metadata !"clang version 3.5.0 (trunk 211528)"}
-!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !4}
-!2 = metadata !{metadata !"llvm.vectorizer.unroll", i32 4}
-!3 = metadata !{metadata !"llvm.vectorizer.width", i32 8}
-!4 = metadata !{metadata !"llvm.vectorizer.enable", i1 true}
+!0 = !{!"clang version 3.5.0 (trunk 211528)"}
+!1 = !{!1, !2, !3, !4, !4}
+!2 = !{!"llvm.vectorizer.unroll", i32 4}
+!3 = !{!"llvm.vectorizer.width", i32 8}
+!4 = !{!"llvm.vectorizer.enable", i1 true}
diff --git a/test/Assembler/uselistorder.ll b/test/Assembler/uselistorder.ll
new file mode 100644
index 000000000000..be5ee7000299
--- /dev/null
+++ b/test/Assembler/uselistorder.ll
@@ -0,0 +1,56 @@
+; RUN: llvm-as < %s -disable-output 2>&1 | FileCheck %s -allow-empty
+; CHECK-NOT: error
+; CHECK-NOT: warning
+; RUN: verify-uselistorder < %s
+
+@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
+@b = alias i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+
+; Check use-list order of constants used by globals.
+@glob1 = global i5 7
+@glob2 = global i5 7
+@glob3 = global i5 7
+
+define i32 @f32(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  br label %first
+
+; <label 0>:
+  %eh = mul i32 %e, %1
+  %sum = add i32 %eh, %ef
+  br label %preexit
+
+preexit:
+  %product = phi i32 [%ef, %first], [%sum, %0]
+  %backto0 = icmp slt i32 %product, -9
+  br i1 %backto0, label %0, label %exit
+
+exit:
+  ret i32 %product
+
+first:
+  %e = add i32 %a, 7
+  %f = add i32 %b, 7
+  %g = add i32 %c, 8
+  %1 = add i32 %d, 8
+  %ef = mul i32 %e, %f
+  %g1 = mul i32 %g, %1
+  %goto0 = icmp slt i32 %g1, -9
+  br i1 %goto0, label %0, label %preexit
+
+; uselistorder directives
+  uselistorder i32 7, { 1, 0 }
+  uselistorder i32 %1, { 1, 0 }
+  uselistorder i32 %e, { 1, 0 }
+  uselistorder label %0, { 1, 0 }
+  uselistorder label %preexit, { 1, 0 }
+}
+
+define i1 @loada() {
+entry:
+  %a = load i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+  ret i1 %a
+}
+
+uselistorder i5 7, { 1, 0, 2 }
+uselistorder i1* getelementptr ([4 x i1]* @a, i64 0, i64 2), { 1, 0 }
diff --git a/test/Assembler/uselistorder_bb.ll b/test/Assembler/uselistorder_bb.ll
new file mode 100644
index 000000000000..11ae57b299ca
--- /dev/null
+++ b/test/Assembler/uselistorder_bb.ll
@@ -0,0 +1,42 @@
+; RUN: llvm-as < %s -disable-output 2>&1 | FileCheck %s -allow-empty
+; CHECK-NOT: error
+; CHECK-NOT: warning
+; RUN: verify-uselistorder < %s
+
+@ba1 = constant i8* blockaddress (@bafunc1, %bb)
+@ba2 = constant i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+@ba3 = constant i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+
+define i8* @babefore() {
+  ret i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+bb1:
+  ret i8* blockaddress (@bafunc1, %bb)
+bb2:
+  ret i8* blockaddress (@bafunc3, %bb)
+}
+define void @bafunc1() {
+  br label %bb
+bb:
+  unreachable
+}
+define void @bafunc2() {
+  br label %bb
+bb:
+  unreachable
+}
+define void @bafunc3() {
+  br label %bb
+bb:
+  unreachable
+}
+define i8* @baafter() {
+  ret i8* blockaddress (@bafunc2, %bb)
+bb1:
+  ret i8* blockaddress (@bafunc1, %bb)
+bb2:
+  ret i8* blockaddress (@bafunc3, %bb)
+}
+
+uselistorder_bb @bafunc1, %bb, { 1, 0 }
+uselistorder_bb @bafunc2, %bb, { 1, 0 }
+uselistorder_bb @bafunc3, %bb, { 1, 0 }
diff --git a/test/Assembler/vbool-cmp.ll b/test/Assembler/vbool-cmp.ll
index e652d2ff3b36..6bbd5c8f0b4c 100644
--- a/test/Assembler/vbool-cmp.ll
+++ b/test/Assembler/vbool-cmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Rudimentary test of fcmp/icmp on vectors returning vector of bool
 
 ; CHECK: @ffoo
diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll
index 6e3894ca1007..dc5549404f21 100644
--- a/test/Assembler/vector-cmp.ll
+++ b/test/Assembler/vector-cmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; PR2317
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
diff --git a/test/Assembler/vector-select.ll b/test/Assembler/vector-select.ll
index ae8358abf9a1..59692d646356 100644
--- a/test/Assembler/vector-select.ll
+++ b/test/Assembler/vector-select.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Rudimentary test of select on vectors returning vector of bool
 
 ; CHECK: @foo
diff --git a/test/Assembler/vector-shift.ll b/test/Assembler/vector-shift.ll
index 6a6531b4d2ff..d4351a87ce94 100644
--- a/test/Assembler/vector-shift.ll
+++ b/test/Assembler/vector-shift.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 
 ; CHECK: @foo
 ; CHECK: shl
diff --git a/test/Assembler/x86mmx.ll b/test/Assembler/x86mmx.ll
index 732d3be8619d..608347e0fceb 100644
--- a/test/Assembler/x86mmx.ll
+++ b/test/Assembler/x86mmx.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
 ; Basic smoke test for x86_mmx type.
 
 ; CHECK: define x86_mmx @sh16
diff --git a/test/Bindings/Go/go.test b/test/Bindings/Go/go.test
new file mode 100644
index 000000000000..14eb3281cc4e
--- /dev/null
+++ b/test/Bindings/Go/go.test
@@ -0,0 +1,3 @@
+; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm
+
+; REQUIRES: shell, not_ubsan
diff --git a/test/Bindings/Go/lit.local.cfg b/test/Bindings/Go/lit.local.cfg
new file mode 100644
index 000000000000..e86595b8cb56
--- /dev/null
+++ b/test/Bindings/Go/lit.local.cfg
@@ -0,0 +1,57 @@
+import os
+import pipes
+import shlex
+import sys
+
+if not 'go' in config.root.llvm_bindings:
+    config.unsupported = True
+
+def find_executable(executable, path=None):
+    if path is None:
+        path = os.environ['PATH']
+    paths = path.split(os.pathsep)
+    base, ext = os.path.splitext(executable)
+
+    if (sys.platform == 'win32' or os.name == 'os2') and (ext != '.exe'):
+        executable = executable + '.exe'
+
+    if not os.path.isfile(executable):
+        for p in paths:
+            f = os.path.join(p, executable)
+            if os.path.isfile(f):
+                return f
+        return None
+    else:
+        return executable
+
+# Resolve certain symlinks in the first word of compiler.
+#
+# This is a Go-specific hack. cgo and other Go tools check $CC and $CXX for the
+# substring 'clang' to determine if the compiler is Clang. This won't work if
+# $CC is cc and cc is a symlink pointing to clang, as it is on Darwin.
+#
+# Go tools also have problems with ccache, so we disable it.
+def fixup_compiler_path(compiler):
+    args = shlex.split(compiler)
+    if args[0].endswith('ccache'):
+        args = args[1:]
+
+    path = find_executable(args[0])
+
+    try:
+        if path.endswith('/cc') and os.readlink(path) == 'clang':
+            args[0] = path[:len(path)-2] + 'clang'
+    except (AttributeError, OSError):
+        pass
+
+    try:
+        if path.endswith('/c++') and os.readlink(path) == 'clang++':
+            args[0] = path[:len(path)-3] + 'clang++'
+    except (AttributeError, OSError):
+        pass
+
+    return ' '.join([pipes.quote(arg) for arg in args])
+
+config.environment['CC'] = fixup_compiler_path(config.host_cc)
+config.environment['CXX'] = fixup_compiler_path(config.host_cxx)
+config.environment['CGO_LDFLAGS'] = config.host_ldflags
diff --git a/test/Bindings/OCaml/analysis.ml b/test/Bindings/OCaml/analysis.ml
new file mode 100644
index 000000000000..e935ee838058
--- /dev/null
+++ b/test/Bindings/OCaml/analysis.ml
@@ -0,0 +1,54 @@
+(* RUN: cp %s %T/analysis.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+open Llvm
+open Llvm_analysis
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let bomb msg =
+  prerr_endline msg;
+  exit 2
+
+let _ =
+  let fty = function_type (void_type context) [| |] in
+  let m = create_module context "valid_m" in
+  let fn = define_function "valid_fn" fty m in
+  let at_entry = builder_at_end context (entry_block fn) in
+  ignore (build_ret_void at_entry);
+
+
+  (* Test that valid constructs verify. *)
+  begin match verify_module m with
+    Some msg -> bomb "valid module failed verification!"
+  | None -> ()
+  end;
+
+  if not (verify_function fn) then bomb "valid function failed verification!";
+
+
+  (* Test that invalid constructs do not verify.
+     A basic block can contain only one terminator instruction. *)
+  ignore (build_ret_void at_entry);
+
+  begin match verify_module m with
+    Some msg -> ()
+  | None -> bomb "invalid module passed verification!"
+  end;
+
+  if verify_function fn then bomb "invalid function passed verification!";
+
+
+  dispose_module m
+
+  (* Don't bother to test assert_valid_{module,function}. *)
diff --git a/test/Bindings/OCaml/bitreader.ml b/test/Bindings/OCaml/bitreader.ml
new file mode 100644
index 000000000000..57cfd043b2de
--- /dev/null
+++ b/test/Bindings/OCaml/bitreader.ml
@@ -0,0 +1,79 @@
+(* RUN: cp %s %T/bitreader.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = Llvm.global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let _ =
+  let fn = Sys.argv.(1) in
+  let m = Llvm.create_module context "ocaml_test_module" in
+
+  test (Llvm_bitwriter.write_bitcode_file m fn);
+
+  Llvm.dispose_module m;
+
+  (* parse_bitcode *)
+  begin
+    let mb = Llvm.MemoryBuffer.of_file fn in
+    begin try
+      let m = Llvm_bitreader.parse_bitcode context mb in
+      Llvm.dispose_module m
+    with x ->
+      Llvm.MemoryBuffer.dispose mb;
+      raise x
+    end
+  end;
+
+  (* MemoryBuffer.of_file *)
+  test begin try
+    let mb = Llvm.MemoryBuffer.of_file (fn ^ ".bogus") in
+    Llvm.MemoryBuffer.dispose mb;
+    false
+  with Llvm.IoError _ ->
+    true
+  end;
+
+  (* get_module *)
+  begin
+    let mb = Llvm.MemoryBuffer.of_file fn in
+    let m = begin try
+      Llvm_bitreader.get_module context mb
+    with x ->
+      Llvm.MemoryBuffer.dispose mb;
+      raise x
+    end in
+    Llvm.dispose_module m
+  end;
+
+  (* corrupt the bitcode *)
+  let fn = fn ^ ".txt" in
+  begin let oc = open_out fn in
+    output_string oc "not a bitcode file\n";
+    close_out oc
+  end;
+
+  (* test get_module exceptions *)
+  test begin
+    try
+      let mb = Llvm.MemoryBuffer.of_file fn in
+      let m = begin try
+        Llvm_bitreader.get_module context mb
+      with x ->
+        Llvm.MemoryBuffer.dispose mb;
+        raise x
+      end in
+      Llvm.dispose_module m;
+      false
+    with Llvm_bitreader.Error _ ->
+      true
+  end
diff --git a/test/Bindings/OCaml/bitwriter.ml b/test/Bindings/OCaml/bitwriter.ml
new file mode 100644
index 000000000000..7c803f6d5510
--- /dev/null
+++ b/test/Bindings/OCaml/bitwriter.ml
@@ -0,0 +1,49 @@
+(* RUN: cp %s %T/bitwriter.ml
+ * RUN: %ocamlc -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = Llvm.global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let read_file name =
+  let ic = open_in_bin name in
+  let len = in_channel_length ic in
+  let buf = String.create len in
+
+  test ((input ic buf 0 len) = len);
+
+  close_in ic;
+
+  buf
+
+let temp_bitcode ?unbuffered m =
+  let temp_name, temp_oc = Filename.open_temp_file ~mode:[Open_binary] "" "" in
+
+  test (Llvm_bitwriter.output_bitcode ?unbuffered temp_oc m);
+  flush temp_oc;
+
+  let temp_buf = read_file temp_name in
+
+  close_out temp_oc;
+
+  temp_buf
+
+let _ =
+  let m = Llvm.create_module context "ocaml_test_module" in
+
+  test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1));
+  let file_buf = read_file Sys.argv.(1) in
+
+  test (file_buf = temp_bitcode m);
+  test (file_buf = temp_bitcode ~unbuffered:false m);
+  test (file_buf = temp_bitcode ~unbuffered:true m);
+  test (file_buf = Llvm.MemoryBuffer.as_string (Llvm_bitwriter.write_bitcode_to_memory_buffer m))
diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml
new file mode 100644
index 000000000000..0f3daac5bfd2
--- /dev/null
+++ b/test/Bindings/OCaml/core.ml
@@ -0,0 +1,1507 @@
+(* RUN: cp %s %T/core.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc > %t.ll
+ * RUN: FileCheck %s < %t.ll
+ * Do a second pass for things that shouldn't be anywhere.
+ * RUN: FileCheck -check-prefix=CHECK-NOWHERE %s < %t.ll
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_bitwriter
+
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let exit_status = ref 0
+let suite_name = ref ""
+let group_name = ref ""
+let case_num = ref 0
+let print_checkpoints = false
+let context = global_context ()
+let i1_type = Llvm.i1_type context
+let i8_type = Llvm.i8_type context
+let i16_type = Llvm.i16_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let void_type = Llvm.void_type context
+let float_type = Llvm.float_type context
+let double_type = Llvm.double_type context
+let fp128_type = Llvm.fp128_type context
+
+let group name =
+  group_name := !suite_name ^ "/" ^ name;
+  case_num := 0;
+  if print_checkpoints then
+    prerr_endline ("  " ^ name ^ "...")
+
+let insist cond =
+  incr case_num;
+  if not cond then
+    exit_status := 10;
+  match print_checkpoints, cond with
+  | false, true -> ()
+  | false, false ->
+      prerr_endline ("FAILED: " ^ !suite_name ^ "/" ^ !group_name ^ " #" ^ (string_of_int !case_num))
+  | true, true ->
+      prerr_endline ("    " ^ (string_of_int !case_num))
+  | true, false ->
+      prerr_endline ("    " ^ (string_of_int !case_num) ^ " FAIL")
+
+let suite name f =
+  suite_name := name;
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Conversion --------------------------------------------------------===*)
+
+let test_conversion () =
+  insist ("i32" = (string_of_lltype i32_type));
+  let c = const_int i32_type 42 in
+  insist ("i32 42" = (string_of_llvalue c))
+
+
+(*===-- Target ------------------------------------------------------------===*)
+
+let test_target () =
+  begin group "triple";
+    let trip = "i686-apple-darwin8" in
+    set_target_triple trip m;
+    insist (trip = target_triple m)
+  end;
+
+  begin group "layout";
+    let layout = "e" in
+    set_data_layout layout m;
+    insist (layout = data_layout m)
+  end
+  (* CHECK: target datalayout = "e"
+   * CHECK: target triple = "i686-apple-darwin8"
+   *)
+
+
+(*===-- Constants ---------------------------------------------------------===*)
+
+let test_constants () =
+  (* CHECK: const_int{{.*}}i32{{.*}}-1
+   *)
+  group "int";
+  let c = const_int i32_type (-1) in
+  ignore (define_global "const_int" c m);
+  insist (i32_type = type_of c);
+  insist (is_constant c);
+  insist (Some (-1L) = int64_of_const c);
+
+  (* CHECK: const_sext_int{{.*}}i64{{.*}}-1
+   *)
+  group "sext int";
+  let c = const_int i64_type (-1) in
+  ignore (define_global "const_sext_int" c m);
+  insist (i64_type = type_of c);
+  insist (Some (-1L) = int64_of_const c);
+
+  (* CHECK: const_zext_int64{{.*}}i64{{.*}}4294967295
+   *)
+  group "zext int64";
+  let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in
+  ignore (define_global "const_zext_int64" c m);
+  insist (i64_type = type_of c);
+  insist (Some 4294967295L = int64_of_const c);
+
+  (* CHECK: const_int_string{{.*}}i32{{.*}}-1
+   *)
+  group "int string";
+  let c = const_int_of_string i32_type "-1" 10 in
+  ignore (define_global "const_int_string" c m);
+  insist (i32_type = type_of c);
+  insist (None = (string_of_const c));
+  insist (None = float_of_const c);
+  insist (Some (-1L) = int64_of_const c);
+
+  (* CHECK: const_int64{{.*}}i64{{.*}}9223372036854775807
+   *)
+  group "max int64";
+  let c = const_of_int64 i64_type 9223372036854775807L true in
+  ignore (define_global "const_int64" c m) ;
+  insist (i64_type = type_of c);
+  insist (Some 9223372036854775807L = int64_of_const c);
+
+  if Sys.word_size = 64; then begin
+    group "long int";
+    let c = const_int i64_type (1 lsl 61) in
+    insist (c = const_of_int64 i64_type (Int64.of_int (1 lsl 61)) false)
+  end;
+
+  (* CHECK: @const_string = global {{.*}}c"cruel\00world"
+   *)
+  group "string";
+  let c = const_string context "cruel\000world" in
+  ignore (define_global "const_string" c m);
+  insist ((array_type i8_type 11) = type_of c);
+  insist ((Some "cruel\000world") = (string_of_const c));
+
+  (* CHECK: const_stringz{{.*}}"hi\00again\00"
+   *)
+  group "stringz";
+  let c = const_stringz context "hi\000again" in
+  ignore (define_global "const_stringz" c m);
+  insist ((array_type i8_type 9) = type_of c);
+
+  (* CHECK: const_single{{.*}}2.75
+   * CHECK: const_double{{.*}}3.1459
+   * CHECK: const_double_string{{.*}}2
+   * CHECK: const_fake_fp128{{.*}}0xL00000000000000004000000000000000
+   * CHECK: const_fp128_string{{.*}}0xLF3CB1CCF26FBC178452FB4EC7F91973F
+   *)
+  begin group "real";
+    let cs = const_float float_type 2.75 in
+    ignore (define_global "const_single" cs m);
+    insist (float_type = type_of cs);
+    insist (float_of_const cs = Some 2.75);
+
+    let cd = const_float double_type 3.1459 in
+    ignore (define_global "const_double" cd m);
+    insist (double_type = type_of cd);
+    insist (float_of_const cd = Some 3.1459);
+
+    let cd = const_float_of_string double_type "2" in
+    ignore (define_global "const_double_string" cd m);
+    insist (double_type = type_of cd);
+    insist (float_of_const cd = Some 2.);
+
+    let cd = const_float fp128_type 2. in
+    ignore (define_global "const_fake_fp128" cd m);
+    insist (fp128_type = type_of cd);
+    insist (float_of_const cd = Some 2.);
+
+    let cd = const_float_of_string fp128_type "1e400" in
+    ignore (define_global "const_fp128_string" cd m);
+    insist (fp128_type = type_of cd);
+    insist (float_of_const cd = None);
+  end;
+
+  let one = const_int i16_type 1 in
+  let two = const_int i16_type 2 in
+  let three = const_int i32_type 3 in
+  let four = const_int i32_type 4 in
+
+  (* CHECK: const_array{{.*}}[i32 3, i32 4]
+   *)
+  group "array";
+  let c = const_array i32_type [| three; four |] in
+  ignore (define_global "const_array" c m);
+  insist ((array_type i32_type 2) = (type_of c));
+  insist (three = (const_element c 0));
+  insist (four = (const_element c 1));
+
+  (* CHECK: const_vector{{.*}}<i16 1, i16 2{{.*}}>
+   *)
+  group "vector";
+  let c = const_vector [| one; two; one; two;
+                          one; two; one; two |] in
+  ignore (define_global "const_vector" c m);
+  insist ((vector_type i16_type 8) = (type_of c));
+
+  (* CHECK: const_structure{{.*.}}i16 1, i16 2, i32 3, i32 4
+   *)
+  group "structure";
+  let c = const_struct context [| one; two; three; four |] in
+  ignore (define_global "const_structure" c m);
+  insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
+        = (type_of c));
+
+  (* CHECK: const_null{{.*}}zeroinit
+   *)
+  group "null";
+  let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
+                                                    double_type |]) in
+  ignore (define_global "const_null" c m);
+
+  (* CHECK: const_all_ones{{.*}}-1
+   *)
+  group "all ones";
+  let c = const_all_ones i64_type in
+  ignore (define_global "const_all_ones" c m);
+
+  group "pointer null"; begin
+    (* CHECK: const_pointer_null = global i64* null
+     *)
+    let c = const_pointer_null (pointer_type i64_type) in
+    ignore (define_global "const_pointer_null" c m);
+  end;
+
+  (* CHECK: const_undef{{.*}}undef
+   *)
+  group "undef";
+  let c = undef i1_type in
+  ignore (define_global "const_undef" c m);
+  insist (i1_type = type_of c);
+  insist (is_undef c);
+
+  group "constant arithmetic";
+  (* CHECK: @const_neg = global i64 sub
+   * CHECK: @const_nsw_neg = global i64 sub nsw
+   * CHECK: @const_nuw_neg = global i64 sub nuw
+   * CHECK: @const_fneg = global double fsub
+   * CHECK: @const_not = global i64 xor
+   * CHECK: @const_add = global i64 add
+   * CHECK: @const_nsw_add = global i64 add nsw
+   * CHECK: @const_nuw_add = global i64 add nuw
+   * CHECK: @const_fadd = global double fadd
+   * CHECK: @const_sub = global i64 sub
+   * CHECK: @const_nsw_sub = global i64 sub nsw
+   * CHECK: @const_nuw_sub = global i64 sub nuw
+   * CHECK: @const_fsub = global double fsub
+   * CHECK: @const_mul = global i64 mul
+   * CHECK: @const_nsw_mul = global i64 mul nsw
+   * CHECK: @const_nuw_mul = global i64 mul nuw
+   * CHECK: @const_fmul = global double fmul
+   * CHECK: @const_udiv = global i64 udiv
+   * CHECK: @const_sdiv = global i64 sdiv
+   * CHECK: @const_exact_sdiv = global i64 sdiv exact
+   * CHECK: @const_fdiv = global double fdiv
+   * CHECK: @const_urem = global i64 urem
+   * CHECK: @const_srem = global i64 srem
+   * CHECK: @const_frem = global double frem
+   * CHECK: @const_and = global i64 and
+   * CHECK: @const_or = global i64 or
+   * CHECK: @const_xor = global i64 xor
+   * CHECK: @const_icmp = global i1 icmp sle
+   * CHECK: @const_fcmp = global i1 fcmp ole
+   *)
+  let void_ptr = pointer_type i8_type in
+  let five = const_int i64_type 5 in
+  let ffive = const_uitofp five double_type in
+  let foldbomb_gv = define_global "FoldBomb" (const_null i8_type) m in
+  let foldbomb = const_ptrtoint foldbomb_gv i64_type in
+  let ffoldbomb = const_uitofp foldbomb double_type in
+  ignore (define_global "const_neg" (const_neg foldbomb) m);
+  ignore (define_global "const_nsw_neg" (const_nsw_neg foldbomb) m);
+  ignore (define_global "const_nuw_neg" (const_nuw_neg foldbomb) m);
+  ignore (define_global "const_fneg" (const_fneg ffoldbomb) m);
+  ignore (define_global "const_not" (const_not foldbomb) m);
+  ignore (define_global "const_add" (const_add foldbomb five) m);
+  ignore (define_global "const_nsw_add" (const_nsw_add foldbomb five) m);
+  ignore (define_global "const_nuw_add" (const_nuw_add foldbomb five) m);
+  ignore (define_global "const_fadd" (const_fadd ffoldbomb ffive) m);
+  ignore (define_global "const_sub" (const_sub foldbomb five) m);
+  ignore (define_global "const_nsw_sub" (const_nsw_sub foldbomb five) m);
+  ignore (define_global "const_nuw_sub" (const_nuw_sub foldbomb five) m);
+  ignore (define_global "const_fsub" (const_fsub ffoldbomb ffive) m);
+  ignore (define_global "const_mul" (const_mul foldbomb five) m);
+  ignore (define_global "const_nsw_mul" (const_nsw_mul foldbomb five) m);
+  ignore (define_global "const_nuw_mul" (const_nuw_mul foldbomb five) m);
+  ignore (define_global "const_fmul" (const_fmul ffoldbomb ffive) m);
+  ignore (define_global "const_udiv" (const_udiv foldbomb five) m);
+  ignore (define_global "const_sdiv" (const_sdiv foldbomb five) m);
+  ignore (define_global "const_exact_sdiv" (const_exact_sdiv foldbomb five) m);
+  ignore (define_global "const_fdiv" (const_fdiv ffoldbomb ffive) m);
+  ignore (define_global "const_urem" (const_urem foldbomb five) m);
+  ignore (define_global "const_srem" (const_srem foldbomb five) m);
+  ignore (define_global "const_frem" (const_frem ffoldbomb ffive) m);
+  ignore (define_global "const_and" (const_and foldbomb five) m);
+  ignore (define_global "const_or" (const_or foldbomb five) m);
+  ignore (define_global "const_xor" (const_xor foldbomb five) m);
+  ignore (define_global "const_icmp" (const_icmp Icmp.Sle foldbomb five) m);
+  ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m);
+
+  group "constant casts";
+  (* CHECK: const_trunc{{.*}}trunc
+   * CHECK: const_sext{{.*}}sext
+   * CHECK: const_zext{{.*}}zext
+   * CHECK: const_fptrunc{{.*}}fptrunc
+   * CHECK: const_fpext{{.*}}fpext
+   * CHECK: const_uitofp{{.*}}uitofp
+   * CHECK: const_sitofp{{.*}}sitofp
+   * CHECK: const_fptoui{{.*}}fptoui
+   * CHECK: const_fptosi{{.*}}fptosi
+   * CHECK: const_ptrtoint{{.*}}ptrtoint
+   * CHECK: const_inttoptr{{.*}}inttoptr
+   * CHECK: const_bitcast{{.*}}bitcast
+   * CHECK: const_intcast{{.*}}zext
+   *)
+  let i128_type = integer_type context 128 in
+  ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
+                                               i8_type) m);
+  ignore (define_global "const_sext" (const_sext foldbomb i128_type) m);
+  ignore (define_global "const_zext" (const_zext foldbomb i128_type) m);
+  ignore (define_global "const_fptrunc" (const_fptrunc ffoldbomb float_type) m);
+  ignore (define_global "const_fpext" (const_fpext ffoldbomb fp128_type) m);
+  ignore (define_global "const_uitofp" (const_uitofp foldbomb double_type) m);
+  ignore (define_global "const_sitofp" (const_sitofp foldbomb double_type) m);
+  ignore (define_global "const_fptoui" (const_fptoui ffoldbomb i32_type) m);
+  ignore (define_global "const_fptosi" (const_fptosi ffoldbomb i32_type) m);
+  ignore (define_global "const_ptrtoint" (const_ptrtoint
+    (const_gep (const_null (pointer_type i8_type))
+               [| const_int i32_type 1 |])
+    i32_type) m);
+  ignore (define_global "const_inttoptr" (const_inttoptr (const_add foldbomb five)
+                                                  void_ptr) m);
+  ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
+  ignore (define_global "const_intcast"
+          (const_intcast foldbomb i128_type ~is_signed:false) m);
+
+  group "misc constants";
+  (* CHECK: const_size_of{{.*}}getelementptr{{.*}}null
+   * CHECK: const_gep{{.*}}getelementptr
+   * CHECK: const_select{{.*}}select
+   * CHECK: const_extractelement{{.*}}extractelement
+   * CHECK: const_insertelement{{.*}}insertelement
+   * CHECK: const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>
+   *)
+  ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
+  ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
+  ignore (define_global "const_select" (const_select
+    (const_icmp Icmp.Sle foldbomb five)
+    (const_int i8_type (-1))
+    (const_int i8_type 0)) m);
+  let zero = const_int i32_type 0 in
+  let one  = const_int i32_type 1 in
+  ignore (define_global "const_extractelement" (const_extractelement
+    (const_vector [| zero; one; zero; one |])
+    (const_trunc foldbomb i32_type)) m);
+  ignore (define_global "const_insertelement" (const_insertelement
+    (const_vector [| zero; one; zero; one |])
+    zero (const_trunc foldbomb i32_type)) m);
+  ignore (define_global "const_shufflevector" (const_shufflevector
+    (const_vector [| zero; one |])
+    (const_vector [| one; zero |])
+    (const_vector [| const_int i32_type 0; const_int i32_type 1;
+                     const_int i32_type 2; const_int i32_type 3 |])) m);
+
+  group "asm"; begin
+    let ft = function_type void_type [| i32_type; i32_type; i32_type |] in
+    ignore (const_inline_asm
+      ft
+      ""
+      "{cx},{ax},{di},~{dirflag},~{fpsr},~{flags},~{edi},~{ecx}"
+      true
+      false)
+  end;
+
+  group "recursive struct"; begin
+      let nsty = named_struct_type context "rec" in
+      let pty = pointer_type nsty in
+      struct_set_body nsty [| i32_type; pty |] false;
+      let elts = [| const_int i32_type 4; const_pointer_null pty |] in
+      let grec_init = const_named_struct nsty elts in
+      ignore (define_global "grec" grec_init m);
+      ignore (string_of_lltype nsty);
+  end
+
+
+(*===-- Global Values -----------------------------------------------------===*)
+
+let test_global_values () =
+  let (++) x f = f x; x in
+  let zero32 = const_null i32_type in
+
+  (* CHECK: GVal01
+   *)
+  group "naming";
+  let g = define_global "TEMPORARY" zero32 m in
+  insist ("TEMPORARY" = value_name g);
+  set_value_name "GVal01" g;
+  insist ("GVal01" = value_name g);
+
+  (* CHECK: GVal02{{.*}}linkonce
+   *)
+  group "linkage";
+  let g = define_global "GVal02" zero32 m ++
+          set_linkage Linkage.Link_once in
+  insist (Linkage.Link_once = linkage g);
+
+  (* CHECK: GVal03{{.*}}Hanalei
+   *)
+  group "section";
+  let g = define_global "GVal03" zero32 m ++
+          set_section "Hanalei" in
+  insist ("Hanalei" = section g);
+
+  (* CHECK: GVal04{{.*}}hidden
+   *)
+  group "visibility";
+  let g = define_global "GVal04" zero32 m ++
+          set_visibility Visibility.Hidden in
+  insist (Visibility.Hidden = visibility g);
+
+  (* CHECK: GVal05{{.*}}align 128
+   *)
+  group "alignment";
+  let g = define_global "GVal05" zero32 m ++
+          set_alignment 128 in
+  insist (128 = alignment g);
+
+  (* CHECK: GVal06{{.*}}dllexport
+   *)
+  group "dll_storage_class";
+  let g = define_global "GVal06" zero32 m ++
+          set_dll_storage_class DLLStorageClass.DLLExport in
+  insist (DLLStorageClass.DLLExport = dll_storage_class g)
+
+
+(*===-- Global Variables --------------------------------------------------===*)
+
+let test_global_variables () =
+  let (++) x f = f x; x in
+  let forty_two32 = const_int i32_type 42 in
+
+  group "declarations"; begin
+    (* CHECK: @GVar01 = external global i32
+     * CHECK: @QGVar01 = external addrspace(3) global i32
+     *)
+    insist (None == lookup_global "GVar01" m);
+    let g = declare_global i32_type "GVar01" m in
+    insist (is_declaration g);
+    insist (pointer_type float_type ==
+              type_of (declare_global float_type "GVar01" m));
+    insist (g == declare_global i32_type "GVar01" m);
+    insist (match lookup_global "GVar01" m with Some x -> x = g
+                                              | None -> false);
+
+    insist (None == lookup_global "QGVar01" m);
+    let g = declare_qualified_global i32_type "QGVar01" 3 m in
+    insist (is_declaration g);
+    insist (qualified_pointer_type float_type 3 ==
+              type_of (declare_qualified_global float_type "QGVar01" 3 m));
+    insist (g == declare_qualified_global i32_type "QGVar01" 3 m);
+    insist (match lookup_global "QGVar01" m with Some x -> x = g
+                                              | None -> false);
+  end;
+
+  group "definitions"; begin
+    (* CHECK: @GVar02 = global i32 42
+     * CHECK: @GVar03 = global i32 42
+     * CHECK: @QGVar02 = addrspace(3) global i32 42
+     * CHECK: @QGVar03 = addrspace(3) global i32 42
+     *)
+    let g = define_global "GVar02" forty_two32 m in
+    let g2 = declare_global i32_type "GVar03" m ++
+           set_initializer forty_two32 in
+    insist (not (is_declaration g));
+    insist (not (is_declaration g2));
+    insist ((global_initializer g) == (global_initializer g2));
+
+    let g = define_qualified_global "QGVar02" forty_two32 3 m in
+    let g2 = declare_qualified_global i32_type "QGVar03" 3 m ++
+           set_initializer forty_two32 in
+    insist (not (is_declaration g));
+    insist (not (is_declaration g2));
+    insist ((global_initializer g) == (global_initializer g2));
+  end;
+
+  (* CHECK: GVar04{{.*}}thread_local
+   *)
+  group "threadlocal";
+  let g = define_global "GVar04" forty_two32 m ++
+          set_thread_local true in
+  insist (is_thread_local g);
+
+  (* CHECK: GVar05{{.*}}thread_local(initialexec)
+   *)
+  group "threadlocal_mode";
+  let g = define_global "GVar05" forty_two32 m ++
+          set_thread_local_mode ThreadLocalMode.InitialExec in
+  insist ((thread_local_mode g) = ThreadLocalMode.InitialExec);
+
+  (* CHECK: GVar06{{.*}}externally_initialized
+   *)
+  group "externally_initialized";
+  let g = define_global "GVar06" forty_two32 m ++
+          set_externally_initialized true in
+  insist (is_externally_initialized g);
+
+  (* CHECK-NOWHERE-NOT: GVar07
+   *)
+  group "delete";
+  let g = define_global "GVar07" forty_two32 m in
+  delete_global g;
+
+  (* CHECK: ConstGlobalVar{{.*}}constant
+   *)
+  group "constant";
+  let g = define_global "ConstGlobalVar" forty_two32 m in
+  insist (not (is_global_constant g));
+  set_global_constant true g;
+  insist (is_global_constant g);
+
+  begin group "iteration";
+    let m = create_module context "temp" in
+
+    insist (At_end m = global_begin m);
+    insist (At_start m = global_end m);
+
+    let g1 = declare_global i32_type "One" m in
+    let g2 = declare_global i32_type "Two" m in
+
+    insist (Before g1 = global_begin m);
+    insist (Before g2 = global_succ g1);
+    insist (At_end m = global_succ g2);
+
+    insist (After g2 = global_end m);
+    insist (After g1 = global_pred g2);
+    insist (At_start m = global_pred g1);
+
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_globals lf "" m);
+
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_globals rf m "");
+
+    dispose_module m
+  end
+
+(* String globals built below are emitted here.
+ * CHECK: build_global_string{{.*}}stringval
+ *)
+
+
+(*===-- Uses --------------------------------------------------------------===*)
+
+let test_uses () =
+  let ty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "use_function" ty m in
+  let b = builder_at_end context (entry_block fn) in
+
+  let p1 = param fn 0 in
+  let p2 = param fn 1 in
+  let v1 = build_add p1 p2 "v1" b in
+  let v2 = build_add p1 v1 "v2" b in
+  let _ = build_add v1 v2 "v3" b in
+
+  let lf s u = value_name (user u) ^ "->" ^ s in
+  insist ("v2->v3->" = fold_left_uses lf "" v1);
+  let rf u s = value_name (user u) ^ "<-" ^ s in
+  insist ("v3<-v2<-" = fold_right_uses rf v1 "");
+
+  let lf s u = value_name (used_value u) ^ "->" ^ s in
+  insist ("v1->v1->" = fold_left_uses lf "" v1);
+
+  let rf u s = value_name (used_value u) ^ "<-" ^ s in
+  insist ("v1<-v1<-" = fold_right_uses rf v1 "");
+
+  ignore (build_unreachable b)
+
+
+(*===-- Users -------------------------------------------------------------===*)
+
+let test_users () =
+  let ty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "user_function" ty m in
+  let b = builder_at_end context (entry_block fn) in
+
+  let p1 = param fn 0 in
+  let p2 = param fn 1 in
+  let a3 = build_alloca i32_type "user_alloca" b in
+  let p3 = build_load a3 "user_load" b in
+  let i = build_add p1 p2 "sum" b in
+
+  insist ((num_operands i) = 2);
+  insist ((operand i 0) = p1);
+  insist ((operand i 1) = p2);
+
+  set_operand i 1 p3;
+  insist ((operand i 1) != p2);
+  insist ((operand i 1) = p3);
+
+  ignore (build_unreachable b)
+
+
+(*===-- Aliases -----------------------------------------------------------===*)
+
+let test_aliases () =
+  (* CHECK: @alias = alias i32* @aliasee
+   *)
+  let forty_two32 = const_int i32_type 42 in
+  let v = define_global "aliasee" forty_two32 m in
+  ignore (add_alias m (pointer_type i32_type) v "alias")
+
+
+(*===-- Functions ---------------------------------------------------------===*)
+
+let test_functions () =
+  let ty = function_type i32_type [| i32_type; i64_type |] in
+  let ty2 = function_type i8_type [| i8_type; i64_type |] in
+
+  (* CHECK: declare i32 @Fn1(i32, i64)
+   *)
+  begin group "declare";
+    insist (None = lookup_function "Fn1" m);
+    let fn = declare_function "Fn1" ty m in
+    insist (pointer_type ty = type_of fn);
+    insist (is_declaration fn);
+    insist (0 = Array.length (basic_blocks fn));
+    insist (pointer_type ty2 == type_of (declare_function "Fn1" ty2 m));
+    insist (fn == declare_function "Fn1" ty m);
+    insist (None <> lookup_function "Fn1" m);
+    insist (match lookup_function "Fn1" m with Some x -> x = fn
+                                             | None -> false);
+    insist (m == global_parent fn)
+  end;
+
+  (* CHECK-NOWHERE-NOT: Fn2
+   *)
+  group "delete";
+  let fn = declare_function "Fn2" ty m in
+  delete_function fn;
+
+  (* CHECK: define{{.*}}Fn3
+   *)
+  group "define";
+  let fn = define_function "Fn3" ty m in
+  insist (not (is_declaration fn));
+  insist (1 = Array.length (basic_blocks fn));
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+
+  (* CHECK: define{{.*}}Fn4{{.*}}Param1{{.*}}Param2
+   *)
+  group "params";
+  let fn = define_function "Fn4" ty m in
+  let params = params fn in
+  insist (2 = Array.length params);
+  insist (params.(0) = param fn 0);
+  insist (params.(1) = param fn 1);
+  insist (i32_type = type_of params.(0));
+  insist (i64_type = type_of params.(1));
+  set_value_name "Param1" params.(0);
+  set_value_name "Param2" params.(1);
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+
+  (* CHECK: fastcc{{.*}}Fn5
+   *)
+  group "callconv";
+  let fn = define_function "Fn5" ty m in
+  insist (CallConv.c = function_call_conv fn);
+  set_function_call_conv CallConv.fast fn;
+  insist (CallConv.fast = function_call_conv fn);
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+
+  begin group "gc";
+    (* CHECK: Fn6{{.*}}gc{{.*}}shadowstack
+     *)
+    let fn = define_function "Fn6" ty m in
+    insist (None = gc fn);
+    set_gc (Some "ocaml") fn;
+    insist (Some "ocaml" = gc fn);
+    set_gc None fn;
+    insist (None = gc fn);
+    set_gc (Some "shadowstack") fn;
+    ignore (build_unreachable (builder_at_end context (entry_block fn)));
+  end;
+
+  begin group "iteration";
+    let m = create_module context "temp" in
+
+    insist (At_end m = function_begin m);
+    insist (At_start m = function_end m);
+
+    let f1 = define_function "One" ty m in
+    let f2 = define_function "Two" ty m in
+
+    insist (Before f1 = function_begin m);
+    insist (Before f2 = function_succ f1);
+    insist (At_end m = function_succ f2);
+
+    insist (After f2 = function_end m);
+    insist (After f1 = function_pred f2);
+    insist (At_start m = function_pred f1);
+
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_functions lf "" m);
+
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_functions rf m "");
+
+    dispose_module m
+  end
+
+
+(*===-- Params ------------------------------------------------------------===*)
+
+let test_params () =
+  begin group "iteration";
+    let m = create_module context "temp" in
+
+    let vf = define_function "void" (function_type void_type [| |]) m in
+
+    insist (At_end vf = param_begin vf);
+    insist (At_start vf = param_end vf);
+
+    let ty = function_type void_type [| i32_type; i32_type |] in
+    let f = define_function "f" ty m in
+    let p1 = param f 0 in
+    let p2 = param f 1 in
+    set_value_name "One" p1;
+    set_value_name "Two" p2;
+    add_param_attr p1 Attribute.Sext;
+    add_param_attr p2 Attribute.Noalias;
+    remove_param_attr p2 Attribute.Noalias;
+    add_function_attr f Attribute.Nounwind;
+    add_function_attr f Attribute.Noreturn;
+    remove_function_attr f Attribute.Noreturn;
+
+    insist (Before p1 = param_begin f);
+    insist (Before p2 = param_succ p1);
+    insist (At_end f = param_succ p2);
+
+    insist (After p2 = param_end f);
+    insist (After p1 = param_pred p2);
+    insist (At_start f = param_pred p1);
+
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_params lf "" f);
+
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_params rf f "");
+
+    dispose_module m
+  end
+
+
+(*===-- Basic Blocks ------------------------------------------------------===*)
+
+let test_basic_blocks () =
+  let ty = function_type void_type [| |] in
+
+  (* CHECK: Bb1
+   *)
+  group "entry";
+  let fn = declare_function "X" ty m in
+  let bb = append_block context "Bb1" fn in
+  insist (bb = entry_block fn);
+  ignore (build_unreachable (builder_at_end context bb));
+
+  (* CHECK-NOWHERE-NOT: Bb2
+   *)
+  group "delete";
+  let fn = declare_function "X2" ty m in
+  let bb = append_block context "Bb2" fn in
+  delete_block bb;
+
+  group "insert";
+  let fn = declare_function "X3" ty m in
+  let bbb = append_block context "b" fn in
+  let bba = insert_block context "a" bbb in
+  insist ([| bba; bbb |] = basic_blocks fn);
+  ignore (build_unreachable (builder_at_end context bba));
+  ignore (build_unreachable (builder_at_end context bbb));
+
+  (* CHECK: Bb3
+   *)
+  group "name/value";
+  let fn = define_function "X4" ty m in
+  let bb = entry_block fn in
+  ignore (build_unreachable (builder_at_end context bb));
+  let bbv = value_of_block bb in
+  set_value_name "Bb3" bbv;
+  insist ("Bb3" = value_name bbv);
+
+  group "casts";
+  let fn = define_function "X5" ty m in
+  let bb = entry_block fn in
+  ignore (build_unreachable (builder_at_end context bb));
+  insist (bb = block_of_value (value_of_block bb));
+  insist (value_is_block (value_of_block bb));
+  insist (not (value_is_block (const_null i32_type)));
+
+  begin group "iteration";
+    let m = create_module context "temp" in
+    let f = declare_function "Temp" (function_type i32_type [| |]) m in
+
+    insist (At_end f = block_begin f);
+    insist (At_start f = block_end f);
+
+    let b1 = append_block context "One" f in
+    let b2 = append_block context "Two" f in
+
+    insist (Before b1 = block_begin f);
+    insist (Before b2 = block_succ b1);
+    insist (At_end f = block_succ b2);
+
+    insist (After b2 = block_end f);
+    insist (After b1 = block_pred b2);
+    insist (At_start f = block_pred b1);
+
+    let lf s x = s ^ "->" ^ value_name (value_of_block x) in
+    insist ("->One->Two" = fold_left_blocks lf "" f);
+
+    let rf x s = value_name (value_of_block x) ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_blocks rf f "");
+
+    dispose_module m
+  end
+
+
+(*===-- Instructions ------------------------------------------------------===*)
+
+let test_instructions () =
+  begin group "iteration";
+    let m = create_module context "temp" in
+    let fty = function_type void_type [| i32_type; i32_type |] in
+    let f = define_function "f" fty m in
+    let bb = entry_block f in
+    let b = builder_at context (At_end bb) in
+
+    insist (At_end bb = instr_begin bb);
+    insist (At_start bb = instr_end bb);
+
+    let i1 = build_add (param f 0) (param f 1) "One" b in
+    let i2 = build_sub (param f 0) (param f 1) "Two" b in
+
+    insist (Before i1 = instr_begin bb);
+    insist (Before i2 = instr_succ i1);
+    insist (At_end bb = instr_succ i2);
+
+    insist (After i2 = instr_end bb);
+    insist (After i1 = instr_pred i2);
+    insist (At_start bb = instr_pred i1);
+
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_instrs lf "" bb);
+
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_instrs rf bb "");
+
+    dispose_module m
+  end;
+
+  group "clone instr";
+  begin
+    (* CHECK: %clone = add i32 %0, 2
+     *)
+    let fty = function_type void_type [| i32_type |] in
+    let fn = define_function "BuilderParent" fty m in
+    let bb = entry_block fn in
+    let b = builder_at_end context bb in
+    let p = param fn 0 in
+    let sum = build_add p p "sum" b in
+    let y = const_int i32_type 2 in
+    let clone = instr_clone sum in
+    set_operand clone 0 p;
+    set_operand clone 1 y;
+    insert_into_builder clone "clone" b;
+    ignore (build_ret_void b)
+  end
+
+
+(*===-- Builder -----------------------------------------------------------===*)
+
+let test_builder () =
+  let (++) x f = f x; x in
+
+  begin group "parent";
+    insist (try
+              ignore (insertion_block (builder context));
+              false
+            with Not_found ->
+              true);
+
+    let fty = function_type void_type [| i32_type |] in
+    let fn = define_function "BuilderParent" fty m in
+    let bb = entry_block fn in
+    let b = builder_at_end context bb in
+    let p = param fn 0 in
+    let sum = build_add p p "sum" b in
+    ignore (build_ret_void b);
+
+    insist (fn = block_parent bb);
+    insist (fn = param_parent p);
+    insist (bb = instr_parent sum);
+    insist (bb = insertion_block b)
+  end;
+
+  group "ret void";
+  begin
+    (* CHECK: ret void
+     *)
+    let fty = function_type void_type [| |] in
+    let fn = declare_function "X6" fty m in
+    let b = builder_at_end context (append_block context "Bb01" fn) in
+    ignore (build_ret_void b)
+  end;
+
+  group "ret aggregate";
+  begin
+      (* CHECK: ret { i8, i64 } { i8 4, i64 5 }
+       *)
+      let sty = struct_type context [| i8_type; i64_type |] in
+      let fty = function_type sty [| |] in
+      let fn = declare_function "XA6" fty m in
+      let b = builder_at_end context (append_block context "Bb01" fn) in
+      let agg = [| const_int i8_type 4; const_int i64_type 5 |] in
+      ignore (build_aggregate_ret agg b)
+  end;
+
+  (* The rest of the tests will use one big function. *)
+  let fty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "X7" fty m in
+  let atentry = builder_at_end context (entry_block fn) in
+  let p1 = param fn 0 ++ set_value_name "P1" in
+  let p2 = param fn 1 ++ set_value_name "P2" in
+  let f1 = build_uitofp p1 float_type "F1" atentry in
+  let f2 = build_uitofp p2 float_type "F2" atentry in
+
+  let bb00 = append_block context "Bb00" fn in
+  ignore (build_unreachable (builder_at_end context bb00));
+
+  group "function attribute";
+  begin
+      ignore (add_function_attr fn Attribute.UWTable);
+      (* CHECK: X7{{.*}}#0
+       * #0 is uwtable, defined at EOF.
+       *)
+      insist ([Attribute.UWTable] = function_attr fn);
+  end;
+
+  group "casts"; begin
+    let void_ptr = pointer_type i8_type in
+
+    (* CHECK-DAG: %build_trunc = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_trunc2 = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_trunc3 = trunc i32 %P1 to i8
+     * CHECK-DAG: %build_zext = zext i8 %build_trunc to i32
+     * CHECK-DAG: %build_zext2 = zext i8 %build_trunc to i32
+     * CHECK-DAG: %build_sext = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_sext2 = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_sext3 = sext i32 %build_zext to i64
+     * CHECK-DAG: %build_uitofp = uitofp i64 %build_sext to float
+     * CHECK-DAG: %build_sitofp = sitofp i32 %build_zext to double
+     * CHECK-DAG: %build_fptoui = fptoui float %build_uitofp to i32
+     * CHECK-DAG: %build_fptosi = fptosi double %build_sitofp to i64
+     * CHECK-DAG: %build_fptrunc = fptrunc double %build_sitofp to float
+     * CHECK-DAG: %build_fptrunc2 = fptrunc double %build_sitofp to float
+     * CHECK-DAG: %build_fpext = fpext float %build_fptrunc to double
+     * CHECK-DAG: %build_fpext2 = fpext float %build_fptrunc to double
+     * CHECK-DAG: %build_inttoptr = inttoptr i32 %P1 to i8*
+     * CHECK-DAG: %build_ptrtoint = ptrtoint i8* %build_inttoptr to i64
+     * CHECK-DAG: %build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64
+     * CHECK-DAG: %build_bitcast = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast2 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast3 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_bitcast4 = bitcast i64 %build_ptrtoint to double
+     * CHECK-DAG: %build_pointercast = bitcast i8* %build_inttoptr to i16*
+     *)
+    let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
+    let inst29 = build_zext inst28 i32_type "build_zext" atentry in
+    let inst30 = build_sext inst29 i64_type "build_sext" atentry in
+    let inst31 = build_uitofp inst30 float_type "build_uitofp" atentry in
+    let inst32 = build_sitofp inst29 double_type "build_sitofp" atentry in
+    ignore(build_fptoui inst31 i32_type "build_fptoui" atentry);
+    ignore(build_fptosi inst32 i64_type "build_fptosi" atentry);
+    let inst35 = build_fptrunc inst32 float_type "build_fptrunc" atentry in
+    ignore(build_fpext inst35 double_type "build_fpext" atentry);
+    let inst37 = build_inttoptr p1 void_ptr "build_inttoptr" atentry in
+    let inst38 = build_ptrtoint inst37 i64_type "build_ptrtoint" atentry in
+    ignore(build_bitcast inst38 double_type "build_bitcast" atentry);
+    ignore(build_zext_or_bitcast inst38 double_type "build_bitcast2" atentry);
+    ignore(build_sext_or_bitcast inst38 double_type "build_bitcast3" atentry);
+    ignore(build_trunc_or_bitcast inst38 double_type "build_bitcast4" atentry);
+    ignore(build_pointercast inst37 (pointer_type i16_type) "build_pointercast" atentry);
+
+    ignore(build_zext_or_bitcast inst28 i32_type "build_zext2" atentry);
+    ignore(build_sext_or_bitcast inst29 i64_type "build_sext2" atentry);
+    ignore(build_trunc_or_bitcast p1 i8_type "build_trunc2" atentry);
+    ignore(build_pointercast inst37 i64_type "build_ptrtoint2" atentry);
+    ignore(build_intcast inst29 i64_type "build_sext3" atentry);
+    ignore(build_intcast p1 i8_type "build_trunc3" atentry);
+    ignore(build_fpcast inst35 double_type "build_fpext2" atentry);
+    ignore(build_fpcast inst32 float_type "build_fptrunc2" atentry);
+  end;
+
+  group "comparisons"; begin
+    (* CHECK: %build_icmp_ne = icmp ne i32 %P1, %P2
+     * CHECK: %build_icmp_sle = icmp sle i32 %P2, %P1
+     * CHECK: %build_fcmp_false = fcmp false float %F1, %F2
+     * CHECK: %build_fcmp_true = fcmp true float %F2, %F1
+     * CHECK: %build_is_null{{.*}}= icmp eq{{.*}}%X0,{{.*}}null
+     * CHECK: %build_is_not_null = icmp ne i8* %X1, null
+     * CHECK: %build_ptrdiff
+     *)
+    let c = build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry in
+    insist (Some Icmp.Ne = icmp_predicate c);
+    insist (None = fcmp_predicate c);
+
+    let c = build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry in
+    insist (Some Icmp.Sle = icmp_predicate c);
+    insist (None = fcmp_predicate c);
+
+    let c = build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry in
+    (* insist (Some Fcmp.False = fcmp_predicate c); *)
+    insist (None = icmp_predicate c);
+
+    let c = build_fcmp Fcmp.True  f2 f1 "build_fcmp_true" atentry in
+    (* insist (Some Fcmp.True = fcmp_predicate c); *)
+    insist (None = icmp_predicate c);
+
+    let g0 = declare_global (pointer_type i8_type) "g0" m in
+    let g1 = declare_global (pointer_type i8_type) "g1" m in
+    let p0 = build_load g0 "X0" atentry in
+    let p1 = build_load g1 "X1" atentry in
+    ignore (build_is_null p0 "build_is_null" atentry);
+    ignore (build_is_not_null p1 "build_is_not_null" atentry);
+    ignore (build_ptrdiff p1 p0 "build_ptrdiff" atentry);
+  end;
+
+  group "miscellaneous"; begin
+    (* CHECK: %build_call = tail call cc63 i32 @{{.*}}(i32 signext %P2, i32 %P1)
+     * CHECK: %build_select = select i1 %build_icmp, i32 %P1, i32 %P2
+     * CHECK: %build_va_arg = va_arg i8** null, i32
+     * CHECK: %build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2
+     * CHECK: %build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2
+     * CHECK: %build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+     * CHECK: %build_insertvalue0 = insertvalue{{.*}}%bl, i32 1, 0
+     * CHECK: %build_extractvalue = extractvalue{{.*}}%build_insertvalue1, 1
+     *)
+    let ci = build_call fn [| p2; p1 |] "build_call" atentry in
+    insist (CallConv.c = instruction_call_conv ci);
+    set_instruction_call_conv 63 ci;
+    insist (63 = instruction_call_conv ci);
+    insist (not (is_tail_call ci));
+    set_tail_call true ci;
+    insist (is_tail_call ci);
+    add_instruction_param_attr ci 1 Attribute.Sext;
+    add_instruction_param_attr ci 2 Attribute.Noalias;
+    remove_instruction_param_attr ci 2 Attribute.Noalias;
+
+    let inst46 = build_icmp Icmp.Eq p1 p2 "build_icmp" atentry in
+    ignore (build_select inst46 p1 p2 "build_select" atentry);
+    ignore (build_va_arg
+      (const_null (pointer_type (pointer_type i8_type)))
+      i32_type "build_va_arg" atentry);
+
+    (* Set up some vector vregs. *)
+    let one  = const_int i32_type 1 in
+    let zero = const_int i32_type 0 in
+    let t1 = const_vector [| one; zero; one; zero |] in
+    let t2 = const_vector [| zero; one; zero; one |] in
+    let t3 = const_vector [| one; one; zero; zero |] in
+    let vec1 = build_insertelement t1 p1 p2 "Vec1" atentry in
+    let vec2 = build_insertelement t2 p1 p2 "Vec2" atentry in
+    let sty = struct_type context [| i32_type; i8_type |] in
+
+    ignore (build_extractelement vec1 p2 "build_extractelement" atentry);
+    ignore (build_insertelement vec1 p1 p2 "build_insertelement" atentry);
+    ignore (build_shufflevector vec1 vec2 t3 "build_shufflevector" atentry);
+
+    let p = build_alloca sty "ba" atentry in
+    let agg = build_load p "bl" atentry in
+    let agg0 = build_insertvalue agg (const_int i32_type 1) 0
+                 "build_insertvalue0" atentry in
+    let agg1 = build_insertvalue agg0 (const_int i8_type 2) 1
+                 "build_insertvalue1" atentry in
+    ignore (build_extractvalue agg1 1 "build_extractvalue" atentry)
+  end;
+
+  group "metadata"; begin
+    (* CHECK: %metadata = add i32 %P1, %P2, !test !1
+     * !1 is metadata emitted at EOF.
+     *)
+    let i = build_add p1 p2 "metadata" atentry in
+    insist ((has_metadata i) = false);
+
+    let m1 = const_int i32_type 1 in
+    let m2 = mdstring context "metadata test" in
+    let md = mdnode context [| m1; m2 |] in
+
+    let kind = mdkind_id context "test" in
+    set_metadata i kind md;
+
+    insist ((has_metadata i) = true);
+    insist ((metadata i kind) = Some md);
+
+    clear_metadata i kind;
+
+    insist ((has_metadata i) = false);
+    insist ((metadata i kind) = None);
+
+    set_metadata i kind md
+  end;
+
+  group "named metadata"; begin
+    (* !llvm.module.flags is emitted at EOF. *)
+    let n1 = const_int i32_type 1 in
+    let n2 = mdstring context "Debug Info Version" in
+    let n3 = const_int i32_type 2 in
+    let md = mdnode context [| n1; n2; n3 |] in
+    add_named_metadata_operand m "llvm.module.flags" md;
+
+    insist ((get_named_metadata m "llvm.module.flags") = [| md |])
+  end;
+
+  group "dbg"; begin
+    (* CHECK: %dbg = add i32 %P1, %P2, !dbg !2
+     * !2 is metadata emitted at EOF.
+     *)
+    insist ((current_debug_location atentry) = Some (mdnode context [||]));
+
+    let m_line = const_int i32_type 2 in
+    let m_col = const_int i32_type 3 in
+    let m_scope = mdnode context [| |] in
+    let m_inlined = mdnode context [| |] in
+    let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
+    set_current_debug_location atentry md;
+
+    insist ((current_debug_location atentry) = Some md);
+
+    let i = build_add p1 p2 "dbg" atentry in
+    insist ((has_metadata i) = true);
+
+    clear_current_debug_location atentry
+  end;
+
+  group "ret"; begin
+    (* CHECK: ret{{.*}}P1
+     *)
+    let ret = build_ret p1 atentry in
+    position_before ret atentry
+  end;
+
+  (* see test/Feature/exception.ll *)
+  let bblpad = append_block context "Bblpad" fn in
+  let rt = struct_type context [| pointer_type i8_type; i32_type |] in
+  let ft = var_arg_function_type i32_type  [||] in
+  let personality = declare_function "__gxx_personality_v0" ft m in
+  let ztic = declare_global (pointer_type i8_type) "_ZTIc" m in
+  let ztid = declare_global (pointer_type i8_type) "_ZTId" m in
+  let ztipkc = declare_global (pointer_type i8_type) "_ZTIPKc" m in
+  begin
+      set_global_constant true ztic;
+      set_global_constant true ztid;
+      set_global_constant true ztipkc;
+      let lp = build_landingpad rt personality 0 "lpad"
+       (builder_at_end context bblpad) in begin
+           set_cleanup lp true;
+           add_clause lp ztic;
+           insist((pointer_type (pointer_type i8_type)) = type_of ztid);
+           let ety = pointer_type (pointer_type i8_type) in
+           add_clause lp (const_array ety [| ztipkc; ztid |]);
+           ignore (build_resume lp (builder_at_end context bblpad));
+      end;
+      (* CHECK: landingpad{{.*}}personality{{.*}}__gxx_personality_v0
+       * CHECK: cleanup
+       * CHECK: catch{{.*}}i8**{{.*}}@_ZTIc
+       * CHECK: filter{{.*}}@_ZTIPKc{{.*}}@_ZTId
+       * CHECK: resume
+       * *)
+  end;
+
+  group "br"; begin
+    (* CHECK: br{{.*}}Bb02
+     *)
+    let bb02 = append_block context "Bb02" fn in
+    let b = builder_at_end context bb02 in
+    let br = build_br bb02 b in
+    insist (successors br = [| bb02 |]) ;
+    insist (is_conditional br = false) ;
+    insist (get_branch br = Some (`Unconditional bb02)) ;
+  end;
+
+  group "cond_br"; begin
+    (* CHECK: br{{.*}}build_br{{.*}}Bb03{{.*}}Bb00
+     *)
+    let bb03 = append_block context "Bb03" fn in
+    let b = builder_at_end context bb03 in
+    let cond = build_trunc p1 i1_type "build_br" b in
+    let br = build_cond_br cond bb03 bb00 b in
+    insist (num_successors br = 2) ;
+    insist (successor br 0 = bb03) ;
+    insist (successor br 1 = bb00) ;
+    insist (is_conditional br = true) ;
+    insist (get_branch br = Some (`Conditional (cond, bb03, bb00))) ;
+  end;
+
+  group "switch"; begin
+    (* CHECK: switch{{.*}}P1{{.*}}SwiBlock3
+     * CHECK: 2,{{.*}}SwiBlock2
+     *)
+    let bb1 = append_block context "SwiBlock1" fn in
+    let bb2 = append_block context "SwiBlock2" fn in
+    ignore (build_unreachable (builder_at_end context bb2));
+    let bb3 = append_block context "SwiBlock3" fn in
+    ignore (build_unreachable (builder_at_end context bb3));
+    let si = build_switch p1 bb3 1 (builder_at_end context bb1) in begin
+        ignore (add_case si (const_int i32_type 2) bb2);
+        insist (switch_default_dest si = bb3);
+    end;
+    insist (num_successors si = 2) ;
+    insist (get_branch si = None) ;
+  end;
+
+  group "malloc/free"; begin
+      (* CHECK: call{{.*}}@malloc(i32 ptrtoint
+       * CHECK: call{{.*}}@free(i8*
+       * CHECK: call{{.*}}@malloc(i32 %
+       *)
+      let bb1 = append_block context "MallocBlock1" fn in
+      let m1 = (build_malloc (pointer_type i32_type) "m1"
+      (builder_at_end context bb1)) in
+      ignore (build_free m1 (builder_at_end context bb1));
+      ignore (build_array_malloc i32_type p1 "m2" (builder_at_end context bb1));
+      ignore (build_unreachable (builder_at_end context bb1));
+  end;
+
+  group "indirectbr"; begin
+    (* CHECK: indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]
+     *)
+    let bb1 = append_block context "IBRBlock1" fn in
+
+    let bb2 = append_block context "IBRBlock2" fn in
+    ignore (build_unreachable (builder_at_end context bb2));
+
+    let bb3 = append_block context "IBRBlock3" fn in
+    ignore (build_unreachable (builder_at_end context bb3));
+
+    let addr = block_address fn bb2 in
+    let ibr = build_indirect_br addr 2 (builder_at_end context bb1) in
+    ignore (add_destination ibr bb2);
+    ignore (add_destination ibr bb3)
+  end;
+
+  group "invoke"; begin
+    (* CHECK: build_invoke{{.*}}invoke{{.*}}P1{{.*}}P2
+     * CHECK: to{{.*}}Bb04{{.*}}unwind{{.*}}Bblpad
+     *)
+    let bb04 = append_block context "Bb04" fn in
+    let b = builder_at_end context bb04 in
+    ignore (build_invoke fn [| p1; p2 |] bb04 bblpad "build_invoke" b)
+  end;
+
+  group "unreachable"; begin
+    (* CHECK: unreachable
+     *)
+    let bb06 = append_block context "Bb06" fn in
+    let b = builder_at_end context bb06 in
+    ignore (build_unreachable b)
+  end;
+
+  group "arithmetic"; begin
+    let bb07 = append_block context "Bb07" fn in
+    let b = builder_at_end context bb07 in
+
+    (* CHECK: %build_add = add i32 %P1, %P2
+     * CHECK: %build_nsw_add = add nsw i32 %P1, %P2
+     * CHECK: %build_nuw_add = add nuw i32 %P1, %P2
+     * CHECK: %build_fadd = fadd float %F1, %F2
+     * CHECK: %build_sub = sub i32 %P1, %P2
+     * CHECK: %build_nsw_sub = sub nsw i32 %P1, %P2
+     * CHECK: %build_nuw_sub = sub nuw i32 %P1, %P2
+     * CHECK: %build_fsub = fsub float %F1, %F2
+     * CHECK: %build_mul = mul i32 %P1, %P2
+     * CHECK: %build_nsw_mul = mul nsw i32 %P1, %P2
+     * CHECK: %build_nuw_mul = mul nuw i32 %P1, %P2
+     * CHECK: %build_fmul = fmul float %F1, %F2
+     * CHECK: %build_udiv = udiv i32 %P1, %P2
+     * CHECK: %build_sdiv = sdiv i32 %P1, %P2
+     * CHECK: %build_exact_sdiv = sdiv exact i32 %P1, %P2
+     * CHECK: %build_fdiv = fdiv float %F1, %F2
+     * CHECK: %build_urem = urem i32 %P1, %P2
+     * CHECK: %build_srem = srem i32 %P1, %P2
+     * CHECK: %build_frem = frem float %F1, %F2
+     * CHECK: %build_shl = shl i32 %P1, %P2
+     * CHECK: %build_lshl = lshr i32 %P1, %P2
+     * CHECK: %build_ashl = ashr i32 %P1, %P2
+     * CHECK: %build_and = and i32 %P1, %P2
+     * CHECK: %build_or = or i32 %P1, %P2
+     * CHECK: %build_xor = xor i32 %P1, %P2
+     * CHECK: %build_neg = sub i32 0, %P1
+     * CHECK: %build_nsw_neg = sub nsw i32 0, %P1
+     * CHECK: %build_nuw_neg = sub nuw i32 0, %P1
+     * CHECK: %build_fneg = fsub float {{.*}}0{{.*}}, %F1
+     * CHECK: %build_not = xor i32 %P1, -1
+     *)
+    ignore (build_add p1 p2 "build_add" b);
+    ignore (build_nsw_add p1 p2 "build_nsw_add" b);
+    ignore (build_nuw_add p1 p2 "build_nuw_add" b);
+    ignore (build_fadd f1 f2 "build_fadd" b);
+    ignore (build_sub p1 p2 "build_sub" b);
+    ignore (build_nsw_sub p1 p2 "build_nsw_sub" b);
+    ignore (build_nuw_sub p1 p2 "build_nuw_sub" b);
+    ignore (build_fsub f1 f2 "build_fsub" b);
+    ignore (build_mul p1 p2 "build_mul" b);
+    ignore (build_nsw_mul p1 p2 "build_nsw_mul" b);
+    ignore (build_nuw_mul p1 p2 "build_nuw_mul" b);
+    ignore (build_fmul f1 f2 "build_fmul" b);
+    ignore (build_udiv p1 p2 "build_udiv" b);
+    ignore (build_sdiv p1 p2 "build_sdiv" b);
+    ignore (build_exact_sdiv p1 p2 "build_exact_sdiv" b);
+    ignore (build_fdiv f1 f2 "build_fdiv" b);
+    ignore (build_urem p1 p2 "build_urem" b);
+    ignore (build_srem p1 p2 "build_srem" b);
+    ignore (build_frem f1 f2 "build_frem" b);
+    ignore (build_shl p1 p2 "build_shl" b);
+    ignore (build_lshr p1 p2 "build_lshl" b);
+    ignore (build_ashr p1 p2 "build_ashl" b);
+    ignore (build_and p1 p2 "build_and" b);
+    ignore (build_or p1 p2 "build_or" b);
+    ignore (build_xor p1 p2 "build_xor" b);
+    ignore (build_neg p1 "build_neg" b);
+    ignore (build_nsw_neg p1 "build_nsw_neg" b);
+    ignore (build_nuw_neg p1 "build_nuw_neg" b);
+    ignore (build_fneg f1 "build_fneg" b);
+    ignore (build_not p1 "build_not" b);
+    ignore (build_unreachable b)
+  end;
+
+  group "memory"; begin
+    let bb08 = append_block context "Bb08" fn in
+    let b = builder_at_end context bb08 in
+
+    (* CHECK: %build_alloca = alloca i32
+     * CHECK: %build_array_alloca = alloca i32, i32 %P2
+     * CHECK: %build_load = load volatile i32* %build_array_alloca, align 4
+     * CHECK: store volatile i32 %P2, i32* %build_alloca, align 4
+     * CHECK: %build_gep = getelementptr i32* %build_array_alloca, i32 %P2
+     * CHECK: %build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2
+     * CHECK: %build_struct_gep = getelementptr inbounds{{.*}}%build_alloca2, i32 0, i32 1
+     * CHECK: %build_atomicrmw = atomicrmw xchg i8* %p, i8 42 seq_cst
+     *)
+    let alloca = build_alloca i32_type "build_alloca" b in
+    let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
+
+    let load = build_load array_alloca "build_load" b in
+    ignore(set_alignment 4 load);
+    ignore(set_volatile true load);
+    insist(true = is_volatile load);
+    insist(4 = alignment load);
+
+    let store = build_store p2 alloca b in
+    ignore(set_volatile true store);
+    ignore(set_alignment 4 store);
+    insist(true = is_volatile store);
+    insist(4 = alignment store);
+    ignore(build_gep array_alloca [| p2 |] "build_gep" b);
+    ignore(build_in_bounds_gep array_alloca [| p2 |] "build_in_bounds_gep" b);
+
+    let sty = struct_type context [| i32_type; i8_type |] in
+    let alloca2 = build_alloca sty "build_alloca2" b in
+    ignore(build_struct_gep alloca2 1 "build_struct_gep" b);
+
+    let p = build_alloca i8_type "p" b in
+    ignore(build_atomicrmw AtomicRMWBinOp.Xchg p (const_int i8_type 42)
+              AtomicOrdering.SequentiallyConsistent false "build_atomicrmw"
+              b);
+
+    ignore(build_unreachable b)
+  end;
+
+  group "string"; begin
+    let bb09 = append_block context "Bb09" fn in
+    let b = builder_at_end context bb09 in
+    let p = build_alloca (pointer_type i8_type) "p" b in
+    (* build_global_string is emitted above.
+     * CHECK: store{{.*}}build_global_string1{{.*}}p
+     * *)
+    ignore (build_global_string "stringval" "build_global_string" b);
+    let g = build_global_stringptr "stringval" "build_global_string1" b in
+    ignore (build_store g p b);
+    ignore(build_unreachable b);
+  end;
+
+  group "phi"; begin
+    (* CHECK: PhiNode{{.*}}P1{{.*}}PhiBlock1{{.*}}P2{{.*}}PhiBlock2
+     *)
+    let b1 = append_block context "PhiBlock1" fn in
+    let b2 = append_block context "PhiBlock2" fn in
+
+    let jb = append_block context "PhiJoinBlock" fn in
+    ignore (build_br jb (builder_at_end context b1));
+    ignore (build_br jb (builder_at_end context b2));
+    let at_jb = builder_at_end context jb in
+
+    let phi = build_phi [(p1, b1)] "PhiNode" at_jb in
+    insist ([(p1, b1)] = incoming phi);
+
+    add_incoming (p2, b2) phi;
+    insist ([(p1, b1); (p2, b2)] = incoming phi);
+
+    ignore (build_unreachable at_jb);
+  end
+
+(* End-of-file checks for things like metdata and attributes.
+ * CHECK: attributes #0 = {{.*}}uwtable{{.*}}
+ * CHECK: !llvm.module.flags = !{!0}
+ * CHECK: !0 = !{i32 1, !"Debug Info Version", i32 2}
+ * CHECK: !1 = !{i32 1, !"metadata test"}
+ * CHECK: !2 = !MDLocation(line: 2, column: 3, scope: !3, inlinedAt: !3)
+ *)
+
+(*===-- Pass Managers -----------------------------------------------------===*)
+
+let test_pass_manager () =
+  let (++) x f = ignore (f x); x in
+
+  begin group "module pass manager";
+    ignore (PassManager.create ()
+             ++ PassManager.run_module m
+             ++ PassManager.dispose)
+  end;
+
+  begin group "function pass manager";
+    let fty = function_type void_type [| |] in
+    let fn = define_function "FunctionPassManager" fty m in
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
+
+    ignore (PassManager.create_function m
+             ++ PassManager.initialize
+             ++ PassManager.run_function fn
+             ++ PassManager.finalize
+             ++ PassManager.dispose)
+  end
+
+
+(*===-- Memory Buffer -----------------------------------------------------===*)
+
+let test_memory_buffer () =
+  group "memory buffer";
+  let buf = MemoryBuffer.of_string "foobar" in
+  insist ((MemoryBuffer.as_string buf) = "foobar")
+
+
+(*===-- Writer ------------------------------------------------------------===*)
+
+let test_writer () =
+  group "valid";
+  insist (match Llvm_analysis.verify_module m with
+          | None -> true
+          | Some msg -> prerr_string msg; false);
+
+  group "writer";
+  insist (write_bitcode_file m filename);
+
+  dispose_module m
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "conversion"       test_conversion;
+  suite "target"           test_target;
+  suite "constants"        test_constants;
+  suite "global values"    test_global_values;
+  suite "global variables" test_global_variables;
+  suite "uses"             test_uses;
+  suite "users"            test_users;
+  suite "aliases"          test_aliases;
+  suite "functions"        test_functions;
+  suite "params"           test_params;
+  suite "basic blocks"     test_basic_blocks;
+  suite "instructions"     test_instructions;
+  suite "builder"          test_builder;
+  suite "pass manager"     test_pass_manager;
+  suite "memory buffer"    test_memory_buffer;
+  suite "writer"           test_writer; (* Keep this last; it disposes m. *)
+  exit !exit_status
diff --git a/test/Bindings/OCaml/executionengine.ml b/test/Bindings/OCaml/executionengine.ml
new file mode 100644
index 000000000000..1de2cfb7fefd
--- /dev/null
+++ b/test/Bindings/OCaml/executionengine.ml
@@ -0,0 +1,112 @@
+(* RUN: cp %s %T/executionengine.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %t
+ * REQUIRES: native, object-emission
+ * XFAIL: vg_leak
+ *)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = global_context ()
+let i8_type = Llvm.i8_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let double_type = Llvm.double_type context
+
+let () =
+  assert (Llvm_executionengine.initialize ())
+
+let bomb msg =
+  prerr_endline msg;
+  exit 2
+
+let define_getglobal m pg =
+  let fn = define_function "getglobal" (function_type i32_type [||]) m in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
+  let g = build_call pg [||] "" b in
+  ignore (build_ret g b);
+  fn
+
+let define_plus m =
+  let fn = define_function "plus" (function_type i32_type [| i32_type;
+                                                             i32_type |]) m in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
+  let add = build_add (param fn 0) (param fn 1) "sum" b in
+  ignore (build_ret add b);
+  fn
+
+let test_executionengine () =
+  let open Ctypes in
+
+  (* create *)
+  let m = create_module (global_context ()) "test_module" in
+  let ee = create m in
+
+  (* add plus *)
+  ignore (define_plus m);
+
+  (* declare global variable *)
+  ignore (define_global "globvar" (const_int i32_type 23) m);
+
+  (* add module *)
+  let m2 = create_module (global_context ()) "test_module2" in
+  add_module m2 ee;
+
+  (* add global mapping *)
+  (* BROKEN: see PR20656 *)
+  (* let g = declare_function "g" (function_type i32_type [||]) m2 in
+  let cg = coerce (Foreign.funptr (void @-> returning int32_t)) (ptr void)
+                                  (fun () -> 42l) in
+  add_global_mapping g cg ee;
+
+  (* check g *)
+  let cg' = get_pointer_to_global g (ptr void) ee in
+  if 0 <> ptr_compare cg cg' then bomb "int pointers to g differ";
+
+  (* add getglobal *)
+  let getglobal = define_getglobal m2 g in*)
+
+  (* run_static_ctors *)
+  run_static_ctors ee;
+
+  (* get a handle on globvar *)
+  let varh    = get_global_value_address "globvar" int32_t ee in
+  if 23l <> varh then bomb "get_global_value_address didn't work";
+
+  (* call plus *)
+  let cplusty = Foreign.funptr (int32_t @-> int32_t @-> returning int32_t) in
+  let cplus   = get_function_address "plus" cplusty ee in
+  if 4l <> cplus 2l 2l then bomb "plus didn't work";
+
+  (* call getglobal *)
+  (* let cgetglobalty = Foreign.funptr (void @-> returning int32_t) in
+  let cgetglobal   = get_pointer_to_global getglobal cgetglobalty ee in
+  if 42l <> cgetglobal () then bomb "getglobal didn't work"; *)
+
+  (* remove_module *)
+  remove_module m2 ee;
+  dispose_module m2;
+
+  (* run_static_dtors *)
+  run_static_dtors ee;
+
+  (* Show that the data layout binding links and runs.*)
+  let dl = data_layout ee in
+
+  (* Demonstrate that a garbage pointer wasn't returned. *)
+  let ty = DataLayout.intptr_type context dl in
+  if ty != i32_type && ty != i64_type then bomb "target_data did not work";
+
+  (* dispose *)
+  dispose ee
+
+let () =
+  test_executionengine ();
+  Gc.compact ()
diff --git a/test/Bindings/OCaml/ext_exc.ml b/test/Bindings/OCaml/ext_exc.ml
new file mode 100644
index 000000000000..2b44803f5170
--- /dev/null
+++ b/test/Bindings/OCaml/ext_exc.ml
@@ -0,0 +1,22 @@
+(* RUN: cp %s %T/ext_exc.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+let context = Llvm.global_context ()
+
+(* this used to crash, we must not use 'external' in .mli files, but 'val' if we
+ * want the let _ bindings executed, see http://caml.inria.fr/mantis/view.php?id=4166 *)
+let _ =
+    try
+        ignore (Llvm_bitreader.get_module context (Llvm.MemoryBuffer.of_stdin ()))
+    with
+    Llvm_bitreader.Error _ -> ();;
+let _ =
+    try
+        ignore (Llvm.MemoryBuffer.of_file "/path/to/nonexistent/file")
+    with
+    Llvm.IoError _ -> ();;
diff --git a/test/Bindings/OCaml/ipo.ml b/test/Bindings/OCaml/ipo.ml
new file mode 100644
index 000000000000..fc728b92ff3d
--- /dev/null
+++ b/test/Bindings/OCaml/ipo.ml
@@ -0,0 +1,72 @@
+(* RUN: cp %s %T/ipo_opts.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_ipo
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+let i8_type = Llvm.i8_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = f x; x in
+
+  let fty = function_type i8_type [| |] in
+  let fn = define_function "fn" fty m in
+  let fn2 = define_function "fn2" fty m in begin
+      ignore (build_ret (const_int i8_type 4) (builder_at_end context (entry_block fn)));
+      let b = builder_at_end context  (entry_block fn2) in
+      ignore (build_ret (build_call fn [| |] "" b) b);
+  end;
+
+  ignore (PassManager.create ()
+           ++ add_argument_promotion
+           ++ add_constant_merge
+           ++ add_dead_arg_elimination
+           ++ add_function_attrs
+           ++ add_function_inlining
+           ++ add_always_inliner
+           ++ add_global_dce
+           ++ add_global_optimizer
+           ++ add_ipc_propagation
+           ++ add_prune_eh
+           ++ add_ipsccp
+           ++ add_internalize ~all_but_main:true
+           ++ add_strip_dead_prototypes
+           ++ add_strip_symbols
+           ++ PassManager.run_module m
+           ++ PassManager.dispose)
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/test/Bindings/OCaml/irreader.ml b/test/Bindings/OCaml/irreader.ml
new file mode 100644
index 000000000000..e1771e75dd49
--- /dev/null
+++ b/test/Bindings/OCaml/irreader.ml
@@ -0,0 +1,59 @@
+(* RUN: cp %s %T/irreader.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_irreader
+
+let context = global_context ()
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+let _ =
+  Printexc.record_backtrace true
+
+let insist cond =
+  if not cond then failwith "insist"
+
+
+(*===-- IR Reader ---------------------------------------------------------===*)
+
+let test_irreader () =
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global i32 42" in
+    let m   = parse_ir context buf in
+    match lookup_global "foo" m with
+    | Some foo ->
+        insist ((global_initializer foo) = (const_int (i32_type context) 42))
+    | None ->
+        failwith "global"
+  end;
+
+  begin
+    let buf = MemoryBuffer.of_string "@foo = global garble" in
+    try
+      ignore (parse_ir context buf);
+      failwith "parsed"
+    with Llvm_irreader.Error _ ->
+      ()
+  end
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "irreader" test_irreader
diff --git a/test/Bindings/OCaml/linker.ml b/test/Bindings/OCaml/linker.ml
new file mode 100644
index 000000000000..1ea0be9d3dc3
--- /dev/null
+++ b/test/Bindings/OCaml/linker.ml
@@ -0,0 +1,63 @@
+(* RUN: cp %s %T/linker.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_linker
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Linker -----------------------------------------------------------===*)
+
+let test_linker () =
+  let fty = function_type void_type [| |] in
+
+  let make_module name =
+    let m = create_module context name in
+    let fn = define_function ("fn_" ^ name) fty m in
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
+    m
+  in
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2;
+  dispose_module m1;
+  dispose_module m2;
+
+  let m1 = make_module "one"
+  and m2 = make_module "two" in
+  link_modules m1 m2;
+  dispose_module m1;
+
+  let m1 = make_module "one"
+  and m2 = make_module "one" in
+  try
+    link_modules m1 m2;
+    failwith "must raise"
+  with Error _ ->
+    dispose_module m1;
+    dispose_module m2
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "linker" test_linker
diff --git a/test/Bindings/OCaml/lit.local.cfg b/test/Bindings/OCaml/lit.local.cfg
new file mode 100644
index 000000000000..7a83ca142808
--- /dev/null
+++ b/test/Bindings/OCaml/lit.local.cfg
@@ -0,0 +1,7 @@
+config.suffixes = ['.ml']
+
+if not 'ocaml' in config.root.llvm_bindings:
+    config.unsupported = True
+
+if config.root.have_ocaml_ounit not in ('1', 'TRUE'):
+    config.unsupported = True
diff --git a/test/Bindings/OCaml/passmgr_builder.ml b/test/Bindings/OCaml/passmgr_builder.ml
new file mode 100644
index 000000000000..5dd9d4e00e89
--- /dev/null
+++ b/test/Bindings/OCaml/passmgr_builder.ml
@@ -0,0 +1,64 @@
+(* RUN: cp %s %T/passmgr_builder.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_passmgr_builder
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Pass Manager Builder ----------------------------------------------===*)
+
+let test_pmbuilder () =
+  let (++) x f = ignore (f x); x in
+
+  let module_passmgr = PassManager.create () in
+  let func_passmgr   = PassManager.create_function m in
+  let lto_passmgr    = PassManager.create () in
+
+  ignore (Llvm_passmgr_builder.create ()
+           ++ set_opt_level 3
+           ++ set_size_level 1
+           ++ set_disable_unit_at_a_time false
+           ++ set_disable_unroll_loops false
+           ++ use_inliner_with_threshold 10
+           ++ populate_function_pass_manager func_passmgr
+           ++ populate_module_pass_manager module_passmgr
+           ++ populate_lto_pass_manager lto_passmgr
+                  ~internalize:false ~run_inliner:false);
+  Gc.compact ();
+
+  PassManager.dispose module_passmgr;
+  PassManager.dispose func_passmgr;
+  PassManager.dispose lto_passmgr
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "pass manager builder" test_pmbuilder;
+  dispose_module m
diff --git a/test/Bindings/OCaml/scalar_opts.ml b/test/Bindings/OCaml/scalar_opts.ml
new file mode 100644
index 000000000000..3017fb131129
--- /dev/null
+++ b/test/Bindings/OCaml/scalar_opts.ml
@@ -0,0 +1,92 @@
+(* RUN: cp %s %T/scalar_opts.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_scalar_opts
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = f x; x in
+
+  let fty = function_type void_type [| |] in
+  let fn = define_function "fn" fty m in
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
+
+  ignore (PassManager.create_function m
+           ++ add_aggressive_dce
+           ++ add_alignment_from_assumptions
+           ++ add_cfg_simplification
+           ++ add_dead_store_elimination
+           ++ add_scalarizer
+           ++ add_merged_load_store_motion
+           ++ add_gvn
+           ++ add_ind_var_simplification
+           ++ add_instruction_combination
+           ++ add_jump_threading
+           ++ add_licm
+           ++ add_loop_deletion
+           ++ add_loop_idiom
+           ++ add_loop_rotation
+           ++ add_loop_reroll
+           ++ add_loop_unroll
+           ++ add_loop_unswitch
+           ++ add_memcpy_opt
+           ++ add_partially_inline_lib_calls
+           ++ add_lower_switch
+           ++ add_memory_to_register_promotion
+           ++ add_reassociation
+           ++ add_sccp
+           ++ add_scalar_repl_aggregation
+           ++ add_scalar_repl_aggregation_ssa
+           ++ add_scalar_repl_aggregation_with_threshold 4
+           ++ add_lib_call_simplification
+           ++ add_tail_call_elimination
+           ++ add_constant_propagation
+           ++ add_memory_to_register_demotion
+           ++ add_verifier
+           ++ add_correlated_value_propagation
+           ++ add_early_cse
+           ++ add_lower_expect_intrinsic
+           ++ add_type_based_alias_analysis
+           ++ add_scoped_no_alias_alias_analysis
+           ++ add_basic_alias_analysis
+           ++ PassManager.initialize
+           ++ PassManager.run_function fn
+           ++ PassManager.finalize
+           ++ PassManager.dispose)
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/test/Bindings/OCaml/target.ml b/test/Bindings/OCaml/target.ml
new file mode 100644
index 000000000000..41faefa8ea68
--- /dev/null
+++ b/test/Bindings/OCaml/target.ml
@@ -0,0 +1,116 @@
+(* RUN: cp %s %T/target.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_target
+
+let () = Llvm_all_backends.initialize ()
+
+let context = global_context ()
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let _ =
+  Printexc.record_backtrace true
+
+let assert_equal a b =
+  if a <> b then failwith "assert_equal"
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+let target = Target.by_triple (Target.default_triple ())
+
+let machine = TargetMachine.create (Target.default_triple ()) target
+
+(*===-- Data Layout -------------------------------------------------------===*)
+
+let test_target_data () =
+  let module DL = DataLayout in
+  let layout = "e-p:32:32-f64:32:64-v64:32:64-v128:32:128-n32-S32" in
+  let dl     = DL.of_string layout in
+  let sty    = struct_type context [| i32_type; i64_type |] in
+
+  assert_equal (DL.as_string dl) layout;
+  assert_equal (DL.byte_order dl) Endian.Little;
+  assert_equal (DL.pointer_size dl) 4;
+  assert_equal (DL.intptr_type context dl) i32_type;
+  assert_equal (DL.qualified_pointer_size 0 dl) 4;
+  assert_equal (DL.qualified_intptr_type context 0 dl) i32_type;
+  assert_equal (DL.size_in_bits sty dl) (Int64.of_int 96);
+  assert_equal (DL.store_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.abi_size sty dl) (Int64.of_int 12);
+  assert_equal (DL.stack_align sty dl) 4;
+  assert_equal (DL.preferred_align sty dl) 8;
+  assert_equal (DL.preferred_align_of_global (declare_global sty "g" m) dl) 8;
+  assert_equal (DL.element_at_offset sty (Int64.of_int 1) dl) 0;
+  assert_equal (DL.offset_of_element sty 1 dl) (Int64.of_int 4);
+
+  let pm = PassManager.create () in
+  ignore (DL.add_to_pass_manager pm dl)
+
+
+(*===-- Target ------------------------------------------------------------===*)
+
+let test_target () =
+  let module T = Target in
+  ignore (T.succ target);
+  ignore (T.name target);
+  ignore (T.description target);
+  ignore (T.has_jit target);
+  ignore (T.has_target_machine target);
+  ignore (T.has_asm_backend target)
+
+
+(*===-- Target Machine ----------------------------------------------------===*)
+
+let test_target_machine () =
+  let module TM = TargetMachine in
+  assert_equal (TM.target machine) target;
+  assert_equal (TM.triple machine) (Target.default_triple ());
+  assert_equal (TM.cpu machine) "";
+  assert_equal (TM.features machine) "";
+  ignore (TM.data_layout machine);
+  TM.set_verbose_asm true machine;
+  let pm = PassManager.create () in
+  TM.add_analysis_passes pm machine
+
+
+(*===-- Code Emission -----------------------------------------------------===*)
+
+let test_code_emission () =
+  TargetMachine.emit_to_file m CodeGenFileType.ObjectFile filename machine;
+  try
+    TargetMachine.emit_to_file m CodeGenFileType.ObjectFile
+                               "/nonexistent/file" machine;
+    failwith "must raise"
+  with Llvm_target.Error _ ->
+    ();
+
+  let buf = TargetMachine.emit_to_memory_buffer m CodeGenFileType.ObjectFile
+                                                machine in
+  Llvm.MemoryBuffer.dispose buf
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  test_target_data ();
+  test_target ();
+  test_target_machine ();
+  test_code_emission ();
+  dispose_module m
diff --git a/test/Bindings/OCaml/transform_utils.ml b/test/Bindings/OCaml/transform_utils.ml
new file mode 100644
index 000000000000..6b46df173b08
--- /dev/null
+++ b/test/Bindings/OCaml/transform_utils.ml
@@ -0,0 +1,21 @@
+(* RUN: cp %s %T/transform_utils.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %t
+ * RUN: %ocamlopt -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+open Llvm
+open Llvm_transform_utils
+
+let context = global_context ()
+
+let test_clone_module () =
+  let m  = create_module context "mod" in
+  let m' = clone_module m in
+  if m == m' then failwith "m == m'";
+  if string_of_llmodule m <> string_of_llmodule m' then failwith "string_of m <> m'"
+
+let () =
+  test_clone_module ()
diff --git a/test/Bindings/OCaml/vectorize.ml b/test/Bindings/OCaml/vectorize.ml
new file mode 100644
index 000000000000..c5b03b525375
--- /dev/null
+++ b/test/Bindings/OCaml/vectorize.ml
@@ -0,0 +1,56 @@
+(* RUN: cp %s %T/vectorize_opts.ml
+ * RUN: %ocamlc -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %t %t.bc
+ * RUN: %ocamlopt -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_vectorize
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = f x; x in
+
+  let fty = function_type void_type [| |] in
+  let fn = define_function "fn" fty m in
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
+
+  ignore (PassManager.create ()
+           ++ add_bb_vectorize
+           ++ add_loop_vectorize
+           ++ add_slp_vectorize
+           ++ PassManager.run_module m
+           ++ PassManager.dispose)
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/test/Bindings/Ocaml/analysis.ml b/test/Bindings/Ocaml/analysis.ml
deleted file mode 100644
index c02645c0bce3..000000000000
--- a/test/Bindings/Ocaml/analysis.ml
+++ /dev/null
@@ -1,54 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %t.builddir/analysis.ml -o %t
- * RUN: %t
- * XFAIL: vg_leak
- *)
-
-open Llvm
-open Llvm_analysis
-
-(* Note that this takes a moment to link, so it's best to keep the number of
-   individual tests low. *)
-
-let context = global_context ()
-
-let test x = if not x then exit 1 else ()
-
-let bomb msg =
-  prerr_endline msg;
-  exit 2
-
-let _ =
-  let fty = function_type (void_type context) [| |] in
-  let m = create_module context "valid_m" in
-  let fn = define_function "valid_fn" fty m in
-  let at_entry = builder_at_end context (entry_block fn) in
-  ignore (build_ret_void at_entry);
-  
-  
-  (* Test that valid constructs verify. *)
-  begin match verify_module m with
-    Some msg -> bomb "valid module failed verification!"
-  | None -> ()
-  end;
-  
-  if not (verify_function fn) then bomb "valid function failed verification!";
-  
-  
-  (* Test that invalid constructs do not verify.
-     A basic block can contain only one terminator instruction. *)
-  ignore (build_ret_void at_entry);
-  
-  begin match verify_module m with
-    Some msg -> ()
-  | None -> bomb "invalid module passed verification!"
-  end;
-  
-  if verify_function fn then bomb "invalid function passed verification!";
-  
-  
-  dispose_module m
-  
-  (* Don't bother to test assert_valid_{module,function}. *)
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
deleted file mode 100644
index f1d202ab0490..000000000000
--- a/test/Bindings/Ocaml/bitreader.ml
+++ /dev/null
@@ -1,79 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %t.builddir/bitreader.ml -o %t
- * RUN: %t %t.bc
- * RUN: llvm-dis < %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note that this takes a moment to link, so it's best to keep the number of
-   individual tests low. *)
-
-let context = Llvm.global_context ()
-
-let test x = if not x then exit 1 else ()
-
-let _ =
-  let fn = Sys.argv.(1) in
-  let m = Llvm.create_module context "ocaml_test_module" in
-  
-  test (Llvm_bitwriter.write_bitcode_file m fn);
-  
-  Llvm.dispose_module m;
-  
-  (* parse_bitcode *)
-  begin
-    let mb = Llvm.MemoryBuffer.of_file fn in
-    begin try
-      let m = Llvm_bitreader.parse_bitcode context mb in
-      Llvm.dispose_module m
-    with x ->
-      Llvm.MemoryBuffer.dispose mb;
-      raise x
-    end
-  end;
-  
-  (* MemoryBuffer.of_file *)
-  test begin try
-    let mb = Llvm.MemoryBuffer.of_file (fn ^ ".bogus") in
-    Llvm.MemoryBuffer.dispose mb;
-    false
-  with Llvm.IoError _ ->
-    true
-  end;
-  
-  (* get_module *)
-  begin
-    let mb = Llvm.MemoryBuffer.of_file fn in
-    let m = begin try
-      Llvm_bitreader.get_module context mb
-    with x ->
-      Llvm.MemoryBuffer.dispose mb;
-      raise x
-    end in
-    Llvm.dispose_module m
-  end;
-  
-  (* corrupt the bitcode *)
-  let fn = fn ^ ".txt" in
-  begin let oc = open_out fn in
-    output_string oc "not a bitcode file\n";
-    close_out oc
-  end;
-  
-  (* test get_module exceptions *)
-  test begin
-    try
-      let mb = Llvm.MemoryBuffer.of_file fn in
-      let m = begin try
-        Llvm_bitreader.get_module context mb
-      with x ->
-        Llvm.MemoryBuffer.dispose mb;
-        raise x
-      end in
-      Llvm.dispose_module m;
-      false
-    with Llvm_bitreader.Error _ ->
-      true
-  end
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
deleted file mode 100644
index ae456cf785c8..000000000000
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ /dev/null
@@ -1,48 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %t.builddir/bitwriter.ml -o %t
- * RUN: %t %t.bc
- * RUN: llvm-dis < %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note that this takes a moment to link, so it's best to keep the number of
-   individual tests low. *)
-
-let context = Llvm.global_context ()
-
-let test x = if not x then exit 1 else ()
-
-let read_file name =
-  let ic = open_in_bin name in
-  let len = in_channel_length ic in
-  let buf = String.create len in
-
-  test ((input ic buf 0 len) = len);
-
-  close_in ic;
-
-  buf
-
-let temp_bitcode ?unbuffered m =
-  let temp_name, temp_oc = Filename.open_temp_file ~mode:[Open_binary] "" "" in
-
-  test (Llvm_bitwriter.output_bitcode ?unbuffered temp_oc m);
-  flush temp_oc;
-
-  let temp_buf = read_file temp_name in
-
-  close_out temp_oc;
-
-  temp_buf
-
-let _ =
-  let m = Llvm.create_module context "ocaml_test_module" in
-  
-  test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1));
-  let file_buf = read_file Sys.argv.(1) in
-
-  test (file_buf = temp_bitcode m);
-  test (file_buf = temp_bitcode ~unbuffered:false m);
-  test (file_buf = temp_bitcode ~unbuffered:true m)
diff --git a/test/Bindings/Ocaml/executionengine.ml b/test/Bindings/Ocaml/executionengine.ml
deleted file mode 100644
index 8e2494952a2b..000000000000
--- a/test/Bindings/Ocaml/executionengine.ml
+++ /dev/null
@@ -1,118 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/executionengine.ml -o %t
- * RUN: %t
- * XFAIL: vg_leak
- *)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-
-(* Note that this takes a moment to link, so it's best to keep the number of
-   individual tests low. *)
-
-let context = global_context ()
-let i8_type = Llvm.i8_type context
-let i32_type = Llvm.i32_type context
-let i64_type = Llvm.i64_type context
-let double_type = Llvm.double_type context
-
-let bomb msg =
-  prerr_endline msg;
-  exit 2
-
-let define_main_fn m retval =
-  let fn =
-    let str_arr_type = pointer_type (pointer_type i8_type) in
-    define_function "main" (function_type i32_type [| i32_type;
-                                                      str_arr_type;
-                                                      str_arr_type |]) m in
-  let b = builder_at_end (global_context ()) (entry_block fn) in
-  ignore (build_ret (const_int i32_type retval) b);
-  fn
-
-let define_plus m =
-  let fn = define_function "plus" (function_type i32_type [| i32_type;
-                                                             i32_type |]) m in
-  let b = builder_at_end (global_context ()) (entry_block fn) in
-  let add = build_add (param fn 0) (param fn 1) "sum" b in
-  ignore (build_ret add b)
-
-let test_genericvalue () =
-  let tu = (1, 2) in
-  let ptrgv = GenericValue.of_pointer tu in
-  assert (tu = GenericValue.as_pointer ptrgv);
-  
-  let fpgv = GenericValue.of_float double_type 2. in
-  assert (2. = GenericValue.as_float double_type fpgv);
-  
-  let intgv = GenericValue.of_int i32_type 3 in
-  assert (3  = GenericValue.as_int intgv);
-  
-  let i32gv = GenericValue.of_int32 i32_type (Int32.of_int 4) in
-  assert ((Int32.of_int 4) = GenericValue.as_int32 i32gv);
-  
-  let nigv = GenericValue.of_nativeint i32_type (Nativeint.of_int 5) in
-  assert ((Nativeint.of_int 5) = GenericValue.as_nativeint nigv);
-  
-  let i64gv = GenericValue.of_int64 i64_type (Int64.of_int 6) in
-  assert ((Int64.of_int 6) = GenericValue.as_int64 i64gv)
-
-let test_executionengine () =
-  (* create *)
-  let m = create_module (global_context ()) "test_module" in
-  let main = define_main_fn m 42 in
-  
-  let m2 = create_module (global_context ()) "test_module2" in
-  define_plus m2;
-  
-  let ee = ExecutionEngine.create m in
-  ExecutionEngine.add_module m2 ee;
-  
-  (* run_static_ctors *)
-  ExecutionEngine.run_static_ctors ee;
-  
-  (* run_function_as_main *)
-  let res = ExecutionEngine.run_function_as_main main [|"test"|] [||] ee in
-  if 42 != res then bomb "main did not return 42";
-  
-  (* free_machine_code *)
-  ExecutionEngine.free_machine_code main ee;
-  
-  (* find_function *)
-  match ExecutionEngine.find_function "dne" ee with
-  | Some _ -> raise (Failure "find_function 'dne' failed")
-  | None ->
-  
-  match ExecutionEngine.find_function "plus" ee with
-  | None -> raise (Failure "find_function 'plus' failed")
-  | Some plus ->
-  
-  (* run_function *)
-  let res = ExecutionEngine.run_function plus
-                                         [| GenericValue.of_int i32_type 2;
-                                            GenericValue.of_int i32_type 2 |]
-                                         ee in
-  if 4 != GenericValue.as_int res then bomb "plus did not work";
-  
-  (* remove_module *)
-  Llvm.dispose_module (ExecutionEngine.remove_module m2 ee);
-  
-  (* run_static_dtors *)
-  ExecutionEngine.run_static_dtors ee;
-
-  (* Show that the data layout binding links and runs.*)
-  let dl = ExecutionEngine.data_layout ee in
-
-  (* Demonstrate that a garbage pointer wasn't returned. *)
-  let ty = DataLayout.intptr_type context dl in
-  if ty != i32_type && ty != i64_type then bomb "target_data did not work";
-  
-  (* dispose *)
-  ExecutionEngine.dispose ee
-
-let _ =
-  test_genericvalue ();
-  test_executionengine ()
diff --git a/test/Bindings/Ocaml/ext_exc.ml b/test/Bindings/Ocaml/ext_exc.ml
deleted file mode 100644
index 9afc3c3ab4b2..000000000000
--- a/test/Bindings/Ocaml/ext_exc.ml
+++ /dev/null
@@ -1,20 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %t.builddir/ext_exc.ml -o %t
- * RUN: %t </dev/null
- * XFAIL: vg_leak
- *)
-let context = Llvm.global_context ()
-(* this used to crash, we must not use 'external' in .mli files, but 'val' if we
- * want the let _ bindings executed, see http://caml.inria.fr/mantis/view.php?id=4166 *)
-let _ =
-    try
-        ignore (Llvm_bitreader.get_module context (Llvm.MemoryBuffer.of_stdin ()))
-    with
-    Llvm_bitreader.Error _ -> ();;
-let _ =
-    try
-        ignore (Llvm.MemoryBuffer.of_file "/path/to/nonexistent/file")
-    with
-    Llvm.IoError _ -> ();;
diff --git a/test/Bindings/Ocaml/ipo_opts.ml b/test/Bindings/Ocaml/ipo_opts.ml
deleted file mode 100644
index e0bcbe5f561e..000000000000
--- a/test/Bindings/Ocaml/ipo_opts.ml
+++ /dev/null
@@ -1,72 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_ipo.cmxa llvm_target.cmxa %t.builddir/ipo_opts.ml -o %t
- * RUN: %t %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_ipo
-open Llvm_target
-
-let context = global_context ()
-let void_type = Llvm.void_type context
-let i8_type = Llvm.i8_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-
-(*===-- Transforms --------------------------------------------------------===*)
-
-let test_transforms () =
-  let (++) x f = ignore (f x); x in
-
-  let fty = function_type i8_type [| |] in
-  let fn = define_function "fn" fty m in
-  let fn2 = define_function "fn2" fty m in begin
-      ignore (build_ret (const_int i8_type 4) (builder_at_end context (entry_block fn)));
-      let b = builder_at_end context  (entry_block fn2) in
-      ignore (build_ret (build_call fn [| |] "" b) b);
-  end;
-
-  ignore (PassManager.create ()
-           ++ add_argument_promotion
-           ++ add_constant_merge
-           ++ add_dead_arg_elimination
-           ++ add_function_attrs
-           ++ add_function_inlining
-           ++ add_always_inliner
-           ++ add_global_dce
-           ++ add_global_optimizer
-           ++ add_ipc_propagation
-           ++ add_prune_eh
-           ++ add_ipsccp
-           ++ add_internalize
-           ++ add_strip_dead_prototypes
-           ++ add_strip_symbols
-           ++ PassManager.run_module m
-           ++ PassManager.dispose)
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "transforms" test_transforms;
-  dispose_module m
diff --git a/test/Bindings/Ocaml/irreader.ml b/test/Bindings/Ocaml/irreader.ml
deleted file mode 100644
index 3511c2b23646..000000000000
--- a/test/Bindings/Ocaml/irreader.ml
+++ /dev/null
@@ -1,59 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_irreader.cmxa %t.builddir/irreader.ml -o %t
- * RUN: %t
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_irreader
-
-let context = global_context ()
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-let _ =
-  Printexc.record_backtrace true
-
-let insist cond =
-  if not cond then failwith "insist"
-
-
-(*===-- IR Reader ---------------------------------------------------------===*)
-
-let test_irreader () =
-  begin
-    let buf = MemoryBuffer.of_string "@foo = global i32 42" in
-    let m   = parse_ir context buf in
-    match lookup_global "foo" m with
-    | Some foo ->
-        insist ((global_initializer foo) = (const_int (i32_type context) 42))
-    | None ->
-        failwith "global"
-  end;
-
-  begin
-    let buf = MemoryBuffer.of_string "@foo = global garble" in
-    try
-      ignore (parse_ir context buf);
-      failwith "parsed"
-    with Llvm_irreader.Error _ ->
-      ()
-  end
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "irreader" test_irreader
diff --git a/test/Bindings/Ocaml/linker.ml b/test/Bindings/Ocaml/linker.ml
deleted file mode 100644
index 9359ae9f2c48..000000000000
--- a/test/Bindings/Ocaml/linker.ml
+++ /dev/null
@@ -1,63 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_linker.cmxa %t.builddir/linker.ml -o %t
- * RUN: %t
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_linker
-
-let context = global_context ()
-let void_type = Llvm.void_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Linker -----------------------------------------------------------===*)
-
-let test_linker () =
-  let fty = function_type void_type [| |] in
-
-  let make_module name =
-    let m = create_module context name in
-    let fn = define_function ("fn_" ^ name) fty m in
-    ignore (build_ret_void (builder_at_end context (entry_block fn)));
-    m
-  in
-
-  let m1 = make_module "one"
-  and m2 = make_module "two" in
-  link_modules m1 m2 Mode.PreserveSource;
-  dispose_module m1;
-  dispose_module m2;
-
-  let m1 = make_module "one"
-  and m2 = make_module "two" in
-  link_modules m1 m2 Mode.DestroySource;
-  dispose_module m1;
-
-  let m1 = make_module "one"
-  and m2 = make_module "one" in
-  try
-    link_modules m1 m2 Mode.PreserveSource;
-    failwith "must raise"
-  with Error _ ->
-    dispose_module m1;
-    dispose_module m2
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "linker" test_linker
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
deleted file mode 100644
index c38d89ab09e9..000000000000
--- a/test/Bindings/Ocaml/lit.local.cfg
+++ /dev/null
@@ -1,5 +0,0 @@
-config.suffixes = ['.ml']
-
-bindings = set([s.strip() for s in config.root.llvm_bindings.split(',')])
-if not 'ocaml' in bindings:
-    config.unsupported = True
diff --git a/test/Bindings/Ocaml/passmgr_builder.ml b/test/Bindings/Ocaml/passmgr_builder.ml
deleted file mode 100644
index 1a3102f70a34..000000000000
--- a/test/Bindings/Ocaml/passmgr_builder.ml
+++ /dev/null
@@ -1,64 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_passmgr_builder.cmxa %t.builddir/passmgr_builder.ml -o %t
- * RUN: %t %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_passmgr_builder
-
-let context = global_context ()
-let void_type = Llvm.void_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-
-(*===-- Pass Manager Builder ----------------------------------------------===*)
-
-let test_pmbuilder () =
-  let (++) x f = ignore (f x); x in
-
-  let module_passmgr = PassManager.create () in
-  let func_passmgr   = PassManager.create_function m in
-  let lto_passmgr    = PassManager.create () in
-
-  ignore (Llvm_passmgr_builder.create ()
-           ++ set_opt_level 3
-           ++ set_size_level 1
-           ++ set_disable_unit_at_a_time false
-           ++ set_disable_unroll_loops false
-           ++ use_inliner_with_threshold 10
-           ++ populate_function_pass_manager func_passmgr
-           ++ populate_module_pass_manager module_passmgr
-           ++ populate_lto_pass_manager lto_passmgr
-                  ~internalize:false ~run_inliner:false);
-  Gc.compact ();
-
-  PassManager.dispose module_passmgr;
-  PassManager.dispose func_passmgr;
-  PassManager.dispose lto_passmgr
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "pass manager builder" test_pmbuilder;
-  dispose_module m
diff --git a/test/Bindings/Ocaml/scalar_opts.ml b/test/Bindings/Ocaml/scalar_opts.ml
deleted file mode 100644
index 39913e43119d..000000000000
--- a/test/Bindings/Ocaml/scalar_opts.ml
+++ /dev/null
@@ -1,87 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %t.builddir/scalar_opts.ml -o %t
- * RUN: %t %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_scalar_opts
-open Llvm_target
-
-let context = global_context ()
-let void_type = Llvm.void_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-
-(*===-- Transforms --------------------------------------------------------===*)
-
-let test_transforms () =
-  let (++) x f = ignore (f x); x in
-
-  let fty = function_type void_type [| |] in
-  let fn = define_function "fn" fty m in
-  ignore (build_ret_void (builder_at_end context (entry_block fn)));
-  
-  ignore (PassManager.create_function m
-           ++ add_verifier
-           ++ add_constant_propagation
-           ++ add_sccp
-           ++ add_dead_store_elimination
-           ++ add_aggressive_dce
-           ++ add_scalar_repl_aggregation
-           ++ add_scalar_repl_aggregation_ssa
-           ++ add_scalar_repl_aggregation_with_threshold 4
-           ++ add_ind_var_simplification
-           ++ add_instruction_combination
-           ++ add_licm
-           ++ add_loop_unswitch
-           ++ add_loop_unroll
-           ++ add_loop_rotation
-           ++ add_memory_to_register_promotion
-           ++ add_memory_to_register_demotion
-           ++ add_reassociation
-           ++ add_jump_threading
-           ++ add_cfg_simplification
-           ++ add_tail_call_elimination
-           ++ add_gvn
-           ++ add_memcpy_opt
-           ++ add_loop_deletion
-           ++ add_loop_idiom
-           ++ add_lib_call_simplification
-           ++ add_correlated_value_propagation
-           ++ add_early_cse
-           ++ add_lower_expect_intrinsic
-           ++ add_type_based_alias_analysis
-           ++ add_basic_alias_analysis
-           ++ add_partially_inline_lib_calls
-           ++ add_verifier
-           ++ PassManager.initialize
-           ++ PassManager.run_function fn
-           ++ PassManager.finalize
-           ++ PassManager.dispose)
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "transforms" test_transforms;
-  dispose_module m
diff --git a/test/Bindings/Ocaml/target.ml b/test/Bindings/Ocaml/target.ml
deleted file mode 100644
index 0a2283aa3ed5..000000000000
--- a/test/Bindings/Ocaml/target.ml
+++ /dev/null
@@ -1,116 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -g -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %t.builddir/target.ml -o %t
- * RUN: %t %t.bc
- * REQUIRES: native, object-emission
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_target
-
-let _ = Llvm_executionengine.initialize_native_target ()
-
-let context = global_context ()
-let i32_type = Llvm.i32_type context
-let i64_type = Llvm.i64_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let _ =
-  Printexc.record_backtrace true
-
-let assert_equal a b =
-  if a <> b then failwith "assert_equal"
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-let target = Target.by_triple (Target.default_triple ())
-
-let machine = TargetMachine.create (Target.default_triple ()) target
-
-(*===-- Data Layout -------------------------------------------------------===*)
-
-let test_target_data () =
-  let module DL = DataLayout in
-  let layout = "e-p:32:32-f64:32:64-v64:32:64-v128:32:128-n32-S32" in
-  let dl     = DL.of_string layout in
-  let sty    = struct_type context [| i32_type; i64_type |] in
-
-  assert_equal (DL.as_string dl) layout;
-  assert_equal (DL.byte_order dl) Endian.Little;
-  assert_equal (DL.pointer_size dl) 4;
-  assert_equal (DL.intptr_type context dl) i32_type;
-  assert_equal (DL.qualified_pointer_size 0 dl) 4;
-  assert_equal (DL.qualified_intptr_type context 0 dl) i32_type;
-  assert_equal (DL.size_in_bits sty dl) (Int64.of_int 96);
-  assert_equal (DL.store_size sty dl) (Int64.of_int 12);
-  assert_equal (DL.abi_size sty dl) (Int64.of_int 12);
-  assert_equal (DL.stack_align sty dl) 4;
-  assert_equal (DL.preferred_align sty dl) 8;
-  assert_equal (DL.preferred_align_of_global (declare_global sty "g" m) dl) 8;
-  assert_equal (DL.element_at_offset sty (Int64.of_int 1) dl) 0;
-  assert_equal (DL.offset_of_element sty 1 dl) (Int64.of_int 4);
-
-  let pm = PassManager.create () in
-  ignore (DL.add_to_pass_manager pm dl)
-
-
-(*===-- Target ------------------------------------------------------------===*)
-
-let test_target () =
-  let module T = Target in
-  ignore (T.succ target);
-  ignore (T.name target);
-  ignore (T.description target);
-  ignore (T.has_jit target);
-  ignore (T.has_target_machine target);
-  ignore (T.has_asm_backend target)
-
-
-(*===-- Target Machine ----------------------------------------------------===*)
-
-let test_target_machine () =
-  let module TM = TargetMachine in
-  assert_equal (TM.target machine) target;
-  assert_equal (TM.triple machine) (Target.default_triple ());
-  assert_equal (TM.cpu machine) "";
-  assert_equal (TM.features machine) "";
-  ignore (TM.data_layout machine);
-  TM.set_verbose_asm true machine
-
-
-(*===-- Code Emission -----------------------------------------------------===*)
-
-let test_code_emission () =
-  TargetMachine.emit_to_file m CodeGenFileType.ObjectFile filename machine;
-  try
-    TargetMachine.emit_to_file m CodeGenFileType.ObjectFile
-                               "/nonexistent/file" machine;
-    failwith "must raise"
-  with Llvm_target.Error _ ->
-    ();
-
-  let buf = TargetMachine.emit_to_memory_buffer m CodeGenFileType.ObjectFile
-                                                machine in
-  Llvm.MemoryBuffer.dispose buf
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  test_target_data ();
-  test_target ();
-  test_target_machine ();
-  (* test_code_emission (); *) (* broken without AsmParser support *)
-  dispose_module m
diff --git a/test/Bindings/Ocaml/vectorize_opts.ml b/test/Bindings/Ocaml/vectorize_opts.ml
deleted file mode 100644
index 5ef985d5dc18..000000000000
--- a/test/Bindings/Ocaml/vectorize_opts.ml
+++ /dev/null
@@ -1,56 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_vectorize.cmxa llvm_target.cmxa %t.builddir/vectorize_opts.ml -o %t
- * RUN: %t %t.bc
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_vectorize
-open Llvm_target
-
-let context = global_context ()
-let void_type = Llvm.void_type context
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let print_checkpoints = false
-
-let suite name f =
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-
-(*===-- Transforms --------------------------------------------------------===*)
-
-let test_transforms () =
-  let (++) x f = ignore (f x); x in
-
-  let fty = function_type void_type [| |] in
-  let fn = define_function "fn" fty m in
-  ignore (build_ret_void (builder_at_end context (entry_block fn)));
-
-  ignore (PassManager.create ()
-           ++ add_bb_vectorize
-           ++ add_loop_vectorize
-           ++ add_slp_vectorize
-           ++ PassManager.run_module m
-           ++ PassManager.dispose)
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "transforms" test_transforms;
-  dispose_module m
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
deleted file mode 100644
index f014116ffe8e..000000000000
--- a/test/Bindings/Ocaml/vmcore.ml
+++ /dev/null
@@ -1,1421 +0,0 @@
-(* RUN: rm -rf %t.builddir
- * RUN: mkdir -p %t.builddir
- * RUN: cp %s %t.builddir
- * RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %t.builddir/vmcore.ml -o %t
- * RUN: %t %t.bc
- * RUN: llvm-dis < %t.bc > %t.ll
- * RUN: FileCheck %s < %t.ll
- * Do a second pass for things that shouldn't be anywhere.
- * RUN: FileCheck -check-prefix=CHECK-NOWHERE %s < %t.ll
- * XFAIL: vg_leak
- *)
-
-(* Note: It takes several seconds for ocamlopt to link an executable with
-         libLLVMCore.a, so it's better to write a big test than a bunch of
-         little ones. *)
-
-open Llvm
-open Llvm_bitwriter
-
-
-(* Tiny unit test framework - really just to help find which line is busted *)
-let exit_status = ref 0
-let suite_name = ref ""
-let group_name = ref ""
-let case_num = ref 0
-let print_checkpoints = false
-let context = global_context ()
-let i1_type = Llvm.i1_type context
-let i8_type = Llvm.i8_type context
-let i16_type = Llvm.i16_type context
-let i32_type = Llvm.i32_type context
-let i64_type = Llvm.i64_type context
-let void_type = Llvm.void_type context
-let float_type = Llvm.float_type context
-let double_type = Llvm.double_type context
-let fp128_type = Llvm.fp128_type context
-
-let group name =
-  group_name := !suite_name ^ "/" ^ name;
-  case_num := 0;
-  if print_checkpoints then
-    prerr_endline ("  " ^ name ^ "...")
-
-let insist cond =
-  incr case_num;
-  if not cond then
-    exit_status := 10;
-  match print_checkpoints, cond with
-  | false, true -> ()
-  | false, false ->
-      prerr_endline ("FAILED: " ^ !suite_name ^ "/" ^ !group_name ^ " #" ^ (string_of_int !case_num))
-  | true, true ->
-      prerr_endline ("    " ^ (string_of_int !case_num))
-  | true, false ->
-      prerr_endline ("    " ^ (string_of_int !case_num) ^ " FAIL")
-
-let suite name f =
-  suite_name := name;
-  if print_checkpoints then
-    prerr_endline (name ^ ":");
-  f ()
-
-
-(*===-- Fixture -----------------------------------------------------------===*)
-
-let filename = Sys.argv.(1)
-let m = create_module context filename
-
-
-(*===-- Conversion --------------------------------------------------------===*)
-
-let test_conversion () =
-  insist ("i32" = (string_of_lltype i32_type));
-  let c = const_int i32_type 42 in
-  insist ("i32 42" = (string_of_llvalue c))
-
-
-(*===-- Target ------------------------------------------------------------===*)
-
-let test_target () =
-  begin group "triple";
-    let trip = "i686-apple-darwin8" in
-    set_target_triple trip m;
-    insist (trip = target_triple m)
-  end;
-  
-  begin group "layout";
-    let layout = "e" in
-    set_data_layout layout m;
-    insist (layout = data_layout m)
-  end
-  (* CHECK: target datalayout = "e"
-   * CHECK: target triple = "i686-apple-darwin8"
-   *)
-
-
-(*===-- Constants ---------------------------------------------------------===*)
-
-let test_constants () =
-  (* CHECK: const_int{{.*}}i32{{.*}}-1
-   *)
-  group "int";
-  let c = const_int i32_type (-1) in
-  ignore (define_global "const_int" c m);
-  insist (i32_type = type_of c);
-  insist (is_constant c);
-
-  (* CHECK: const_sext_int{{.*}}i64{{.*}}-1
-   *)
-  group "sext int";
-  let c = const_int i64_type (-1) in
-  ignore (define_global "const_sext_int" c m);
-  insist (i64_type = type_of c);
-
-  (* CHECK: const_zext_int64{{.*}}i64{{.*}}4294967295
-   *)
-  group "zext int64";
-  let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in
-  ignore (define_global "const_zext_int64" c m);
-  insist (i64_type = type_of c);
-
-  (* CHECK: const_int_string{{.*}}i32{{.*}}-1
-   *)
-  group "int string";
-  let c = const_int_of_string i32_type "-1" 10 in
-  ignore (define_global "const_int_string" c m);
-  insist (i32_type = type_of c);
-
-  (* CHECK: @const_string = global {{.*}}c"cruel\00world"
-   *)
-  group "string";
-  let c = const_string context "cruel\000world" in
-  ignore (define_global "const_string" c m);
-  insist ((array_type i8_type 11) = type_of c);
-
-  (* CHECK: const_stringz{{.*}}"hi\00again\00"
-   *)
-  group "stringz";
-  let c = const_stringz context "hi\000again" in
-  ignore (define_global "const_stringz" c m);
-  insist ((array_type i8_type 9) = type_of c);
-
-  (* CHECK: const_single{{.*}}2.75
-   * CHECK: const_double{{.*}}3.1459
-   * CHECK: const_double_string{{.*}}1.25
-   *)
-  begin group "real";
-    let cs = const_float float_type 2.75 in
-    ignore (define_global "const_single" cs m);
-    insist (float_type = type_of cs);
-    
-    let cd = const_float double_type 3.1459 in
-    ignore (define_global "const_double" cd m);
-    insist (double_type = type_of cd);
-
-    let cd = const_float_of_string double_type "1.25" in
-    ignore (define_global "const_double_string" cd m);
-    insist (double_type = type_of cd)
-  end;
-  
-  let one = const_int i16_type 1 in
-  let two = const_int i16_type 2 in
-  let three = const_int i32_type 3 in
-  let four = const_int i32_type 4 in
-  
-  (* CHECK: const_array{{.*}}[i32 3, i32 4]
-   *)
-  group "array";
-  let c = const_array i32_type [| three; four |] in
-  ignore (define_global "const_array" c m);
-  insist ((array_type i32_type 2) = (type_of c));
-  
-  (* CHECK: const_vector{{.*}}<i16 1, i16 2{{.*}}>
-   *)
-  group "vector";
-  let c = const_vector [| one; two; one; two;
-                          one; two; one; two |] in
-  ignore (define_global "const_vector" c m);
-  insist ((vector_type i16_type 8) = (type_of c));
-
-  (* CHECK: const_structure{{.*.}}i16 1, i16 2, i32 3, i32 4
-   *)
-  group "structure";
-  let c = const_struct context [| one; two; three; four |] in
-  ignore (define_global "const_structure" c m);
-  insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
-        = (type_of c));
-
-  (* CHECK: const_null{{.*}}zeroinit
-   *)
-  group "null";
-  let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
-                                                    double_type |]) in
-  ignore (define_global "const_null" c m);
-  
-  (* CHECK: const_all_ones{{.*}}-1
-   *)
-  group "all ones";
-  let c = const_all_ones i64_type in
-  ignore (define_global "const_all_ones" c m);
-
-  group "pointer null"; begin
-    (* CHECK: const_pointer_null = global i64* null
-     *)
-    let c = const_pointer_null (pointer_type i64_type) in
-    ignore (define_global "const_pointer_null" c m);
-  end;
-  
-  (* CHECK: const_undef{{.*}}undef
-   *)
-  group "undef";
-  let c = undef i1_type in
-  ignore (define_global "const_undef" c m);
-  insist (i1_type = type_of c);
-  insist (is_undef c);
-  
-  group "constant arithmetic";
-  (* CHECK: @const_neg = global i64 sub
-   * CHECK: @const_nsw_neg = global i64 sub nsw
-   * CHECK: @const_nuw_neg = global i64 sub nuw
-   * CHECK: @const_fneg = global double fsub
-   * CHECK: @const_not = global i64 xor
-   * CHECK: @const_add = global i64 add
-   * CHECK: @const_nsw_add = global i64 add nsw
-   * CHECK: @const_nuw_add = global i64 add nuw
-   * CHECK: @const_fadd = global double fadd
-   * CHECK: @const_sub = global i64 sub
-   * CHECK: @const_nsw_sub = global i64 sub nsw
-   * CHECK: @const_nuw_sub = global i64 sub nuw
-   * CHECK: @const_fsub = global double fsub
-   * CHECK: @const_mul = global i64 mul
-   * CHECK: @const_nsw_mul = global i64 mul nsw
-   * CHECK: @const_nuw_mul = global i64 mul nuw
-   * CHECK: @const_fmul = global double fmul
-   * CHECK: @const_udiv = global i64 udiv
-   * CHECK: @const_sdiv = global i64 sdiv
-   * CHECK: @const_exact_sdiv = global i64 sdiv exact
-   * CHECK: @const_fdiv = global double fdiv
-   * CHECK: @const_urem = global i64 urem
-   * CHECK: @const_srem = global i64 srem
-   * CHECK: @const_frem = global double frem
-   * CHECK: @const_and = global i64 and
-   * CHECK: @const_or = global i64 or
-   * CHECK: @const_xor = global i64 xor
-   * CHECK: @const_icmp = global i1 icmp sle
-   * CHECK: @const_fcmp = global i1 fcmp ole
-   *)
-  let void_ptr = pointer_type i8_type in
-  let five = const_int i64_type 5 in
-  let ffive = const_uitofp five double_type in
-  let foldbomb_gv = define_global "FoldBomb" (const_null i8_type) m in
-  let foldbomb = const_ptrtoint foldbomb_gv i64_type in
-  let ffoldbomb = const_uitofp foldbomb double_type in
-  ignore (define_global "const_neg" (const_neg foldbomb) m);
-  ignore (define_global "const_nsw_neg" (const_nsw_neg foldbomb) m);
-  ignore (define_global "const_nuw_neg" (const_nuw_neg foldbomb) m);
-  ignore (define_global "const_fneg" (const_fneg ffoldbomb) m);
-  ignore (define_global "const_not" (const_not foldbomb) m);
-  ignore (define_global "const_add" (const_add foldbomb five) m);
-  ignore (define_global "const_nsw_add" (const_nsw_add foldbomb five) m);
-  ignore (define_global "const_nuw_add" (const_nuw_add foldbomb five) m);
-  ignore (define_global "const_fadd" (const_fadd ffoldbomb ffive) m);
-  ignore (define_global "const_sub" (const_sub foldbomb five) m);
-  ignore (define_global "const_nsw_sub" (const_nsw_sub foldbomb five) m);
-  ignore (define_global "const_nuw_sub" (const_nuw_sub foldbomb five) m);
-  ignore (define_global "const_fsub" (const_fsub ffoldbomb ffive) m);
-  ignore (define_global "const_mul" (const_mul foldbomb five) m);
-  ignore (define_global "const_nsw_mul" (const_nsw_mul foldbomb five) m);
-  ignore (define_global "const_nuw_mul" (const_nuw_mul foldbomb five) m);
-  ignore (define_global "const_fmul" (const_fmul ffoldbomb ffive) m);
-  ignore (define_global "const_udiv" (const_udiv foldbomb five) m);
-  ignore (define_global "const_sdiv" (const_sdiv foldbomb five) m);
-  ignore (define_global "const_exact_sdiv" (const_exact_sdiv foldbomb five) m);
-  ignore (define_global "const_fdiv" (const_fdiv ffoldbomb ffive) m);
-  ignore (define_global "const_urem" (const_urem foldbomb five) m);
-  ignore (define_global "const_srem" (const_srem foldbomb five) m);
-  ignore (define_global "const_frem" (const_frem ffoldbomb ffive) m);
-  ignore (define_global "const_and" (const_and foldbomb five) m);
-  ignore (define_global "const_or" (const_or foldbomb five) m);
-  ignore (define_global "const_xor" (const_xor foldbomb five) m);
-  ignore (define_global "const_icmp" (const_icmp Icmp.Sle foldbomb five) m);
-  ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m);
-  
-  group "constant casts";
-  (* CHECK: const_trunc{{.*}}trunc
-   * CHECK: const_sext{{.*}}sext
-   * CHECK: const_zext{{.*}}zext
-   * CHECK: const_fptrunc{{.*}}fptrunc
-   * CHECK: const_fpext{{.*}}fpext
-   * CHECK: const_uitofp{{.*}}uitofp
-   * CHECK: const_sitofp{{.*}}sitofp
-   * CHECK: const_fptoui{{.*}}fptoui
-   * CHECK: const_fptosi{{.*}}fptosi
-   * CHECK: const_ptrtoint{{.*}}ptrtoint
-   * CHECK: const_inttoptr{{.*}}inttoptr
-   * CHECK: const_bitcast{{.*}}bitcast
-   * CHECK: const_intcast{{.*}}zext
-   *)
-  let i128_type = integer_type context 128 in
-  ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
-                                               i8_type) m);
-  ignore (define_global "const_sext" (const_sext foldbomb i128_type) m);
-  ignore (define_global "const_zext" (const_zext foldbomb i128_type) m);
-  ignore (define_global "const_fptrunc" (const_fptrunc ffoldbomb float_type) m);
-  ignore (define_global "const_fpext" (const_fpext ffoldbomb fp128_type) m);
-  ignore (define_global "const_uitofp" (const_uitofp foldbomb double_type) m);
-  ignore (define_global "const_sitofp" (const_sitofp foldbomb double_type) m);
-  ignore (define_global "const_fptoui" (const_fptoui ffoldbomb i32_type) m);
-  ignore (define_global "const_fptosi" (const_fptosi ffoldbomb i32_type) m);
-  ignore (define_global "const_ptrtoint" (const_ptrtoint 
-    (const_gep (const_null (pointer_type i8_type))
-               [| const_int i32_type 1 |])
-    i32_type) m);
-  ignore (define_global "const_inttoptr" (const_inttoptr (const_add foldbomb five)
-                                                  void_ptr) m);
-  ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
-  ignore (define_global "const_intcast"
-          (const_intcast foldbomb i128_type ~is_signed:false) m);
-  
-  group "misc constants";
-  (* CHECK: const_size_of{{.*}}getelementptr{{.*}}null
-   * CHECK: const_gep{{.*}}getelementptr
-   * CHECK: const_select{{.*}}select
-   * CHECK: const_extractelement{{.*}}extractelement
-   * CHECK: const_insertelement{{.*}}insertelement
-   * CHECK: const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>
-   *)
-  ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
-  ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
-  ignore (define_global "const_select" (const_select
-    (const_icmp Icmp.Sle foldbomb five)
-    (const_int i8_type (-1))
-    (const_int i8_type 0)) m);
-  let zero = const_int i32_type 0 in
-  let one  = const_int i32_type 1 in
-  ignore (define_global "const_extractelement" (const_extractelement
-    (const_vector [| zero; one; zero; one |])
-    (const_trunc foldbomb i32_type)) m);
-  ignore (define_global "const_insertelement" (const_insertelement
-    (const_vector [| zero; one; zero; one |])
-    zero (const_trunc foldbomb i32_type)) m);
-  ignore (define_global "const_shufflevector" (const_shufflevector
-    (const_vector [| zero; one |])
-    (const_vector [| one; zero |])
-    (const_vector [| const_int i32_type 0; const_int i32_type 1;
-                     const_int i32_type 2; const_int i32_type 3 |])) m);
-
-  group "asm"; begin
-    let ft = function_type void_type [| i32_type; i32_type; i32_type |] in
-    ignore (const_inline_asm
-      ft
-      ""
-      "{cx},{ax},{di},~{dirflag},~{fpsr},~{flags},~{edi},~{ecx}"
-      true
-      false)
-  end;
-
-  group "recursive struct"; begin
-      let nsty = named_struct_type context "rec" in
-      let pty = pointer_type nsty in
-      struct_set_body nsty [| i32_type; pty |] false;
-      let elts = [| const_int i32_type 4; const_pointer_null pty |] in
-      let grec_init = const_named_struct nsty elts in
-      ignore (define_global "grec" grec_init m);
-      ignore (string_of_lltype nsty);
-  end
-
-
-(*===-- Global Values -----------------------------------------------------===*)
-
-let test_global_values () =
-  let (++) x f = f x; x in
-  let zero32 = const_null i32_type in
-
-  (* CHECK: GVal01
-   *)
-  group "naming";
-  let g = define_global "TEMPORARY" zero32 m in
-  insist ("TEMPORARY" = value_name g);
-  set_value_name "GVal01" g;
-  insist ("GVal01" = value_name g);
-
-  (* CHECK: GVal02{{.*}}linkonce
-   *)
-  group "linkage";
-  let g = define_global "GVal02" zero32 m ++
-          set_linkage Linkage.Link_once in
-  insist (Linkage.Link_once = linkage g);
-
-  (* CHECK: GVal03{{.*}}Hanalei
-   *)
-  group "section";
-  let g = define_global "GVal03" zero32 m ++
-          set_section "Hanalei" in
-  insist ("Hanalei" = section g);
-  
-  (* CHECK: GVal04{{.*}}hidden
-   *)
-  group "visibility";
-  let g = define_global "GVal04" zero32 m ++
-          set_visibility Visibility.Hidden in
-  insist (Visibility.Hidden = visibility g);
-  
-  (* CHECK: GVal05{{.*}}align 128
-   *)
-  group "alignment";
-  let g = define_global "GVal05" zero32 m ++
-          set_alignment 128 in
-  insist (128 = alignment g)
-
-
-(*===-- Global Variables --------------------------------------------------===*)
-
-let test_global_variables () =
-  let (++) x f = f x; x in
-  let forty_two32 = const_int i32_type 42 in
-
-  group "declarations"; begin
-    (* CHECK: @GVar01 = external global i32
-     * CHECK: @QGVar01 = external addrspace(3) global i32
-     *)
-    insist (None == lookup_global "GVar01" m);
-    let g = declare_global i32_type "GVar01" m in
-    insist (is_declaration g);
-    insist (pointer_type float_type ==
-              type_of (declare_global float_type "GVar01" m));
-    insist (g == declare_global i32_type "GVar01" m);
-    insist (match lookup_global "GVar01" m with Some x -> x = g
-                                              | None -> false);
-
-    insist (None == lookup_global "QGVar01" m);
-    let g = declare_qualified_global i32_type "QGVar01" 3 m in
-    insist (is_declaration g);
-    insist (qualified_pointer_type float_type 3 ==
-              type_of (declare_qualified_global float_type "QGVar01" 3 m));
-    insist (g == declare_qualified_global i32_type "QGVar01" 3 m);
-    insist (match lookup_global "QGVar01" m with Some x -> x = g
-                                              | None -> false);
-  end;
-  
-  group "definitions"; begin
-    (* CHECK: @GVar02 = global i32 42
-     * CHECK: @GVar03 = global i32 42
-     * CHECK: @QGVar02 = addrspace(3) global i32 42
-     * CHECK: @QGVar03 = addrspace(3) global i32 42
-     *)
-    let g = define_global "GVar02" forty_two32 m in
-    let g2 = declare_global i32_type "GVar03" m ++
-           set_initializer forty_two32 in
-    insist (not (is_declaration g));
-    insist (not (is_declaration g2));
-    insist ((global_initializer g) == (global_initializer g2));
-
-    let g = define_qualified_global "QGVar02" forty_two32 3 m in
-    let g2 = declare_qualified_global i32_type "QGVar03" 3 m ++
-           set_initializer forty_two32 in
-    insist (not (is_declaration g));
-    insist (not (is_declaration g2));
-    insist ((global_initializer g) == (global_initializer g2));
-  end;
-
-  (* CHECK: GVar04{{.*}}thread_local
-   *)
-  group "threadlocal";
-  let g = define_global "GVar04" forty_two32 m ++
-          set_thread_local true in
-  insist (is_thread_local g);
-
-  (* CHECK: GVar05{{.*}}thread_local(initialexec)
-   *)
-  group "threadlocal_mode";
-  let g = define_global "GVar05" forty_two32 m ++
-          set_thread_local_mode ThreadLocalMode.InitialExec in
-  insist ((thread_local_mode g) = ThreadLocalMode.InitialExec);
-
-  (* CHECK: GVar06{{.*}}externally_initialized
-   *)
-  group "externally_initialized";
-  let g = define_global "GVar06" forty_two32 m ++
-          set_externally_initialized true in
-  insist (is_externally_initialized g);
-
-  (* CHECK-NOWHERE-NOT: GVar07
-   *)
-  group "delete";
-  let g = define_global "GVar07" forty_two32 m in
-  delete_global g;
-
-  (* CHECK: ConstGlobalVar{{.*}}constant
-   *)
-  group "constant";
-  let g = define_global "ConstGlobalVar" forty_two32 m in
-  insist (not (is_global_constant g));
-  set_global_constant true g;
-  insist (is_global_constant g);
-  
-  begin group "iteration";
-    let m = create_module context "temp" in
-    
-    insist (At_end m = global_begin m);
-    insist (At_start m = global_end m);
-    
-    let g1 = declare_global i32_type "One" m in
-    let g2 = declare_global i32_type "Two" m in
-    
-    insist (Before g1 = global_begin m);
-    insist (Before g2 = global_succ g1);
-    insist (At_end m = global_succ g2);
-    
-    insist (After g2 = global_end m);
-    insist (After g1 = global_pred g2);
-    insist (At_start m = global_pred g1);
-    
-    let lf s x = s ^ "->" ^ value_name x in
-    insist ("->One->Two" = fold_left_globals lf "" m);
-    
-    let rf x s = value_name x ^ "<-" ^ s in
-    insist ("One<-Two<-" = fold_right_globals rf m "");
-    
-    dispose_module m
-  end
-
-(* String globals built below are emitted here.
- * CHECK: build_global_string{{.*}}stringval
- *)
-
-
-(*===-- Uses --------------------------------------------------------------===*)
-
-let test_uses () =
-  let ty = function_type i32_type [| i32_type; i32_type |] in
-  let fn = define_function "use_function" ty m in
-  let b = builder_at_end context (entry_block fn) in
-
-  let p1 = param fn 0 in
-  let p2 = param fn 1 in
-  let v1 = build_add p1 p2 "v1" b in
-  let v2 = build_add p1 v1 "v2" b in
-  let _ = build_add v1 v2 "v3" b in
-
-  let lf s u = value_name (user u) ^ "->" ^ s in
-  insist ("v2->v3->" = fold_left_uses lf "" v1);
-  let rf u s = value_name (user u) ^ "<-" ^ s in
-  insist ("v3<-v2<-" = fold_right_uses rf v1 "");
-
-  let lf s u = value_name (used_value u) ^ "->" ^ s in
-  insist ("v1->v1->" = fold_left_uses lf "" v1);
-
-  let rf u s = value_name (used_value u) ^ "<-" ^ s in
-  insist ("v1<-v1<-" = fold_right_uses rf v1 "");
-
-  ignore (build_unreachable b)
-
-
-(*===-- Users -------------------------------------------------------------===*)
-
-let test_users () =
-  let ty = function_type i32_type [| i32_type; i32_type |] in
-  let fn = define_function "user_function" ty m in
-  let b = builder_at_end context (entry_block fn) in
-
-  let p1 = param fn 0 in
-  let p2 = param fn 1 in
-  let a3 = build_alloca i32_type "user_alloca" b in
-  let p3 = build_load a3 "user_load" b in
-  let i = build_add p1 p2 "sum" b in
-
-  insist ((num_operands i) = 2);
-  insist ((operand i 0) = p1);
-  insist ((operand i 1) = p2);
-
-  set_operand i 1 p3;
-  insist ((operand i 1) != p2);
-  insist ((operand i 1) = p3);
-
-  ignore (build_unreachable b)
-
-
-(*===-- Aliases -----------------------------------------------------------===*)
-
-let test_aliases () =
-  (* CHECK: @alias = alias i32* @aliasee
-   *)
-  let forty_two32 = const_int i32_type 42 in
-  let v = define_global "aliasee" forty_two32 m in
-  ignore (add_alias m (pointer_type i32_type) v "alias")
-
-
-(*===-- Functions ---------------------------------------------------------===*)
-
-let test_functions () =
-  let ty = function_type i32_type [| i32_type; i64_type |] in
-  let ty2 = function_type i8_type [| i8_type; i64_type |] in
-  
-  (* CHECK: declare i32 @Fn1(i32, i64)
-   *)
-  begin group "declare";
-    insist (None = lookup_function "Fn1" m);
-    let fn = declare_function "Fn1" ty m in
-    insist (pointer_type ty = type_of fn);
-    insist (is_declaration fn);
-    insist (0 = Array.length (basic_blocks fn));
-    insist (pointer_type ty2 == type_of (declare_function "Fn1" ty2 m));
-    insist (fn == declare_function "Fn1" ty m);
-    insist (None <> lookup_function "Fn1" m);
-    insist (match lookup_function "Fn1" m with Some x -> x = fn
-                                             | None -> false);
-    insist (m == global_parent fn)
-  end;
-  
-  (* CHECK-NOWHERE-NOT: Fn2
-   *)
-  group "delete";
-  let fn = declare_function "Fn2" ty m in
-  delete_function fn;
-  
-  (* CHECK: define{{.*}}Fn3
-   *)
-  group "define";
-  let fn = define_function "Fn3" ty m in
-  insist (not (is_declaration fn));
-  insist (1 = Array.length (basic_blocks fn));
-  ignore (build_unreachable (builder_at_end context (entry_block fn)));
-  
-  (* CHECK: define{{.*}}Fn4{{.*}}Param1{{.*}}Param2
-   *)
-  group "params";
-  let fn = define_function "Fn4" ty m in
-  let params = params fn in
-  insist (2 = Array.length params);
-  insist (params.(0) = param fn 0);
-  insist (params.(1) = param fn 1);
-  insist (i32_type = type_of params.(0));
-  insist (i64_type = type_of params.(1));
-  set_value_name "Param1" params.(0);
-  set_value_name "Param2" params.(1);
-  ignore (build_unreachable (builder_at_end context (entry_block fn)));
-  
-  (* CHECK: fastcc{{.*}}Fn5
-   *)
-  group "callconv";
-  let fn = define_function "Fn5" ty m in
-  insist (CallConv.c = function_call_conv fn);
-  set_function_call_conv CallConv.fast fn;
-  insist (CallConv.fast = function_call_conv fn);
-  ignore (build_unreachable (builder_at_end context (entry_block fn)));
-  
-  begin group "gc";
-    (* CHECK: Fn6{{.*}}gc{{.*}}shadowstack
-     *)
-    let fn = define_function "Fn6" ty m in
-    insist (None = gc fn);
-    set_gc (Some "ocaml") fn;
-    insist (Some "ocaml" = gc fn);
-    set_gc None fn;
-    insist (None = gc fn);
-    set_gc (Some "shadowstack") fn;
-    ignore (build_unreachable (builder_at_end context (entry_block fn)));
-  end;
-  
-  begin group "iteration";
-    let m = create_module context "temp" in
-    
-    insist (At_end m = function_begin m);
-    insist (At_start m = function_end m);
-    
-    let f1 = define_function "One" ty m in
-    let f2 = define_function "Two" ty m in
-    
-    insist (Before f1 = function_begin m);
-    insist (Before f2 = function_succ f1);
-    insist (At_end m = function_succ f2);
-    
-    insist (After f2 = function_end m);
-    insist (After f1 = function_pred f2);
-    insist (At_start m = function_pred f1);
-    
-    let lf s x = s ^ "->" ^ value_name x in
-    insist ("->One->Two" = fold_left_functions lf "" m);
-    
-    let rf x s = value_name x ^ "<-" ^ s in
-    insist ("One<-Two<-" = fold_right_functions rf m "");
-    
-    dispose_module m
-  end
-
-
-(*===-- Params ------------------------------------------------------------===*)
-
-let test_params () =
-  begin group "iteration";
-    let m = create_module context "temp" in
-    
-    let vf = define_function "void" (function_type void_type [| |]) m in
-    
-    insist (At_end vf = param_begin vf);
-    insist (At_start vf = param_end vf);
-    
-    let ty = function_type void_type [| i32_type; i32_type |] in
-    let f = define_function "f" ty m in
-    let p1 = param f 0 in
-    let p2 = param f 1 in
-    set_value_name "One" p1;
-    set_value_name "Two" p2;
-    add_param_attr p1 Attribute.Sext;
-    add_param_attr p2 Attribute.Noalias;
-    remove_param_attr p2 Attribute.Noalias;
-    add_function_attr f Attribute.Nounwind;
-    add_function_attr f Attribute.Noreturn;
-    remove_function_attr f Attribute.Noreturn;
-
-    insist (Before p1 = param_begin f);
-    insist (Before p2 = param_succ p1);
-    insist (At_end f = param_succ p2);
-    
-    insist (After p2 = param_end f);
-    insist (After p1 = param_pred p2);
-    insist (At_start f = param_pred p1);
-    
-    let lf s x = s ^ "->" ^ value_name x in
-    insist ("->One->Two" = fold_left_params lf "" f);
-    
-    let rf x s = value_name x ^ "<-" ^ s in
-    insist ("One<-Two<-" = fold_right_params rf f "");
-    
-    dispose_module m
-  end
-
-
-(*===-- Basic Blocks ------------------------------------------------------===*)
-
-let test_basic_blocks () =
-  let ty = function_type void_type [| |] in
-  
-  (* CHECK: Bb1
-   *)
-  group "entry";
-  let fn = declare_function "X" ty m in
-  let bb = append_block context "Bb1" fn in
-  insist (bb = entry_block fn);
-  ignore (build_unreachable (builder_at_end context bb));
-  
-  (* CHECK-NOWHERE-NOT: Bb2
-   *)
-  group "delete";
-  let fn = declare_function "X2" ty m in
-  let bb = append_block context "Bb2" fn in
-  delete_block bb;
-  
-  group "insert";
-  let fn = declare_function "X3" ty m in
-  let bbb = append_block context "b" fn in
-  let bba = insert_block context "a" bbb in
-  insist ([| bba; bbb |] = basic_blocks fn);
-  ignore (build_unreachable (builder_at_end context bba));
-  ignore (build_unreachable (builder_at_end context bbb));
-  
-  (* CHECK: Bb3
-   *)
-  group "name/value";
-  let fn = define_function "X4" ty m in
-  let bb = entry_block fn in
-  ignore (build_unreachable (builder_at_end context bb));
-  let bbv = value_of_block bb in
-  set_value_name "Bb3" bbv;
-  insist ("Bb3" = value_name bbv);
-  
-  group "casts";
-  let fn = define_function "X5" ty m in
-  let bb = entry_block fn in
-  ignore (build_unreachable (builder_at_end context bb));
-  insist (bb = block_of_value (value_of_block bb));
-  insist (value_is_block (value_of_block bb));
-  insist (not (value_is_block (const_null i32_type)));
-  
-  begin group "iteration";
-    let m = create_module context "temp" in
-    let f = declare_function "Temp" (function_type i32_type [| |]) m in
-    
-    insist (At_end f = block_begin f);
-    insist (At_start f = block_end f);
-    
-    let b1 = append_block context "One" f in
-    let b2 = append_block context "Two" f in
-    
-    insist (Before b1 = block_begin f);
-    insist (Before b2 = block_succ b1);
-    insist (At_end f = block_succ b2);
-    
-    insist (After b2 = block_end f);
-    insist (After b1 = block_pred b2);
-    insist (At_start f = block_pred b1);
-    
-    let lf s x = s ^ "->" ^ value_name (value_of_block x) in
-    insist ("->One->Two" = fold_left_blocks lf "" f);
-    
-    let rf x s = value_name (value_of_block x) ^ "<-" ^ s in
-    insist ("One<-Two<-" = fold_right_blocks rf f "");
-    
-    dispose_module m
-  end
-
-
-(*===-- Instructions ------------------------------------------------------===*)
-
-let test_instructions () =
-  begin group "iteration";
-    let m = create_module context "temp" in
-    let fty = function_type void_type [| i32_type; i32_type |] in
-    let f = define_function "f" fty m in
-    let bb = entry_block f in
-    let b = builder_at context (At_end bb) in
-    
-    insist (At_end bb = instr_begin bb);
-    insist (At_start bb = instr_end bb);
-    
-    let i1 = build_add (param f 0) (param f 1) "One" b in
-    let i2 = build_sub (param f 0) (param f 1) "Two" b in
-    
-    insist (Before i1 = instr_begin bb);
-    insist (Before i2 = instr_succ i1);
-    insist (At_end bb = instr_succ i2);
-    
-    insist (After i2 = instr_end bb);
-    insist (After i1 = instr_pred i2);
-    insist (At_start bb = instr_pred i1);
-    
-    let lf s x = s ^ "->" ^ value_name x in
-    insist ("->One->Two" = fold_left_instrs lf "" bb);
-    
-    let rf x s = value_name x ^ "<-" ^ s in
-    insist ("One<-Two<-" = fold_right_instrs rf bb "");
-    
-    dispose_module m
-  end
-
-
-(*===-- Builder -----------------------------------------------------------===*)
-
-let test_builder () =
-  let (++) x f = f x; x in
-  
-  begin group "parent";
-    insist (try
-              ignore (insertion_block (builder context));
-              false
-            with Not_found ->
-              true);
-    
-    let fty = function_type void_type [| i32_type |] in
-    let fn = define_function "BuilderParent" fty m in
-    let bb = entry_block fn in
-    let b = builder_at_end context bb in
-    let p = param fn 0 in
-    let sum = build_add p p "sum" b in
-    ignore (build_ret_void b);
-    
-    insist (fn = block_parent bb);
-    insist (fn = param_parent p);
-    insist (bb = instr_parent sum);
-    insist (bb = insertion_block b)
-  end;
-  
-  group "ret void";
-  begin
-    (* CHECK: ret void
-     *)
-    let fty = function_type void_type [| |] in
-    let fn = declare_function "X6" fty m in
-    let b = builder_at_end context (append_block context "Bb01" fn) in
-    ignore (build_ret_void b)
-  end;
-
-  group "ret aggregate";
-  begin
-      (* CHECK: ret { i8, i64 } { i8 4, i64 5 }
-       *)
-      let sty = struct_type context [| i8_type; i64_type |] in
-      let fty = function_type sty [| |] in
-      let fn = declare_function "XA6" fty m in
-      let b = builder_at_end context (append_block context "Bb01" fn) in
-      let agg = [| const_int i8_type 4; const_int i64_type 5 |] in
-      ignore (build_aggregate_ret agg b)
-  end;
-  
-  (* The rest of the tests will use one big function. *)
-  let fty = function_type i32_type [| i32_type; i32_type |] in
-  let fn = define_function "X7" fty m in
-  let atentry = builder_at_end context (entry_block fn) in
-  let p1 = param fn 0 ++ set_value_name "P1" in
-  let p2 = param fn 1 ++ set_value_name "P2" in
-  let f1 = build_uitofp p1 float_type "F1" atentry in
-  let f2 = build_uitofp p2 float_type "F2" atentry in
-  
-  let bb00 = append_block context "Bb00" fn in
-  ignore (build_unreachable (builder_at_end context bb00));
-
-  group "function attribute";
-  begin
-      ignore (add_function_attr fn Attribute.UWTable);
-      (* CHECK: X7{{.*}}#0
-       * #0 is uwtable, defined at EOF.
-       *)
-      insist ([Attribute.UWTable] = function_attr fn);
-  end;
-
-  group "casts"; begin
-    let void_ptr = pointer_type i8_type in
-
-    (* CHECK-DAG: %build_trunc = trunc i32 %P1 to i8
-     * CHECK-DAG: %build_trunc2 = trunc i32 %P1 to i8
-     * CHECK-DAG: %build_trunc3 = trunc i32 %P1 to i8
-     * CHECK-DAG: %build_zext = zext i8 %build_trunc to i32
-     * CHECK-DAG: %build_zext2 = zext i8 %build_trunc to i32
-     * CHECK-DAG: %build_sext = sext i32 %build_zext to i64
-     * CHECK-DAG: %build_sext2 = sext i32 %build_zext to i64
-     * CHECK-DAG: %build_sext3 = sext i32 %build_zext to i64
-     * CHECK-DAG: %build_uitofp = uitofp i64 %build_sext to float
-     * CHECK-DAG: %build_sitofp = sitofp i32 %build_zext to double
-     * CHECK-DAG: %build_fptoui = fptoui float %build_uitofp to i32
-     * CHECK-DAG: %build_fptosi = fptosi double %build_sitofp to i64
-     * CHECK-DAG: %build_fptrunc = fptrunc double %build_sitofp to float
-     * CHECK-DAG: %build_fptrunc2 = fptrunc double %build_sitofp to float
-     * CHECK-DAG: %build_fpext = fpext float %build_fptrunc to double
-     * CHECK-DAG: %build_fpext2 = fpext float %build_fptrunc to double
-     * CHECK-DAG: %build_inttoptr = inttoptr i32 %P1 to i8*
-     * CHECK-DAG: %build_ptrtoint = ptrtoint i8* %build_inttoptr to i64
-     * CHECK-DAG: %build_ptrtoint2 = ptrtoint i8* %build_inttoptr to i64
-     * CHECK-DAG: %build_bitcast = bitcast i64 %build_ptrtoint to double
-     * CHECK-DAG: %build_bitcast2 = bitcast i64 %build_ptrtoint to double
-     * CHECK-DAG: %build_bitcast3 = bitcast i64 %build_ptrtoint to double
-     * CHECK-DAG: %build_bitcast4 = bitcast i64 %build_ptrtoint to double
-     * CHECK-DAG: %build_pointercast = bitcast i8* %build_inttoptr to i16*
-     *)
-    let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
-    let inst29 = build_zext inst28 i32_type "build_zext" atentry in
-    let inst30 = build_sext inst29 i64_type "build_sext" atentry in
-    let inst31 = build_uitofp inst30 float_type "build_uitofp" atentry in
-    let inst32 = build_sitofp inst29 double_type "build_sitofp" atentry in
-    ignore(build_fptoui inst31 i32_type "build_fptoui" atentry);
-    ignore(build_fptosi inst32 i64_type "build_fptosi" atentry);
-    let inst35 = build_fptrunc inst32 float_type "build_fptrunc" atentry in
-    ignore(build_fpext inst35 double_type "build_fpext" atentry);
-    let inst37 = build_inttoptr p1 void_ptr "build_inttoptr" atentry in
-    let inst38 = build_ptrtoint inst37 i64_type "build_ptrtoint" atentry in
-    ignore(build_bitcast inst38 double_type "build_bitcast" atentry);
-    ignore(build_zext_or_bitcast inst38 double_type "build_bitcast2" atentry);
-    ignore(build_sext_or_bitcast inst38 double_type "build_bitcast3" atentry);
-    ignore(build_trunc_or_bitcast inst38 double_type "build_bitcast4" atentry);
-    ignore(build_pointercast inst37 (pointer_type i16_type) "build_pointercast" atentry);
-
-    ignore(build_zext_or_bitcast inst28 i32_type "build_zext2" atentry);
-    ignore(build_sext_or_bitcast inst29 i64_type "build_sext2" atentry);
-    ignore(build_trunc_or_bitcast p1 i8_type "build_trunc2" atentry);
-    ignore(build_pointercast inst37 i64_type "build_ptrtoint2" atentry);
-    ignore(build_intcast inst29 i64_type "build_sext3" atentry);
-    ignore(build_intcast p1 i8_type "build_trunc3" atentry);
-    ignore(build_fpcast inst35 double_type "build_fpext2" atentry);
-    ignore(build_fpcast inst32 float_type "build_fptrunc2" atentry);
-  end;
-
-  group "comparisons"; begin
-    (* CHECK: %build_icmp_ne = icmp ne i32 %P1, %P2
-     * CHECK: %build_icmp_sle = icmp sle i32 %P2, %P1
-     * CHECK: %build_fcmp_false = fcmp false float %F1, %F2
-     * CHECK: %build_fcmp_true = fcmp true float %F2, %F1
-     * CHECK: %build_is_null{{.*}}= icmp eq{{.*}}%X0,{{.*}}null
-     * CHECK: %build_is_not_null = icmp ne i8* %X1, null
-     * CHECK: %build_ptrdiff
-     *)
-    ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
-    ignore (build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry);
-    ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry);
-    ignore (build_fcmp Fcmp.True  f2 f1 "build_fcmp_true" atentry);
-    let g0 = declare_global (pointer_type i8_type) "g0" m in
-    let g1 = declare_global (pointer_type i8_type) "g1" m in
-    let p0 = build_load g0 "X0" atentry in
-    let p1 = build_load g1 "X1" atentry in
-    ignore (build_is_null p0 "build_is_null" atentry);
-    ignore (build_is_not_null p1 "build_is_not_null" atentry);
-    ignore (build_ptrdiff p1 p0 "build_ptrdiff" atentry);
-  end;
-
-  group "miscellaneous"; begin
-    (* CHECK: %build_call = tail call cc63 i32 @{{.*}}(i32 signext %P2, i32 %P1)
-     * CHECK: %build_select = select i1 %build_icmp, i32 %P1, i32 %P2
-     * CHECK: %build_va_arg = va_arg i8** null, i32
-     * CHECK: %build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2
-     * CHECK: %build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2
-     * CHECK: %build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
-     * CHECK: %build_insertvalue0 = insertvalue{{.*}}%bl, i32 1, 0
-     * CHECK: %build_extractvalue = extractvalue{{.*}}%build_insertvalue1, 1
-     *)
-    let ci = build_call fn [| p2; p1 |] "build_call" atentry in
-    insist (CallConv.c = instruction_call_conv ci);
-    set_instruction_call_conv 63 ci;
-    insist (63 = instruction_call_conv ci);
-    insist (not (is_tail_call ci));
-    set_tail_call true ci;
-    insist (is_tail_call ci);
-    add_instruction_param_attr ci 1 Attribute.Sext;
-    add_instruction_param_attr ci 2 Attribute.Noalias;
-    remove_instruction_param_attr ci 2 Attribute.Noalias;
-
-    let inst46 = build_icmp Icmp.Eq p1 p2 "build_icmp" atentry in
-    ignore (build_select inst46 p1 p2 "build_select" atentry);
-    ignore (build_va_arg
-      (const_null (pointer_type (pointer_type i8_type)))
-      i32_type "build_va_arg" atentry);
-
-    (* Set up some vector vregs. *)
-    let one  = const_int i32_type 1 in
-    let zero = const_int i32_type 0 in
-    let t1 = const_vector [| one; zero; one; zero |] in
-    let t2 = const_vector [| zero; one; zero; one |] in
-    let t3 = const_vector [| one; one; zero; zero |] in
-    let vec1 = build_insertelement t1 p1 p2 "Vec1" atentry in
-    let vec2 = build_insertelement t2 p1 p2 "Vec2" atentry in
-    let sty = struct_type context [| i32_type; i8_type |] in
-
-    ignore (build_extractelement vec1 p2 "build_extractelement" atentry);
-    ignore (build_insertelement vec1 p1 p2 "build_insertelement" atentry);
-    ignore (build_shufflevector vec1 vec2 t3 "build_shufflevector" atentry);
-
-    let p = build_alloca sty "ba" atentry in
-    let agg = build_load p "bl" atentry in
-    let agg0 = build_insertvalue agg (const_int i32_type 1) 0
-                 "build_insertvalue0" atentry in
-    let agg1 = build_insertvalue agg0 (const_int i8_type 2) 1
-                 "build_insertvalue1" atentry in
-    ignore (build_extractvalue agg1 1 "build_extractvalue" atentry)
-  end;
-
-  group "metadata"; begin
-    (* CHECK: %metadata = add i32 %P1, %P2, !test !1
-     * !1 is metadata emitted at EOF.
-     *)
-    let i = build_add p1 p2 "metadata" atentry in
-    insist ((has_metadata i) = false);
-
-    let m1 = const_int i32_type 1 in
-    let m2 = mdstring context "metadata test" in
-    let md = mdnode context [| m1; m2 |] in
-
-    let kind = mdkind_id context "test" in
-    set_metadata i kind md;
-
-    insist ((has_metadata i) = true);
-    insist ((metadata i kind) = Some md);
-
-    clear_metadata i kind;
-
-    insist ((has_metadata i) = false);
-    insist ((metadata i kind) = None);
-
-    set_metadata i kind md
-  end;
-
-  group "named metadata"; begin
-    (* !llvm.module.flags is emitted at EOF. *)
-    let n1 = const_int i32_type 1 in
-    let n2 = mdstring context "Debug Info Version" in
-    let md = mdnode context [| n1; n2; n1 |] in
-    add_named_metadata_operand m "llvm.module.flags" md;
-
-    insist ((get_named_metadata m "llvm.module.flags") = [| md |])
-  end;
-
-  group "dbg"; begin
-    (* CHECK: %dbg = add i32 %P1, %P2, !dbg !2
-     * !2 is metadata emitted at EOF.
-     *)
-    insist ((current_debug_location atentry) = None);
-
-    let m_line = const_int i32_type 2 in
-    let m_col = const_int i32_type 3 in
-    let m_scope = mdnode context [| |] in
-    let m_inlined = mdnode context [| |] in
-    let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
-    set_current_debug_location atentry md;
-
-    insist ((current_debug_location atentry) = Some md);
-
-    let i = build_add p1 p2 "dbg" atentry in
-    insist ((has_metadata i) = true);
-
-    clear_current_debug_location atentry
-  end;
-
-  group "ret"; begin
-    (* CHECK: ret{{.*}}P1
-     *)
-    let ret = build_ret p1 atentry in
-    position_before ret atentry
-  end;
-
-  (* see test/Feature/exception.ll *)
-  let bblpad = append_block context "Bblpad" fn in
-  let rt = struct_type context [| pointer_type i8_type; i32_type |] in
-  let ft = var_arg_function_type i32_type  [||] in
-  let personality = declare_function "__gxx_personality_v0" ft m in
-  let ztic = declare_global (pointer_type i8_type) "_ZTIc" m in
-  let ztid = declare_global (pointer_type i8_type) "_ZTId" m in
-  let ztipkc = declare_global (pointer_type i8_type) "_ZTIPKc" m in
-  begin
-      set_global_constant true ztic;
-      set_global_constant true ztid;
-      set_global_constant true ztipkc;
-      let lp = build_landingpad rt personality 0 "lpad"
-       (builder_at_end context bblpad) in begin
-           set_cleanup lp true;
-           add_clause lp ztic;
-           insist((pointer_type (pointer_type i8_type)) = type_of ztid);
-           let ety = pointer_type (pointer_type i8_type) in
-           add_clause lp (const_array ety [| ztipkc; ztid |]);
-           ignore (build_resume lp (builder_at_end context bblpad));
-      end;
-      (* CHECK: landingpad{{.*}}personality{{.*}}__gxx_personality_v0
-       * CHECK: cleanup
-       * CHECK: catch{{.*}}i8**{{.*}}@_ZTIc
-       * CHECK: filter{{.*}}@_ZTIPKc{{.*}}@_ZTId
-       * CHECK: resume
-       * *)
-  end;
-
-  group "br"; begin
-    (* CHECK: br{{.*}}Bb02
-     *)
-    let bb02 = append_block context "Bb02" fn in
-    let b = builder_at_end context bb02 in
-    ignore (build_br bb02 b)
-  end;
-  
-  group "cond_br"; begin
-    (* CHECK: br{{.*}}build_br{{.*}}Bb03{{.*}}Bb00
-     *)
-    let bb03 = append_block context "Bb03" fn in
-    let b = builder_at_end context bb03 in
-    let cond = build_trunc p1 i1_type "build_br" b in
-    ignore (build_cond_br cond bb03 bb00 b)
-  end;
-  
-  group "switch"; begin
-    (* CHECK: switch{{.*}}P1{{.*}}SwiBlock3
-     * CHECK: 2,{{.*}}SwiBlock2
-     *)
-    let bb1 = append_block context "SwiBlock1" fn in
-    let bb2 = append_block context "SwiBlock2" fn in
-    ignore (build_unreachable (builder_at_end context bb2));
-    let bb3 = append_block context "SwiBlock3" fn in
-    ignore (build_unreachable (builder_at_end context bb3));
-    let si = build_switch p1 bb3 1 (builder_at_end context bb1) in begin
-        ignore (add_case si (const_int i32_type 2) bb2);
-        insist (switch_default_dest si = bb3);
-    end;
-  end;
-
-  group "malloc/free"; begin
-      (* CHECK: call{{.*}}@malloc(i32 ptrtoint
-       * CHECK: call{{.*}}@free(i8*
-       * CHECK: call{{.*}}@malloc(i32 %
-       *)
-      let bb1 = append_block context "MallocBlock1" fn in
-      let m1 = (build_malloc (pointer_type i32_type) "m1"
-      (builder_at_end context bb1)) in
-      ignore (build_free m1 (builder_at_end context bb1));
-      ignore (build_array_malloc i32_type p1 "m2" (builder_at_end context bb1));
-      ignore (build_unreachable (builder_at_end context bb1));
-  end;
-
-  group "indirectbr"; begin
-    (* CHECK: indirectbr i8* blockaddress(@X7, %IBRBlock2), [label %IBRBlock2, label %IBRBlock3]
-     *)
-    let bb1 = append_block context "IBRBlock1" fn in
-
-    let bb2 = append_block context "IBRBlock2" fn in
-    ignore (build_unreachable (builder_at_end context bb2));
-
-    let bb3 = append_block context "IBRBlock3" fn in
-    ignore (build_unreachable (builder_at_end context bb3));
-
-    let addr = block_address fn bb2 in
-    let ibr = build_indirect_br addr 2 (builder_at_end context bb1) in
-    ignore (add_destination ibr bb2);
-    ignore (add_destination ibr bb3)
-  end;
-  
-  group "invoke"; begin
-    (* CHECK: build_invoke{{.*}}invoke{{.*}}P1{{.*}}P2
-     * CHECK: to{{.*}}Bb04{{.*}}unwind{{.*}}Bblpad
-     *)
-    let bb04 = append_block context "Bb04" fn in
-    let b = builder_at_end context bb04 in
-    ignore (build_invoke fn [| p1; p2 |] bb04 bblpad "build_invoke" b)
-  end;
-  
-  group "unreachable"; begin
-    (* CHECK: unreachable
-     *)
-    let bb06 = append_block context "Bb06" fn in
-    let b = builder_at_end context bb06 in
-    ignore (build_unreachable b)
-  end;
-  
-  group "arithmetic"; begin
-    let bb07 = append_block context "Bb07" fn in
-    let b = builder_at_end context bb07 in
-    
-    (* CHECK: %build_add = add i32 %P1, %P2
-     * CHECK: %build_nsw_add = add nsw i32 %P1, %P2
-     * CHECK: %build_nuw_add = add nuw i32 %P1, %P2
-     * CHECK: %build_fadd = fadd float %F1, %F2
-     * CHECK: %build_sub = sub i32 %P1, %P2
-     * CHECK: %build_nsw_sub = sub nsw i32 %P1, %P2
-     * CHECK: %build_nuw_sub = sub nuw i32 %P1, %P2
-     * CHECK: %build_fsub = fsub float %F1, %F2
-     * CHECK: %build_mul = mul i32 %P1, %P2
-     * CHECK: %build_nsw_mul = mul nsw i32 %P1, %P2
-     * CHECK: %build_nuw_mul = mul nuw i32 %P1, %P2
-     * CHECK: %build_fmul = fmul float %F1, %F2
-     * CHECK: %build_udiv = udiv i32 %P1, %P2
-     * CHECK: %build_sdiv = sdiv i32 %P1, %P2
-     * CHECK: %build_exact_sdiv = sdiv exact i32 %P1, %P2
-     * CHECK: %build_fdiv = fdiv float %F1, %F2
-     * CHECK: %build_urem = urem i32 %P1, %P2
-     * CHECK: %build_srem = srem i32 %P1, %P2
-     * CHECK: %build_frem = frem float %F1, %F2
-     * CHECK: %build_shl = shl i32 %P1, %P2
-     * CHECK: %build_lshl = lshr i32 %P1, %P2
-     * CHECK: %build_ashl = ashr i32 %P1, %P2
-     * CHECK: %build_and = and i32 %P1, %P2
-     * CHECK: %build_or = or i32 %P1, %P2
-     * CHECK: %build_xor = xor i32 %P1, %P2
-     * CHECK: %build_neg = sub i32 0, %P1
-     * CHECK: %build_nsw_neg = sub nsw i32 0, %P1
-     * CHECK: %build_nuw_neg = sub nuw i32 0, %P1
-     * CHECK: %build_fneg = fsub float {{.*}}0{{.*}}, %F1
-     * CHECK: %build_not = xor i32 %P1, -1
-     *)
-    ignore (build_add p1 p2 "build_add" b);
-    ignore (build_nsw_add p1 p2 "build_nsw_add" b);
-    ignore (build_nuw_add p1 p2 "build_nuw_add" b);
-    ignore (build_fadd f1 f2 "build_fadd" b);
-    ignore (build_sub p1 p2 "build_sub" b);
-    ignore (build_nsw_sub p1 p2 "build_nsw_sub" b);
-    ignore (build_nuw_sub p1 p2 "build_nuw_sub" b);
-    ignore (build_fsub f1 f2 "build_fsub" b);
-    ignore (build_mul p1 p2 "build_mul" b);
-    ignore (build_nsw_mul p1 p2 "build_nsw_mul" b);
-    ignore (build_nuw_mul p1 p2 "build_nuw_mul" b);
-    ignore (build_fmul f1 f2 "build_fmul" b);
-    ignore (build_udiv p1 p2 "build_udiv" b);
-    ignore (build_sdiv p1 p2 "build_sdiv" b);
-    ignore (build_exact_sdiv p1 p2 "build_exact_sdiv" b);
-    ignore (build_fdiv f1 f2 "build_fdiv" b);
-    ignore (build_urem p1 p2 "build_urem" b);
-    ignore (build_srem p1 p2 "build_srem" b);
-    ignore (build_frem f1 f2 "build_frem" b);
-    ignore (build_shl p1 p2 "build_shl" b);
-    ignore (build_lshr p1 p2 "build_lshl" b);
-    ignore (build_ashr p1 p2 "build_ashl" b);
-    ignore (build_and p1 p2 "build_and" b);
-    ignore (build_or p1 p2 "build_or" b);
-    ignore (build_xor p1 p2 "build_xor" b);
-    ignore (build_neg p1 "build_neg" b);
-    ignore (build_nsw_neg p1 "build_nsw_neg" b);
-    ignore (build_nuw_neg p1 "build_nuw_neg" b);
-    ignore (build_fneg f1 "build_fneg" b);
-    ignore (build_not p1 "build_not" b);
-    ignore (build_unreachable b)
-  end;
-  
-  group "memory"; begin
-    let bb08 = append_block context "Bb08" fn in
-    let b = builder_at_end context bb08 in
-
-    (* CHECK: %build_alloca = alloca i32
-     * CHECK: %build_array_alloca = alloca i32, i32 %P2
-     * CHECK: %build_load = load volatile i32* %build_array_alloca, align 4
-     * CHECK: store volatile i32 %P2, i32* %build_alloca, align 4
-     * CHECK: %build_gep = getelementptr i32* %build_array_alloca, i32 %P2
-     * CHECK: %build_in_bounds_gep = getelementptr inbounds i32* %build_array_alloca, i32 %P2
-     * CHECK: %build_struct_gep = getelementptr inbounds{{.*}}%build_alloca2, i32 0, i32 1
-     * CHECK: %build_atomicrmw = atomicrmw xchg i8* %p, i8 42 seq_cst
-     *)
-    let alloca = build_alloca i32_type "build_alloca" b in
-    let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
-
-    let load = build_load array_alloca "build_load" b in
-    ignore(set_alignment 4 load);
-    ignore(set_volatile true load);
-    insist(true = is_volatile load);
-    insist(4 = alignment load);
-
-    let store = build_store p2 alloca b in
-    ignore(set_volatile true store);
-    ignore(set_alignment 4 store);
-    insist(true = is_volatile store);
-    insist(4 = alignment store);
-    ignore(build_gep array_alloca [| p2 |] "build_gep" b);
-    ignore(build_in_bounds_gep array_alloca [| p2 |] "build_in_bounds_gep" b);
-
-    let sty = struct_type context [| i32_type; i8_type |] in
-    let alloca2 = build_alloca sty "build_alloca2" b in
-    ignore(build_struct_gep alloca2 1 "build_struct_gep" b);
-
-    let p = build_alloca i8_type "p" b in
-    ignore(build_atomicrmw AtomicRMWBinOp.Xchg p (const_int i8_type 42)
-              AtomicOrdering.SequentiallyConsistent false "build_atomicrmw"
-              b);
-
-    ignore(build_unreachable b)
-  end;
-
-  group "string"; begin
-    let bb09 = append_block context "Bb09" fn in
-    let b = builder_at_end context bb09 in
-    let p = build_alloca (pointer_type i8_type) "p" b in
-    (* build_global_string is emitted above.
-     * CHECK: store{{.*}}build_global_string1{{.*}}p
-     * *)
-    ignore (build_global_string "stringval" "build_global_string" b);
-    let g = build_global_stringptr "stringval" "build_global_string1" b in
-    ignore (build_store g p b);
-    ignore(build_unreachable b);
-  end;
-
-  group "phi"; begin
-    (* CHECK: PhiNode{{.*}}P1{{.*}}PhiBlock1{{.*}}P2{{.*}}PhiBlock2
-     *)
-    let b1 = append_block context "PhiBlock1" fn in
-    let b2 = append_block context "PhiBlock2" fn in
-    
-    let jb = append_block context "PhiJoinBlock" fn in
-    ignore (build_br jb (builder_at_end context b1));
-    ignore (build_br jb (builder_at_end context b2));
-    let at_jb = builder_at_end context jb in
-    
-    let phi = build_phi [(p1, b1)] "PhiNode" at_jb in
-    insist ([(p1, b1)] = incoming phi);
-    
-    add_incoming (p2, b2) phi;
-    insist ([(p1, b1); (p2, b2)] = incoming phi);
-    
-    ignore (build_unreachable at_jb);
-  end
-
-(* End-of-file checks for things like metdata and attributes.
- * CHECK: attributes #0 = {{.*}}uwtable{{.*}}
- * CHECK: !llvm.module.flags = !{!0}
- * CHECK: !0 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
- * CHECK: !1 = metadata !{i32 1, metadata !"metadata test"}
- * CHECK: !2 = metadata !{i32 2, i32 3, metadata !3, metadata !3}
- *)
-
-(*===-- Pass Managers -----------------------------------------------------===*)
-
-let test_pass_manager () =
-  let (++) x f = ignore (f x); x in
-
-  begin group "module pass manager";
-    ignore (PassManager.create ()
-             ++ PassManager.run_module m
-             ++ PassManager.dispose)
-  end;
-  
-  begin group "function pass manager";
-    let fty = function_type void_type [| |] in
-    let fn = define_function "FunctionPassManager" fty m in
-    ignore (build_ret_void (builder_at_end context (entry_block fn)));
-    
-    ignore (PassManager.create_function m
-             ++ PassManager.initialize
-             ++ PassManager.run_function fn
-             ++ PassManager.finalize
-             ++ PassManager.dispose)
-  end
-
-
-(*===-- Memory Buffer -----------------------------------------------------===*)
-
-let test_memory_buffer () =
-  group "memory buffer";
-  let buf = MemoryBuffer.of_string "foobar" in
-  insist ((MemoryBuffer.as_string buf) = "foobar")
-
-
-(*===-- Writer ------------------------------------------------------------===*)
-
-let test_writer () =
-  group "valid";
-  insist (match Llvm_analysis.verify_module m with
-          | None -> true
-          | Some msg -> prerr_string msg; false);
-
-  group "writer";
-  insist (write_bitcode_file m filename);
-  
-  dispose_module m
-
-
-(*===-- Driver ------------------------------------------------------------===*)
-
-let _ =
-  suite "conversion"       test_conversion;
-  suite "target"           test_target;
-  suite "constants"        test_constants;
-  suite "global values"    test_global_values;
-  suite "global variables" test_global_variables;
-  suite "uses"             test_uses;
-  suite "users"            test_users;
-  suite "aliases"          test_aliases;
-  suite "functions"        test_functions;
-  suite "params"           test_params;
-  suite "basic blocks"     test_basic_blocks;
-  suite "instructions"     test_instructions;
-  suite "builder"          test_builder;
-  suite "pass manager"     test_pass_manager;
-  suite "memory buffer"    test_memory_buffer;
-  suite "writer"           test_writer; (* Keep this last; it disposes m. *)
-  exit !exit_status
diff --git a/test/Bindings/llvm-c/disassemble.test b/test/Bindings/llvm-c/disassemble.test
index 201e914587d4..bb7a9a01ab9a 100644
--- a/test/Bindings/llvm-c/disassemble.test
+++ b/test/Bindings/llvm-c/disassemble.test
@@ -1,15 +1,27 @@
 ; RUN: llvm-c-test --disassemble < %s | FileCheck %s
 
+armv8-linux-gnu     +crypto 02 00 81 e0 02 03 b0 f3
+;CHECK: triple: armv8-linux-gnu, features: +crypto
+;CHECK: 02 00 81 e0                  add r0, r1, r2
+;CHECK: 02 03 b0 f3                  aese.8 q0, q1
 
-arm-linux-android    44 26 1f e5 0c 10 4b e2 02 20 81 e0
-;CHECK: triple: arm-linux-android
+armv8-linux-gnu     -crypto 02 00 81 e0 02 03 b0 f3
+;CHECK: triple: armv8-linux-gnu, features: -crypto
+;CHECK: 02 00 81 e0                  add r0, r1, r2
+;CHECK: 02                           ???
+;CHECK: 03                           ???
+;CHECK: b0                           ???
+;CHECK: f3                           ???
+
+arm-linux-android     NULL  44 26 1f e5 0c 10 4b e2 02 20 81 e0
+;CHECK: triple: arm-linux-android, features: NULL
 ;CHECK: ldr	r2, [pc, #-1604]
 ;CHECK: sub	r1, r11, #12
 ;CHECK: 02 20 81 e0
 ;CHECK: add	r2, r1, r2
 
-x86_64-linux-unknown 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
-;CHECK: triple: x86_64-linux-unknown
+x86_64-linux-unknown  NULL  48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
+;CHECK: triple: x86_64-linux-unknown, features: NULL
 ;CHECK: addq	$56, %rsp
 ;CHECK: popq	%rbx
 ;CHECK: popq	%rbp
@@ -19,11 +31,13 @@ x86_64-linux-unknown 48 83 c4 38 5b 5d 41 5c 41 5d 41 5e 41 5f c3
 ;CHECK: popq	%r15
 ;CHECK: ret
 
-i686-apple-darwin    0f b7 4c 24 0a e8 29 ce ff ff
+i686-apple-darwin     NULL  0f b7 4c 24 0a e8 29 ce ff ff
+;CHECK: triple: i686-apple-darwin, features: NULL
 ;CHECK: movzwl	10(%esp), %ecx
 ;CHECK: calll	-12759
 
-i686-linux-unknown   dd 44 24 04 d9 e1 c3
+i686-linux-unknown    NULL  dd 44 24 04 d9 e1 c3
+;CHECK: triple: i686-linux-unknown, features: NULL
 ;CHECK: fldl	4(%esp)
 ;CHECK: fabs
 ;CHECK: ret
diff --git a/test/Bindings/llvm-c/objectfile.ll b/test/Bindings/llvm-c/objectfile.ll
new file mode 100644
index 000000000000..b6cb4a0477eb
--- /dev/null
+++ b/test/Bindings/llvm-c/objectfile.ll
@@ -0,0 +1,2 @@
+; RUN: not llvm-c-test --object-list-sections < /dev/null
+; This used to cause a segfault
diff --git a/test/Bitcode/2006-12-11-Cast-ConstExpr.ll b/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
index e704627e3610..35bf7abd584c 100644
--- a/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
+++ b/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
@@ -1,6 +1,7 @@
 ; This test ensures that we get a bitcast constant expression in and out,
 ; not a sitofp constant expression. 
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 ; CHECK: bitcast (
 
 @G = external global i32
diff --git a/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll b/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
index 415f88e16374..9405fbbe79bf 100644
--- a/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
+++ b/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis -disable-output
+; RUN: verify-uselistorder < %s
 ; PR4373
 
 @foo = weak global { i32 } zeroinitializer              
diff --git a/test/Bitcode/aggregateInstructions.3.2.ll b/test/Bitcode/aggregateInstructions.3.2.ll
index 9352390b131b..59aafd1de233 100644
--- a/test/Bitcode/aggregateInstructions.3.2.ll
+++ b/test/Bitcode/aggregateInstructions.3.2.ll
@@ -1,33 +1,34 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; aggregateOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread instructions with aggregate operands
-; in older bitcode files.
-
-define void @extractvalue([4 x i8] %x1, [4 x [4 x i8]] %x2, {{i32, float}} %x3){
-entry:
-; CHECK: %res1 = extractvalue [4 x i8] %x1, 0
-  %res1 = extractvalue [4 x i8] %x1, 0
-  
-; CHECK-NEXT: %res2 = extractvalue [4 x [4 x i8]] %x2, 1
-  %res2 = extractvalue [4 x [4 x i8 ]] %x2, 1
-
-; CHECK-NEXT: %res3 = extractvalue [4 x [4 x i8]] %x2, 0, 1
-  %res3 = extractvalue [4 x [4 x i8 ]] %x2, 0, 1
-  
-; CHECK-NEXT: %res4 = extractvalue { { i32, float } } %x3, 0, 1
-  %res4 = extractvalue {{i32, float}} %x3, 0, 1
-
-  ret void
-}
-
-define void @insertvalue([4 x [4 x i8 ]] %x1){
-entry:
-; CHECK: %res1 = insertvalue [4 x [4 x i8]] %x1, i8 0, 0, 0
-  %res1 = insertvalue [4 x [4 x i8 ]] %x1, i8 0, 0, 0
-  
-; CHECK-NEXT: %res2 = insertvalue [4 x [4 x i8]] undef, i8 0, 0, 0
-  %res2 = insertvalue [4 x [4 x i8 ]] undef, i8 0, 0, 0
-
-  ret void
-}
-\ No newline at end of file
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; aggregateOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread instructions with aggregate operands
+; in older bitcode files.
+
+define void @extractvalue([4 x i8] %x1, [4 x [4 x i8]] %x2, {{i32, float}} %x3){
+entry:
+; CHECK: %res1 = extractvalue [4 x i8] %x1, 0
+  %res1 = extractvalue [4 x i8] %x1, 0
+
+; CHECK-NEXT: %res2 = extractvalue [4 x [4 x i8]] %x2, 1
+  %res2 = extractvalue [4 x [4 x i8 ]] %x2, 1
+
+; CHECK-NEXT: %res3 = extractvalue [4 x [4 x i8]] %x2, 0, 1
+  %res3 = extractvalue [4 x [4 x i8 ]] %x2, 0, 1
+
+; CHECK-NEXT: %res4 = extractvalue { { i32, float } } %x3, 0, 1
+  %res4 = extractvalue {{i32, float}} %x3, 0, 1
+
+  ret void
+}
+
+define void @insertvalue([4 x [4 x i8 ]] %x1){
+entry:
+; CHECK: %res1 = insertvalue [4 x [4 x i8]] %x1, i8 0, 0, 0
+  %res1 = insertvalue [4 x [4 x i8 ]] %x1, i8 0, 0, 0
+
+; CHECK-NEXT: %res2 = insertvalue [4 x [4 x i8]] undef, i8 0, 0, 0
+  %res2 = insertvalue [4 x [4 x i8 ]] undef, i8 0, 0, 0
+
+  ret void
+}
diff --git a/test/Bitcode/arm32_neon_vcnt_upgrade.ll b/test/Bitcode/arm32_neon_vcnt_upgrade.ll
index 10b9284e9814..ed3981b465ac 100644
--- a/test/Bitcode/arm32_neon_vcnt_upgrade.ll
+++ b/test/Bitcode/arm32_neon_vcnt_upgrade.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 ; Tests vclz and vcnt
 
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
diff --git a/test/Bitcode/atomic.ll b/test/Bitcode/atomic.ll
index 37815a749b55..c09e74c1c2f2 100644
--- a/test/Bitcode/atomic.ll
+++ b/test/Bitcode/atomic.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as %s -o - | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 define void @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
   cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
@@ -14,4 +15,4 @@ define void @test_cmpxchg(i32* %addr, i32 %desired, i32 %new) {
   ; CHECK: cmpxchg weak volatile i32* %addr, i32 %desired, i32 %new singlethread release monotonic
 
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Bitcode/attributes-3.3.ll b/test/Bitcode/attributes-3.3.ll
index cd70ba1a749a..b564425c3738 100644
--- a/test/Bitcode/attributes-3.3.ll
+++ b/test/Bitcode/attributes-3.3.ll
@@ -1,4 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; attributes-3.3.ll.bc was generated by passing this file to llvm-as-3.3.
 ; The test checks that LLVM does not silently misread attributes of
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index 2490e5920726..c75ee80dc661 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 ; PR12696
 
 define void @f1(i8 zeroext)
@@ -239,6 +240,11 @@ define dereferenceable(18446744073709551606) i8* @f40(i8* dereferenceable(184467
         ret i8* %a
 }
 
+define void @f41(i8* align 32, double* align 64) {
+; CHECK: define void @f41(i8* align 32, double* align 64) {
+        ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
diff --git a/test/Bitcode/binaryFloatInstructions.3.2.ll b/test/Bitcode/binaryFloatInstructions.3.2.ll
index f94d82d23c9f..cec16839f9cc 100644
--- a/test/Bitcode/binaryFloatInstructions.3.2.ll
+++ b/test/Bitcode/binaryFloatInstructions.3.2.ll
@@ -1,120 +1,121 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; BinaryFloatOperation.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread binary float instructions from
-; older bitcode files.
-
-define void @fadd(float %x1, double %x2 ,half %x3, fp128 %x4, x86_fp80 %x5, ppc_fp128 %x6){
-entry:
-; CHECK: %res1 = fadd float %x1, %x1
-  %res1 = fadd float %x1, %x1
-
-; CHECK-NEXT: %res2 = fadd double %x2, %x2
-  %res2 = fadd double %x2, %x2
-
-; CHECK-NEXT: %res3 = fadd half %x3, %x3
-  %res3 = fadd half %x3, %x3
-
-; CHECK-NEXT: %res4 = fadd fp128 %x4, %x4
-  %res4 = fadd fp128 %x4, %x4
-
-; CHECK-NEXT: %res5 = fadd x86_fp80 %x5, %x5
-  %res5 = fadd x86_fp80 %x5, %x5
-  
-; CHECK-NEXT: %res6 = fadd ppc_fp128 %x6, %x6
-  %res6 = fadd ppc_fp128 %x6, %x6
-  
-  ret void
-}
-
-define void @faddFloatVec(<2 x float> %x1, <3 x float> %x2 ,<4 x float> %x3, <8 x float> %x4, <16 x float> %x5){
-entry:
-; CHECK: %res1 = fadd <2 x float> %x1, %x1
-  %res1 = fadd <2 x float> %x1, %x1
-
-; CHECK-NEXT: %res2 = fadd <3 x float> %x2, %x2
-  %res2 = fadd <3 x float> %x2, %x2
-
-; CHECK-NEXT: %res3 = fadd <4 x float> %x3, %x3
-  %res3 = fadd <4 x float> %x3, %x3
-
-; CHECK-NEXT: %res4 = fadd <8 x float> %x4, %x4
-  %res4 = fadd <8 x float> %x4, %x4
-
-; CHECK-NEXT: %res5 = fadd <16 x float> %x5, %x5
-  %res5 = fadd <16 x float> %x5, %x5
-  
-  ret void
-}
-
-define void @faddDoubleVec(<2 x double> %x1, <3 x double> %x2 ,<4 x double> %x3, <8 x double> %x4, <16 x double> %x5){
-entry:
-; CHECK: %res1 = fadd <2 x double> %x1, %x1
-  %res1 = fadd <2 x double> %x1, %x1
-
-; CHECK-NEXT: %res2 = fadd <3 x double> %x2, %x2
-  %res2 = fadd <3 x double> %x2, %x2
-
-; CHECK-NEXT: %res3 = fadd <4 x double> %x3, %x3
-  %res3 = fadd <4 x double> %x3, %x3
-
-; CHECK-NEXT: %res4 = fadd <8 x double> %x4, %x4
-  %res4 = fadd <8 x double> %x4, %x4
-
-; CHECK-NEXT: %res5 = fadd <16 x double> %x5, %x5
-  %res5 = fadd <16 x double> %x5, %x5
-  
-  ret void
-}
-
-define void @faddHalfVec(<2 x half> %x1, <3 x half> %x2 ,<4 x half> %x3, <8 x half> %x4, <16 x half> %x5){
-entry:
-; CHECK: %res1 = fadd <2 x half> %x1, %x1
-  %res1 = fadd <2 x half> %x1, %x1
-
-; CHECK-NEXT: %res2 = fadd <3 x half> %x2, %x2
-  %res2 = fadd <3 x half> %x2, %x2
-
-; CHECK-NEXT: %res3 = fadd <4 x half> %x3, %x3
-  %res3 = fadd <4 x half> %x3, %x3
-
-; CHECK-NEXT: %res4 = fadd <8 x half> %x4, %x4
-  %res4 = fadd <8 x half> %x4, %x4
-
-; CHECK-NEXT: %res5 = fadd <16 x half> %x5, %x5
-  %res5 = fadd <16 x half> %x5, %x5
-  
-  ret void
-}
-
-define void @fsub(float %x1){
-entry:
-; CHECK: %res1 = fsub float %x1, %x1
-  %res1 = fsub float %x1, %x1
-
-  ret void
-}
-
-define void @fmul(float %x1){
-entry:
-; CHECK: %res1 = fmul float %x1, %x1
-  %res1 = fmul float %x1, %x1
-  
-  ret void
-}
-
-define void @fdiv(float %x1){
-entry:
-; CHECK: %res1 = fdiv float %x1, %x1
-  %res1 = fdiv float %x1, %x1
-  
-  ret void
-}
-
-define void @frem(float %x1){
-entry:
-; CHECK: %res1 = frem float %x1, %x1
-  %res1 = frem float %x1, %x1
-
-  ret void
-}
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; BinaryFloatOperation.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread binary float instructions from
+; older bitcode files.
+
+define void @fadd(float %x1, double %x2 ,half %x3, fp128 %x4, x86_fp80 %x5, ppc_fp128 %x6){
+entry:
+; CHECK: %res1 = fadd float %x1, %x1
+  %res1 = fadd float %x1, %x1
+
+; CHECK-NEXT: %res2 = fadd double %x2, %x2
+  %res2 = fadd double %x2, %x2
+
+; CHECK-NEXT: %res3 = fadd half %x3, %x3
+  %res3 = fadd half %x3, %x3
+
+; CHECK-NEXT: %res4 = fadd fp128 %x4, %x4
+  %res4 = fadd fp128 %x4, %x4
+
+; CHECK-NEXT: %res5 = fadd x86_fp80 %x5, %x5
+  %res5 = fadd x86_fp80 %x5, %x5
+
+; CHECK-NEXT: %res6 = fadd ppc_fp128 %x6, %x6
+  %res6 = fadd ppc_fp128 %x6, %x6
+
+  ret void
+}
+
+define void @faddFloatVec(<2 x float> %x1, <3 x float> %x2 ,<4 x float> %x3, <8 x float> %x4, <16 x float> %x5){
+entry:
+; CHECK: %res1 = fadd <2 x float> %x1, %x1
+  %res1 = fadd <2 x float> %x1, %x1
+
+; CHECK-NEXT: %res2 = fadd <3 x float> %x2, %x2
+  %res2 = fadd <3 x float> %x2, %x2
+
+; CHECK-NEXT: %res3 = fadd <4 x float> %x3, %x3
+  %res3 = fadd <4 x float> %x3, %x3
+
+; CHECK-NEXT: %res4 = fadd <8 x float> %x4, %x4
+  %res4 = fadd <8 x float> %x4, %x4
+
+; CHECK-NEXT: %res5 = fadd <16 x float> %x5, %x5
+  %res5 = fadd <16 x float> %x5, %x5
+
+  ret void
+}
+
+define void @faddDoubleVec(<2 x double> %x1, <3 x double> %x2 ,<4 x double> %x3, <8 x double> %x4, <16 x double> %x5){
+entry:
+; CHECK: %res1 = fadd <2 x double> %x1, %x1
+  %res1 = fadd <2 x double> %x1, %x1
+
+; CHECK-NEXT: %res2 = fadd <3 x double> %x2, %x2
+  %res2 = fadd <3 x double> %x2, %x2
+
+; CHECK-NEXT: %res3 = fadd <4 x double> %x3, %x3
+  %res3 = fadd <4 x double> %x3, %x3
+
+; CHECK-NEXT: %res4 = fadd <8 x double> %x4, %x4
+  %res4 = fadd <8 x double> %x4, %x4
+
+; CHECK-NEXT: %res5 = fadd <16 x double> %x5, %x5
+  %res5 = fadd <16 x double> %x5, %x5
+
+  ret void
+}
+
+define void @faddHalfVec(<2 x half> %x1, <3 x half> %x2 ,<4 x half> %x3, <8 x half> %x4, <16 x half> %x5){
+entry:
+; CHECK: %res1 = fadd <2 x half> %x1, %x1
+  %res1 = fadd <2 x half> %x1, %x1
+
+; CHECK-NEXT: %res2 = fadd <3 x half> %x2, %x2
+  %res2 = fadd <3 x half> %x2, %x2
+
+; CHECK-NEXT: %res3 = fadd <4 x half> %x3, %x3
+  %res3 = fadd <4 x half> %x3, %x3
+
+; CHECK-NEXT: %res4 = fadd <8 x half> %x4, %x4
+  %res4 = fadd <8 x half> %x4, %x4
+
+; CHECK-NEXT: %res5 = fadd <16 x half> %x5, %x5
+  %res5 = fadd <16 x half> %x5, %x5
+
+  ret void
+}
+
+define void @fsub(float %x1){
+entry:
+; CHECK: %res1 = fsub float %x1, %x1
+  %res1 = fsub float %x1, %x1
+
+  ret void
+}
+
+define void @fmul(float %x1){
+entry:
+; CHECK: %res1 = fmul float %x1, %x1
+  %res1 = fmul float %x1, %x1
+
+  ret void
+}
+
+define void @fdiv(float %x1){
+entry:
+; CHECK: %res1 = fdiv float %x1, %x1
+  %res1 = fdiv float %x1, %x1
+
+  ret void
+}
+
+define void @frem(float %x1){
+entry:
+; CHECK: %res1 = frem float %x1, %x1
+  %res1 = frem float %x1, %x1
+
+  ret void
+}
diff --git a/test/Bitcode/binaryIntInstructions.3.2.ll b/test/Bitcode/binaryIntInstructions.3.2.ll
index b08501ca932a..e484ff129a73 100644
--- a/test/Bitcode/binaryIntInstructions.3.2.ll
+++ b/test/Bitcode/binaryIntInstructions.3.2.ll
@@ -1,177 +1,178 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; BinaryIntOperation.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread binary integer instructions from
-; older bitcode files.
-
-define void @add(i1 %x1, i8 %x2 ,i16 %x3, i32 %x4, i64 %x5){
-entry:
-; CHECK: %res1 = add i1 %x1, %x1
-  %res1 = add i1 %x1, %x1
-
-; CHECK-NEXT: %res2 = add i8 %x2, %x2
-  %res2 = add i8 %x2, %x2
-
-; CHECK-NEXT: %res3 = add i16 %x3, %x3
-  %res3 = add i16 %x3, %x3
-
-; CHECK-NEXT: %res4 = add i32 %x4, %x4
-  %res4 = add i32 %x4, %x4
-
-; CHECK-NEXT: %res5 = add i64 %x5, %x5
-  %res5 = add i64 %x5, %x5
-  
-; CHECK: %res6 = add nuw i1 %x1, %x1
-  %res6 = add nuw i1 %x1, %x1
-  
-; CHECK: %res7 = add nsw i1 %x1, %x1
-  %res7 = add nsw i1 %x1, %x1
-  
-; CHECK: %res8 = add nuw nsw i1 %x1, %x1
-  %res8 = add nuw nsw i1 %x1, %x1
-  
-  ret void
-}
-
-define void @addvec8NuwNsw(<2 x i8> %x1, <3 x i8> %x2 ,<4 x i8> %x3, <8 x i8> %x4, <16 x i8> %x5){
-entry:
-; CHECK: %res1 = add nuw nsw <2 x i8> %x1, %x1
-  %res1 = add nuw nsw <2 x i8> %x1, %x1
-
-; CHECK-NEXT: %res2 = add nuw nsw <3 x i8> %x2, %x2
-  %res2 = add nuw nsw <3 x i8> %x2, %x2
-
-; CHECK-NEXT: %res3 = add nuw nsw <4 x i8> %x3, %x3
-  %res3 = add nuw nsw <4 x i8> %x3, %x3
-
-; CHECK-NEXT: %res4 = add nuw nsw <8 x i8> %x4, %x4
-  %res4 = add nuw nsw <8 x i8> %x4, %x4
-  
-; CHECK-NEXT: %res5 = add nuw nsw <16 x i8> %x5, %x5
-  %res5 = add nuw nsw <16 x i8> %x5, %x5
-  
-  ret void
-}
-
-define void @addvec16NuwNsw(<2 x i16> %x1, <3 x i16> %x2 ,<4 x i16> %x3, <8 x i16> %x4, <16 x i16> %x5){
-entry:
-; CHECK: %res1 = add nuw nsw <2 x i16> %x1, %x1
-  %res1 = add nuw nsw <2 x i16> %x1, %x1
-
-; CHECK-NEXT: %res2 = add nuw nsw <3 x i16> %x2, %x2
-  %res2 = add nuw nsw <3 x i16> %x2, %x2
-
-; CHECK-NEXT: %res3 = add nuw nsw <4 x i16> %x3, %x3
-  %res3 = add nuw nsw <4 x i16> %x3, %x3
-
-; CHECK-NEXT: %res4 = add nuw nsw <8 x i16> %x4, %x4
-  %res4 = add nuw nsw <8 x i16> %x4, %x4
-  
-; CHECK-NEXT: %res5 = add nuw nsw <16 x i16> %x5, %x5
-  %res5 = add nuw nsw <16 x i16> %x5, %x5
-  
-  ret void
-}
-
-define void @addvec32NuwNsw(<2 x i32> %x1, <3 x i32> %x2 ,<4 x i32> %x3, <8 x i32> %x4, <16 x i32> %x5){
-entry:
-; CHECK: %res1 = add nuw nsw <2 x i32> %x1, %x1
-  %res1 = add nuw nsw <2 x i32> %x1, %x1
-
-; CHECK-NEXT: %res2 = add nuw nsw <3 x i32> %x2, %x2
-  %res2 = add nuw nsw <3 x i32> %x2, %x2
-
-; CHECK-NEXT: %res3 = add nuw nsw <4 x i32> %x3, %x3
-  %res3 = add nuw nsw <4 x i32> %x3, %x3
-
-; CHECK-NEXT: %res4 = add nuw nsw <8 x i32> %x4, %x4
-  %res4 = add nuw nsw <8 x i32> %x4, %x4
-  
-; CHECK-NEXT: %res5 = add nuw nsw <16 x i32> %x5, %x5
-  %res5 = add nuw nsw <16 x i32> %x5, %x5
-  
-  ret void
-}
-
-define void @addvec64NuwNsw(<2 x i64> %x1, <3 x i64> %x2 ,<4 x i64> %x3, <8 x i64> %x4, <16 x i64> %x5){
-entry:
-; CHECK: %res1 = add nuw nsw <2 x i64> %x1, %x1
-  %res1 = add nuw nsw <2 x i64> %x1, %x1
-
-; CHECK-NEXT: %res2 = add nuw nsw <3 x i64> %x2, %x2
-  %res2 = add nuw nsw <3 x i64> %x2, %x2
-
-; CHECK-NEXT: %res3 = add nuw nsw <4 x i64> %x3, %x3
-  %res3 = add nuw nsw <4 x i64> %x3, %x3
-
-; CHECK-NEXT: %res4 = add nuw nsw <8 x i64> %x4, %x4
-  %res4 = add nuw nsw <8 x i64> %x4, %x4
-  
-; CHECK-NEXT: %res5 = add nuw nsw <16 x i64> %x5, %x5
-  %res5 = add nuw nsw <16 x i64> %x5, %x5
-  
-  ret void
-}
-
-define void @sub(i8 %x1){
-entry:
-; CHECK: %res1 = sub i8 %x1, %x1
-  %res1 = sub i8 %x1, %x1
-  
-; CHECK: %res2 = sub nuw i8 %x1, %x1
-  %res2 = sub nuw i8 %x1, %x1
-  
-; CHECK: %res3 = sub nsw i8 %x1, %x1
-  %res3 = sub nsw i8 %x1, %x1
-  
-; CHECK: %res4 = sub nuw nsw i8 %x1, %x1
-  %res4 = sub nuw nsw i8 %x1, %x1
-  
-  ret void
-}
-
-define void @mul(i8 %x1){
-entry:
-; CHECK: %res1 = mul i8 %x1, %x1
-  %res1 = mul i8 %x1, %x1
-  
-  ret void
-}
-
-define void @udiv(i8 %x1){
-entry:
-; CHECK: %res1 = udiv i8 %x1, %x1
-  %res1 = udiv i8 %x1, %x1
-  
-; CHECK-NEXT: %res2 = udiv exact i8 %x1, %x1
-  %res2 = udiv exact i8 %x1, %x1
-
-  ret void
-}
-
-define void @sdiv(i8 %x1){
-entry:
-; CHECK: %res1 = sdiv i8 %x1, %x1
-  %res1 = sdiv i8 %x1, %x1
-  
-; CHECK-NEXT: %res2 = sdiv exact i8 %x1, %x1
-  %res2 = sdiv exact i8 %x1, %x1
-
-  ret void
-}
-
-define void @urem(i32 %x1){
-entry:
-; CHECK: %res1 = urem i32 %x1, %x1
-  %res1 = urem i32 %x1, %x1
-  
-  ret void
-}
-
-define void @srem(i32 %x1){
-entry:
-; CHECK: %res1 = srem i32 %x1, %x1
-  %res1 = srem i32 %x1, %x1
-  
-  ret void
-}
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; BinaryIntOperation.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread binary integer instructions from
+; older bitcode files.
+
+define void @add(i1 %x1, i8 %x2 ,i16 %x3, i32 %x4, i64 %x5){
+entry:
+; CHECK: %res1 = add i1 %x1, %x1
+  %res1 = add i1 %x1, %x1
+
+; CHECK-NEXT: %res2 = add i8 %x2, %x2
+  %res2 = add i8 %x2, %x2
+
+; CHECK-NEXT: %res3 = add i16 %x3, %x3
+  %res3 = add i16 %x3, %x3
+
+; CHECK-NEXT: %res4 = add i32 %x4, %x4
+  %res4 = add i32 %x4, %x4
+
+; CHECK-NEXT: %res5 = add i64 %x5, %x5
+  %res5 = add i64 %x5, %x5
+
+; CHECK: %res6 = add nuw i1 %x1, %x1
+  %res6 = add nuw i1 %x1, %x1
+
+; CHECK: %res7 = add nsw i1 %x1, %x1
+  %res7 = add nsw i1 %x1, %x1
+
+; CHECK: %res8 = add nuw nsw i1 %x1, %x1
+  %res8 = add nuw nsw i1 %x1, %x1
+
+  ret void
+}
+
+define void @addvec8NuwNsw(<2 x i8> %x1, <3 x i8> %x2 ,<4 x i8> %x3, <8 x i8> %x4, <16 x i8> %x5){
+entry:
+; CHECK: %res1 = add nuw nsw <2 x i8> %x1, %x1
+  %res1 = add nuw nsw <2 x i8> %x1, %x1
+
+; CHECK-NEXT: %res2 = add nuw nsw <3 x i8> %x2, %x2
+  %res2 = add nuw nsw <3 x i8> %x2, %x2
+
+; CHECK-NEXT: %res3 = add nuw nsw <4 x i8> %x3, %x3
+  %res3 = add nuw nsw <4 x i8> %x3, %x3
+
+; CHECK-NEXT: %res4 = add nuw nsw <8 x i8> %x4, %x4
+  %res4 = add nuw nsw <8 x i8> %x4, %x4
+
+; CHECK-NEXT: %res5 = add nuw nsw <16 x i8> %x5, %x5
+  %res5 = add nuw nsw <16 x i8> %x5, %x5
+
+  ret void
+}
+
+define void @addvec16NuwNsw(<2 x i16> %x1, <3 x i16> %x2 ,<4 x i16> %x3, <8 x i16> %x4, <16 x i16> %x5){
+entry:
+; CHECK: %res1 = add nuw nsw <2 x i16> %x1, %x1
+  %res1 = add nuw nsw <2 x i16> %x1, %x1
+
+; CHECK-NEXT: %res2 = add nuw nsw <3 x i16> %x2, %x2
+  %res2 = add nuw nsw <3 x i16> %x2, %x2
+
+; CHECK-NEXT: %res3 = add nuw nsw <4 x i16> %x3, %x3
+  %res3 = add nuw nsw <4 x i16> %x3, %x3
+
+; CHECK-NEXT: %res4 = add nuw nsw <8 x i16> %x4, %x4
+  %res4 = add nuw nsw <8 x i16> %x4, %x4
+
+; CHECK-NEXT: %res5 = add nuw nsw <16 x i16> %x5, %x5
+  %res5 = add nuw nsw <16 x i16> %x5, %x5
+
+  ret void
+}
+
+define void @addvec32NuwNsw(<2 x i32> %x1, <3 x i32> %x2 ,<4 x i32> %x3, <8 x i32> %x4, <16 x i32> %x5){
+entry:
+; CHECK: %res1 = add nuw nsw <2 x i32> %x1, %x1
+  %res1 = add nuw nsw <2 x i32> %x1, %x1
+
+; CHECK-NEXT: %res2 = add nuw nsw <3 x i32> %x2, %x2
+  %res2 = add nuw nsw <3 x i32> %x2, %x2
+
+; CHECK-NEXT: %res3 = add nuw nsw <4 x i32> %x3, %x3
+  %res3 = add nuw nsw <4 x i32> %x3, %x3
+
+; CHECK-NEXT: %res4 = add nuw nsw <8 x i32> %x4, %x4
+  %res4 = add nuw nsw <8 x i32> %x4, %x4
+
+; CHECK-NEXT: %res5 = add nuw nsw <16 x i32> %x5, %x5
+  %res5 = add nuw nsw <16 x i32> %x5, %x5
+
+  ret void
+}
+
+define void @addvec64NuwNsw(<2 x i64> %x1, <3 x i64> %x2 ,<4 x i64> %x3, <8 x i64> %x4, <16 x i64> %x5){
+entry:
+; CHECK: %res1 = add nuw nsw <2 x i64> %x1, %x1
+  %res1 = add nuw nsw <2 x i64> %x1, %x1
+
+; CHECK-NEXT: %res2 = add nuw nsw <3 x i64> %x2, %x2
+  %res2 = add nuw nsw <3 x i64> %x2, %x2
+
+; CHECK-NEXT: %res3 = add nuw nsw <4 x i64> %x3, %x3
+  %res3 = add nuw nsw <4 x i64> %x3, %x3
+
+; CHECK-NEXT: %res4 = add nuw nsw <8 x i64> %x4, %x4
+  %res4 = add nuw nsw <8 x i64> %x4, %x4
+
+; CHECK-NEXT: %res5 = add nuw nsw <16 x i64> %x5, %x5
+  %res5 = add nuw nsw <16 x i64> %x5, %x5
+
+  ret void
+}
+
+define void @sub(i8 %x1){
+entry:
+; CHECK: %res1 = sub i8 %x1, %x1
+  %res1 = sub i8 %x1, %x1
+
+; CHECK: %res2 = sub nuw i8 %x1, %x1
+  %res2 = sub nuw i8 %x1, %x1
+
+; CHECK: %res3 = sub nsw i8 %x1, %x1
+  %res3 = sub nsw i8 %x1, %x1
+
+; CHECK: %res4 = sub nuw nsw i8 %x1, %x1
+  %res4 = sub nuw nsw i8 %x1, %x1
+
+  ret void
+}
+
+define void @mul(i8 %x1){
+entry:
+; CHECK: %res1 = mul i8 %x1, %x1
+  %res1 = mul i8 %x1, %x1
+
+  ret void
+}
+
+define void @udiv(i8 %x1){
+entry:
+; CHECK: %res1 = udiv i8 %x1, %x1
+  %res1 = udiv i8 %x1, %x1
+
+; CHECK-NEXT: %res2 = udiv exact i8 %x1, %x1
+  %res2 = udiv exact i8 %x1, %x1
+
+  ret void
+}
+
+define void @sdiv(i8 %x1){
+entry:
+; CHECK: %res1 = sdiv i8 %x1, %x1
+  %res1 = sdiv i8 %x1, %x1
+
+; CHECK-NEXT: %res2 = sdiv exact i8 %x1, %x1
+  %res2 = sdiv exact i8 %x1, %x1
+
+  ret void
+}
+
+define void @urem(i32 %x1){
+entry:
+; CHECK: %res1 = urem i32 %x1, %x1
+  %res1 = urem i32 %x1, %x1
+
+  ret void
+}
+
+define void @srem(i32 %x1){
+entry:
+; CHECK: %res1 = srem i32 %x1, %x1
+  %res1 = srem i32 %x1, %x1
+
+  ret void
+}
diff --git a/test/Bitcode/bitwiseInstructions.3.2.ll b/test/Bitcode/bitwiseInstructions.3.2.ll
index 6225a08f2064..aaaf4f543a8e 100644
--- a/test/Bitcode/bitwiseInstructions.3.2.ll
+++ b/test/Bitcode/bitwiseInstructions.3.2.ll
@@ -1,68 +1,69 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; bitwiseOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread bitwise instructions from
-; older bitcode files.
-
-define void @shl(i8 %x1){
-entry:
-; CHECK: %res1 = shl i8 %x1, %x1
-  %res1 = shl i8 %x1, %x1
-  
-; CHECK: %res2 = shl nuw i8 %x1, %x1
-  %res2 = shl nuw i8 %x1, %x1
-  
-; CHECK: %res3 = shl nsw i8 %x1, %x1
-  %res3 = shl nsw i8 %x1, %x1
-
-; CHECK: %res4 = shl nuw nsw i8 %x1, %x1
-  %res4 = shl nuw nsw i8 %x1, %x1
-  
-  ret void
-}
-
-define void @lshr(i8 %x1){
-entry:
-; CHECK: %res1 = lshr i8 %x1, %x1
-  %res1 = lshr i8 %x1, %x1
-  
-; CHECK: %res2 = lshr exact i8 %x1, %x1
-  %res2 = lshr exact i8 %x1, %x1
-  
-  ret void
-}
-
-define void @ashr(i8 %x1){
-entry:
-; CHECK: %res1 = ashr i8 %x1, %x1
-  %res1 = ashr i8 %x1, %x1
-
-; CHECK-NEXT: %res2 = ashr exact i8 %x1, %x1
-  %res2 = ashr exact i8 %x1, %x1
-
-  ret void
-}
-
-define void @and(i8 %x1){
-entry:
-; CHECK: %res1 = and i8 %x1, %x1
-  %res1 = and i8 %x1, %x1
-  
-  ret void
-}
-
-define void @or(i8 %x1){
-entry:
-; CHECK: %res1 = or i8 %x1, %x1
-  %res1 = or i8 %x1, %x1
-  
-  ret void
-}
-
-define void @xor(i8 %x1){
-entry:
-; CHECK: %res1 = xor i8 %x1, %x1
-  %res1 = xor i8 %x1, %x1
-  
-  ret void
-}
-\ No newline at end of file
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; bitwiseOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread bitwise instructions from
+; older bitcode files.
+
+define void @shl(i8 %x1){
+entry:
+; CHECK: %res1 = shl i8 %x1, %x1
+  %res1 = shl i8 %x1, %x1
+
+; CHECK: %res2 = shl nuw i8 %x1, %x1
+  %res2 = shl nuw i8 %x1, %x1
+
+; CHECK: %res3 = shl nsw i8 %x1, %x1
+  %res3 = shl nsw i8 %x1, %x1
+
+; CHECK: %res4 = shl nuw nsw i8 %x1, %x1
+  %res4 = shl nuw nsw i8 %x1, %x1
+
+  ret void
+}
+
+define void @lshr(i8 %x1){
+entry:
+; CHECK: %res1 = lshr i8 %x1, %x1
+  %res1 = lshr i8 %x1, %x1
+
+; CHECK: %res2 = lshr exact i8 %x1, %x1
+  %res2 = lshr exact i8 %x1, %x1
+
+  ret void
+}
+
+define void @ashr(i8 %x1){
+entry:
+; CHECK: %res1 = ashr i8 %x1, %x1
+  %res1 = ashr i8 %x1, %x1
+
+; CHECK-NEXT: %res2 = ashr exact i8 %x1, %x1
+  %res2 = ashr exact i8 %x1, %x1
+
+  ret void
+}
+
+define void @and(i8 %x1){
+entry:
+; CHECK: %res1 = and i8 %x1, %x1
+  %res1 = and i8 %x1, %x1
+
+  ret void
+}
+
+define void @or(i8 %x1){
+entry:
+; CHECK: %res1 = or i8 %x1, %x1
+  %res1 = or i8 %x1, %x1
+
+  ret void
+}
+
+define void @xor(i8 %x1){
+entry:
+; CHECK: %res1 = xor i8 %x1, %x1
+  %res1 = xor i8 %x1, %x1
+
+  ret void
+}
diff --git a/test/Bitcode/blockaddress.ll b/test/Bitcode/blockaddress.ll
index 8ac54be00d54..db109dfe469b 100644
--- a/test/Bitcode/blockaddress.ll
+++ b/test/Bitcode/blockaddress.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 ; PR9857
 
 define void @f(i8** nocapture %ptr1) {
@@ -43,3 +44,17 @@ here:
 end:
   ret void
 }
+
+; Check a blockaddress taken in two separate functions before the referenced
+; function.
+define i8* @take1() {
+  ret i8* blockaddress(@taken, %bb)
+}
+define i8* @take2() {
+  ret i8* blockaddress(@taken, %bb)
+}
+define void @taken() {
+  unreachable
+bb:
+  unreachable
+}
diff --git a/test/Bitcode/calling-conventions.3.2.ll b/test/Bitcode/calling-conventions.3.2.ll
index aca9efd0892b..b60f1d70ca04 100644
--- a/test/Bitcode/calling-conventions.3.2.ll
+++ b/test/Bitcode/calling-conventions.3.2.ll
@@ -1,4 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; calling-conventions.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
 ; The test checks that LLVM does not silently misread calling conventions of
@@ -14,7 +15,7 @@ declare coldcc void @coldcc()
 ; CHECK: declare coldcc void @coldcc
 
 declare cc10 void @cc10()
-; CHECK: declare cc10 void @cc10
+; CHECK: declare ghccc void @cc10
 
 declare spir_kernel void @spir_kernel()
 ; CHECK: declare spir_kernel void @spir_kernel
@@ -71,7 +72,7 @@ define void @call_coldcc() {
 }
 
 define void @call_cc10 () { 
-; CHECK: call cc10 void @cc10 
+; CHECK: call ghccc void @cc10
   call cc10 void @cc10 ()
   ret void
 }
diff --git a/test/Bitcode/case-ranges-3.3.ll b/test/Bitcode/case-ranges-3.3.ll
index 6e1d0a69a591..020b37f49db7 100644
--- a/test/Bitcode/case-ranges-3.3.ll
+++ b/test/Bitcode/case-ranges-3.3.ll
@@ -1,4 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; case-ranges.ll.bc was generated by passing this file to llvm-as from the 3.3
 ; release of LLVM. This tests that the bitcode for switches from that release
diff --git a/test/Bitcode/cmpxchg-upgrade.ll b/test/Bitcode/cmpxchg-upgrade.ll
index d36ac1c17909..125729e99cd9 100644
--- a/test/Bitcode/cmpxchg-upgrade.ll
+++ b/test/Bitcode/cmpxchg-upgrade.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
 
 ; cmpxchg-upgrade.ll.bc was produced by running a version of llvm-as from just
 ; before the IR change on this file.
@@ -20,4 +21,4 @@ define void @test(i32* %addr) {
 ; CHECK: cmpxchg i32* %addr, i32 42, i32 0 seq_cst seq_cst
 
    ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Bitcode/constantsTest.3.2.ll b/test/Bitcode/constantsTest.3.2.ll
new file mode 100644
index 000000000000..b4973cf7a832
--- /dev/null
+++ b/test/Bitcode/constantsTest.3.2.ll
@@ -0,0 +1,124 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; constantsTest.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread binary float instructions of
+; older bitcode files.
+
+;global variable address
+; CHECK: @X = global i32 0
+@X = global i32 0
+; CHECK: @Y = global i32 1
+@Y = global i32 1
+; CHECK: @Z = global [2 x i32*] [i32* @X, i32* @Y]
+@Z = global [2 x i32*] [i32* @X, i32* @Y]
+
+
+define void @SimpleConstants(i32 %x) {
+entry:
+; null
+; CHECK: store i32 %x, i32* null
+  store i32 %x, i32* null
+ 
+; boolean
+; CHECK-NEXT: %res1 = fcmp true float 1.000000e+00, 1.000000e+00 
+  %res1 = fcmp true float 1.0, 1.0
+; CHECK-NEXT: %res2 = fcmp false float 1.000000e+00, 1.000000e+00
+  %res2 = fcmp false float 1.0, 1.0
+
+;integer
+; CHECK-NEXT: %res3 = add i32 0, 0
+  %res3 = add i32 0, 0
+
+;float
+; CHECK-NEXT: %res4 = fadd float 0.000000e+00, 0.000000e+00
+  %res4 = fadd float 0.0, 0.0
+
+  ret void
+}
+
+define void @ComplexConstants(<2 x i32> %x){
+entry:
+;constant structure
+; CHECK: %res1 = extractvalue { i32, float } { i32 1, float 2.000000e+00 }, 0
+  %res1 = extractvalue {i32, float} {i32 1, float 2.0}, 0
+  
+;const array
+; CHECK-NEXT: %res2 = extractvalue [2 x i32] [i32 1, i32 2], 0
+  %res2 = extractvalue [2 x i32] [i32 1, i32 2], 0
+  
+;const vector
+; CHECK-NEXT: %res3 = add <2 x i32> <i32 1, i32 1>, <i32 1, i32 1>
+  %res3 = add <2 x i32> <i32 1, i32 1>, <i32 1, i32 1>
+  
+;zeroinitializer
+; CHECK-NEXT: %res4 = add <2 x i32> %x, zeroinitializer
+  %res4 = add <2 x i32> %x, zeroinitializer
+  
+  ret void
+}
+
+define void @OtherConstants(i32 %x, i8* %Addr){
+entry:
+  ;undef
+  ; CHECK: %res1 = add i32 %x, undef 
+  %res1 = add i32 %x, undef
+  
+  ;poison
+  ; CHECK-NEXT: %poison = sub nuw i32 0, 1
+  %poison = sub nuw i32 0, 1
+  
+  ;address of basic block
+  ; CHECK-NEXT: %res2 = icmp eq i8* blockaddress(@OtherConstants, %Next), null
+  %res2 = icmp eq i8* blockaddress(@OtherConstants, %Next), null
+  br label %Next
+  Next: 
+  ret void
+}
+
+define void @OtherConstants2(){
+entry:
+  ; CHECK: trunc i32 1 to i8
+  trunc i32 1 to i8
+  ; CHECK-NEXT: zext i8 1 to i32
+  zext i8 1 to i32
+  ; CHECK-NEXT: sext i8 1 to i32
+  sext i8 1 to i32
+  ; CHECK-NEXT: fptrunc double 1.000000e+00 to float
+  fptrunc double 1.0 to float
+  ; CHECK-NEXT: fpext float 1.000000e+00 to double
+  fpext float 1.0 to double
+  ; CHECK-NEXT: fptosi float 1.000000e+00 to i32
+  fptosi float 1.0 to i32
+  ; CHECK-NEXT: uitofp i32 1 to float
+  uitofp i32 1 to float
+  ; CHECK-NEXT: sitofp i32 -1 to float
+  sitofp i32 -1 to float
+  ; CHECK-NEXT: ptrtoint i32* @X to i32
+  ptrtoint i32* @X to i32
+  ; CHECK-NEXT: inttoptr i8 1 to i8*
+  inttoptr i8 1 to i8*
+  ; CHECK-NEXT: bitcast i32 1 to <2 x i16>
+  bitcast i32 1 to <2 x i16>
+  ; CHECK-NEXT: getelementptr i32* @X, i32 0
+  getelementptr i32* @X, i32 0
+  ; CHECK-NEXT: getelementptr inbounds i32* @X, i32 0
+  getelementptr inbounds i32* @X, i32 0
+  ; CHECK: select i1 true, i32 1, i32 0
+  select i1 true ,i32 1, i32 0
+  ; CHECK-NEXT: icmp eq i32 1, 0
+  icmp eq i32 1, 0
+  ; CHECK-NEXT: fcmp oeq float 1.000000e+00, 0.000000e+00
+  fcmp oeq float 1.0, 0.0
+  ; CHECK-NEXT: extractelement <2 x i32> <i32 1, i32 1>, i32 1
+  extractelement <2 x i32> <i32 1, i32 1>, i32 1
+  ; CHECK-NEXT: insertelement <2 x i32> <i32 1, i32 1>, i32 0, i32 1
+  insertelement <2 x i32> <i32 1, i32 1>, i32 0, i32 1
+  ; CHECK-NEXT: shufflevector <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  shufflevector <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ; CHECK-NEXT: extractvalue { i32, float } { i32 1, float 2.000000e+00 }, 0
+  extractvalue { i32, float } { i32 1, float 2.0 }, 0
+  ; CHECK-NEXT: insertvalue { i32, float } { i32 1, float 2.000000e+00 }, i32 0, 0
+  insertvalue { i32, float } { i32 1, float 2.0 }, i32 0, 0
+  
+  ret void
+}
+\ No newline at end of file
diff --git a/test/Bitcode/constantsTest.3.2.ll.bc b/test/Bitcode/constantsTest.3.2.ll.bc
new file mode 100644
index 000000000000..8454befe72bd
--- /dev/null
+++ b/test/Bitcode/constantsTest.3.2.ll.bc
diff --git a/test/Bitcode/conversionInstructions.3.2.ll b/test/Bitcode/conversionInstructions.3.2.ll
index 4b3f27386eff..ae2d65eb2b61 100644
--- a/test/Bitcode/conversionInstructions.3.2.ll
+++ b/test/Bitcode/conversionInstructions.3.2.ll
@@ -1,104 +1,124 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; conversionOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread conversion instructions from
-; older bitcode files.
-
-define void @trunc(i32 %src){
-entry:
-; CHECK: %res1 = trunc i32 %src to i8
-  %res1 = trunc i32 %src to i8
-    
-  ret void
-}
-
-define void @zext(i32 %src){
-entry:
-; CHECK: %res1 = zext i32 %src to i64
-  %res1 = zext i32 %src to i64
-    
-  ret void
-}
-
-define void @sext(i32 %src){
-entry:
-; CHECK: %res1 = sext i32 %src to i64
-  %res1 = sext i32 %src to i64
-    
-  ret void
-}
-
-define void @fptrunc(double %src){
-entry:
-; CHECK: %res1 = fptrunc double %src to float
-  %res1 = fptrunc double %src to float
-  
-  ret void
-}
-
-define void @fpext(float %src){
-entry:
-; CHECK: %res1 = fpext float %src to double
-  %res1 = fpext float %src to double
-  
-  ret void
-}
-
-define void @fptoui(float %src){
-entry:
-; CHECK: %res1 = fptoui float %src to i32
-  %res1 = fptoui float %src to i32
-  
-  ret void
-}
-
-define void @fptosi(float %src){
-entry:
-; CHECK: %res1 = fptosi float %src to i32
-  %res1 = fptosi float %src to i32
-  
-  ret void
-}
-
-define void @uitofp(i32 %src){
-entry:
-; CHECK: %res1 = uitofp i32 %src to float
-  %res1 = uitofp i32 %src to float
-  
-  ret void
-}
-
-define void @sitofp(i32 %src){
-entry:
-; CHECK: %res1 = sitofp i32 %src to float
-  %res1 = sitofp i32 %src to float
-  
-  ret void
-}
-
-define void @ptrtoint(i32* %src){
-entry:
-; CHECK: %res1 = ptrtoint i32* %src to i8
-  %res1 = ptrtoint i32* %src to i8
-  
-  ret void
-}
-
-define void @inttoptr(i32 %src){
-entry:
-; CHECK: %res1 = inttoptr i32 %src to i32*
-  %res1 = inttoptr i32 %src to i32*
-  
-  ret void
-}
-
-define void @bitcast(i32 %src1, i32* %src2){
-entry:
-; CHECK: %res1 = bitcast i32 %src1 to i32
-  %res1 = bitcast i32 %src1 to i32
-  
-; CHECK: %res2 = bitcast i32* %src2 to i64*
-  %res2 = bitcast i32* %src2 to i64*
-  
-  ret void
-}
-\ No newline at end of file
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; conversionInstructions.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread conversion instructions from
+; older bitcode files.
+
+define void @trunc(i32 %src){
+entry:
+; CHECK: %res1 = trunc i32 %src to i8
+  %res1 = trunc i32 %src to i8
+    
+  ret void
+}
+
+define void @zext(i32 %src){
+entry:
+; CHECK: %res1 = zext i32 %src to i64
+  %res1 = zext i32 %src to i64
+    
+  ret void
+}
+
+define void @sext(i32 %src){
+entry:
+; CHECK: %res1 = sext i32 %src to i64
+  %res1 = sext i32 %src to i64
+    
+  ret void
+}
+
+define void @fptrunc(double %src){
+entry:
+; CHECK: %res1 = fptrunc double %src to float
+  %res1 = fptrunc double %src to float
+  
+  ret void
+}
+
+define void @fpext(float %src){
+entry:
+; CHECK: %res1 = fpext float %src to double
+  %res1 = fpext float %src to double
+  
+  ret void
+}
+
+define void @fptoui(float %src){
+entry:
+; CHECK: %res1 = fptoui float %src to i32
+  %res1 = fptoui float %src to i32
+  
+  ret void
+}
+
+define void @fptosi(float %src){
+entry:
+; CHECK: %res1 = fptosi float %src to i32
+  %res1 = fptosi float %src to i32
+  
+  ret void
+}
+
+define void @uitofp(i32 %src){
+entry:
+; CHECK: %res1 = uitofp i32 %src to float
+  %res1 = uitofp i32 %src to float
+  
+  ret void
+}
+
+define void @sitofp(i32 %src){
+entry:
+; CHECK: %res1 = sitofp i32 %src to float
+  %res1 = sitofp i32 %src to float
+  
+  ret void
+}
+
+define void @ptrtoint(i32* %src){
+entry:
+; CHECK: %res1 = ptrtoint i32* %src to i8
+  %res1 = ptrtoint i32* %src to i8
+  
+  ret void
+}
+
+define void @inttoptr(i32 %src){
+entry:
+; CHECK: %res1 = inttoptr i32 %src to i32*
+  %res1 = inttoptr i32 %src to i32*
+  
+  ret void
+}
+
+define void @bitcast(i32 %src1, i32* %src2){
+entry:
+; CHECK: %res1 = bitcast i32 %src1 to i32
+  %res1 = bitcast i32 %src1 to i32
+  
+; CHECK: %res2 = bitcast i32* %src2 to i64*
+  %res2 = bitcast i32* %src2 to i64*
+  
+  ret void
+}
+
+define void @ptrtointInstr(i32* %ptr, <4 x i32*> %vecPtr){
+entry:
+; CHECK: %res1 = ptrtoint i32* %ptr to i8
+  %res1 = ptrtoint i32* %ptr to i8  
+; CHECK-NEXT: %res2 = ptrtoint <4 x i32*> %vecPtr to <4 x i64>
+  %res2 = ptrtoint <4 x i32*> %vecPtr to <4 x i64>
+  
+  ret void
+}
+
+define void @inttoptrInstr(i32 %x, <4 x i32> %vec){
+entry:
+; CHECK: %res1 = inttoptr i32 %x to i64*
+  %res1 = inttoptr i32 %x to i64*
+; CHECK-NEXT: inttoptr <4 x i32> %vec to <4 x i8*>
+  %res2 = inttoptr <4 x i32> %vec to <4 x i8*>
+  
+  ret void
+}
diff --git a/test/Bitcode/conversionInstructions.3.2.ll.bc b/test/Bitcode/conversionInstructions.3.2.ll.bc
index fabf7dab6fb8..a6f8a47736ac 100644
--- a/test/Bitcode/conversionInstructions.3.2.ll.bc
+++ b/test/Bitcode/conversionInstructions.3.2.ll.bc
diff --git a/test/Bitcode/deprecated-linker_private-linker_private_weak.ll b/test/Bitcode/deprecated-linker_private-linker_private_weak.ll
deleted file mode 100644
index 12a527c7738f..000000000000
--- a/test/Bitcode/deprecated-linker_private-linker_private_weak.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llvm-as -o - %s | llvm-dis | FileCheck %s
-; RUN: llvm-as -o /dev/null %s 2>&1 | FileCheck %s -check-prefix CHECK-WARNINGS
-
-@.linker_private = linker_private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
-@.linker_private_weak = linker_private_weak unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
-
-; CHECK: @.linker_private = private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
-; CHECK: @.linker_private_weak = private unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
-
-; CHECK-WARNINGS: warning: '.linker_private' is deprecated, treating as PrivateLinkage
-; CHECK-WARNINGS: @.linker_private = linker_private unnamed_addr constant [15 x i8] c"linker_private\00", align 64
-; CHECK-WARNINGS:                    ^
-
-; CHECK-WARNINGS: warning: '.linker_private_weak' is deprecated, treating as PrivateLinkage
-; CHECK-WARNINGS: @.linker_private_weak = linker_private_weak unnamed_addr constant [20 x i8] c"linker_private_weak\00", align 64
-; CHECK-WARNINGS:                         ^
-
diff --git a/test/Bitcode/drop-debug-info.ll b/test/Bitcode/drop-debug-info.ll
index 5123018577ee..5109b5e43279 100644
--- a/test/Bitcode/drop-debug-info.ll
+++ b/test/Bitcode/drop-debug-info.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s -o %t.bc 2>&1 >/dev/null | FileCheck -check-prefix=WARN %s
 ; RUN: llvm-dis < %t.bc | FileCheck %s
+; RUN: verify-uselistorder < %t.bc
 
 define i32 @main() {
 entry:
@@ -11,17 +12,17 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"../llvm/tools/clang/test/CodeGen/debug-info-version.c", metadata !"/Users/manmanren/llvm_gmail/release"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c] [DW_LANG_C99]
+!1 = !{!"../llvm/tools/clang/test/CodeGen/debug-info-version.c", !"/Users/manmanren/llvm_gmail/release"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\003\000\001\000\006\00256\000\003", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/llvm_gmail/release/../llvm/tools/clang/test/CodeGen/debug-info-version.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!12 = !MDLocation(line: 4, scope: !4)
 
 ; WARN: warning: ignoring debug info with an invalid version (0)
 ; CHECK-NOT: !dbg
diff --git a/test/Bitcode/extractelement.ll b/test/Bitcode/extractelement.ll
index 8999c656fce9..90a883d6f02d 100644
--- a/test/Bitcode/extractelement.ll
+++ b/test/Bitcode/extractelement.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -constprop | llvm-dis -disable-output
+; RUN: verify-uselistorder < %s
 ; PR3465
 
 define double @test() {
diff --git a/test/Bitcode/flags.ll b/test/Bitcode/flags.ll
index 7b0c5b538894..6febaa6b40df 100644
--- a/test/Bitcode/flags.ll
+++ b/test/Bitcode/flags.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis > %t0
 ; RUN: opt -S < %s > %t1
 ; RUN: diff %t0 %t1
+; RUN: verify-uselistorder < %s
 ; PR6140
 
 ; Make sure the flags are serialized/deserialized properly for both
diff --git a/test/Bitcode/function-encoding-rel-operands.ll b/test/Bitcode/function-encoding-rel-operands.ll
index aedb0c32676f..24d6d8082861 100644
--- a/test/Bitcode/function-encoding-rel-operands.ll
+++ b/test/Bitcode/function-encoding-rel-operands.ll
@@ -1,6 +1,7 @@
 ; Basic sanity test to check that instruction operands are encoded with
 ; relative IDs.
 ; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 ; CHECK: FUNCTION_BLOCK
 ; CHECK: INST_BINOP {{.*}}op0=1 op1=1
@@ -47,3 +48,5 @@ entry:
   %2 = icmp eq i32 %1, %a
   ret i1 %2
 }
+
+; CHECK: Stream type: LLVM IR
diff --git a/test/Bitcode/function-local-metadata.3.5.ll b/test/Bitcode/function-local-metadata.3.5.ll
new file mode 100644
index 000000000000..5bd829682cc4
--- /dev/null
+++ b/test/Bitcode/function-local-metadata.3.5.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; Check that function-local metadata is dropped correctly when it's not a
+; direct argument to a call instruction.
+;
+; Bitcode assembled by llvm-as v3.5.0.
+
+define void @foo(i32 %v) {
+; CHECK: entry:
+entry:
+; CHECK-NEXT: call void @llvm.bar(metadata i32 %v)
+  call void @llvm.bar(metadata !{i32 %v})
+
+; Note: these supposedly legal instructions fired an assertion in llvm-as:
+;
+; Assertion failed: (I != ValueMap.end() && "Value not in slotcalculator!"), function getValueID, file lib/Bitcode/Writer/ValueEnumerator.cpp, line 138.
+;
+; So, I didn't test them; it looks like bitcode compatability is irrelevant.
+  ; call void @llvm.bar(metadata !{i32 0, i32 %v})
+  ; call void @llvm.bar(metadata !{i32 %v, i32 0})
+  ; call void @llvm.bar(metadata !{metadata !{}, i32 %v})
+  ; call void @llvm.bar(metadata !{i32 %v, metadata !{}})
+
+; CHECK-NEXT: call void @llvm.bar(metadata !0)
+; CHECK-NEXT: call void @llvm.bar(metadata !0)
+  call void @llvm.bar(metadata !{i32 %v, i32 %v})
+  call void @llvm.bar(metadata !{metadata !{i32 %v}})
+
+; CHECK-NEXT: ret void{{$}}
+  ret void, !baz !{i32 %v}
+}
+
+declare void @llvm.bar(metadata)
+
+; CHECK: !0 = !{}
diff --git a/test/Bitcode/function-local-metadata.3.5.ll.bc b/test/Bitcode/function-local-metadata.3.5.ll.bc
new file mode 100644
index 000000000000..6323ca4a6a30
--- /dev/null
+++ b/test/Bitcode/function-local-metadata.3.5.ll.bc
diff --git a/test/Bitcode/global-variables.3.2.ll b/test/Bitcode/global-variables.3.2.ll
index 549d025549be..afd9cb14194b 100644
--- a/test/Bitcode/global-variables.3.2.ll
+++ b/test/Bitcode/global-variables.3.2.ll
@@ -1,4 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; global-variables.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
 ; The test checks that LLVM does not silently misread global variables attributes of
diff --git a/test/Bitcode/highLevelStructure.3.2.ll b/test/Bitcode/highLevelStructure.3.2.ll
new file mode 100644
index 000000000000..88fb3406d820
--- /dev/null
+++ b/test/Bitcode/highLevelStructure.3.2.ll
@@ -0,0 +1,86 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; highLevelStructure.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread binary float instructions of
+; older bitcode files.
+
+; Data Layout Test
+; CHECK: target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-f80:32-n8:16:32-S32"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-a0:0:64-f80:32:32-n8:16:32-S32"
+
+; Module-Level Inline Assembly Test
+; CHECK: module asm "some assembly"
+module asm "some assembly"
+
+; Named Types Test
+; CHECK: %mytype = type { %mytype*, i32 }
+%mytype = type { %mytype*, i32 }
+
+; Aliases Test
+; CHECK: @glob1 = global i32 1
+@glob1 = global i32 1
+; CHECK: @aliased1 = alias i32* @glob1
+@aliased1 = alias i32* @glob1
+; CHECK-NEXT: @aliased2 = internal alias i32* @glob1
+@aliased2 = internal alias i32* @glob1
+; CHECK-NEXT: @aliased3 = alias i32* @glob1
+@aliased3 = external alias i32* @glob1
+; CHECK-NEXT: @aliased4 = weak alias i32* @glob1
+@aliased4 = weak alias i32* @glob1
+; CHECK-NEXT: @aliased5 = weak_odr alias i32* @glob1
+@aliased5 = weak_odr alias i32* @glob1
+
+;Parameter Attribute Test
+; CHECK: declare void @ParamAttr1(i8 zeroext)
+declare void @ParamAttr1(i8 zeroext)
+; CHECK: declare void @ParamAttr2(i8* nest)
+declare void @ParamAttr2(i8* nest)
+; CHECK: declare void @ParamAttr3(i8* sret)
+declare void @ParamAttr3(i8* sret)
+; CHECK: declare void @ParamAttr4(i8 signext)
+declare void @ParamAttr4(i8 signext)
+; CHECK: declare void @ParamAttr5(i8* inreg)
+declare void @ParamAttr5(i8* inreg)
+; CHECK: declare void @ParamAttr6(i8* byval)
+declare void @ParamAttr6(i8* byval)
+; CHECK: declare void @ParamAttr7(i8* noalias)
+declare void @ParamAttr7(i8* noalias)
+; CHECK: declare void @ParamAttr8(i8* nocapture)
+declare void @ParamAttr8(i8* nocapture)
+; CHECK: declare void @ParamAttr9{{[(i8* nest noalias nocapture) | (i8* noalias nocapture nest)]}}
+declare void @ParamAttr9(i8* nest noalias nocapture)
+; CHECK: declare void @ParamAttr10{{[(i8* sret noalias nocapture) | (i8* noalias nocapture sret)]}}
+declare void @ParamAttr10(i8* sret noalias nocapture)
+;CHECK: declare void @ParamAttr11{{[(i8* byval noalias nocapture) | (i8* noalias nocapture byval)]}}
+declare void @ParamAttr11(i8* byval noalias nocapture)
+;CHECK: declare void @ParamAttr12{{[(i8* inreg noalias nocapture) | (i8* noalias nocapture inreg)]}}
+declare void @ParamAttr12(i8* inreg noalias nocapture)
+
+
+; NamedTypesTest
+define void @NamedTypes() {
+entry:
+; CHECK: %res = alloca %mytype
+  %res = alloca %mytype
+  ret void
+}
+
+; Garbage Collector Name Test
+; CHECK: define void @gcTest() gc "gc"
+define void @gcTest() gc "gc" {
+entry:
+  ret void
+}
+
+; Named metadata Test
+; CHECK: !name = !{!0, !1, !2}
+!name = !{!0, !1, !2}
+; CHECK: !0 = !{!"zero"}
+!0 = metadata !{metadata !"zero"}
+; CHECK: !1 = !{!"one"}
+!1 = metadata !{metadata !"one"}
+; CHECK: !2 = !{!"two"}
+!2 = metadata !{metadata !"two"}
+
+
+
diff --git a/test/Bitcode/highLevelStructure.3.2.ll.bc b/test/Bitcode/highLevelStructure.3.2.ll.bc
new file mode 100644
index 000000000000..591c5c30b351
--- /dev/null
+++ b/test/Bitcode/highLevelStructure.3.2.ll.bc
diff --git a/test/Bitcode/inalloca.ll b/test/Bitcode/inalloca.ll
index bad87a9b03f0..84abe176d65e 100644
--- a/test/Bitcode/inalloca.ll
+++ b/test/Bitcode/inalloca.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 ; inalloca should roundtrip.
 
diff --git a/test/Bitcode/invalid.ll b/test/Bitcode/invalid.ll
index 1d4a82bf2b61..8b532d8dadfd 100644
--- a/test/Bitcode/invalid.ll
+++ b/test/Bitcode/invalid.ll
@@ -1,6 +1,6 @@
 ; RUN:  not llvm-dis < %s.bc 2>&1 | FileCheck %s
 
-; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: Invalid value
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: Unknown attribute kind (48)
 
 ; invalid.ll.bc has an invalid attribute number.
 ; The test checks that LLVM reports the error and doesn't access freed memory
diff --git a/test/Bitcode/linkage-types-3.2.ll b/test/Bitcode/linkage-types-3.2.ll
index fd070efbd444..e255b619f41b 100644
--- a/test/Bitcode/linkage-types-3.2.ll
+++ b/test/Bitcode/linkage-types-3.2.ll
@@ -1,128 +1,129 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; linkage-types-3.2.ll.bc was generated by passing this file to llvm-as-3.2
 ; The test checks that LLVM does not silently misread linkage types of
 ; older bitcode files.
 
 @common.var = common global i32 0
-; CHECK: @common.var = common global i32 0
+; CHECK: @common.var = common global i32 0{{$}}
 
 @appending.var = appending global [8 x i32] undef
-; CHECK: @appending.var = appending global [8 x i32] undef
+; CHECK: @appending.var = appending global [8 x i32] undef{{$}}
 
 @extern_weak.var = extern_weak global i32
-; CHECK: @extern_weak.var = extern_weak global i32
+; CHECK: @extern_weak.var = extern_weak global i32{{$}}
 
 @private.var = private constant i32 0
-; CHECK: @private.var = private constant i32 0
+; CHECK: @private.var = private constant i32 0{{$}}
 
 @linker_private.var = linker_private constant i32 0
-; CHECK: @linker_private.var = private constant i32 0
+; CHECK: @linker_private.var = private constant i32 0{{$}}
 
 @linker_private_weak.var = linker_private_weak constant i32 0
-; CHECK: @linker_private_weak.var = private constant i32 0
+; CHECK: @linker_private_weak.var = private constant i32 0{{$}}
 
 @linker_private_weak_def_auto.var = linker_private_weak_def_auto constant i32 0
-; CHECK: @linker_private_weak_def_auto.var = constant i32 0
+; CHECK: @linker_private_weak_def_auto.var = constant i32 0{{$}}
 
 @internal.var = internal constant i32 0
-; CHECK: @internal.var = internal constant i32 0
+; CHECK: @internal.var = internal constant i32 0{{$}}
 
 @available_externally.var = available_externally constant i32 0
-; CHECK: @available_externally.var = available_externally constant i32 0
+; CHECK: @available_externally.var = available_externally constant i32 0{{$}}
 
 @linkonce.var = linkonce constant i32 0
-; CHECK: @linkonce.var = linkonce constant i32 0
+; CHECK: @linkonce.var = linkonce constant i32 0{{$}}
 
 @weak.var = weak constant i32 0
-; CHECK: @weak.var = weak constant i32 0
+; CHECK: @weak.var = weak constant i32 0{{$}}
 
 @linkonce_odr.var = linkonce_odr constant i32 0
-; CHECK: @linkonce_odr.var = linkonce_odr constant i32 0
+; CHECK: @linkonce_odr.var = linkonce_odr constant i32 0{{$}}
 
 @linkonce_odr_auto_hide.var = linkonce_odr_auto_hide constant i32 0
-; CHECK: @linkonce_odr_auto_hide.var = constant i32 0
+; CHECK: @linkonce_odr_auto_hide.var = constant i32 0{{$}}
 
 @external.var = external constant i32
-; CHECK: @external.var = external constant i32
+; CHECK: @external.var = external constant i32{{$}}
 
 @dllexport.var = dllexport global i32 0
-; CHECK: @dllexport.var = dllexport global i32 0
+; CHECK: @dllexport.var = dllexport global i32 0{{$}}
 
 @dllimport.var = dllimport global i32
-; CHECK: @dllimport.var = external dllimport global i32
+; CHECK: @dllimport.var = external dllimport global i32{{$}}
 
 define private void @private()
-; CHECK: define private void @private
+; CHECK: define private void @private() {
 {
-  ret void;
+  ret void
 }
 
 define linker_private void @linker_private()
-; CHECK: define private void @linker_private
+; CHECK: define private void @linker_private() {
 {
-  ret void;
+  ret void
 }
 
 define linker_private_weak void @linker_private_weak()
-; CHECK: define private void @linker_private_weak
+; CHECK: define private void @linker_private_weak() {
 {
-  ret void;
+  ret void
 }
 
 define linker_private_weak_def_auto void @linker_private_weak_def_auto()
-; CHECK: define void @linker_private_weak_def_auto
+; CHECK: define void @linker_private_weak_def_auto() {
 {
-  ret void;
+  ret void
 }
 
 define internal void @internal()
-; CHECK: define internal void @internal
+; CHECK: define internal void @internal() {
 {
-  ret void;
+  ret void
 }
 
 define available_externally void @available_externally()
-; CHECK: define available_externally void @available_externally
+; CHECK: define available_externally void @available_externally() {
 {
-  ret void;
+  ret void
 }
 
 define linkonce void @linkonce()
-; CHECK: define linkonce void @linkonce
+; CHECK: define linkonce void @linkonce() {
 {
-  ret void;
+  ret void
 }
 
 define weak void @weak()
-; CHECK: define weak void @weak
+; CHECK: define weak void @weak() {
 {
-  ret void;
+  ret void
 }
 
 define linkonce_odr void @linkonce_odr()
-; CHECK: define linkonce_odr void @linkonce_odr
+; CHECK: define linkonce_odr void @linkonce_odr() {
 {
-  ret void;
+  ret void
 }
 
 define linkonce_odr_auto_hide void @linkonce_odr_auto_hide()
-; CHECK: define void @linkonce_odr_auto_hide
+; CHECK: define void @linkonce_odr_auto_hide() {
 {
-  ret void;
+  ret void
 }
 
 define external void @external()
-; CHECK: define void @external
+; CHECK: define void @external() {
 {
-  ret void;
+  ret void
 }
 
 declare dllimport void @dllimport()
-; CHECK: declare dllimport void @dllimport
+; CHECK: declare dllimport void @dllimport(){{$}}
 
 define dllexport void @dllexport()
-; CHECK: define dllexport void @dllexport()
+; CHECK: define dllexport void @dllexport() {
 {
-  ret void;
+  ret void
 }
diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll b/test/Bitcode/local-linkage-default-visibility.3.4.ll
index 45a7b1213a8c..df0cf7653e55 100644
--- a/test/Bitcode/local-linkage-default-visibility.3.4.ll
+++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
 
 ; local-linkage-default-visibility.3.4.ll.bc was generated by passing this file
 ; to llvm-as-3.4.  The test checks that LLVM upgrades visibility of symbols
@@ -25,22 +26,22 @@
 @global = global i32 0
 
 @default.internal.alias = alias internal i32* @global
-; CHECK: @default.internal.alias = alias internal i32* @global
+; CHECK: @default.internal.alias = internal alias i32* @global
 
 @hidden.internal.alias = hidden alias internal i32* @global
-; CHECK: @hidden.internal.alias = alias internal i32* @global
+; CHECK: @hidden.internal.alias = internal alias i32* @global
 
 @protected.internal.alias = protected alias internal i32* @global
-; CHECK: @protected.internal.alias = alias internal i32* @global
+; CHECK: @protected.internal.alias = internal alias i32* @global
 
 @default.private.alias = alias private i32* @global
-; CHECK: @default.private.alias = alias private i32* @global
+; CHECK: @default.private.alias = private alias i32* @global
 
 @hidden.private.alias = hidden alias private i32* @global
-; CHECK: @hidden.private.alias = alias private i32* @global
+; CHECK: @hidden.private.alias = private alias i32* @global
 
 @protected.private.alias = protected alias private i32* @global
-; CHECK: @protected.private.alias = alias private i32* @global
+; CHECK: @protected.private.alias = private alias i32* @global
 
 define internal void @default.internal() {
 ; CHECK: define internal void @default.internal
diff --git a/test/Bitcode/mdstring-high-bits.ll b/test/Bitcode/mdstring-high-bits.ll
new file mode 100644
index 000000000000..0d8fdeb9596c
--- /dev/null
+++ b/test/Bitcode/mdstring-high-bits.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; PR21882: confirm we don't crash when high bits are set in a character in a
+; metadata string.
+
+; CHECK: !name = !{!0}
+!name = !{!0}
+; CHECK: !0 = !{!"\80"}
+!0 = !{!"\80"}
diff --git a/test/Bitcode/memInstructions.3.2.ll b/test/Bitcode/memInstructions.3.2.ll
index e4cb6bdbe96b..d826dd120621 100644
--- a/test/Bitcode/memInstructions.3.2.ll
+++ b/test/Bitcode/memInstructions.3.2.ll
@@ -1,328 +1,329 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; memOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread memory related instructions of
-; older bitcode files.
-
-define void @alloca(){
-entry:
-; CHECK: %res1 = alloca i8
-  %res1 = alloca i8
-  
-; CHECK-NEXT: %res2 = alloca i8, i32 2
-  %res2 = alloca i8, i32 2
-
-; CHECK-NEXT: %res3 = alloca i8, i32 2, align 4
-  %res3 = alloca i8, i32 2, align 4
-  
-; CHECK-NEXT: %res4 = alloca i8, align 4
-  %res4 = alloca i8, align 4
-  
-  ret void
-}
-
-define void @load(){
-entry:
-  %ptr1 = alloca i8
-  store i8 2, i8* %ptr1
-  
-; CHECK: %res1 = load i8* %ptr1
-  %res1 = load i8* %ptr1
-  
-; CHECK-NEXT: %res2 = load volatile i8* %ptr1
-  %res2 = load volatile i8* %ptr1
-  
-; CHECK-NEXT: %res3 = load i8* %ptr1, align 1
-  %res3 = load i8* %ptr1, align 1
-  
-; CHECK-NEXT: %res4 = load volatile i8* %ptr1, align 1
-  %res4 = load volatile i8* %ptr1, align 1
-  
-; CHECK-NEXT: %res5 = load i8* %ptr1, !nontemporal !0
-  %res5 = load i8* %ptr1, !nontemporal !0
-  
-; CHECK-NEXT: %res6 = load volatile i8* %ptr1, !nontemporal !0
-  %res6 = load volatile i8* %ptr1, !nontemporal !0
-  
-; CHECK-NEXT: %res7 = load i8* %ptr1, align 1, !nontemporal !0
-  %res7 = load i8* %ptr1, align 1, !nontemporal !0
-  
-; CHECK-NEXT: %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
-  %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
-  
-; CHECK-NEXT: %res9 = load i8* %ptr1, !invariant.load !1
-  %res9 = load i8* %ptr1, !invariant.load !1
-  
-; CHECK-NEXT: %res10 = load volatile i8* %ptr1, !invariant.load !1
-  %res10 = load volatile i8* %ptr1, !invariant.load !1
-  
-; CHECK-NEXT: %res11 = load i8* %ptr1, align 1, !invariant.load !1
-  %res11 = load i8* %ptr1, align 1, !invariant.load !1
-  
-; CHECK-NEXT: %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
-  %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
-  
-; CHECK-NEXT: %res13 = load i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res13 = load i8* %ptr1, !nontemporal !0, !invariant.load !1
-  
-; CHECK-NEXT: %res14 = load volatile i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res14 = load volatile i8* %ptr1, !nontemporal !0, !invariant.load !1
-  
-; CHECK-NEXT: %res15 = load i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res15 = load i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
-  
-; CHECK-NEXT: %res16 = load volatile i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res16 = load volatile i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
-  
-  ret void
-}
-
-define void @loadAtomic(){
-entry:
-  %ptr1 = alloca i8
-  store i8 2, i8* %ptr1
-  
-; CHECK: %res1 = load atomic i8* %ptr1 unordered, align 1
-  %res1 = load atomic i8* %ptr1 unordered, align 1
-  
-; CHECK-NEXT: %res2 = load atomic i8* %ptr1 monotonic, align 1
-  %res2 = load atomic i8* %ptr1 monotonic, align 1
-  
-; CHECK-NEXT: %res3 = load atomic i8* %ptr1 acquire, align 1
-  %res3 = load atomic i8* %ptr1 acquire, align 1
-  
-; CHECK-NEXT: %res4 = load atomic i8* %ptr1 seq_cst, align 1
-  %res4 = load atomic i8* %ptr1 seq_cst, align 1
-  
-; CHECK-NEXT: %res5 = load atomic volatile i8* %ptr1 unordered, align 1
-  %res5 = load atomic volatile i8* %ptr1 unordered, align 1
-  
-; CHECK-NEXT: %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
-  %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
-  
-; CHECK-NEXT: %res7 = load atomic volatile i8* %ptr1 acquire, align 1
-  %res7 = load atomic volatile i8* %ptr1 acquire, align 1
-  
-; CHECK-NEXT: %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
-  %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
-  
-; CHECK-NEXT: %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
-  %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
-  
-; CHECK-NEXT: %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
-  %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
-  
-; CHECK-NEXT: %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
-  %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
-  
-; CHECK-NEXT: %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
-  %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
-  
-; CHECK-NEXT: %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
-  %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
-  
-; CHECK-NEXT: %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
-  %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
-  
-; CHECK-NEXT: %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
-  %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
-  
-; CHECK-NEXT: %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
-  %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
-  
-  ret void
-}
-
-define void @store(){
-entry:
-  %ptr1 = alloca i8
-  
-; CHECK: store i8 2, i8* %ptr1
-  store i8 2, i8* %ptr1
-  
-; CHECK-NEXT: store volatile i8 2, i8* %ptr1
-  store volatile i8 2, i8* %ptr1
-  
-; CHECK-NEXT: store i8 2, i8* %ptr1, align 1
-  store i8 2, i8* %ptr1, align 1
-  
-; CHECK-NEXT: store volatile i8 2, i8* %ptr1, align 1
-  store volatile i8 2, i8* %ptr1, align 1
-  
-; CHECK-NEXT: store i8 2, i8* %ptr1, !nontemporal !0
-  store i8 2, i8* %ptr1, !nontemporal !0
-  
-; CHECK-NEXT: store volatile i8 2, i8* %ptr1, !nontemporal !0
-  store volatile i8 2, i8* %ptr1, !nontemporal !0
-  
-; CHECK-NEXT: store i8 2, i8* %ptr1, align 1, !nontemporal !0
-  store i8 2, i8* %ptr1, align 1, !nontemporal !0
-  
-; CHECK-NEXT: store volatile i8 2, i8* %ptr1, align 1, !nontemporal !0
-  store volatile i8 2, i8* %ptr1, align 1, !nontemporal !0
-  
-  ret void
-}
-
-define void @storeAtomic(){
-entry:
-  %ptr1 = alloca i8
-  
-; CHECK: store atomic i8 2, i8* %ptr1 unordered, align 1
-  store atomic i8 2, i8* %ptr1 unordered, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 monotonic, align 1
-  store atomic i8 2, i8* %ptr1 monotonic, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 release, align 1
-  store atomic i8 2, i8* %ptr1 release, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 seq_cst, align 1
-  store atomic i8 2, i8* %ptr1 seq_cst, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 unordered, align 1
-  store atomic volatile i8 2, i8* %ptr1 unordered, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 monotonic, align 1
-  store atomic volatile i8 2, i8* %ptr1 monotonic, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 release, align 1
-  store atomic volatile i8 2, i8* %ptr1 release, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 seq_cst, align 1
-  store atomic volatile i8 2, i8* %ptr1 seq_cst, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread unordered, align 1
-  store atomic i8 2, i8* %ptr1 singlethread unordered, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread monotonic, align 1
-  store atomic i8 2, i8* %ptr1 singlethread monotonic, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread release, align 1
-  store atomic i8 2, i8* %ptr1 singlethread release, align 1
-  
-; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread seq_cst, align 1
-  store atomic i8 2, i8* %ptr1 singlethread seq_cst, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread unordered, align 1
-  store atomic volatile i8 2, i8* %ptr1 singlethread unordered, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread monotonic, align 1
-  store atomic volatile i8 2, i8* %ptr1 singlethread monotonic, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread release, align 1
-  store atomic volatile i8 2, i8* %ptr1 singlethread release, align 1
-  
-; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread seq_cst, align 1
-  store atomic volatile i8 2, i8* %ptr1 singlethread seq_cst, align 1
-  
-  ret void
-}
-
-define void @cmpxchg(i32* %ptr,i32 %cmp,i32 %new){
-entry:
-  ;cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>
-
-; CHECK: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
-; CHECK-NEXT: %res1 = extractvalue { i32, i1 } [[TMP]], 0
-  %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
-; CHECK-NEXT: %res2 = extractvalue { i32, i1 } [[TMP]], 0
-  %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
-; CHECK-NEXT: %res3 = extractvalue { i32, i1 } [[TMP]], 0
-  %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
-; CHECK-NEXT: %res4 = extractvalue { i32, i1 } [[TMP]], 0
-  %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
-  
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
-; CHECK-NEXT: %res5 = extractvalue { i32, i1 } [[TMP]], 0
-  %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
-; CHECK-NEXT: %res6 = extractvalue { i32, i1 } [[TMP]], 0
-  %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
-; CHECK-NEXT: %res7 = extractvalue { i32, i1 } [[TMP]], 0
-  %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
-; CHECK-NEXT: %res8 = extractvalue { i32, i1 } [[TMP]], 0
-  %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
-  
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
-; CHECK-NEXT: %res9 = extractvalue { i32, i1 } [[TMP]], 0
-  %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
-; CHECK-NEXT: %res10 = extractvalue { i32, i1 } [[TMP]], 0
-  %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
-; CHECK-NEXT: %res11 = extractvalue { i32, i1 } [[TMP]], 0
-  %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
-; CHECK-NEXT: %res12 = extractvalue { i32, i1 } [[TMP]], 0
-  %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
-  
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
-; CHECK-NEXT: %res13 = extractvalue { i32, i1 } [[TMP]], 0
-  %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
-; CHECK-NEXT: %res14 = extractvalue { i32, i1 } [[TMP]], 0
-  %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
-; CHECK-NEXT: %res15 = extractvalue { i32, i1 } [[TMP]], 0
-  %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
-; CHECK-NEXT: %res16 = extractvalue { i32, i1 } [[TMP]], 0
-  %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
-  
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
-; CHECK-NEXT: %res17 = extractvalue { i32, i1 } [[TMP]], 0
-  %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
-; CHECK-NEXT: %res18 = extractvalue { i32, i1 } [[TMP]], 0
-  %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
-; CHECK-NEXT: %res19 = extractvalue { i32, i1 } [[TMP]], 0
-  %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
-  
-; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
-; CHECK-NEXT: %res20 = extractvalue { i32, i1 } [[TMP]], 0
-  %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
-
-  ret void
-}
-
-define void @getelementptr({i8, i8}* %s, <4 x i8*> %ptrs, <4 x i64> %offsets ){
-entry:
-; CHECK: %res1 = getelementptr { i8, i8 }* %s, i32 1, i32 1
-  %res1 = getelementptr {i8, i8}* %s, i32 1, i32 1
-  
-; CHECK-NEXT: %res2 = getelementptr inbounds { i8, i8 }* %s, i32 1, i32 1
-  %res2 = getelementptr inbounds {i8, i8}* %s, i32 1, i32 1
-  
-; CHECK-NEXT: %res3 = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets
-  %res3 = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets
-  
-  ret void
-}
-
-!0 = metadata !{ i32 1 }
-!1 = metadata !{}
-\ No newline at end of file
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; memOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread memory related instructions of
+; older bitcode files.
+
+define void @alloca(){
+entry:
+; CHECK: %res1 = alloca i8
+  %res1 = alloca i8
+
+; CHECK-NEXT: %res2 = alloca i8, i32 2
+  %res2 = alloca i8, i32 2
+
+; CHECK-NEXT: %res3 = alloca i8, i32 2, align 4
+  %res3 = alloca i8, i32 2, align 4
+
+; CHECK-NEXT: %res4 = alloca i8, align 4
+  %res4 = alloca i8, align 4
+
+  ret void
+}
+
+define void @load(){
+entry:
+  %ptr1 = alloca i8
+  store i8 2, i8* %ptr1
+
+; CHECK: %res1 = load i8* %ptr1
+  %res1 = load i8* %ptr1
+
+; CHECK-NEXT: %res2 = load volatile i8* %ptr1
+  %res2 = load volatile i8* %ptr1
+
+; CHECK-NEXT: %res3 = load i8* %ptr1, align 1
+  %res3 = load i8* %ptr1, align 1
+
+; CHECK-NEXT: %res4 = load volatile i8* %ptr1, align 1
+  %res4 = load volatile i8* %ptr1, align 1
+
+; CHECK-NEXT: %res5 = load i8* %ptr1, !nontemporal !0
+  %res5 = load i8* %ptr1, !nontemporal !0
+
+; CHECK-NEXT: %res6 = load volatile i8* %ptr1, !nontemporal !0
+  %res6 = load volatile i8* %ptr1, !nontemporal !0
+
+; CHECK-NEXT: %res7 = load i8* %ptr1, align 1, !nontemporal !0
+  %res7 = load i8* %ptr1, align 1, !nontemporal !0
+
+; CHECK-NEXT: %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
+  %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
+
+; CHECK-NEXT: %res9 = load i8* %ptr1, !invariant.load !1
+  %res9 = load i8* %ptr1, !invariant.load !1
+
+; CHECK-NEXT: %res10 = load volatile i8* %ptr1, !invariant.load !1
+  %res10 = load volatile i8* %ptr1, !invariant.load !1
+
+; CHECK-NEXT: %res11 = load i8* %ptr1, align 1, !invariant.load !1
+  %res11 = load i8* %ptr1, align 1, !invariant.load !1
+
+; CHECK-NEXT: %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
+  %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
+
+; CHECK-NEXT: %res13 = load i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res13 = load i8* %ptr1, !nontemporal !0, !invariant.load !1
+
+; CHECK-NEXT: %res14 = load volatile i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res14 = load volatile i8* %ptr1, !nontemporal !0, !invariant.load !1
+
+; CHECK-NEXT: %res15 = load i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res15 = load i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
+
+; CHECK-NEXT: %res16 = load volatile i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res16 = load volatile i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
+
+  ret void
+}
+
+define void @loadAtomic(){
+entry:
+  %ptr1 = alloca i8
+  store i8 2, i8* %ptr1
+
+; CHECK: %res1 = load atomic i8* %ptr1 unordered, align 1
+  %res1 = load atomic i8* %ptr1 unordered, align 1
+
+; CHECK-NEXT: %res2 = load atomic i8* %ptr1 monotonic, align 1
+  %res2 = load atomic i8* %ptr1 monotonic, align 1
+
+; CHECK-NEXT: %res3 = load atomic i8* %ptr1 acquire, align 1
+  %res3 = load atomic i8* %ptr1 acquire, align 1
+
+; CHECK-NEXT: %res4 = load atomic i8* %ptr1 seq_cst, align 1
+  %res4 = load atomic i8* %ptr1 seq_cst, align 1
+
+; CHECK-NEXT: %res5 = load atomic volatile i8* %ptr1 unordered, align 1
+  %res5 = load atomic volatile i8* %ptr1 unordered, align 1
+
+; CHECK-NEXT: %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
+  %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
+
+; CHECK-NEXT: %res7 = load atomic volatile i8* %ptr1 acquire, align 1
+  %res7 = load atomic volatile i8* %ptr1 acquire, align 1
+
+; CHECK-NEXT: %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
+  %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
+
+; CHECK-NEXT: %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
+  %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
+
+; CHECK-NEXT: %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
+  %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
+
+; CHECK-NEXT: %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
+  %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
+
+; CHECK-NEXT: %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
+  %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
+
+; CHECK-NEXT: %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
+  %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
+
+; CHECK-NEXT: %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
+  %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
+
+; CHECK-NEXT: %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
+  %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
+
+; CHECK-NEXT: %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
+  %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
+
+  ret void
+}
+
+define void @store(){
+entry:
+  %ptr1 = alloca i8
+
+; CHECK: store i8 2, i8* %ptr1
+  store i8 2, i8* %ptr1
+
+; CHECK-NEXT: store volatile i8 2, i8* %ptr1
+  store volatile i8 2, i8* %ptr1
+
+; CHECK-NEXT: store i8 2, i8* %ptr1, align 1
+  store i8 2, i8* %ptr1, align 1
+
+; CHECK-NEXT: store volatile i8 2, i8* %ptr1, align 1
+  store volatile i8 2, i8* %ptr1, align 1
+
+; CHECK-NEXT: store i8 2, i8* %ptr1, !nontemporal !0
+  store i8 2, i8* %ptr1, !nontemporal !0
+
+; CHECK-NEXT: store volatile i8 2, i8* %ptr1, !nontemporal !0
+  store volatile i8 2, i8* %ptr1, !nontemporal !0
+
+; CHECK-NEXT: store i8 2, i8* %ptr1, align 1, !nontemporal !0
+  store i8 2, i8* %ptr1, align 1, !nontemporal !0
+
+; CHECK-NEXT: store volatile i8 2, i8* %ptr1, align 1, !nontemporal !0
+  store volatile i8 2, i8* %ptr1, align 1, !nontemporal !0
+
+  ret void
+}
+
+define void @storeAtomic(){
+entry:
+  %ptr1 = alloca i8
+
+; CHECK: store atomic i8 2, i8* %ptr1 unordered, align 1
+  store atomic i8 2, i8* %ptr1 unordered, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 monotonic, align 1
+  store atomic i8 2, i8* %ptr1 monotonic, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 release, align 1
+  store atomic i8 2, i8* %ptr1 release, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 seq_cst, align 1
+  store atomic i8 2, i8* %ptr1 seq_cst, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 unordered, align 1
+  store atomic volatile i8 2, i8* %ptr1 unordered, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 monotonic, align 1
+  store atomic volatile i8 2, i8* %ptr1 monotonic, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 release, align 1
+  store atomic volatile i8 2, i8* %ptr1 release, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 seq_cst, align 1
+  store atomic volatile i8 2, i8* %ptr1 seq_cst, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread unordered, align 1
+  store atomic i8 2, i8* %ptr1 singlethread unordered, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread monotonic, align 1
+  store atomic i8 2, i8* %ptr1 singlethread monotonic, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread release, align 1
+  store atomic i8 2, i8* %ptr1 singlethread release, align 1
+
+; CHECK-NEXT: store atomic i8 2, i8* %ptr1 singlethread seq_cst, align 1
+  store atomic i8 2, i8* %ptr1 singlethread seq_cst, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread unordered, align 1
+  store atomic volatile i8 2, i8* %ptr1 singlethread unordered, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread monotonic, align 1
+  store atomic volatile i8 2, i8* %ptr1 singlethread monotonic, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread release, align 1
+  store atomic volatile i8 2, i8* %ptr1 singlethread release, align 1
+
+; CHECK-NEXT: store atomic volatile i8 2, i8* %ptr1 singlethread seq_cst, align 1
+  store atomic volatile i8 2, i8* %ptr1 singlethread seq_cst, align 1
+
+  ret void
+}
+
+define void @cmpxchg(i32* %ptr,i32 %cmp,i32 %new){
+entry:
+  ;cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>
+
+; CHECK: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK-NEXT: %res1 = extractvalue { i32, i1 } [[TMP]], 0
+  %res1 = cmpxchg i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+; CHECK-NEXT: %res2 = extractvalue { i32, i1 } [[TMP]], 0
+  %res2 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new monotonic monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: %res3 = extractvalue { i32, i1 } [[TMP]], 0
+  %res3 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+; CHECK-NEXT: %res4 = extractvalue { i32, i1 } [[TMP]], 0
+  %res4 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread monotonic monotonic
+
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: %res5 = extractvalue { i32, i1 } [[TMP]], 0
+  %res5 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acquire acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
+; CHECK-NEXT: %res6 = extractvalue { i32, i1 } [[TMP]], 0
+  %res6 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acquire acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: %res7 = extractvalue { i32, i1 } [[TMP]], 0
+  %res7 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+; CHECK-NEXT: %res8 = extractvalue { i32, i1 } [[TMP]], 0
+  %res8 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acquire acquire
+
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: %res9 = extractvalue { i32, i1 } [[TMP]], 0
+  %res9 = cmpxchg i32* %ptr, i32 %cmp, i32 %new release monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
+; CHECK-NEXT: %res10 = extractvalue { i32, i1 } [[TMP]], 0
+  %res10 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new release monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: %res11 = extractvalue { i32, i1 } [[TMP]], 0
+  %res11 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+; CHECK-NEXT: %res12 = extractvalue { i32, i1 } [[TMP]], 0
+  %res12 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread release monotonic
+
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: %res13 = extractvalue { i32, i1 } [[TMP]], 0
+  %res13 = cmpxchg i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+; CHECK-NEXT: %res14 = extractvalue { i32, i1 } [[TMP]], 0
+  %res14 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new acq_rel acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: %res15 = extractvalue { i32, i1 } [[TMP]], 0
+  %res15 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+; CHECK-NEXT: %res16 = extractvalue { i32, i1 } [[TMP]], 0
+  %res16 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread acq_rel acquire
+
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: %res17 = extractvalue { i32, i1 } [[TMP]], 0
+  %res17 = cmpxchg i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+; CHECK-NEXT: %res18 = extractvalue { i32, i1 } [[TMP]], 0
+  %res18 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new seq_cst seq_cst
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: %res19 = extractvalue { i32, i1 } [[TMP]], 0
+  %res19 = cmpxchg i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+
+; CHECK-NEXT: [[TMP:%[a-z0-9]+]] = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+; CHECK-NEXT: %res20 = extractvalue { i32, i1 } [[TMP]], 0
+  %res20 = cmpxchg volatile i32* %ptr, i32 %cmp, i32 %new singlethread seq_cst seq_cst
+
+  ret void
+}
+
+define void @getelementptr({i8, i8}* %s, <4 x i8*> %ptrs, <4 x i64> %offsets ){
+entry:
+; CHECK: %res1 = getelementptr { i8, i8 }* %s, i32 1, i32 1
+  %res1 = getelementptr {i8, i8}* %s, i32 1, i32 1
+
+; CHECK-NEXT: %res2 = getelementptr inbounds { i8, i8 }* %s, i32 1, i32 1
+  %res2 = getelementptr inbounds {i8, i8}* %s, i32 1, i32 1
+
+; CHECK-NEXT: %res3 = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets
+  %res3 = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets
+
+  ret void
+}
+
+!0 = metadata !{ i32 1 }
+!1 = metadata !{}
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
index 4055f921c330..07371a30d272 100644
--- a/test/Bitcode/metadata-2.ll
+++ b/test/Bitcode/metadata-2.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis -disable-output
+; RUN: verify-uselistorder < %s
 	%0 = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
 	%1 = type { i64, %object.ModuleInfo* }		; type %1
 	%2 = type { i32, void ()* }		; type %2
@@ -83,5 +84,5 @@ moduleinfoCtorEntry:
 }
 !llvm.ldc.classinfo._D6Object7__ClassZ = !{!0}
 !llvm.ldc.classinfo._D10ModuleInfo7__ClassZ = !{!1}
-!0 = metadata !{%object.Object undef, i1 false, i1 false}
-!1 = metadata !{%object.ModuleInfo undef, i1 false, i1 false}
+!0 = !{%object.Object undef, i1 false, i1 false}
+!1 = !{%object.ModuleInfo undef, i1 false, i1 false}
diff --git a/test/Bitcode/metadata.3.5.ll b/test/Bitcode/metadata.3.5.ll
new file mode 100644
index 000000000000..ae7b83a94746
--- /dev/null
+++ b/test/Bitcode/metadata.3.5.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; Check that metadata encoded in 3.5 is correctly understood going forward.
+;
+; Bitcode assembled by llvm-as v3.5.0.
+
+define void @foo(i32 %v) {
+; CHECK: entry:
+entry:
+; CHECK-NEXT: call void @llvm.bar(metadata !0)
+  call void @llvm.bar(metadata !0)
+
+; CHECK-NEXT: ret void, !baz !1
+  ret void, !baz !1
+}
+
+declare void @llvm.bar(metadata)
+
+@global = global i32 0
+
+; CHECK: !0 = !{!1, !2, i32* @global, null}
+; CHECK: !1 = !{!2, null}
+; CHECK: !2 = !{}
+!0 = metadata !{metadata !1, metadata !2, i32* @global, null}
+!1 = metadata !{metadata !2, null}
+!2 = metadata !{}
diff --git a/test/Bitcode/metadata.3.5.ll.bc b/test/Bitcode/metadata.3.5.ll.bc
new file mode 100644
index 000000000000..1857465755da
--- /dev/null
+++ b/test/Bitcode/metadata.3.5.ll.bc
diff --git a/test/Bitcode/metadata.ll b/test/Bitcode/metadata.ll
index fc8a622252bf..7d24a91dd67e 100644
--- a/test/Bitcode/metadata.ll
+++ b/test/Bitcode/metadata.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as < %s | llvm-dis -disable-output
+; RUN: verify-uselistorder < %s
 
 !llvm.foo = !{!0}
-!0 = metadata !{i32 42}
+!0 = !{i32 42}
 @my.str = internal constant [4 x i8] c"foo\00"
diff --git a/test/Bitcode/miscInstructions.3.2.ll b/test/Bitcode/miscInstructions.3.2.ll
index bceae20109c3..6a077d5c9fa0 100644
--- a/test/Bitcode/miscInstructions.3.2.ll
+++ b/test/Bitcode/miscInstructions.3.2.ll
@@ -1,126 +1,186 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; miscInstructions.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread miscellaneous instructions of
-; older bitcode files.
-
-define void @icmp(i32 %x1, i32 %x2, i32* %ptr1, i32* %ptr2, <2 x i32> %vec1, <2 x i32> %vec2){
-entry:
-; CHECK: %res1 = icmp eq i32 %x1, %x2
-  %res1 = icmp eq i32 %x1, %x2
-  
-; CHECK-NEXT: %res2 = icmp ne i32 %x1, %x2
-  %res2 = icmp ne i32 %x1, %x2
-  
-; CHECK-NEXT: %res3 = icmp ugt i32 %x1, %x2
-  %res3 = icmp ugt i32 %x1, %x2
-  
-; CHECK-NEXT: %res4 = icmp uge i32 %x1, %x2
-  %res4 = icmp uge i32 %x1, %x2
-  
-; CHECK-NEXT: %res5 = icmp ult i32 %x1, %x2
-  %res5 = icmp ult i32 %x1, %x2
-  
-; CHECK-NEXT: %res6 = icmp ule i32 %x1, %x2
-  %res6 = icmp ule i32 %x1, %x2
-  
-; CHECK-NEXT: %res7 = icmp sgt i32 %x1, %x2
-  %res7 = icmp sgt i32 %x1, %x2
-  
-; CHECK-NEXT: %res8 = icmp sge i32 %x1, %x2
-  %res8 = icmp sge i32 %x1, %x2
-  
-; CHECK-NEXT: %res9 = icmp slt i32 %x1, %x2
-  %res9 = icmp slt i32 %x1, %x2
-  
-; CHECK-NEXT: %res10 = icmp sle i32 %x1, %x2
-  %res10 = icmp sle i32 %x1, %x2
-  
-; CHECK-NEXT: %res11 = icmp eq i32* %ptr1, %ptr2
-  %res11 = icmp eq i32* %ptr1, %ptr2
-  
-; CHECK-NEXT: %res12 = icmp eq <2 x i32> %vec1, %vec2
-  %res12 = icmp eq <2 x i32> %vec1, %vec2
-  
-  ret void
-}
-
-
-define void @fcmp(float %x1, float %x2, <2 x float> %vec1, <2 x float> %vec2){
-entry:
-; CHECK: %res1 = fcmp oeq float %x1, %x2
-  %res1 = fcmp oeq float %x1, %x2
-  
-; CHECK-NEXT: %res2 = fcmp one float %x1, %x2
-  %res2 = fcmp one float %x1, %x2
-  
-; CHECK-NEXT: %res3 = fcmp ugt float %x1, %x2
-  %res3 = fcmp ugt float %x1, %x2
-  
-; CHECK-NEXT: %res4 = fcmp uge float %x1, %x2
-  %res4 = fcmp uge float %x1, %x2
-  
-; CHECK-NEXT: %res5 = fcmp ult float %x1, %x2
-  %res5 = fcmp ult float %x1, %x2
-  
-; CHECK-NEXT: %res6 = fcmp ule float %x1, %x2
-  %res6 = fcmp ule float %x1, %x2
-  
-; CHECK-NEXT: %res7 = fcmp ogt float %x1, %x2
-  %res7 = fcmp ogt float %x1, %x2
-  
-; CHECK-NEXT: %res8 = fcmp oge float %x1, %x2
-  %res8 = fcmp oge float %x1, %x2
-  
-; CHECK-NEXT: %res9 = fcmp olt float %x1, %x2
-  %res9 = fcmp olt float %x1, %x2
-  
-; CHECK-NEXT: %res10 = fcmp ole float %x1, %x2
-  %res10 = fcmp ole float %x1, %x2
-  
-; CHECK-NEXT: %res11 = fcmp ord float %x1, %x2
-  %res11 = fcmp ord float %x1, %x2
-  
-; CHECK-NEXT: %res12 = fcmp ueq float %x1, %x2
-  %res12 = fcmp ueq float %x1, %x2
-  
-; CHECK-NEXT: %res13 = fcmp une float %x1, %x2
-  %res13 = fcmp une float %x1, %x2
-  
-; CHECK-NEXT: %res14 = fcmp uno float %x1, %x2
-  %res14 = fcmp uno float %x1, %x2
-  
-; CHECK-NEXT: %res15 = fcmp true float %x1, %x2
-  %res15 = fcmp true float %x1, %x2
-  
-; CHECK-NEXT: %res16 = fcmp false float %x1, %x2
-  %res16 = fcmp false float %x1, %x2
-  
-; CHECK-NEXT: %res17 = fcmp oeq <2 x float> %vec1, %vec2
-  %res17 = fcmp oeq <2 x float> %vec1, %vec2
-  
-  ret void
-}
-
-declare i32 @printf(i8* noalias nocapture, ...)
-
-define void @call(i32 %x, i8* %msg ){
-entry:
-
-; CHECK: %res1 = call i32 @test(i32 %x)
-  %res1 = call i32 @test(i32 %x)
-  
-; CHECK-NEXT: %res2 = tail call i32 @test(i32 %x)
-  %res2 = tail call i32 @test(i32 %x)
-  
-; CHECK-NEXT: %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
-  %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
-  
-  ret void
-}
-
-define i32 @test(i32 %x){
-entry:
-
-  ret i32 %x
-}
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; miscInstructions.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread miscellaneous instructions of
+; older bitcode files.
+
+@X = global i8 1
+@_ZTIi = global i8* @X 
+@_ZTId = global i8* @X 
+
+define i32 @__gxx_personality_v0(...){
+entry:
+  ret i32 0
+}
+
+define void @landingpadInstr1(i1 %cond1, <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2){
+entry:
+; CHECK: %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 
+; CHECK: catch i8** @_ZTIi
+  catch i8** @_ZTIi
+  ret void
+}
+
+define void @landingpadInstr2(i1 %cond1, <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2){
+entry:
+; CHECK: %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK: cleanup
+  cleanup
+  ret void
+}
+
+define void @landingpadInstr3(i1 %cond1, <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2){
+entry:
+; CHECK: %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0 
+  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+; CHECK: catch i8** @_ZTIi
+  catch i8** @_ZTIi
+; CHECK: filter [1 x i8**] [i8** @_ZTId]
+  filter [1 x i8**] [i8** @_ZTId]
+  ret void
+}
+
+define void @phiInstr(){
+LoopHeader: 
+  %x = add i32 0, 0
+  br label %Loop
+Loop:
+; CHECK:  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+  %nextindvar = add i32 %indvar, 1
+  br label %Loop
+  ret void
+}
+
+define void @selectInstr(i1 %cond1, <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2){
+entry:
+; CHECK: %res1 = select i1 %cond1, i8 1, i8 0 
+  %res1 = select i1 %cond1, i8 1, i8 0
+; CHECK-NEXT: %res2 = select <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2
+  %res2 = select <2 x i1> %cond2, <2 x i8> %x1, <2 x i8> %x2
+
+  ret void
+}
+
+define void @icmp(i32 %x1, i32 %x2, i32* %ptr1, i32* %ptr2, <2 x i32> %vec1, <2 x i32> %vec2){
+entry:
+; CHECK: %res1 = icmp eq i32 %x1, %x2
+  %res1 = icmp eq i32 %x1, %x2
+  
+; CHECK-NEXT: %res2 = icmp ne i32 %x1, %x2
+  %res2 = icmp ne i32 %x1, %x2
+  
+; CHECK-NEXT: %res3 = icmp ugt i32 %x1, %x2
+  %res3 = icmp ugt i32 %x1, %x2
+  
+; CHECK-NEXT: %res4 = icmp uge i32 %x1, %x2
+  %res4 = icmp uge i32 %x1, %x2
+  
+; CHECK-NEXT: %res5 = icmp ult i32 %x1, %x2
+  %res5 = icmp ult i32 %x1, %x2
+  
+; CHECK-NEXT: %res6 = icmp ule i32 %x1, %x2
+  %res6 = icmp ule i32 %x1, %x2
+  
+; CHECK-NEXT: %res7 = icmp sgt i32 %x1, %x2
+  %res7 = icmp sgt i32 %x1, %x2
+  
+; CHECK-NEXT: %res8 = icmp sge i32 %x1, %x2
+  %res8 = icmp sge i32 %x1, %x2
+  
+; CHECK-NEXT: %res9 = icmp slt i32 %x1, %x2
+  %res9 = icmp slt i32 %x1, %x2
+  
+; CHECK-NEXT: %res10 = icmp sle i32 %x1, %x2
+  %res10 = icmp sle i32 %x1, %x2
+  
+; CHECK-NEXT: %res11 = icmp eq i32* %ptr1, %ptr2
+  %res11 = icmp eq i32* %ptr1, %ptr2
+  
+; CHECK-NEXT: %res12 = icmp eq <2 x i32> %vec1, %vec2
+  %res12 = icmp eq <2 x i32> %vec1, %vec2
+  
+  ret void
+}
+
+
+define void @fcmp(float %x1, float %x2, <2 x float> %vec1, <2 x float> %vec2){
+entry:
+; CHECK: %res1 = fcmp oeq float %x1, %x2
+  %res1 = fcmp oeq float %x1, %x2
+  
+; CHECK-NEXT: %res2 = fcmp one float %x1, %x2
+  %res2 = fcmp one float %x1, %x2
+  
+; CHECK-NEXT: %res3 = fcmp ugt float %x1, %x2
+  %res3 = fcmp ugt float %x1, %x2
+  
+; CHECK-NEXT: %res4 = fcmp uge float %x1, %x2
+  %res4 = fcmp uge float %x1, %x2
+  
+; CHECK-NEXT: %res5 = fcmp ult float %x1, %x2
+  %res5 = fcmp ult float %x1, %x2
+  
+; CHECK-NEXT: %res6 = fcmp ule float %x1, %x2
+  %res6 = fcmp ule float %x1, %x2
+  
+; CHECK-NEXT: %res7 = fcmp ogt float %x1, %x2
+  %res7 = fcmp ogt float %x1, %x2
+  
+; CHECK-NEXT: %res8 = fcmp oge float %x1, %x2
+  %res8 = fcmp oge float %x1, %x2
+  
+; CHECK-NEXT: %res9 = fcmp olt float %x1, %x2
+  %res9 = fcmp olt float %x1, %x2
+  
+; CHECK-NEXT: %res10 = fcmp ole float %x1, %x2
+  %res10 = fcmp ole float %x1, %x2
+  
+; CHECK-NEXT: %res11 = fcmp ord float %x1, %x2
+  %res11 = fcmp ord float %x1, %x2
+  
+; CHECK-NEXT: %res12 = fcmp ueq float %x1, %x2
+  %res12 = fcmp ueq float %x1, %x2
+  
+; CHECK-NEXT: %res13 = fcmp une float %x1, %x2
+  %res13 = fcmp une float %x1, %x2
+  
+; CHECK-NEXT: %res14 = fcmp uno float %x1, %x2
+  %res14 = fcmp uno float %x1, %x2
+  
+; CHECK-NEXT: %res15 = fcmp true float %x1, %x2
+  %res15 = fcmp true float %x1, %x2
+  
+; CHECK-NEXT: %res16 = fcmp false float %x1, %x2
+  %res16 = fcmp false float %x1, %x2
+  
+; CHECK-NEXT: %res17 = fcmp oeq <2 x float> %vec1, %vec2
+  %res17 = fcmp oeq <2 x float> %vec1, %vec2
+  
+  ret void
+}
+
+declare i32 @printf(i8* noalias nocapture, ...)
+
+define void @call(i32 %x, i8* %msg ){
+entry:
+
+; CHECK: %res1 = call i32 @test(i32 %x)
+  %res1 = call i32 @test(i32 %x)
+  
+; CHECK-NEXT: %res2 = tail call i32 @test(i32 %x)
+  %res2 = tail call i32 @test(i32 %x)
+  
+; CHECK-NEXT: %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
+  %res3 = call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)
+  
+  ret void
+}
+
+define i32 @test(i32 %x){
+entry:
+
+  ret i32 %x
+}
diff --git a/test/Bitcode/miscInstructions.3.2.ll.bc b/test/Bitcode/miscInstructions.3.2.ll.bc
index 9d479b506171..ed63d7059fae 100644
--- a/test/Bitcode/miscInstructions.3.2.ll.bc
+++ b/test/Bitcode/miscInstructions.3.2.ll.bc
diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll
index 7a0eea2f3f24..b73b1a90afb9 100644
--- a/test/Bitcode/old-aliases.ll
+++ b/test/Bitcode/old-aliases.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
 
 ; old-aliases.bc consist of this file assembled with an old llvm-as (3.5 trunk)
 ; from when aliases contained a ConstantExpr.
diff --git a/test/Bitcode/ptest-new.ll b/test/Bitcode/ptest-new.ll
index 735cc9c1cc44..c17ddc910df2 100644
--- a/test/Bitcode/ptest-new.ll
+++ b/test/Bitcode/ptest-new.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 define i32 @foo(<2 x i64> %bar) nounwind {
 entry:
diff --git a/test/Bitcode/ptest-old.ll b/test/Bitcode/ptest-old.ll
index fbe962fae51a..c1e1cae37368 100644
--- a/test/Bitcode/ptest-old.ll
+++ b/test/Bitcode/ptest-old.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 define i32 @foo(<4 x float> %bar) nounwind {
 entry:
diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll
index 08a3061394db..3ad06796dccf 100644
--- a/test/Bitcode/select.ll
+++ b/test/Bitcode/select.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 define <2 x i32> @main() {
   ret <2 x i32> select (<2 x i1> <i1 false, i1 undef>, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 undef>)
diff --git a/test/Bitcode/shuffle.ll b/test/Bitcode/shuffle.ll
index 1495d8eebf9c..b84641cef23a 100644
--- a/test/Bitcode/shuffle.ll
+++ b/test/Bitcode/shuffle.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis -disable-output
+; RUN: verify-uselistorder < %s
 
 ; <rdar://problem/8622574>
 ; tests the bitcodereader can handle the case where the reader will initially
diff --git a/test/Bitcode/ssse3_palignr.ll b/test/Bitcode/ssse3_palignr.ll
index 90b4394a8b46..8254513a0c57 100644
--- a/test/Bitcode/ssse3_palignr.ll
+++ b/test/Bitcode/ssse3_palignr.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -S | FileCheck %s
+; RUN: verify-uselistorder < %s
 ; CHECK-NOT: {@llvm\\.palign}
 
 define <4 x i32> @align1(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
diff --git a/test/Bitcode/standardCIntrinsic.3.2.ll b/test/Bitcode/standardCIntrinsic.3.2.ll
new file mode 100644
index 000000000000..09f2378a2217
--- /dev/null
+++ b/test/Bitcode/standardCIntrinsic.3.2.ll
@@ -0,0 +1,16 @@
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; standardCIntrinsic.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread standard C library intrinsic functions
+; of older bitcode files.
+
+define void @memcpyintrinsic(i8* %dest, i8* %src, i32 %len) {
+entry:
+
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 true)
+  
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 %align, i1 %isvolatile)
+\ No newline at end of file
diff --git a/test/Bitcode/standardCIntrinsic.3.2.ll.bc b/test/Bitcode/standardCIntrinsic.3.2.ll.bc
new file mode 100644
index 000000000000..3ffb1af4fa7c
--- /dev/null
+++ b/test/Bitcode/standardCIntrinsic.3.2.ll.bc
diff --git a/test/Bitcode/tailcall.ll b/test/Bitcode/tailcall.ll
index 765b47054cad..01190d74c348 100644
--- a/test/Bitcode/tailcall.ll
+++ b/test/Bitcode/tailcall.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 ; Check that musttail and tail roundtrip.
 
diff --git a/test/Bitcode/terminatorInstructions.3.2.ll b/test/Bitcode/terminatorInstructions.3.2.ll
index 31e78967ee0c..ba0f5ade2cc1 100644
--- a/test/Bitcode/terminatorInstructions.3.2.ll
+++ b/test/Bitcode/terminatorInstructions.3.2.ll
@@ -1,47 +1,76 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; TerminatorOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread terminator instructions from
-; older bitcode files.
-
-define i32 @condbr(i1 %cond){
-entry:
-; CHECK: br i1 %cond, label %TrueLabel, label %FalseLabel
-  br i1 %cond, label %TrueLabel, label %FalseLabel
-  
-  TrueLabel:
-  ret i32 1
-  
-  FalseLabel:
-  ret i32 0
-}
-
-define i32 @uncondbr(){
-entry:
-; CHECK: br label %uncondLabel
-  br label %uncondLabel
-  
-  uncondLabel:
-  ret i32 1
-}
-
-define i32 @indirectbr(i8* %Addr){
-entry:
-; CHECK: indirectbr i8* %Addr, [label %bb1, label %bb2]
-  indirectbr i8* %Addr, [ label %bb1, label %bb2 ]
-  
-  bb1:
-  ret i32 1
-  
-  bb2:
-  ret i32 0
-}
-
-define void @unreachable(){
-entry:
-; CHECK: unreachable
-  unreachable
-  
-  ret void
-}
-
+; RUN:  llvm-dis < %s.bc| FileCheck %s
+
+; TerminatorOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread terminator instructions from
+; older bitcode files.
+
+define i32 @condbr(i1 %cond){
+entry:
+; CHECK: br i1 %cond, label %TrueLabel, label %FalseLabel
+  br i1 %cond, label %TrueLabel, label %FalseLabel
+  
+  TrueLabel:
+  ret i32 1
+  
+  FalseLabel:
+  ret i32 0
+}
+
+define i32 @uncondbr(){
+entry:
+; CHECK: br label %uncondLabel
+  br label %uncondLabel
+  
+  uncondLabel:
+  ret i32 1
+}
+
+define i32 @indirectbr(i8* %Addr){
+entry:
+; CHECK: indirectbr i8* %Addr, [label %bb1, label %bb2]
+  indirectbr i8* %Addr, [ label %bb1, label %bb2 ]
+  
+  bb1:
+  ret i32 1
+  
+  bb2:
+  ret i32 0
+}
+
+define void @unreachable(){
+entry:
+; CHECK: unreachable
+  unreachable
+  
+  ret void
+}
+
+define i32 @retInstr(){
+entry:
+; CHECK: ret i32 1 
+  ret i32 1 
+}
+
+define void @retInstr2(){
+entry:
+; CHECK: ret void 
+  ret void
+}
+
+define i32 @switchInstr(i32 %x){
+entry:
+; CHECK: switch i32 %x, label %label3 [
+  switch i32 %x, label %label3 [
+; CHECK-NEXT: i32 1, label %label1   
+  i32 1, label %label1
+; CHECK-NEXT: i32 2, label %label2  
+  i32 2, label %label2
+  ]
+label1:
+  ret i32 1
+label2:
+  ret i32 2
+label3:
+  ret i32 0
+}
+
diff --git a/test/Bitcode/terminatorInstructions.3.2.ll.bc b/test/Bitcode/terminatorInstructions.3.2.ll.bc
index 9d92ead8ad18..0fbc3194373a 100644
--- a/test/Bitcode/terminatorInstructions.3.2.ll.bc
+++ b/test/Bitcode/terminatorInstructions.3.2.ll.bc
diff --git a/test/Bitcode/upgrade-global-ctors.ll b/test/Bitcode/upgrade-global-ctors.ll
index bd253a81620f..d7afcdd0c182 100644
--- a/test/Bitcode/upgrade-global-ctors.ll
+++ b/test/Bitcode/upgrade-global-ctors.ll
@@ -1,3 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
-; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
+; Global constructors should no longer be upgraded when reading bitcode.
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer
diff --git a/test/Bitcode/upgrade-loop-metadata.ll b/test/Bitcode/upgrade-loop-metadata.ll
index 67a8d3935926..be2a99a47afb 100644
--- a/test/Bitcode/upgrade-loop-metadata.ll
+++ b/test/Bitcode/upgrade-loop-metadata.ll
@@ -1,6 +1,7 @@
 ; Test to make sure loop vectorizer metadata is automatically upgraded.
 ;
 ; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
 
 define void @_Z28loop_with_vectorize_metadatav() {
 entry:
@@ -26,9 +27,9 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-; CHECK: !{metadata !"llvm.loop.interleave.count", i32 4}
-; CHECK: !{metadata !"llvm.loop.vectorize.width", i32 8}
-; CHECK: !{metadata !"llvm.loop.vectorize.enable", i1 true}
+; CHECK: !{!"llvm.loop.interleave.count", i32 4}
+; CHECK: !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
 
 !0 = metadata !{metadata !"clang version 3.5.0 (trunk 211528)"}
 !1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !4}
diff --git a/test/Bitcode/upgrade-tbaa.ll b/test/Bitcode/upgrade-tbaa.ll
index e7389095b8c1..c20c66aad127 100644
--- a/test/Bitcode/upgrade-tbaa.ll
+++ b/test/Bitcode/upgrade-tbaa.ll
@@ -1,9 +1,10 @@
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder < %s
 
 ; Function Attrs: nounwind
 define void @_Z4testPiPf(i32* nocapture %pI, float* nocapture %pF) #0 {
 entry:
-  store i32 0, i32* %pI, align 4, !tbaa !{metadata !"int", metadata !0}
+  store i32 0, i32* %pI, align 4, !tbaa !{!"int", !0}
   ; CHECK: store i32 0, i32* %pI, align 4, !tbaa [[TAG_INT:!.*]]
   store float 1.000000e+00, float* %pF, align 4, !tbaa !2
   ; CHECK: store float 1.000000e+00, float* %pF, align 4, !tbaa [[TAG_FLOAT:!.*]]
@@ -12,12 +13,12 @@ entry:
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-!2 = metadata !{metadata !"float", metadata !0}
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"float", !0}
 
-; CHECK: [[TAG_INT]] = metadata !{metadata [[TYPE_INT:!.*]], metadata [[TYPE_INT]], i64 0}
-; CHECK: [[TYPE_INT]] = metadata !{metadata !"int", metadata [[TYPE_CHAR:!.*]]}
-; CHECK: [[TYPE_CHAR]] = metadata !{metadata !"omnipotent char", metadata !{{.*}}
-; CHECK: [[TAG_FLOAT]] = metadata !{metadata [[TYPE_FLOAT:!.*]], metadata [[TYPE_FLOAT]], i64 0}
-; CHECK: [[TYPE_FLOAT]] = metadata !{metadata !"float", metadata [[TYPE_CHAR]]}
+; CHECK: [[TAG_INT]] = !{[[TYPE_INT:!.*]], [[TYPE_INT]], i64 0}
+; CHECK: [[TYPE_INT]] = !{!"int", [[TYPE_CHAR:!.*]]}
+; CHECK: [[TYPE_CHAR]] = !{!"omnipotent char", !{{.*}}
+; CHECK: [[TAG_FLOAT]] = !{[[TYPE_FLOAT:!.*]], [[TYPE_FLOAT]], i64 0}
+; CHECK: [[TYPE_FLOAT]] = !{!"float", [[TYPE_CHAR]]}
diff --git a/test/Bitcode/use-list-order.ll b/test/Bitcode/use-list-order.ll
new file mode 100644
index 000000000000..6617b9c5edf8
--- /dev/null
+++ b/test/Bitcode/use-list-order.ll
@@ -0,0 +1,168 @@
+; RUN: verify-uselistorder < %s
+
+@a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
+@b = alias i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+
+; Check use-list order of constants used by globals.
+@glob1 = global i5 7
+@glob2 = global i5 7
+@glob3 = global i5 7
+
+; Check use-list order between variables and aliases.
+@target = global i3 zeroinitializer
+@alias1 = alias i3* @target
+@alias2 = alias i3* @target
+@alias3 = alias i3* @target
+@var1 = global i3* @target
+@var2 = global i3* @target
+@var3 = global i3* @target
+
+; Check use-list order for a global when used both by a global and in a
+; function.
+@globalAndFunction = global i4 4
+@globalAndFunctionGlobalUser = global i4* @globalAndFunction
+
+; Check use-list order for constants used by globals that are themselves used
+; as aliases.  This confirms that this globals are recognized as GlobalValues
+; (not general constants).
+@const.global = global i63 0
+@const.global.ptr = global i63* @const.global
+@const.global.2 = global i63 0
+
+; Same as above, but for aliases.
+@const.target = global i62 1
+@const.alias = alias i62* @const.target
+@const.alias.ptr = alias i62* @const.alias
+@const.alias.2 = alias i62* @const.target
+
+define i64 @f(i64 %f) {
+entry:
+  %sum = add i64 %f, 0
+  ret i64 %sum
+}
+
+define i64 @g(i64 %g) {
+entry:
+  %sum = add i64 %g, 0
+  ret i64 %sum
+}
+
+define i64 @h(i64 %h) {
+entry:
+  %sum = add i64 %h, 0
+  ret i64 %sum
+}
+
+define i64 @i(i64 %i) {
+entry:
+  %sum = add i64 %i, 1
+  ret i64 %sum
+}
+
+define i64 @j(i64 %j) {
+entry:
+  %sum = add i64 %j, 1
+  ret i64 %sum
+}
+
+define i64 @k(i64 %k) {
+entry:
+  %sum = add i64 %k, 1
+  ret i64 %sum
+}
+
+define i64 @l(i64 %l) {
+entry:
+  %sum = add i64 %l, 1
+  ret i64 %sum
+}
+
+define i1 @loadb() {
+entry:
+  %b = load i1* @b
+  ret i1 %b
+}
+
+define i1 @loada() {
+entry:
+  %a = load i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+  ret i1 %a
+}
+
+define i32 @f32(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  br label %first
+
+second:
+  %eh = mul i32 %e, %h
+  %sum = add i32 %eh, %ef
+  br label %exit
+
+exit:
+  %product = phi i32 [%ef, %first], [%sum, %second]
+  ret i32 %product
+
+first:
+  %e = add i32 %a, 7
+  %f = add i32 %b, 7
+  %g = add i32 %c, 8
+  %h = add i32 %d, 8
+  %ef = mul i32 %e, %f
+  %gh = mul i32 %g, %h
+  %gotosecond = icmp slt i32 %gh, -9
+  br i1 %gotosecond, label %second, label %exit
+}
+
+define i4 @globalAndFunctionFunctionUser() {
+entry:
+  %local = load i4* @globalAndFunction
+  ret i4 %local
+}
+
+; Check for when an instruction is its own user.
+define void @selfUser(i1 %a) {
+entry:
+  ret void
+
+loop1:
+  br label %loop2
+
+loop2:
+  %var = phi i32 [ %var, %loop1 ], [ %var, %loop2 ]
+  br label %loop2
+}
+
+; Check that block addresses work.
+@ba1 = constant i8* blockaddress (@bafunc1, %bb)
+@ba2 = constant i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+@ba3 = constant i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+
+define i8* @babefore() {
+  ret i8* getelementptr (i8* blockaddress (@bafunc2, %bb), i61 0)
+bb1:
+  ret i8* blockaddress (@bafunc1, %bb)
+bb2:
+  ret i8* blockaddress (@bafunc3, %bb)
+}
+define void @bafunc1() {
+  unreachable
+bb:
+  unreachable
+}
+define void @bafunc2() {
+  unreachable
+bb:
+  unreachable
+}
+define void @bafunc3() {
+  unreachable
+bb:
+  unreachable
+}
+define i8* @baafter() {
+  ret i8* blockaddress (@bafunc2, %bb)
+bb1:
+  ret i8* blockaddress (@bafunc1, %bb)
+bb2:
+  ret i8* blockaddress (@bafunc3, %bb)
+}
diff --git a/test/Bitcode/variableArgumentIntrinsic.3.2.ll b/test/Bitcode/variableArgumentIntrinsic.3.2.ll
index 35fe0e252822..ad70f0542e88 100644
--- a/test/Bitcode/variableArgumentIntrinsic.3.2.ll
+++ b/test/Bitcode/variableArgumentIntrinsic.3.2.ll
@@ -1,33 +1,34 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; vaArgIntrinsic.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread variable argument intrinsic instructions
-; of older bitcode files.
-
-define i32 @varArgIntrinsic(i32 %X, ...) {
-
-  %ap = alloca i8*
-  %ap2 = bitcast i8** %ap to i8*
-  
-; CHECK: call void @llvm.va_start(i8* %ap2)
-  call void @llvm.va_start(i8* %ap2)
-
-; CHECK-NEXT: %tmp = va_arg i8** %ap, i32
-  %tmp = va_arg i8** %ap, i32
-
-  %aq = alloca i8*
-  %aq2 = bitcast i8** %aq to i8*
-  
-; CHECK: call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-; CHECK-NEXT: call void @llvm.va_end(i8* %aq2)
-  call void @llvm.va_end(i8* %aq2)
-
-; CHECK-NEXT:  call void @llvm.va_end(i8* %ap2)
-  call void @llvm.va_end(i8* %ap2)
-  ret i32 %tmp
-}
-
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare void @llvm.va_end(i8*)
-\ No newline at end of file
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; vaArgIntrinsic.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread variable argument intrinsic instructions
+; of older bitcode files.
+
+define i32 @varArgIntrinsic(i32 %X, ...) {
+
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+
+; CHECK: call void @llvm.va_start(i8* %ap2)
+  call void @llvm.va_start(i8* %ap2)
+
+; CHECK-NEXT: %tmp = va_arg i8** %ap, i32
+  %tmp = va_arg i8** %ap, i32
+
+  %aq = alloca i8*
+  %aq2 = bitcast i8** %aq to i8*
+
+; CHECK: call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+; CHECK-NEXT: call void @llvm.va_end(i8* %aq2)
+  call void @llvm.va_end(i8* %aq2)
+
+; CHECK-NEXT:  call void @llvm.va_end(i8* %ap2)
+  call void @llvm.va_end(i8* %ap2)
+  ret i32 %tmp
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/Bitcode/vectorInstructions.3.2.ll b/test/Bitcode/vectorInstructions.3.2.ll
index b24ef75ef081..94c193a1c441 100644
--- a/test/Bitcode/vectorInstructions.3.2.ll
+++ b/test/Bitcode/vectorInstructions.3.2.ll
@@ -1,34 +1,33 @@
-; RUN:  llvm-dis < %s.bc| FileCheck %s
-
-; vectorOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
-; The test checks that LLVM does not misread vector operations of
-; older bitcode files.
-
-define void @extractelement(<2 x i8> %x1){
-entry:
-; CHECK: %res1 = extractelement <2 x i8> %x1, i32 0
-  %res1 = extractelement <2 x i8> %x1, i32 0
-  
-  ret void
-}
-
-define void @insertelement(<2 x i8> %x1){
-entry:
-; CHECK: %res1 = insertelement <2 x i8> %x1, i8 0, i32 0
-  %res1 = insertelement <2 x i8> %x1, i8 0, i32 0
-  
-  ret void
-}
-
-define void @shufflevector(<2 x i8> %x1){
-entry:
-; CHECK: %res1 = shufflevector <2 x i8> %x1, <2 x i8> %x1, <2 x i32> <i32 0, i32 1>
-  %res1 = shufflevector <2 x i8> %x1, <2 x i8> %x1, <2 x i32> <i32 0, i32 1>
-
-; CHECK-NEXT: %res2 = shufflevector <2 x i8> %x1, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
-  %res2 = shufflevector <2 x i8> %x1, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
-  
-  ret void
-}
-
-
+; RUN: llvm-dis < %s.bc| FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; vectorOperations.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
+; The test checks that LLVM does not misread vector operations of
+; older bitcode files.
+
+define void @extractelement(<2 x i8> %x1){
+entry:
+; CHECK: %res1 = extractelement <2 x i8> %x1, i32 0
+  %res1 = extractelement <2 x i8> %x1, i32 0
+
+  ret void
+}
+
+define void @insertelement(<2 x i8> %x1){
+entry:
+; CHECK: %res1 = insertelement <2 x i8> %x1, i8 0, i32 0
+  %res1 = insertelement <2 x i8> %x1, i8 0, i32 0
+
+  ret void
+}
+
+define void @shufflevector(<2 x i8> %x1){
+entry:
+; CHECK: %res1 = shufflevector <2 x i8> %x1, <2 x i8> %x1, <2 x i32> <i32 0, i32 1>
+  %res1 = shufflevector <2 x i8> %x1, <2 x i8> %x1, <2 x i32> <i32 0, i32 1>
+
+; CHECK-NEXT: %res2 = shufflevector <2 x i8> %x1, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %res2 = shufflevector <2 x i8> %x1, <2 x i8> undef, <2 x i32> <i32 0, i32 1>
+
+  ret void
+}
diff --git a/test/Bitcode/visibility-styles.3.2.ll b/test/Bitcode/visibility-styles.3.2.ll
index ec2ee6832063..e36c0e04a808 100644
--- a/test/Bitcode/visibility-styles.3.2.ll
+++ b/test/Bitcode/visibility-styles.3.2.ll
@@ -1,4 +1,5 @@
 ; RUN:  llvm-dis < %s.bc| FileCheck %s
+; RUN:  verify-uselistorder < %s.bc
 
 ; visibility-styles.3.2.ll.bc was generated by passing this file to llvm-as-3.2.
 ; The test checks that LLVM does not silently misread visibility styles of
diff --git a/test/Bitcode/weak-cmpxchg-upgrade.ll b/test/Bitcode/weak-cmpxchg-upgrade.ll
index dbcd150633ed..76b857b5e221 100644
--- a/test/Bitcode/weak-cmpxchg-upgrade.ll
+++ b/test/Bitcode/weak-cmpxchg-upgrade.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
 
 ; cmpxchg-upgrade.ll.bc was produced by running a version of llvm-as from just
 ; before the IR change on this file.
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index cc043f084feb..00015d1e8713 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -5,11 +5,11 @@
 ; Bugpoint should keep the call's metadata attached to the call.
 
 ; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]]
-; CHECK: ![[LOC]] = metadata !{i32 104, i32 105, metadata ![[SCOPE:[0-9]+]], metadata ![[SCOPE]]}
-; CHECK: ![[SCOPE]] = metadata !{i32 458769, metadata ![[FILE:[0-9]+]], i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata ![[LIST:[0-9]+]], metadata ![[LIST]], null, null, null, metadata !""}
-; CHECK: ![[FILE]] = metadata !{metadata !"source.c", metadata !"/dir"}
-; CHECK: ![[LIST]] = metadata !{i32 0}
-; CHECK: ![[CALL]] = metadata !{metadata !"the call to foo"}
+; CHECK: ![[LOC]] = !MDLocation(line: 104, column: 105, scope: ![[SCOPE:[0-9]+]], inlinedAt: ![[SCOPE]])
+; CHECK: ![[SCOPE]] = !{!"0x11\000\00me\001\00\000\00\000", ![[FILE:[0-9]+]], ![[LIST:[0-9]+]], ![[LIST]], null, null, null}
+; CHECK: ![[FILE]] = !{!"source.c", !"/dir"}
+; CHECK: ![[LIST]] = !{i32 0}
+; CHECK: ![[CALL]] = !{!"the call to foo"}
 
 %rust_task = type {}
 define void @test(i32* %a, i8* %b) {
@@ -25,18 +25,18 @@ declare void @foo()
 
 !llvm.module.flags = !{!17}
 
-!0 = metadata !{metadata !"boring"}
-!1 = metadata !{metadata !"uninteresting"}
-!2 = metadata !{metadata !"the call to foo"}
-!3 = metadata !{metadata !"noise"}
-!4 = metadata !{metadata !"filler"}
+!0 = !{!"boring"}
+!1 = !{!"uninteresting"}
+!2 = !{!"the call to foo"}
+!3 = !{!"noise"}
+!4 = !{!"filler"}
 
-!9 = metadata !{i32 458769, metadata !15, i32 0, metadata !"me", i1 true, metadata !"", i32 0, metadata !16, metadata !16, null, null, null, metadata !""}
-!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9}
-!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9}
-!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9}
-!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9}
-!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9}
-!15 = metadata !{metadata !"source.c", metadata !"/dir"}
-!16 = metadata !{i32 0}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = !{!"0x11\000\00me\001\00\000\00\000", !15, !16, !16, null, null, null} ; [ DW_TAG_compile_unit ]
+!10 = !MDLocation(line: 100, column: 101, scope: !9, inlinedAt: !9)
+!11 = !MDLocation(line: 102, column: 103, scope: !9, inlinedAt: !9)
+!12 = !MDLocation(line: 104, column: 105, scope: !9, inlinedAt: !9)
+!13 = !MDLocation(line: 106, column: 107, scope: !9, inlinedAt: !9)
+!14 = !MDLocation(line: 108, column: 109, scope: !9, inlinedAt: !9)
+!15 = !{!"source.c", !"/dir"}
+!16 = !{i32 0}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 3e08a1638945..f85a1d97e90c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -30,20 +30,23 @@ set(LLVM_TEST_DEPENDS
           llvm-cov
           llvm-diff
           llvm-dis
-          llvm-extract
+          llvm-dsymutil
           llvm-dwarfdump
+          llvm-extract
           llvm-link
           llvm-lto
           llvm-mc
           llvm-mcmarkup
           llvm-nm
-          llvm-size
           llvm-objdump
           llvm-profdata
+          llvm-ranlib
           llvm-readobj
           llvm-rtdyld
+          llvm-size
           llvm-symbolizer
           llvm-tblgen
+          llvm-vtabledump
           macho-dump
           opt
           FileCheck
@@ -51,6 +54,7 @@ set(LLVM_TEST_DEPENDS
           not
           yaml2obj
           obj2yaml
+          verify-uselistorder
         )
 
 # If Intel JIT events are supported, depend on a tool that tests the listener.
@@ -58,6 +62,33 @@ if( LLVM_USE_INTEL_JITEVENTS )
   set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-jitlistener)
 endif( LLVM_USE_INTEL_JITEVENTS )
 
+if(TARGET LLVMgold)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} LLVMgold)
+endif()
+
+if(TARGET llvm-go)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS} llvm-go)
+endif()
+
+if(TARGET ocaml_llvm)
+  set(LLVM_TEST_DEPENDS ${LLVM_TEST_DEPENDS}
+          ocaml_llvm
+          ocaml_llvm_all_backends
+          ocaml_llvm_analysis
+          ocaml_llvm_bitreader
+          ocaml_llvm_bitwriter
+          ocaml_llvm_executionengine
+          ocaml_llvm_irreader
+          ocaml_llvm_linker
+          ocaml_llvm_target
+          ocaml_llvm_ipo
+          ocaml_llvm_passmgr_builder
+          ocaml_llvm_scalar_opts
+          ocaml_llvm_transform_utils
+          ocaml_llvm_vectorize
+        )
+endif()
+
 add_lit_testsuite(check-llvm "Running the LLVM regression tests"
   ${CMAKE_CURRENT_BINARY_DIR}
   PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
diff --git a/test/CodeGen/AArch64/PBQP-chain.ll b/test/CodeGen/AArch64/PBQP-chain.ll
new file mode 100644
index 000000000000..c4ba026ea428
--- /dev/null
+++ b/test/CodeGen/AArch64/PBQP-chain.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD
+;
+; Test PBQP is able to fulfill the accumulator chaining constraint.
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: fir
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-EVEN: fmadd {{d[0-9]*[02468]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[02468]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
+define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
+entry:
+  %0 = load double* %c, align 8
+  %1 = load double* %x, align 8
+  %mul = fmul fast double %1, %0
+  %2 = load double* %y, align 8
+  %mul7 = fmul fast double %2, %0
+  %arrayidx.1 = getelementptr inbounds double* %c, i64 1
+  %3 = load double* %arrayidx.1, align 8
+  %arrayidx2.1 = getelementptr inbounds double* %x, i64 1
+  %4 = load double* %arrayidx2.1, align 8
+  %mul.1 = fmul fast double %4, %3
+  %add.1 = fadd fast double %mul.1, %mul
+  %arrayidx6.1 = getelementptr inbounds double* %y, i64 1
+  %5 = load double* %arrayidx6.1, align 8
+  %mul7.1 = fmul fast double %5, %3
+  %add8.1 = fadd fast double %mul7.1, %mul7
+  %arrayidx.2 = getelementptr inbounds double* %c, i64 2
+  %6 = load double* %arrayidx.2, align 8
+  %arrayidx2.2 = getelementptr inbounds double* %x, i64 2
+  %7 = load double* %arrayidx2.2, align 8
+  %mul.2 = fmul fast double %7, %6
+  %add.2 = fadd fast double %mul.2, %add.1
+  %arrayidx6.2 = getelementptr inbounds double* %y, i64 2
+  %8 = load double* %arrayidx6.2, align 8
+  %mul7.2 = fmul fast double %8, %6
+  %add8.2 = fadd fast double %mul7.2, %add8.1
+  %arrayidx.3 = getelementptr inbounds double* %c, i64 3
+  %9 = load double* %arrayidx.3, align 8
+  %arrayidx2.3 = getelementptr inbounds double* %x, i64 3
+  %10 = load double* %arrayidx2.3, align 8
+  %mul.3 = fmul fast double %10, %9
+  %add.3 = fadd fast double %mul.3, %add.2
+  %arrayidx6.3 = getelementptr inbounds double* %y, i64 3
+  %11 = load double* %arrayidx6.3, align 8
+  %mul7.3 = fmul fast double %11, %9
+  %add8.3 = fadd fast double %mul7.3, %add8.2
+  %arrayidx.4 = getelementptr inbounds double* %c, i64 4
+  %12 = load double* %arrayidx.4, align 8
+  %arrayidx2.4 = getelementptr inbounds double* %x, i64 4
+  %13 = load double* %arrayidx2.4, align 8
+  %mul.4 = fmul fast double %13, %12
+  %add.4 = fadd fast double %mul.4, %add.3
+  %arrayidx6.4 = getelementptr inbounds double* %y, i64 4
+  %14 = load double* %arrayidx6.4, align 8
+  %mul7.4 = fmul fast double %14, %12
+  %add8.4 = fadd fast double %mul7.4, %add8.3
+  %arrayidx.5 = getelementptr inbounds double* %c, i64 5
+  %15 = load double* %arrayidx.5, align 8
+  %arrayidx2.5 = getelementptr inbounds double* %x, i64 5
+  %16 = load double* %arrayidx2.5, align 8
+  %mul.5 = fmul fast double %16, %15
+  %add.5 = fadd fast double %mul.5, %add.4
+  %arrayidx6.5 = getelementptr inbounds double* %y, i64 5
+  %17 = load double* %arrayidx6.5, align 8
+  %mul7.5 = fmul fast double %17, %15
+  %add8.5 = fadd fast double %mul7.5, %add8.4
+  %arrayidx.6 = getelementptr inbounds double* %c, i64 6
+  %18 = load double* %arrayidx.6, align 8
+  %arrayidx2.6 = getelementptr inbounds double* %x, i64 6
+  %19 = load double* %arrayidx2.6, align 8
+  %mul.6 = fmul fast double %19, %18
+  %add.6 = fadd fast double %mul.6, %add.5
+  %arrayidx6.6 = getelementptr inbounds double* %y, i64 6
+  %20 = load double* %arrayidx6.6, align 8
+  %mul7.6 = fmul fast double %20, %18
+  %add8.6 = fadd fast double %mul7.6, %add8.5
+  %arrayidx.7 = getelementptr inbounds double* %c, i64 7
+  %21 = load double* %arrayidx.7, align 8
+  %arrayidx2.7 = getelementptr inbounds double* %x, i64 7
+  %22 = load double* %arrayidx2.7, align 8
+  %mul.7 = fmul fast double %22, %21
+  %add.7 = fadd fast double %mul.7, %add.6
+  %arrayidx6.7 = getelementptr inbounds double* %y, i64 7
+  %23 = load double* %arrayidx6.7, align 8
+  %mul7.7 = fmul fast double %23, %21
+  %add8.7 = fadd fast double %mul7.7, %add8.6
+  store double %add.7, double* %rx, align 8
+  store double %add8.7, double* %ry, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
new file mode 100644
index 000000000000..45ac5e65c002
--- /dev/null
+++ b/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s
+
+; CHECK-LABEL: test:
+define i32 @test(i32 %acc, i32* nocapture readonly %c) {
+entry:
+  %0 = load i32* %c, align 4
+; CHECK-NOT: mov	 w{{[0-9]*}}, w0
+  %add = add nsw i32 %0, %acc
+  %arrayidx1 = getelementptr inbounds i32* %c, i64 1
+  %1 = load i32* %arrayidx1, align 4
+  %add2 = add nsw i32 %add, %1
+  ret i32 %add2
+}
+
diff --git a/test/CodeGen/AArch64/PBQP-csr.ll b/test/CodeGen/AArch64/PBQP-csr.ll
new file mode 100644
index 000000000000..64335ae353a1
--- /dev/null
+++ b/test/CodeGen/AArch64/PBQP-csr.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -mattr=+neon -fp-contract=fast -regalloc=pbqp -pbqp-coalescing | FileCheck %s
+
+%pl = type { i32, i32, i32, i32, %p*, %l*, double* }
+%p = type { i32, %ca*, [27 x %ca*], %v*, %v*, %v*, i32 }
+%ca = type { %v, float, i32 }
+%v = type { double, double, double }
+%l = type opaque
+%rs = type { i32, i32, i32, i32, %v*, %v*, [21 x double], %v, %v, %v, double, double, double }
+
+;CHECK-LABEL: test_csr
+define void @test_csr(%pl* nocapture readnone %this, %rs* nocapture %r) align 2 {
+;CHECK-NOT: stp {{d[0-9]+}}, {{d[0-9]+}}
+entry:
+  %x.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 0
+  %y.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 1
+  %z.i = getelementptr inbounds %rs* %r, i64 0, i32 7, i32 2
+  %x.i61 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 0
+  %y.i62 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 1
+  %z.i63 = getelementptr inbounds %rs* %r, i64 0, i32 8, i32 2
+  %x.i58 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 0
+  %y.i59 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 1
+  %z.i60 = getelementptr inbounds %rs* %r, i64 0, i32 9, i32 2
+  %na = getelementptr inbounds %rs* %r, i64 0, i32 0
+  %0 = bitcast double* %x.i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false)
+  %1 = load i32* %na, align 4
+  %cmp70 = icmp sgt i32 %1, 0
+  br i1 %cmp70, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %fn = getelementptr inbounds %rs* %r, i64 0, i32 4
+  %2 = load %v** %fn, align 8
+  %fs = getelementptr inbounds %rs* %r, i64 0, i32 5
+  %3 = load %v** %fs, align 8
+  %4 = sext i32 %1 to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %5 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add6.i, %for.body ]
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %6 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %17, %for.body ]
+  %7 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %22, %for.body ]
+  %8 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %26, %for.body ]
+  %9 = phi <2 x double> [ zeroinitializer, %for.body.lr.ph ], [ %28, %for.body ]
+  %x.i54 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 0
+  %x1.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 0
+  %y.i56 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 1
+  %10 = bitcast double* %x.i54 to <2 x double>*
+  %11 = load <2 x double>* %10, align 8
+  %y2.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 1
+  %12 = bitcast double* %x1.i to <2 x double>*
+  %13 = load <2 x double>* %12, align 8
+  %14 = fadd fast <2 x double> %13, %11
+  %z.i57 = getelementptr inbounds %v* %2, i64 %indvars.iv, i32 2
+  %15 = load double* %z.i57, align 8
+  %z4.i = getelementptr inbounds %v* %3, i64 %indvars.iv, i32 2
+  %16 = load double* %z4.i, align 8
+  %add5.i = fadd fast double %16, %15
+  %17 = fadd fast <2 x double> %6, %11
+  %18 = bitcast double* %x.i to <2 x double>*
+  store <2 x double> %17, <2 x double>* %18, align 8
+  %19 = load double* %x1.i, align 8
+  %20 = insertelement <2 x double> undef, double %15, i32 0
+  %21 = insertelement <2 x double> %20, double %19, i32 1
+  %22 = fadd fast <2 x double> %7, %21
+  %23 = bitcast double* %z.i to <2 x double>*
+  store <2 x double> %22, <2 x double>* %23, align 8
+  %24 = bitcast double* %y2.i to <2 x double>*
+  %25 = load <2 x double>* %24, align 8
+  %26 = fadd fast <2 x double> %8, %25
+  %27 = bitcast double* %y.i62 to <2 x double>*
+  store <2 x double> %26, <2 x double>* %27, align 8
+  %28 = fadd fast <2 x double> %14, %9
+  %29 = bitcast double* %x.i58 to <2 x double>*
+  store <2 x double> %28, <2 x double>* %29, align 8
+  %add6.i = fadd fast double %add5.i, %5
+  store double %add6.i, double* %z.i60, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp = icmp slt i64 %indvars.iv.next, %4
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
diff --git a/test/CodeGen/AArch64/PBQP.ll b/test/CodeGen/AArch64/PBQP.ll
new file mode 100644
index 000000000000..675a2ca60ddb
--- /dev/null
+++ b/test/CodeGen/AArch64/PBQP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=cortex-a57 -regalloc=pbqp -pbqp-coalescing -o - %s | FileCheck %s
+
+define i32 @foo(i32 %a) {
+; CHECK-LABEL: foo:
+; CHECK: bl bar
+; CHECK: bl baz
+  %call = call i32 @bar(i32 %a)
+  %call1 = call i32 @baz(i32 %call)
+  ret i32 %call1
+}
+
+declare i32 @bar(i32)
+declare i32 @baz(i32)
+
diff --git a/test/CodeGen/AArch64/Redundantstore.ll b/test/CodeGen/AArch64/Redundantstore.ll
new file mode 100644
index 000000000000..72f7f4679e6d
--- /dev/null
+++ b/test/CodeGen/AArch64/Redundantstore.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s 
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+@end_of_array = common global i8* null, align 8
+
+; CHECK-LABEL: @test
+; CHECK: stur
+; CHECK-NOT: stur
+define i8* @test(i32 %size) {
+entry:
+  %0 = load i8** @end_of_array, align 8
+  %conv = sext i32 %size to i64
+  %and = and i64 %conv, -8
+  %conv2 = trunc i64 %and to i32
+  %add.ptr.sum = add nsw i64 %and, -4
+  %add.ptr3 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
+  %size4 = bitcast i8* %add.ptr3 to i32*
+  store i32 %conv2, i32* %size4, align 4
+  %add.ptr.sum9 = add nsw i64 %and, -4
+  %add.ptr5 = getelementptr inbounds i8* %0, i64 %add.ptr.sum9
+  %size6 = bitcast i8* %add.ptr5 to i32*
+  store i32 %conv2, i32* %size6, align 4
+  ret i8* %0
+}
+
diff --git a/test/CodeGen/AArch64/a57-csel.ll b/test/CodeGen/AArch64/a57-csel.ll
new file mode 100644
index 000000000000..9d16d1a0f104
--- /dev/null
+++ b/test/CodeGen/AArch64/a57-csel.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mcpu=cortex-a57 -aarch64-enable-early-ifcvt=false | FileCheck %s
+
+; Check that the select is expanded into a branch sequence.
+define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) {
+  ; CHECK: cbz
+  %x0 = load i64* %c
+  %x1 = icmp eq i64 %x0, 0
+  %x2 = select i1 %x1, i64 %a, i64 %b
+  %x3 = add i64 %x2, %d
+  ret i64 %x3
+}
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
new file mode 100644
index 000000000000..73ee522cbf55
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -O2 -mtriple=aarch64-none-linux-gnu 
+
+; Bug 20598
+
+
+define void @test() #0 {
+entry:
+  br label %for.body, !dbg !39
+
+for.body:                                         ; preds = %for.body, %entry
+  %arrayidx5 = getelementptr inbounds i32* null, i64 1, !dbg !43
+  %0 = load i32* null, align 4, !dbg !45, !tbaa !46
+  %s1 = sub nsw i32 0, %0, !dbg !50
+  %n1 = sext i32 %s1 to i64, !dbg !50
+  %arrayidx21 = getelementptr inbounds i32* null, i64 3, !dbg !51
+  %add53 = add nsw i64 %n1, 0, !dbg !52
+  %add55 = add nsw i64 %n1, 0, !dbg !53
+  %mul63 = mul nsw i64 %add53, -20995, !dbg !54
+  tail call void @llvm.dbg.value(metadata i64 %mul63, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !55
+  %mul65 = mul nsw i64 %add55, -3196, !dbg !56
+  %add67 = add nsw i64 0, %mul65, !dbg !57
+  %add80 = add i64 0, 1024, !dbg !58
+  %add81 = add i64 %add80, %mul63, !dbg !58
+  %add82 = add i64 %add81, 0, !dbg !58
+  %shr83351 = lshr i64 %add82, 11, !dbg !58
+  %conv84 = trunc i64 %shr83351 to i32, !dbg !58
+  store i32 %conv84, i32* %arrayidx21, align 4, !dbg !58, !tbaa !46
+  %add86 = add i64 0, 1024, !dbg !59
+  %add87 = add i64 %add86, 0, !dbg !59
+  %add88 = add i64 %add87, %add67, !dbg !59
+  %shr89352 = lshr i64 %add88, 11, !dbg !59
+  %n2 = trunc i64 %shr89352 to i32, !dbg !59
+  store i32 %n2, i32* %arrayidx5, align 4, !dbg !59, !tbaa !46
+  br label %for.body, !dbg !39
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!36, !37}
+!llvm.ident = !{!38}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [] [] []
+!1 = !{!"test.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00\00\00\00140\000\001\000\006\00256\001\00141", !1, !5, !6, null, void ()* @test, null, null, !12} ; [ DW_TAG_subprogram ] [] [] [def] [scope 141] []
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ] [] []
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [] [] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [] [] []
+!9 = !{!"0x16\00\0030\000\000\000\000", !10, null, !11} ; [ DW_TAG_typedef ] [] [] [] [from int]
+!10 = !{!"", !""}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [int] []
+!12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35}
+!13 = !{!"0x101\00\0016777356\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [] [data] []
+!14 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!15 = !{!"0x16\00\00183\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [] [INT32] [] [from long int]
+!16 = !{!"", !""}
+!17 = !{!"0x24\00\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [] [long int] []
+!18 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!19 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!20 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!21 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!22 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!23 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [] [] []
+!24 = !{!"0x100\00\00142\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!25 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!26 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!27 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!28 = !{!"0x100\00\00143\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!29 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!30 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!31 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!32 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [ ] [] []
+!33 = !{!"0x100\00\00144\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [  ] [] []
+!34 = !{!"0x100\00\00145\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [  ] [] []
+!35 = !{!"0x100\00\00146\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [  ] [] []
+!36 = !{i32 2, !"Dwarf Version", i32 4}
+!37 = !{i32 2, !"Debug Info Version", i32 2}
+!38 = !{!"clang version 3.6.0 "}
+!39 = !MDLocation(line: 154, column: 8, scope: !40)
+!40 = !{!"0xb\00154\008\002", !1, !41} ; [ DW_TAG_lexical_block ] [  ] []
+!41 = !{!"0xb\00154\008\001", !1, !42} ; [ DW_TAG_lexical_block ] [  ] []
+!42 = !{!"0xb\00154\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [  ] []
+!43 = !MDLocation(line: 157, column: 5, scope: !44)
+!44 = !{!"0xb\00154\0042\000", !1, !42} ; [ DW_TAG_lexical_block ] [  ] []
+!45 = !MDLocation(line: 159, column: 5, scope: !44)
+!46 = !{!47, !47, i64 0}
+!47 = !{!"int", !48, i64 0}
+!48 = !{!"omnipotent char", !49, i64 0}
+!49 = !{!"Simple C/C++ TBAA"}
+!50 = !MDLocation(line: 160, column: 5, scope: !44)
+!51 = !MDLocation(line: 161, column: 5, scope: !44)
+!52 = !MDLocation(line: 188, column: 5, scope: !44)
+!53 = !MDLocation(line: 190, column: 5, scope: !44)
+!54 = !MDLocation(line: 198, column: 5, scope: !44)
+!55 = !MDLocation(line: 144, column: 13, scope: !4)
+!56 = !MDLocation(line: 200, column: 5, scope: !44)
+!57 = !MDLocation(line: 203, column: 5, scope: !44)
+!58 = !MDLocation(line: 207, column: 5, scope: !44)
+!59 = !MDLocation(line: 208, column: 5, scope: !44)
diff --git a/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
new file mode 100644
index 000000000000..455325166505
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
@@ -0,0 +1,16 @@
+;RUN: llc <%s -mattr=-neon  -mattr=-fp-armv8  | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+@t = common global i32 0, align 4
+@x = common global i32 0, align 4
+
+define void @foo() {
+entry:
+;CHECK-LABEL: foo:
+;CHECK: __floatsisf
+  %0 = load i32* @x, align 4
+  %conv = sitofp i32 %0 to float
+  store float %conv, float* bitcast (i32* @t to float*), align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
new file mode 100644
index 000000000000..7108bc0e91a9
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -0,0 +1,329 @@
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A57 --check-prefix CHECK-ODD
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
+
+; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
+; our test strategy is to:
+;   * Force the pass to always perform register swapping even if the dest register is of the
+;     correct color already (-force-all)
+;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
+;     and run it twice, once where it always hints odd, and once where it always hints even.
+;
+; We then use regex magic to check that in the two cases the register allocation is
+; different; this is what gives us the testing coverage and distinguishes cases where
+; the pass has done some work versus accidental regalloc.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; Non-overlapping groups - shouldn't need any changing at all.
+
+; CHECK-LABEL: f1:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[x]]
+; CHECK: str [[x]]
+
+define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+  %0 = load double* %p, align 8
+  %arrayidx1 = getelementptr inbounds double* %p, i64 1
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %p, i64 2
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %p, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %p, i64 4
+  %4 = load double* %arrayidx4, align 8
+  %mul = fmul fast double %0, %1
+  %add = fadd fast double %mul, %4
+  %mul5 = fmul fast double %1, %2
+  %add6 = fadd fast double %mul5, %add
+  %mul7 = fmul fast double %1, %3
+  %sub = fsub fast double %add6, %mul7
+  %mul8 = fmul fast double %2, %3
+  %add9 = fadd fast double %mul8, %sub
+  store double %add9, double* %q, align 8
+  %arrayidx11 = getelementptr inbounds double* %p, i64 5
+  %5 = load double* %arrayidx11, align 8
+  %arrayidx12 = getelementptr inbounds double* %p, i64 6
+  %6 = load double* %arrayidx12, align 8
+  %arrayidx13 = getelementptr inbounds double* %p, i64 7
+  %7 = load double* %arrayidx13, align 8
+  %mul15 = fmul fast double %6, %7
+  %mul16 = fmul fast double %0, %5
+  %add17 = fadd fast double %mul16, %mul15
+  %mul18 = fmul fast double %5, %6
+  %add19 = fadd fast double %mul18, %add17
+  %arrayidx20 = getelementptr inbounds double* %q, i64 1
+  store double %add19, double* %arrayidx20, align 8
+  ret void
+}
+
+; Overlapping groups - coloring needed.
+
+; CHECK-LABEL: f2:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmadd [[x]]
+; CHECK-A57: stp [[x]], [[y]]
+; CHECK-A53-DAG: str [[x]]
+; CHECK-A53-DAG: str [[y]]
+
+define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+  %0 = load double* %p, align 8
+  %arrayidx1 = getelementptr inbounds double* %p, i64 1
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %p, i64 2
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %p, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %p, i64 4
+  %4 = load double* %arrayidx4, align 8
+  %arrayidx5 = getelementptr inbounds double* %p, i64 5
+  %5 = load double* %arrayidx5, align 8
+  %arrayidx6 = getelementptr inbounds double* %p, i64 6
+  %6 = load double* %arrayidx6, align 8
+  %arrayidx7 = getelementptr inbounds double* %p, i64 7
+  %7 = load double* %arrayidx7, align 8
+  %mul = fmul fast double %0, %1
+  %add = fadd fast double %mul, %7
+  %mul8 = fmul fast double %5, %6
+  %mul9 = fmul fast double %1, %2
+  %add10 = fadd fast double %mul9, %add
+  %mul11 = fmul fast double %3, %4
+  %add12 = fadd fast double %mul11, %mul8
+  %mul13 = fmul fast double %1, %3
+  %sub = fsub fast double %add10, %mul13
+  %mul14 = fmul fast double %4, %5
+  %add15 = fadd fast double %mul14, %add12
+  %mul16 = fmul fast double %2, %3
+  %add17 = fadd fast double %mul16, %sub
+  store double %add17, double* %q, align 8
+  %arrayidx19 = getelementptr inbounds double* %q, i64 1
+  store double %add15, double* %arrayidx19, align 8
+  ret void
+}
+
+; Dest register is live on block exit - fixup needed.
+
+; CHECK-LABEL: f3:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
+; CHECK: str [[y]]
+
+define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+  %0 = load double* %p, align 8
+  %arrayidx1 = getelementptr inbounds double* %p, i64 1
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %p, i64 2
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %p, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %p, i64 4
+  %4 = load double* %arrayidx4, align 8
+  %mul = fmul fast double %0, %1
+  %add = fadd fast double %mul, %4
+  %mul5 = fmul fast double %1, %2
+  %add6 = fadd fast double %mul5, %add
+  %mul7 = fmul fast double %1, %3
+  %sub = fsub fast double %add6, %mul7
+  %mul8 = fmul fast double %2, %3
+  %add9 = fadd fast double %mul8, %sub
+  %cmp = fcmp oeq double %3, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @g to void ()*)() #2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store double %add9, double* %q, align 8
+  ret void
+}
+
+declare void @g(...) #1
+
+; Single precision version of f2.
+
+; CHECK-LABEL: f4:
+; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
+; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
+; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
+; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y]]
+; CHECK: fmadd [[x]]
+; CHECK-A57: stp [[x]], [[y]]
+; CHECK-A53-DAG: str [[x]]
+; CHECK-A53-DAG: str [[y]]
+
+define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
+entry:
+  %0 = load float* %p, align 4
+  %arrayidx1 = getelementptr inbounds float* %p, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %p, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %p, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %p, i64 4
+  %4 = load float* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds float* %p, i64 5
+  %5 = load float* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds float* %p, i64 6
+  %6 = load float* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds float* %p, i64 7
+  %7 = load float* %arrayidx7, align 4
+  %mul = fmul fast float %0, %1
+  %add = fadd fast float %mul, %7
+  %mul8 = fmul fast float %5, %6
+  %mul9 = fmul fast float %1, %2
+  %add10 = fadd fast float %mul9, %add
+  %mul11 = fmul fast float %3, %4
+  %add12 = fadd fast float %mul11, %mul8
+  %mul13 = fmul fast float %1, %3
+  %sub = fsub fast float %add10, %mul13
+  %mul14 = fmul fast float %4, %5
+  %add15 = fadd fast float %mul14, %add12
+  %mul16 = fmul fast float %2, %3
+  %add17 = fadd fast float %mul16, %sub
+  store float %add17, float* %q, align 4
+  %arrayidx19 = getelementptr inbounds float* %q, i64 1
+  store float %add15, float* %arrayidx19, align 4
+  ret void
+}
+
+; Single precision version of f3
+
+; CHECK-LABEL: f5:
+; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
+; CHECK: str [[y]]
+
+define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
+entry:
+  %0 = load float* %p, align 4
+  %arrayidx1 = getelementptr inbounds float* %p, i64 1
+  %1 = load float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float* %p, i64 2
+  %2 = load float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float* %p, i64 3
+  %3 = load float* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds float* %p, i64 4
+  %4 = load float* %arrayidx4, align 4
+  %mul = fmul fast float %0, %1
+  %add = fadd fast float %mul, %4
+  %mul5 = fmul fast float %1, %2
+  %add6 = fadd fast float %mul5, %add
+  %mul7 = fmul fast float %1, %3
+  %sub = fsub fast float %add6, %mul7
+  %mul8 = fmul fast float %2, %3
+  %add9 = fadd fast float %mul8, %sub
+  %cmp = fcmp oeq float %3, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @g to void ()*)() #2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store float %add9, float* %q, align 4
+  ret void
+}
+
+; Test that regmask clobbering stops a chain sequence.
+
+; CHECK-LABEL: f6:
+; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
+; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
+; CHECK: fmadd [[x]]
+; CHECK: fmsub [[x]]
+; CHECK: fmadd d0, {{.*}}, [[x]]
+; CHECK: bl hh
+; CHECK: str d0
+
+define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+  %0 = load double* %p, align 8
+  %arrayidx1 = getelementptr inbounds double* %p, i64 1
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %p, i64 2
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %p, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %p, i64 4
+  %4 = load double* %arrayidx4, align 8
+  %mul = fmul fast double %0, %1
+  %add = fadd fast double %mul, %4
+  %mul5 = fmul fast double %1, %2
+  %add6 = fadd fast double %mul5, %add
+  %mul7 = fmul fast double %1, %3
+  %sub = fsub fast double %add6, %mul7
+  %mul8 = fmul fast double %2, %3
+  %add9 = fadd fast double %mul8, %sub
+  %call = tail call double @hh(double %add9) #2
+  store double %call, double* %q, align 8
+  ret void
+}
+
+declare double @hh(double) #1
+
+; Check that we correctly deal with repeated operands.
+; The following testcase creates:
+;   %D1<def> = FADDDrr %D0<kill>, %D0
+; We'll get a crash if we naively look at the first operand, remove it
+; from the substitution list then look at the second operand.
+
+; CHECK: fmadd [[x:d[0-9]+]]
+; CHECK: fadd d1, [[x]], [[x]]
+
+define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
+entry:
+  %0 = load double* %p, align 8
+  %arrayidx1 = getelementptr inbounds double* %p, i64 1
+  %1 = load double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr inbounds double* %p, i64 2
+  %2 = load double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr inbounds double* %p, i64 3
+  %3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %p, i64 4
+  %4 = load double* %arrayidx4, align 8
+  %mul = fmul fast double %0, %1
+  %add = fadd fast double %mul, %4
+  %mul5 = fmul fast double %1, %2
+  %add6 = fadd fast double %mul5, %add
+  %mul7 = fmul fast double %1, %3
+  %sub = fsub fast double %add6, %mul7
+  %mul8 = fmul fast double %2, %3
+  %add9 = fadd fast double %mul8, %sub
+  %add10 = fadd fast double %add9, %add9
+  call void @hhh(double 0.0, double %add10)
+  ret void
+}
+
+declare void @hhh(double, double)
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll
new file mode 100644
index 000000000000..01642a4f3bf8
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -0,0 +1,831 @@
+; RUN: llc -mtriple=aarch64_be--linux-gnu < %s | FileCheck %s
+
+@vec_v8i16 = global <8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
+
+; CHECK-LABEL: movi_modimm_t1:
+define i16 @movi_modimm_t1() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t2:
+define i16 @movi_modimm_t2() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t3:
+define i16 @movi_modimm_t3() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t4:
+define i16 @movi_modimm_t4() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t5:
+define i16 @movi_modimm_t5() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].8h, #0x1
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t6:
+define i16 @movi_modimm_t6() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t7:
+define i16 @movi_modimm_t7() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, msl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 511, i16 0, i16 511, i16 0, i16 511, i16 0, i16 511, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t8:
+define i16 @movi_modimm_t8() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, msl #16
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t9:
+define i16 @movi_modimm_t9() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].16b, #0x1
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: movi_modimm_t10:
+define i16 @movi_modimm_t10() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: fmov_modimm_t11:
+define i16 @fmov_modimm_t11() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    fmov    v[[REG2:[0-9]+]].4s, #3.00000000
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: fmov_modimm_t12:
+define i16 @fmov_modimm_t12() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    fmov    v[[REG2:[0-9]+]].2d, #0.17968750
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t1:
+define i16 @mvni_modimm_t1() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t2:
+define i16 @mvni_modimm_t2() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t3:
+define i16 @mvni_modimm_t3() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t4:
+define i16 @mvni_modimm_t4() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t5:
+define i16 @mvni_modimm_t5() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].8h, #0x1
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t6:
+define i16 @mvni_modimm_t6() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t7:
+define i16 @mvni_modimm_t7() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, msl #8
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: mvni_modimm_t8:
+define i16 @mvni_modimm_t8() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, msl #16
+  ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = add <8 x i16> %in, <i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t1:
+define i16 @bic_modimm_t1() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t2:
+define i16 @bic_modimm_t2() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t3:
+define i16 @bic_modimm_t3() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t4:
+define i16 @bic_modimm_t4() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t5:
+define i16 @bic_modimm_t5() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].8h, #0x1
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: bic_modimm_t6:
+define i16 @bic_modimm_t6() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = and <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t1:
+define i16 @orr_modimm_t1() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t2:
+define i16 @orr_modimm_t2() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr     v[[REG2:[0-9]+]].4s, #0x1, lsl #8
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t3:
+define i16 @orr_modimm_t3() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t4:
+define i16 @orr_modimm_t4() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t5:
+define i16 @orr_modimm_t5() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].8h, #0x1
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+; CHECK-LABEL: orr_modimm_t6:
+define i16 @orr_modimm_t6() nounwind {
+  ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
+  ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
+  ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
+  %in = load <8 x i16>* @vec_v8i16
+  %rv = or <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
+  %el = extractelement <8 x i16> %rv, i32 0
+  ret i16 %el
+}
+
+declare i8 @f_v8i8(<8 x i8> %arg)
+declare i16 @f_v4i16(<4 x i16> %arg)
+declare i32 @f_v2i32(<2 x i32> %arg)
+declare i64 @f_v1i64(<1 x i64> %arg)
+declare i8 @f_v16i8(<16 x i8> %arg)
+declare i16 @f_v8i16(<8 x i16> %arg)
+declare i32 @f_v4i32(<4 x i32> %arg)
+declare i64 @f_v2i64(<2 x i64> %arg)
+
+; CHECK-LABEL: modimm_t1_call:
+define void @modimm_t1_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 7, i16 0, i16 7, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 6, i32 6>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 21474836485>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 8589934594, i64 8589934594>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t2_call:
+define void @modimm_t2_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 1792, i16 0, i16 1792, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 1536, i32 1536>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5, lsl #8
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 5497558140160>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 768, i32 768, i32 768, i32 768>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2, lsl #8
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 2199023256064, i64 2199023256064>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t3_call:
+define void @modimm_t3_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8, lsl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7, lsl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 0, i16 7, i16 0, i16 7>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6, lsl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 393216, i32 393216>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5, lsl #16
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 1407374883880960>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5, lsl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4, lsl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 0, i16 4, i16 0, i16 4, i16 0, i16 4, i16 0, i16 4>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3, lsl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 196608, i32 196608, i32 196608, i32 196608>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2, lsl #16
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 562949953552384, i64 562949953552384>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t4_call:
+define void @modimm_t4_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8, lsl #24
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 0, i8 8, i8 0, i8 0, i8 0, i8 8>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7, lsl #24
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 0, i16 1792, i16 0, i16 1792>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6, lsl #24
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 100663296, i32 100663296>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5, lsl #24
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 360287970273525760>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5, lsl #24
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5, i8 0, i8 0, i8 0, i8 5>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4, lsl #24
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024, i16 0, i16 1024>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3, lsl #24
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 50331648, i32 50331648, i32 50331648, i32 50331648>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2, lsl #24
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 144115188109410304, i64 144115188109410304>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t5_call:
+define void @modimm_t5_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x7
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 7, i16 7, i16 7, i16 7>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x6
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 393222, i32 393222>)
+  ; CHECK:         movi    v{{[0-9]+}}.4h, #0x5
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 1407396358717445>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x5
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x4
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x3
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 196611, i32 196611, i32 196611, i32 196611>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].8h, #0x2
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 562958543486978, i64 562958543486978>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t6_call:
+define void @modimm_t6_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x8, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 0, i8 8, i8 0, i8 8, i8 0, i8 8, i8 0, i8 8>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x7, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 1792, i16 1792, i16 1792, i16 1792>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4h, #0x6, lsl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 100664832, i32 100664832>)
+  ; CHECK:         movi    v{{[0-9]+}}.4h, #0x5, lsl #8
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 360293467831665920>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x5, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5, i8 0, i8 5>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x4, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024, i16 1024>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8h, #0x3, lsl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 50332416, i32 50332416, i32 50332416, i32 50332416>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].8h, #0x2, lsl #8
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 144117387132666368, i64 144117387132666368>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t7_call:
+define void @modimm_t7_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8, msl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 255, i8 8, i8 0, i8 0, i8 255, i8 8, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7, msl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 2047, i16 0, i16 2047, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6, msl #8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 1791, i32 1791>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5, msl #8
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 6592774800895>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5, msl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0, i8 255, i8 5, i8 0, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4, msl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0, i16 1279, i16 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3, msl #8
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 1023, i32 1023, i32 1023, i32 1023>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2, msl #8
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 3294239916799, i64 3294239916799>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t8_call:
+define void @modimm_t8_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x8, msl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 255, i8 255, i8 8, i8 0, i8 255, i8 255, i8 8, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x7, msl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 65535, i16 7, i16 65535, i16 7>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2s, #0x6, msl #16
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 458751, i32 458751>)
+  ; CHECK:         movi    v{{[0-9]+}}.2s, #0x5, msl #16
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 1688845565689855>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x5, msl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0, i8 255, i8 255, i8 5, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x4, msl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4, i16 65535, i16 4>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].4s, #0x3, msl #16
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 262143, i32 262143, i32 262143, i32 262143>)
+  ; CHECK:         movi    v[[REG:[0-9]+]].4s, #0x2, msl #16
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 844420635361279, i64 844420635361279>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t9_call:
+define void @modimm_t9_call() {
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8b, #0x8
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8b, #0x7
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 1799, i16 1799, i16 1799, i16 1799>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].8b, #0x6
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 101058054, i32 101058054>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].16b, #0x5
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].16b, #0x4
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028, i16 1028>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].16b, #0x3
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 50529027, i32 50529027, i32 50529027, i32 50529027>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t10_call:
+define void @modimm_t10_call() {
+  ; CHECK:         movi    d[[REG1:[0-9]+]], #0x0000ff000000ff
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>)
+  ; CHECK:         movi    d[[REG1:[0-9]+]], #0x00ffff0000ffff
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 -1, i16 0, i16 -1, i16 0>)
+  ; CHECK:         movi    d[[REG1:[0-9]+]], #0xffffffffffffffff
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 -1, i32 -1>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2d, #0xffffff00ffffff
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2d, #0xffffffffffff0000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 0, i16 -1, i16 -1, i16 -1, i16 0, i16 -1, i16 -1, i16 -1>)
+  ; CHECK:         movi    v[[REG1:[0-9]+]].2d, #0xffffffff00000000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 0, i32 -1, i32 0, i32 -1>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t11_call:
+define void @modimm_t11_call() {
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2s, #4.00000000
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.8b, v[[REG1]].8b
+  ; CHECK-NEXT:    bl      f_v8i8
+  call i8 @f_v8i8(<8 x i8> <i8 0, i8 0, i8 128, i8 64, i8 0, i8 0, i8 128, i8 64>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2s, #3.75000000
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.4h, v[[REG1]].4h
+  ; CHECK-NEXT:    bl      f_v4i16
+  call i16 @f_v4i16(<4 x i16> <i16 0, i16 16496, i16 0, i16 16496>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2s, #3.50000000
+  ; CHECK-NEXT:    rev64   v{{[0-9]+}}.2s, v[[REG1]].2s
+  ; CHECK-NEXT:    bl      f_v2i32
+  call i32 @f_v2i32(<2 x i32> <i32 1080033280, i32 1080033280>)
+  ; CHECK:         fmov    v{{[0-9]+}}.2s, #0.39062500
+  ; CHECK-NEXT:    bl      f_v1i64
+  call i64 @f_v1i64(<1 x i64> <i64 4523865826746957824>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].4s, #3.25000000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64, i8 0, i8 0, i8 80, i8 64>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].4s, #3.00000000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].4s, #2.75000000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 1076887552, i32 1076887552, i32 1076887552, i32 1076887552>)
+  ; CHECK:         fmov    v[[REG:[0-9]+]].4s, #2.5000000
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v2i64
+  call i64 @f_v2i64(<2 x i64> <i64 4620693218757967872, i64 4620693218757967872>)
+
+  ret void
+}
+
+; CHECK-LABEL: modimm_t12_call:
+define void @modimm_t12_call() {
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2d, #0.18750000
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].16b, v[[REG1]].16b
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v16i8
+  call i8 @f_v16i8(<16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 200, i8 63, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 200, i8 63>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2d, #0.17968750
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].8h, v[[REG1]].8h
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v8i16
+  call i16 @f_v8i16(<8 x i16> <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>)
+  ; CHECK:         fmov    v[[REG1:[0-9]+]].2d, #0.17187500
+  ; CHECK-NEXT:    rev64   v[[REG2:[0-9]+]].4s, v[[REG1]].4s
+  ; CHECK-NEXT:    ext     v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8
+  ; CHECK-NEXT:    bl      f_v4i32
+  call i32 @f_v4i32(<4 x i32> <i32 0, i32 1069940736, i32 0, i32 1069940736>)
+
+  ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
new file mode 100644
index 000000000000..64d91eea9739
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -0,0 +1,534 @@
+; REQUIRES: asserts
+; The regression tests need to test for order of emitted instructions, and
+; therefore, the tests are a bit fragile/reliant on instruction scheduling. The
+; test cases have been minimized as much as possible, but still most of the test
+; cases could break if instruction scheduling heuristics for cortex-a53 change
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=1 -stats 2>&1 \
+; RUN:  | FileCheck %s --check-prefix CHECK
+; RUN: llc < %s -mcpu=cortex-a53 -aarch64-fix-cortex-a53-835769=0 -stats 2>&1 \
+; RUN:  | FileCheck %s --check-prefix CHECK-NOWORKAROUND
+; The following run lines are just to verify whether or not this pass runs by
+; default for given CPUs. Given the fragility of the tests, this is only run on
+; a test case where the scheduler has not freedom at all to reschedule the
+; instructions, so the potentially massively different scheduling heuristics
+; will not break the test case.
+; RUN: llc < %s -mcpu=generic    | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cortex-a53 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cortex-a57 | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+; RUN: llc < %s -mcpu=cyclone    | FileCheck %s --check-prefix CHECK-BASIC-PASS-DISABLED
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+  %0 = load i64* %c, align 8
+  %mul = mul nsw i64 %0, %b
+  %add = add nsw i64 %mul, %a
+  ret i64 %add
+}
+; CHECK-LABEL: f_load_madd_64:
+; CHECK:	ldr
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_load_madd_64:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	madd
+; CHECK-BASIC-PASS-DISABLED-LABEL: f_load_madd_64:
+; CHECK-BASIC-PASS-DISABLED:  ldr
+; CHECK-BASIC-PASS-DISABLED-NEXT:  madd
+
+
+define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+  %0 = load i32* %c, align 4
+  %mul = mul nsw i32 %0, %b
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+; CHECK-LABEL: f_load_madd_32:
+; CHECK:	ldr
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_load_madd_32:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+
+define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+  %0 = load i64* %c, align 8
+  %mul = mul nsw i64 %0, %b
+  %sub = sub nsw i64 %a, %mul
+  ret i64 %sub
+}
+; CHECK-LABEL: f_load_msub_64:
+; CHECK:	ldr
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_load_msub_64:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+
+define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+  %0 = load i32* %c, align 4
+  %mul = mul nsw i32 %0, %b
+  %sub = sub nsw i32 %a, %mul
+  ret i32 %sub
+}
+; CHECK-LABEL: f_load_msub_32:
+; CHECK:	ldr
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_load_msub_32:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+
+define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+  %0 = load i64* %c, align 8
+  %mul = mul nsw i64 %0, %b
+  ret i64 %mul
+}
+; CHECK-LABEL: f_load_mul_64:
+; CHECK:	ldr
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_load_mul_64:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+
+define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+  %0 = load i32* %c, align 4
+  %mul = mul nsw i32 %0, %b
+  ret i32 %mul
+}
+; CHECK-LABEL: f_load_mul_32:
+; CHECK:	ldr
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_load_mul_32:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+
+define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+  %0 = load i64* %c, align 8
+  %mul = sub i64 0, %b
+  %sub = mul i64 %0, %mul
+  ret i64 %sub
+}
+; CHECK-LABEL: f_load_mneg_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_mneg_64:
+; FIXME: only add further checks here once LLVM actually produces
+;        neg instructions
+; FIXME-CHECK: ldr
+; FIXME-CHECK-NEXT: nop
+; FIXME-CHECK-NEXT: mneg
+; FIXME-CHECK-NOWORKAROUND: ldr
+; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
+
+
+define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
+entry:
+  %0 = load i32* %c, align 4
+  %mul = sub i32 0, %b
+  %sub = mul i32 %0, %mul
+  ret i32 %sub
+}
+; CHECK-LABEL: f_load_mneg_32:
+; CHECK-NOWORKAROUND-LABEL: f_load_mneg_32:
+; FIXME: only add further checks here once LLVM actually produces
+;        neg instructions
+; FIXME-CHECK: ldr
+; FIXME-CHECK-NEXT: mneg
+; FIXME-CHECK-NOWORKAROUND: ldr
+; FIXME-CHECK-NOWORKAROUND-NEXT: mneg
+
+
+define i64 @f_load_smaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %c to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %add = add nsw i64 %mul, %a
+  %0 = load i32* %d, align 4
+  %conv2 = sext i32 %0 to i64
+  %add3 = add nsw i64 %add, %conv2
+  ret i64 %add3
+}
+; CHECK-LABEL: f_load_smaddl:
+; CHECK:	ldrsw
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	smaddl
+; CHECK-NOWORKAROUND-LABEL: f_load_smaddl:
+; CHECK-NOWORKAROUND:	ldrsw
+; CHECK-NOWORKAROUND-NEXT:	smaddl
+
+
+define i64 @f_load_smsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %c to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %sub = sub i64 %a, %mul
+  %0 = load i32* %d, align 4
+  %conv2 = sext i32 %0 to i64
+  %add = add nsw i64 %sub, %conv2
+  ret i64 %add
+}
+; CHECK-LABEL: f_load_smsubl_64:
+; CHECK:	ldrsw
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	smsubl
+; CHECK-NOWORKAROUND-LABEL: f_load_smsubl_64:
+; CHECK-NOWORKAROUND:	ldrsw
+; CHECK-NOWORKAROUND-NEXT:	smsubl
+
+
+define i64 @f_load_smull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %c to i64
+  %mul = mul nsw i64 %conv1, %conv
+  %0 = load i32* %d, align 4
+  %conv2 = sext i32 %0 to i64
+  %div = sdiv i64 %mul, %conv2
+  ret i64 %div
+}
+; CHECK-LABEL: f_load_smull:
+; CHECK:	ldrsw
+; CHECK-NEXT:	smull
+; CHECK-NOWORKAROUND-LABEL: f_load_smull:
+; CHECK-NOWORKAROUND:	ldrsw
+; CHECK-NOWORKAROUND-NEXT:	smull
+
+
+define i64 @f_load_smnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = sext i32 %b to i64
+  %conv1 = sext i32 %c to i64
+  %mul = sub nsw i64 0, %conv
+  %sub = mul i64 %conv1, %mul
+  %0 = load i32* %d, align 4
+  %conv2 = sext i32 %0 to i64
+  %div = sdiv i64 %sub, %conv2
+  ret i64 %div
+}
+; CHECK-LABEL: f_load_smnegl_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_smnegl_64:
+; FIXME: only add further checks here once LLVM actually produces
+;        smnegl instructions
+
+
+define i64 @f_load_umaddl(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %c to i64
+  %mul = mul i64 %conv1, %conv
+  %add = add i64 %mul, %a
+  %0 = load i32* %d, align 4
+  %conv2 = zext i32 %0 to i64
+  %add3 = add i64 %add, %conv2
+  ret i64 %add3
+}
+; CHECK-LABEL: f_load_umaddl:
+; CHECK:	ldr
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	umaddl
+; CHECK-NOWORKAROUND-LABEL: f_load_umaddl:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	umaddl
+
+
+define i64 @f_load_umsubl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %c to i64
+  %mul = mul i64 %conv1, %conv
+  %sub = sub i64 %a, %mul
+  %0 = load i32* %d, align 4
+  %conv2 = zext i32 %0 to i64
+  %add = add i64 %sub, %conv2
+  ret i64 %add
+}
+; CHECK-LABEL: f_load_umsubl_64:
+; CHECK:	ldr
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	umsubl
+; CHECK-NOWORKAROUND-LABEL: f_load_umsubl_64:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	umsubl
+
+
+define i64 @f_load_umull(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %c to i64
+  %mul = mul i64 %conv1, %conv
+  %0 = load i32* %d, align 4
+  %conv2 = zext i32 %0 to i64
+  %div = udiv i64 %mul, %conv2
+  ret i64 %div
+}
+; CHECK-LABEL: f_load_umull:
+; CHECK:	ldr
+; CHECK-NEXT:	umull
+; CHECK-NOWORKAROUND-LABEL: f_load_umull:
+; CHECK-NOWORKAROUND:	ldr
+; CHECK-NOWORKAROUND-NEXT:	umull
+
+
+define i64 @f_load_umnegl_64(i64 %a, i32 %b, i32 %c, i32* nocapture readonly %d) #0 {
+entry:
+  %conv = zext i32 %b to i64
+  %conv1 = zext i32 %c to i64
+  %mul = sub nsw i64 0, %conv
+  %sub = mul i64 %conv1, %mul
+  %0 = load i32* %d, align 4
+  %conv2 = zext i32 %0 to i64
+  %div = udiv i64 %sub, %conv2
+  ret i64 %div
+}
+; CHECK-LABEL: f_load_umnegl_64:
+; CHECK-NOWORKAROUND-LABEL: f_load_umnegl_64:
+; FIXME: only add further checks here once LLVM actually produces
+;        umnegl instructions
+
+
+define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  store i64 %a, i64* %e, align 8
+  %mul = mul nsw i64 %0, %b
+  %add = add nsw i64 %mul, %a
+  ret i64 %add
+}
+; CHECK-LABEL: f_store_madd_64:
+; CHECK:	str
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_store_madd_64:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+
+define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  store i32 %a, i32* %e, align 4
+  %mul = mul nsw i32 %0, %b
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+; CHECK-LABEL: f_store_madd_32:
+; CHECK:	str
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_store_madd_32:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+
+define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  store i64 %a, i64* %e, align 8
+  %mul = mul nsw i64 %0, %b
+  %sub = sub nsw i64 %a, %mul
+  ret i64 %sub
+}
+; CHECK-LABEL: f_store_msub_64:
+; CHECK:	str
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_store_msub_64:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+
+define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  store i32 %a, i32* %e, align 4
+  %mul = mul nsw i32 %0, %b
+  %sub = sub nsw i32 %a, %mul
+  ret i32 %sub
+}
+; CHECK-LABEL: f_store_msub_32:
+; CHECK:	str
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_store_msub_32:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+
+define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  store i64 %a, i64* %e, align 8
+  %mul = mul nsw i64 %0, %b
+  ret i64 %mul
+}
+; CHECK-LABEL: f_store_mul_64:
+; CHECK:	str
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_store_mul_64:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+
+define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  store i32 %a, i32* %e, align 4
+  %mul = mul nsw i32 %0, %b
+  ret i32 %mul
+}
+; CHECK-LABEL: f_store_mul_32:
+; CHECK:	str
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_store_mul_32:
+; CHECK-NOWORKAROUND:	str
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+
+define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  %1 = bitcast i64* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
+  %mul = mul nsw i64 %0, %b
+  %add = add nsw i64 %mul, %a
+  ret i64 %add
+}
+; CHECK-LABEL: f_prefetch_madd_64:
+; CHECK:	prfm
+; CHECK-NEXT:   nop
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_64:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) #2
+
+define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  %1 = bitcast i32* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
+  %mul = mul nsw i32 %0, %b
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+; CHECK-LABEL: f_prefetch_madd_32:
+; CHECK:	prfm
+; CHECK-NEXT:	madd
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_madd_32:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  %1 = bitcast i64* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
+  %mul = mul nsw i64 %0, %b
+  %sub = sub nsw i64 %a, %mul
+  ret i64 %sub
+}
+; CHECK-LABEL: f_prefetch_msub_64:
+; CHECK:	prfm
+; CHECK-NEXT:   nop
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_64:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  %1 = bitcast i32* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
+  %mul = mul nsw i32 %0, %b
+  %sub = sub nsw i32 %a, %mul
+  ret i32 %sub
+}
+; CHECK-LABEL: f_prefetch_msub_32:
+; CHECK:	prfm
+; CHECK-NEXT:	msub
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_msub_32:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	msub
+
+define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
+entry:
+  %0 = load i64* %cp, align 8
+  %1 = bitcast i64* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
+  %mul = mul nsw i64 %0, %b
+  ret i64 %mul
+}
+; CHECK-LABEL: f_prefetch_mul_64:
+; CHECK:	prfm
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_64:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
+entry:
+  %0 = load i32* %cp, align 4
+  %1 = bitcast i32* %e to i8*
+  tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
+  %mul = mul nsw i32 %0, %b
+  ret i32 %mul
+}
+; CHECK-LABEL: f_prefetch_mul_32:
+; CHECK:	prfm
+; CHECK-NEXT:	mul
+; CHECK-NOWORKAROUND-LABEL: f_prefetch_mul_32:
+; CHECK-NOWORKAROUND:	prfm
+; CHECK-NOWORKAROUND-NEXT:	mul
+
+define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
+entry:
+  %0 = load i64* %c, align 8
+  br label %block1
+
+block1:
+  %mul = mul nsw i64 %0, %b
+  %add = add nsw i64 %mul, %a
+  %tmp = ptrtoint i8* blockaddress(@fall_through, %block1) to i64
+  %ret = add nsw i64 %tmp, %add
+  ret i64 %ret
+}
+; CHECK-LABEL:	fall_through
+; CHECK:	ldr
+; CHECK-NEXT:	nop
+; CHECK-NEXT:	.Ltmp
+; CHECK-NEXT: 	BB
+; CHECK-NEXT: 	madd
+; CHECK-NOWORKAROUND-LABEL:	fall_through
+; CHECK-NOWORKAROUND: 	ldr
+; CHECK-NOWORKAROUND-NEXT:	.Ltmp
+; CHECK-NOWORKAROUND-NEXT:	BB
+; CHECK-NOWORKAROUND-NEXT:	madd
+
+; No checks for this, just check it doesn't crash
+define i32 @crash_check(i8** nocapture readnone %data) #0 {
+entry:
+  br label %while.cond
+
+while.cond:
+  br label %while.cond
+}
+
+attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+
+; CHECK-LABEL: ... Statistics Collected ...
+; CHECK: 11 aarch64-fix-cortex-a53-835769 - Number of Nops added to work around erratum 835769
diff --git a/test/CodeGen/AArch64/aarch64-gep-opt.ll b/test/CodeGen/AArch64/aarch64-gep-opt.ll
new file mode 100644
index 000000000000..811eed9d1fbe
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -0,0 +1,163 @@
+; RUN: llc -O3 -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -O3 -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnueabi"
+
+; Following test cases test enabling SeparateConstOffsetFromGEP pass in AArch64
+; backend. If useAA() returns true, it will lower a GEP with multiple indices
+; into GEPs with a single index, otherwise it will lower it into a
+; "ptrtoint+arithmetics+inttoptr" form.
+
+%struct = type { i32, i32, i32, i32, [20 x i32] }
+
+; Check that when two complex GEPs are used in two basic blocks, LLVM can
+; elimilate the common subexpression for the second use.
+define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
+  %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
+  %1 = load i32* %liberties, align 4
+  %cmp = icmp eq i32 %1, %lib
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
+  %2 = load i32* %origin, align 4
+  store i32 %2, i32* %adj, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; CHECK-LABEL: test_GEP_CSE:
+; CHECK: madd
+; CHECK: ldr
+; CHECK-NOT: madd
+; CHECK:ldr
+
+; CHECK-NoAA-LABEL: @test_GEP_CSE(
+; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64
+; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
+; CHECK-NoAA: add i64 [[PTR2]], 23052
+; CHECK-NoAA: inttoptr
+; CHECK-NoAA: if.then:
+; CHECK-NoAA-NOT: ptrtoint
+; CHECK-NoAA-NOT: mul
+; CHECK-NoAA: add i64 [[PTR2]], 23048
+; CHECK-NoAA: inttoptr
+
+; CHECK-UseAA-LABEL: @test_GEP_CSE(
+; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
+; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]]
+; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052
+; CHECK-UseAA: bitcast
+; CHECK-UseAA: if.then:
+; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048
+; CHECK-UseAA: bitcast
+
+%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
+%struct.pt = type { %struct.point*, i32, i32 }
+%struct.point = type { i32, i32 }
+
+; Check when a GEP is used across two basic block, LLVM can sink the address
+; calculation and code gen can generate a better addressing mode for the second
+; use.
+define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
+  %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
+  %2 = load i32* %1, align 4
+  %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
+  %4 = load i32* %3, align 4
+  %5 = icmp eq i32 %2, %4
+  br i1 %5, label %if.true, label %exit
+
+if.true:
+  %6 = shl i32 %4, 1
+  store i32 %6, i32* %3, align 4
+  br label %exit
+
+exit:
+  %7 = add nsw i32 %4, 1
+  store i32 %7, i32* %1, align 4
+  ret void
+}
+; CHECK-LABEL: test_GEP_across_BB:
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532]
+; CHECK-NOT: add
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528]
+
+; CHECK-NoAA-LABEL: test_GEP_across_BB(
+; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528
+; CHECK-NoAA: add i64 [[TMP]], 532
+; CHECK-NoAA: if.true:
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532
+; CHECK-NoAA: exit:
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528
+
+; CHECK-UseAA-LABEL: test_GEP_across_BB(
+; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
+; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528
+; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: if.true:
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: exit:
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528
+
+%struct.S = type { float, double }
+@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
+
+; The following two test cases check we can extract constant from indices of
+; struct type.
+; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the
+; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field
+; offset is 8, the total constant offset is (5 * 16 + 8) = 88.
+define double* @test-struct_1(i32 %i) {
+entry:
+  %add = add nsw i32 %i, 5
+  %idxprom = sext i32 %add to i64
+  %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+  ret double* %p
+}
+; CHECK-NoAA-LABEL: @test-struct_1(
+; CHECK-NoAA-NOT: getelementptr
+; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
+
+; CHECK-UseAA-LABEL: @test-struct_1(
+; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88
+
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+
+; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1".
+; "i32 3" is the 4th element whose field offset is 16. The alloca size of
+; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the
+; total constant offset is 16 + (-2 * 32) + 8 = -40
+define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
+entry:
+  %arrayidx = add nsw i64 %idx, -2
+  %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+  ret %struct2* %ptr2
+}
+; CHECK-NoAA-LABEL: @test-struct_2(
+; CHECK-NoAA-NOT: = getelementptr
+; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
+
+; CHECK-UseAA-LABEL: @test-struct_2(
+; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40
+
+; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
+; pass does not generate incorrect result.
+define void @test_const_add([3 x i32]* %in) {
+  %inc = add nsw i32 2, 1
+  %idxprom = sext i32 %inc to i64
+  %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+}
+; CHECK-LABEL: test_const_add:
+; CHECK: str wzr, [x0, #44]
diff --git a/test/CodeGen/AArch64/aarch64-smull.ll b/test/CodeGen/AArch64/aarch64-smull.ll
new file mode 100644
index 000000000000..92582d7d25e9
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-smull.ll
@@ -0,0 +1,332 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s -o -| FileCheck %s
+
+define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK-LABEL: smull_v8i8_v8i16:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = mul <8 x i16> %tmp3, %tmp4
+  ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK-LABEL: smull_v4i16_v4i32:
+; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = mul <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK-LABEL: smull_v2i32_v2i64:
+; CHECK:  smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = mul <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK-LABEL: umull_v8i8_v8i16:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i8>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = mul <8 x i16> %tmp3, %tmp4
+  ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK-LABEL: umull_v4i16_v4i32:
+; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = mul <4 x i32> %tmp3, %tmp4
+  ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK-LABEL: umull_v2i32_v2i64:
+; CHECK:  umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = mul <2 x i64> %tmp3, %tmp4
+  ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: smlal_v8i8_v8i16:
+; CHECK:  smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = load <8 x i8>* %C
+  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+  %tmp6 = mul <8 x i16> %tmp4, %tmp5
+  %tmp7 = add <8 x i16> %tmp1, %tmp6
+  ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: smlal_v4i16_v4i32:
+; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i16>* %C
+  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+  %tmp6 = mul <4 x i32> %tmp4, %tmp5
+  %tmp7 = add <4 x i32> %tmp1, %tmp6
+  ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: smlal_v2i32_v2i64:
+; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i64>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i32>* %C
+  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+  %tmp6 = mul <2 x i64> %tmp4, %tmp5
+  %tmp7 = add <2 x i64> %tmp1, %tmp6
+  ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: umlal_v8i8_v8i16:
+; CHECK:  umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = load <8 x i8>* %C
+  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+  %tmp6 = mul <8 x i16> %tmp4, %tmp5
+  %tmp7 = add <8 x i16> %tmp1, %tmp6
+  ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: umlal_v4i16_v4i32:
+; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i16>* %C
+  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+  %tmp6 = mul <4 x i32> %tmp4, %tmp5
+  %tmp7 = add <4 x i32> %tmp1, %tmp6
+  ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: umlal_v2i32_v2i64:
+; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i64>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i32>* %C
+  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+  %tmp6 = mul <2 x i64> %tmp4, %tmp5
+  %tmp7 = add <2 x i64> %tmp1, %tmp6
+  ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: smlsl_v8i8_v8i16:
+; CHECK:  smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = load <8 x i8>* %C
+  %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+  %tmp6 = mul <8 x i16> %tmp4, %tmp5
+  %tmp7 = sub <8 x i16> %tmp1, %tmp6
+  ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: smlsl_v4i16_v4i32:
+; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i16>* %C
+  %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+  %tmp6 = mul <4 x i32> %tmp4, %tmp5
+  %tmp7 = sub <4 x i32> %tmp1, %tmp6
+  ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: smlsl_v2i32_v2i64:
+; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i64>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i32>* %C
+  %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+  %tmp6 = mul <2 x i64> %tmp4, %tmp5
+  %tmp7 = sub <2 x i64> %tmp1, %tmp6
+  ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+; CHECK-LABEL: umlsl_v8i8_v8i16:
+; CHECK:  umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i8>* %B
+  %tmp3 = load <8 x i8>* %C
+  %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+  %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+  %tmp6 = mul <8 x i16> %tmp4, %tmp5
+  %tmp7 = sub <8 x i16> %tmp1, %tmp6
+  ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+; CHECK-LABEL: umlsl_v4i16_v4i32:
+; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = load <4 x i16>* %C
+  %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+  %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+  %tmp6 = mul <4 x i32> %tmp4, %tmp5
+  %tmp7 = sub <4 x i32> %tmp1, %tmp6
+  ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+; CHECK-LABEL: umlsl_v2i32_v2i64:
+; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp1 = load <2 x i64>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = load <2 x i32>* %C
+  %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+  %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+  %tmp6 = mul <2 x i64> %tmp4, %tmp5
+  %tmp7 = sub <2 x i64> %tmp1, %tmp6
+  ret <2 x i64> %tmp7
+}
+
+; SMULL recognizing BUILD_VECTORs with sign/zero-extended elements.
+define <8 x i16> @smull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
+; CHECK-LABEL: smull_extvec_v8i8_v8i16:
+; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @smull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
+; Do not use SMULL if the BUILD_VECTOR element values are too big.
+; CHECK-LABEL: smull_noextvec_v8i8_v8i16:
+; CHECK: movz
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @smull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
+; CHECK-LABEL: smull_extvec_v4i16_v4i32:
+; CHECK:  smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp3 = sext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @smull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
+; CHECK: smull_extvec_v2i32_v2i64
+; CHECK: smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp3 = sext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
+  ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @umull_extvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
+; CHECK-LABEL: umull_extvec_v8i8_v8i16:
+; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @umull_noextvec_v8i8_v8i16(<8 x i8> %arg) nounwind {
+; Do not use SMULL if the BUILD_VECTOR element values are too big.
+; CHECK-LABEL: umull_noextvec_v8i8_v8i16:
+; CHECK: movz
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @umull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
+; CHECK-LABEL: umull_extvec_v4i16_v4i32:
+; CHECK:  umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  %tmp3 = zext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @umull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
+; CHECK-LABEL: umull_extvec_v2i32_v2i64:
+; CHECK: umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  %tmp3 = zext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+  ret <2 x i64> %tmp4
+}
+
+define i16 @smullWithInconsistentExtensions(<8 x i8> %vec) {
+; If one operand has a zero-extend and the other a sign-extend, smull
+; cannot be used.
+; CHECK-LABEL: smullWithInconsistentExtensions:
+; CHECK: mul {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  %1 = sext <8 x i8> %vec to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
+
+define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK-LABEL: distribute:
+; CHECK: umull [[REG1:(v[0-9]+.8h)]], {{v[0-9]+}}.8b, [[REG2:(v[0-9]+.8b)]]
+; CHECK: umlal [[REG1]], {{v[0-9]+}}.8b, [[REG2]]
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = zext <8 x i8> %6 to <8 x i16>
+  %8 = zext <8 x i8> %2 to <8 x i16>
+  %9 = extractelement <2 x double> %4, i32 0
+  %10 = bitcast double %9 to <8 x i8>
+  %11 = zext <8 x i8> %10 to <8 x i16>
+  %12 = add <8 x i16> %7, %11
+  %13 = mul <8 x i16> %12, %8
+  %14 = bitcast i16* %dst to i8*
+  tail call void @llvm.aarch64.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+  ret void
+}
+
+declare <16 x i8> @llvm.aarch64.neon.vld1.v16i8(i8*, i32) nounwind readonly
+
+declare void @llvm.aarch64.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
diff --git a/test/CodeGen/AArch64/aarch64-wide-shuffle.ll b/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
new file mode 100644
index 000000000000..d06df7a87fd7
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-wide-shuffle.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
+entry:
+  ; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
+  ; but only match the last three instructions as the first two could be combined to
+  ; a dup2 at some stage.
+  ; CHECK: dup
+  ; CHECK: ext
+  ; CHECK: ext
+  %x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
+  %vgetq_lane = trunc i32 %x4 to i16
+  %vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
+  %vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
+  %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
+  %vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
+  %vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
+  ret <4 x i16> %vset_lane267
+}
diff --git a/test/CodeGen/AArch64/aarch64_f16_be.ll b/test/CodeGen/AArch64/aarch64_f16_be.ll
new file mode 100644
index 000000000000..7504439bab80
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64_f16_be.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnuabi -O0 < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @test_bitcast_v8f16_to_v4f32(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_v4f32:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_v4f32:
+; CHECK-BE: st1
+
+  %x = alloca <4 x float>, align 16
+  %y = bitcast <8 x half> %a to <4 x float>
+  store <4 x float> %y, <4 x float>* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v8f16_to_v2f64(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_v2f64:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_v2f64:
+; CHECK-BE: st1
+
+  %x = alloca <2 x double>, align 16
+  %y = bitcast <8 x half> %a to <2 x double>
+  store <2 x double> %y, <2 x double>* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v8f16_to_fp128(<8 x half> %a) {
+; CHECK-LABEL: test_bitcast_v8f16_to_fp128:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v8f16_to_fp128:
+; CHECK-BE: st1
+
+  %x = alloca fp128, align 16
+  %y = bitcast <8 x half> %a to fp128
+  store fp128 %y, fp128* %x, align 16
+  ret void
+}
+
+define void @test_bitcast_v4f16_to_v2f32(<4 x half> %a) {
+; CHECK-LABEL: test_bitcast_v4f16_to_v2f32:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v4f16_to_v2f32:
+; CHECK-BE: st1
+
+  %x = alloca <2 x float>, align 8
+  %y = bitcast <4 x half> %a to <2 x float>
+  store <2 x float> %y, <2 x float>* %x, align 8
+  ret void
+}
+
+define void @test_bitcast_v4f16_to_v1f64(<4 x half> %a) {
+; CHECK-LABEL: test_bitcast_v4f16_to_v1f64:
+; CHECK-NOT: st1
+
+; CHECK-BE-LABEL: test_bitcast_v4f16_to_v1f64:
+; CHECK-BE: st1
+
+  %x = alloca <1 x double>, align 8
+  %y = bitcast <4 x half> %a to <1 x double>
+  store <1 x double> %y, <1 x double>* %x, align 8
+  ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64_tree_tests.ll b/test/CodeGen/AArch64/aarch64_tree_tests.ll
new file mode 100644
index 000000000000..08e506a66d5e
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64_tree_tests.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s | FileCheck %s 
+
+; ModuleID = 'aarch64_tree_tests.bc'
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "arm64--linux-gnu"
+
+; CHECK-LABLE: @aarch64_tree_tests_and
+; CHECK: .hword	32768                   
+; CHECK: .hword	32767                   
+; CHECK: .hword	4664                    
+; CHECK: .hword	32767                   
+; CHECK: .hword	32768                   
+; CHECK: .hword	32768                   
+; CHECK: .hword	0                       
+; CHECK: .hword	0                      
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @aarch64_tree_tests_and(<8 x i16> %a) {
+entry:
+  %and = and <8 x i16> <i16 0, i16 undef, i16 undef, i16 0, i16 0, i16 undef, i16 undef, i16 0>, %a
+  %ret = add <8 x i16> %and, <i16 -32768, i16 32767, i16 4664, i16 32767, i16 -32768, i16 -32768, i16 0, i16 0>
+  ret <8 x i16> %ret
+}
+
+; CHECK-LABLE: @aarch64_tree_tests_or
+; CHECK: .hword	32768                 
+; CHECK: .hword	32766
+; CHECK: .hword	4664     
+; CHECK: .hword	32766                
+; CHECK: .hword	32768 
+; CHECK: .hword	32768
+; CHECK: .hword	65535            
+; CHECK: .hword	65535
+
+; Function Attrs: nounwind readnone
+define <8 x i16> @aarch64_tree_tests_or(<8 x i16> %a) {
+entry:
+  %or = or <8 x i16> <i16 -1, i16 undef, i16 undef, i16 -1, i16 -1, i16 undef, i16 undef, i16 -1>, %a
+  %ret = add <8 x i16> %or, <i16 -32767, i16 32767, i16 4665, i16 32767, i16 -32767, i16 -32767, i16 0, i16 0>
+  ret <8 x i16> %ret
+}
+
diff --git a/test/CodeGen/AArch64/adc.ll b/test/CodeGen/AArch64/adc.ll
index 892573ba06b1..0488ee2142cd 100644
--- a/test/CodeGen/AArch64/adc.ll
+++ b/test/CodeGen/AArch64/adc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-apple-ios7.0 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
 
 define i128 @test_simple(i128 %a, i128 %b, i128 %c) {
 ; CHECK-LABEL: test_simple:
diff --git a/test/CodeGen/AArch64/analyze-branch.ll b/test/CodeGen/AArch64/analyze-branch.ll
index 6616b27c45b7..932cd75052c1 100644
--- a/test/CodeGen/AArch64/analyze-branch.ll
+++ b/test/CodeGen/AArch64/analyze-branch.ll
@@ -7,8 +7,8 @@ declare void @test_true()
 declare void @test_false()
 
 ; !0 corresponds to a branch being taken, !1 to not being takne.
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 64, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 64}
 
 define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
 ; CHECK-LABEL: test_Bcc_fallthrough_taken:
diff --git a/test/CodeGen/AArch64/analyzecmp.ll b/test/CodeGen/AArch64/analyzecmp.ll
new file mode 100644
index 000000000000..8962505cbd15
--- /dev/null
+++ b/test/CodeGen/AArch64/analyzecmp.ll
@@ -0,0 +1,32 @@
+; RUN: llc -O3 -mcpu=cortex-a57 < %s | FileCheck %s 
+
+; CHECK-LABLE: @test
+; CHECK: tst [[CMP:x[0-9]+]], #0x8000000000000000
+; CHECK: csel [[R0:x[0-9]+]], [[S0:x[0-9]+]], [[S1:x[0-9]+]], eq
+; CHECK: csel [[R1:x[0-9]+]], [[S2:x[0-9]+]], [[S3:x[0-9]+]], eq
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "arm64--linux-gnueabi"
+
+define void @test(i64 %a, i64* %ptr1, i64* %ptr2) #0 align 2 {
+entry:
+  %conv = and i64 %a, 4294967295
+  %add = add nsw i64 %conv, -1
+  %div = sdiv i64 %add, 64
+  %rem = srem i64 %add, 64
+  %cmp = icmp slt i64 %rem, 0
+  br i1 %cmp, label %if.then, label %exit
+
+if.then:                                
+  %add2 = add nsw i64 %rem, 64
+  %add3 = add i64 %div, -1
+  br label %exit
+
+exit:                 
+  %__n = phi i64 [ %add3, %if.then ], [ %div, %entry ]
+  %__n.0 = phi i64 [ %add2, %if.then ], [ %rem, %entry ]
+  store i64 %__n, i64* %ptr1
+  store i64 %__n.0, i64* %ptr2
+  ret void 
+}
+
+
diff --git a/test/CodeGen/AArch64/and-mask-removal.ll b/test/CodeGen/AArch64/and-mask-removal.ll
new file mode 100644
index 000000000000..f803b85f733b
--- /dev/null
+++ b/test/CodeGen/AArch64/and-mask-removal.ll
@@ -0,0 +1,269 @@
+; RUN: llc -O0 -fast-isel=false -mtriple=arm64-apple-darwin  < %s  | FileCheck %s
+
+@board = common global [400 x i8] zeroinitializer, align 1
+@next_string = common global i32 0, align 4
+@string_number = common global [400 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define void @new_position(i32 %pos) {
+entry:
+  %idxprom = sext i32 %pos to i64
+  %arrayidx = getelementptr inbounds [400 x i8]* @board, i64 0, i64 %idxprom
+  %tmp = load i8* %arrayidx, align 1
+  %.off = add i8 %tmp, -1
+  %switch = icmp ult i8 %.off, 2
+  br i1 %switch, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32* @next_string, align 4
+  %arrayidx8 = getelementptr inbounds [400 x i32]* @string_number, i64 0, i64 %idxprom
+  store i32 %tmp1, i32* %arrayidx8, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+; CHECK-LABEL: new_position
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_0(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 74
+  %1 = icmp ult i8 %0, -20
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_0
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_1(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 246
+  %1 = icmp uge i8 %0, 90
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_1
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_2(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 227
+  %1 = icmp ne i8 %0, 179
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_2
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_3(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 201
+  %1 = icmp eq i8 %0, 154
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_3
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_4(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, -79
+  %1 = icmp ne i8 %0, -40
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_4
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_5(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 133
+  %1 = icmp uge i8 %0, -105
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_5
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_6(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, -58
+  %1 = icmp uge i8 %0, 155
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_6
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test8_7(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 225
+  %1 = icmp ult i8 %0, 124
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_7
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+
+
+define zeroext i1 @test8_8(i8 zeroext %x)  align 2 {
+entry:
+  %0 = add i8 %x, 190
+  %1 = icmp uge i8 %0, 1
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test8_8
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_0(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, -46989
+  %1 = icmp ne i16 %0, -41903
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_0
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_2(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, 16882
+  %1 = icmp ule i16 %0, -24837
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_2
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_3(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, 29283
+  %1 = icmp ne i16 %0, 16947
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_3
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_4(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, -35551
+  %1 = icmp uge i16 %0, 15677
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_4
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_5(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, -25214
+  %1 = icmp ne i16 %0, -1932
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_5
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_6(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, -32194
+  %1 = icmp uge i16 %0, -41215
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_6
+; CHECK-NOT: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_7(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, 9272
+  %1 = icmp uge i16 %0, -42916
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_7
+; CHECK: and
+; CHECK: ret
+}
+
+define zeroext i1 @test16_8(i16 zeroext %x)  align 2 {
+entry:
+  %0 = add i16 %x, -63749
+  %1 = icmp ne i16 %0, 6706
+  br i1 %1, label %ret_true, label %ret_false
+ret_false:
+  ret i1 false
+ret_true:
+  ret i1 true
+; CHECK-LABEL: test16_8
+; CHECK-NOT: and
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/AArch64/andandshift.ll b/test/CodeGen/AArch64/andandshift.ll
new file mode 100644
index 000000000000..e2c7a098908e
--- /dev/null
+++ b/test/CodeGen/AArch64/andandshift.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O3 < %s | FileCheck %s 
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "arm64--linux-gnu"
+
+; Function Attrs: nounwind readnone
+define i32 @test1(i8 %a) {
+; CHECK-LABLE: @test1
+; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5
+entry:
+  %conv = zext i8 %a to i32
+  %shr1 = lshr i32 %conv, 3
+  ret i32 %shr1
+}
+
+; Function Attrs: nounwind readnone
+define i32 @test2(i8 %a) {
+; CHECK-LABLE: @test2
+; CHECK: and {{w[0-9]+}}, w0, #0xff
+; CHECK: ubfx {{w[0-9]+}}, w0, #3, #5
+entry:
+  %conv = zext i8 %a to i32
+  %cmp = icmp ugt i8 %a, 47
+  %shr5 = lshr i32 %conv, 3
+  %retval.0 = select i1 %cmp, i32 %shr5, i32 %conv
+  ret i32 %retval.0
+}
+
+
diff --git a/test/CodeGen/AArch64/argument-blocks.ll b/test/CodeGen/AArch64/argument-blocks.ll
new file mode 100644
index 000000000000..f1dcfa67d0eb
--- /dev/null
+++ b/test/CodeGen/AArch64/argument-blocks.ll
@@ -0,0 +1,197 @@
+; RUN: llc -mtriple=aarch64-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DARWINPCS
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AAPCS
+
+declare void @callee(...)
+
+define float @test_hfa_regs(float, [2 x float] %in) {
+; CHECK-LABEL: test_hfa_regs:
+; CHECK: fadd s0, s1, s2
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; Check that the array gets allocated to a contiguous block on the stack (rather
+; than the default of 2 8-byte slots).
+define float @test_hfa_block([7 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_block:
+; CHECK: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; Check that an HFA prevents backfilling of VFP registers (i.e. %rhs must go on
+; the stack rather than in s7).
+define float @test_hfa_block_consume([7 x float], [2 x float] %in, float %rhs) {
+; CHECK-LABEL: test_hfa_block_consume:
+; CHECK-DAG: ldr [[LHS:s[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:s[0-9]+]], [sp, #8]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x float] %in, 0
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+define float @test_hfa_stackalign([8 x float], [1 x float], [2 x float] %in) {
+; CHECK-LABEL: test_hfa_stackalign:
+; CHECK-AAPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #8]
+; CHECK-DARWINPCS: ldp [[LHS:s[0-9]+]], [[RHS:s[0-9]+]], [sp, #4]
+; CHECK: fadd s0, [[LHS]], [[RHS]]
+  %lhs = extractvalue [2 x float] %in, 0
+  %rhs = extractvalue [2 x float] %in, 1
+  %sum = fadd float %lhs, %rhs
+  ret float %sum
+}
+
+; An HFA that ends up on the stack should not have any effect on where
+; integer-based arguments go.
+define i64 @test_hfa_ignores_gprs([7 x float], [2 x float] %in, i64, i64 %res) {
+; CHECK-LABEL: test_hfa_ignores_gprs:
+; CHECK: mov x0, x1
+  ret i64 %res
+}
+
+; [2 x float] should not be promoted to double by the Darwin varargs handling,
+; but should go in an 8-byte aligned slot.
+define void @test_varargs_stackalign() {
+; CHECK-LABEL: test_varargs_stackalign:
+; CHECK-DARWINPCS: stp {{w[0-9]+}}, {{w[0-9]+}}, [sp, #16]
+
+  call void(...)* @callee([3 x float] undef, [2 x float] [float 1.0, float 2.0])
+  ret void
+}
+
+define i64 @test_smallstruct_block([7 x i64], [2 x i64] %in) {
+; CHECK-LABEL: test_smallstruct_block:
+; CHECK: ldp [[LHS:x[0-9]+]], [[RHS:x[0-9]+]], [sp]
+; CHECK: add x0, [[LHS]], [[RHS]]
+  %lhs = extractvalue [2 x i64] %in, 0
+  %rhs = extractvalue [2 x i64] %in, 1
+  %sum = add i64 %lhs, %rhs
+  ret i64 %sum
+}
+
+; Check that a small struct prevents backfilling of registers (i.e. %rhs
+; must go on the stack rather than in x7).
+define i64 @test_smallstruct_block_consume([7 x i64], [2 x i64] %in, i64 %rhs) {
+; CHECK-LABEL: test_smallstruct_block_consume:
+; CHECK-DAG: ldr [[LHS:x[0-9]+]], [sp]
+; CHECK-DAG: ldr [[RHS:x[0-9]+]], [sp, #16]
+; CHECK: add x0, [[LHS]], [[RHS]]
+
+  %lhs = extractvalue [2 x i64] %in, 0
+  %sum = add i64 %lhs, %rhs
+  ret i64 %sum
+}
+
+define <1 x i64> @test_v1i64_blocked([7 x double], [2 x <1 x i64>] %in) {
+; CHECK-LABEL: test_v1i64_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <1 x i64>] %in, 0
+  ret <1 x i64> %val
+}
+
+define <1 x double> @test_v1f64_blocked([7 x double], [2 x <1 x double>] %in) {
+; CHECK-LABEL: test_v1f64_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <1 x double>] %in, 0
+  ret <1 x double> %val
+}
+
+define <2 x i32> @test_v2i32_blocked([7 x double], [2 x <2 x i32>] %in) {
+; CHECK-LABEL: test_v2i32_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <2 x i32>] %in, 0
+  ret <2 x i32> %val
+}
+
+define <2 x float> @test_v2f32_blocked([7 x double], [2 x <2 x float>] %in) {
+; CHECK-LABEL: test_v2f32_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <2 x float>] %in, 0
+  ret <2 x float> %val
+}
+
+define <4 x i16> @test_v4i16_blocked([7 x double], [2 x <4 x i16>] %in) {
+; CHECK-LABEL: test_v4i16_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <4 x i16>] %in, 0
+  ret <4 x i16> %val
+}
+
+define <4 x half> @test_v4f16_blocked([7 x double], [2 x <4 x half>] %in) {
+; CHECK-LABEL: test_v4f16_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <4 x half>] %in, 0
+  ret <4 x half> %val
+}
+
+define <8 x i8> @test_v8i8_blocked([7 x double], [2 x <8 x i8>] %in) {
+; CHECK-LABEL: test_v8i8_blocked:
+; CHECK: ldr d0, [sp]
+  %val = extractvalue [2 x <8 x i8>] %in, 0
+  ret <8 x i8> %val
+}
+
+define <2 x i64> @test_v2i64_blocked([7 x double], [2 x <2 x i64>] %in) {
+; CHECK-LABEL: test_v2i64_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <2 x i64>] %in, 0
+  ret <2 x i64> %val
+}
+
+define <2 x double> @test_v2f64_blocked([7 x double], [2 x <2 x double>] %in) {
+; CHECK-LABEL: test_v2f64_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <2 x double>] %in, 0
+  ret <2 x double> %val
+}
+
+define <4 x i32> @test_v4i32_blocked([7 x double], [2 x <4 x i32>] %in) {
+; CHECK-LABEL: test_v4i32_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <4 x i32>] %in, 0
+  ret <4 x i32> %val
+}
+
+define <4 x float> @test_v4f32_blocked([7 x double], [2 x <4 x float>] %in) {
+; CHECK-LABEL: test_v4f32_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <4 x float>] %in, 0
+  ret <4 x float> %val
+}
+
+define <8 x i16> @test_v8i16_blocked([7 x double], [2 x <8 x i16>] %in) {
+; CHECK-LABEL: test_v8i16_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <8 x i16>] %in, 0
+  ret <8 x i16> %val
+}
+
+define <8 x half> @test_v8f16_blocked([7 x double], [2 x <8 x half>] %in) {
+; CHECK-LABEL: test_v8f16_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <8 x half>] %in, 0
+  ret <8 x half> %val
+}
+
+define <16 x i8> @test_v16i8_blocked([7 x double], [2 x <16 x i8>] %in) {
+; CHECK-LABEL: test_v16i8_blocked:
+; CHECK: ldr q0, [sp]
+  %val = extractvalue [2 x <16 x i8>] %in, 0
+  ret <16 x i8> %val
+}
+
+define half @test_f16_blocked([7 x double], [2 x half] %in) {
+; CHECK-LABEL: test_f16_blocked:
+; CHECK: ldr h0, [sp]
+  %val = extractvalue [2 x half] %in, 0
+  ret half %val
+}
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index 2b083d804912..8b88c0b40887 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -11,35 +11,35 @@ if.then24:                                        ; preds = %entry
   unreachable
 
 if.else295:                                       ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{i32* %do_tab_convert}, metadata !16), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %do_tab_convert, metadata !16, metadata !{!"0x102"}), !dbg !18
   store i32 0, i32* %do_tab_convert, align 4, !dbg !19
   unreachable
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.gv = !{!0}
 !llvm.dbg.sp = !{!1, !7, !10, !11, !12}
 
-!0 = metadata !{i32 589876, i32 0, metadata !1, metadata !"vsplive", metadata !"vsplive", metadata !"", metadata !2, i32 617, metadata !6, i32 1, i32 1, null, null} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"drt_vsprintf", metadata !"drt_vsprintf", metadata !"", i32 616, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 589860, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"putc_mem", metadata !"putc_mem", metadata !"", i32 30, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 589845, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{null}
-!10 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_double", metadata !"print_double", metadata !"", i32 203, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"print_number", metadata !"print_number", metadata !"", i32 75, metadata !4, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 589870, metadata !20, metadata !2, metadata !"get_flags", metadata !"get_flags", metadata !"", i32 508, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 653, i32 5, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !20, metadata !15, i32 652, i32 35, i32 2} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 589835, metadata !20, metadata !1, i32 616, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 590080, metadata !17, metadata !"do_tab_convert", metadata !2, i32 853, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{i32 589835, metadata !20, metadata !14, i32 850, i32 12, i32 33} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 853, i32 11, metadata !17, null}
-!19 = metadata !{i32 853, i32 29, metadata !17, null}
-!20 = metadata !{metadata !"print.i", metadata !"/Volumes/Ebi/echeng/radars/r9146594"}
-!21 = metadata !{i32 0}
+!0 = !{!"0x34\00vsplive\00vsplive\00\00617\001\001", !1, !2, !6, null, null} ; [ DW_TAG_variable ]
+!1 = !{!"0x2e\00drt_vsprintf\00drt_vsprintf\00\00616\000\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = !{!6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3} ; [ DW_TAG_base_type ]
+!7 = !{!"0x2e\00putc_mem\00putc_mem\00\0030\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!8 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !9, i32 0} ; [ DW_TAG_subroutine_type ]
+!9 = !{null}
+!10 = !{!"0x2e\00print_double\00print_double\00\00203\001\001\000\006\00256\000\000", !20, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x2e\00print_number\00print_number\00\0075\001\001\000\006\00256\000\000", !20, !2, !4, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!12 = !{!"0x2e\00get_flags\00get_flags\00\00508\001\001\000\006\00256\000\000", !20, !2, !8, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!13 = !MDLocation(line: 653, column: 5, scope: !14)
+!14 = !{!"0xb\00652\0035\002", !20, !15} ; [ DW_TAG_lexical_block ]
+!15 = !{!"0xb\00616\001\000", !20, !1} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x100\00do_tab_convert\00853\000", !17, !2, !6} ; [ DW_TAG_auto_variable ]
+!17 = !{!"0xb\00850\0012\0033", !20, !14} ; [ DW_TAG_lexical_block ]
+!18 = !MDLocation(line: 853, column: 11, scope: !17)
+!19 = !MDLocation(line: 853, column: 29, scope: !17)
+!20 = !{!"print.i", !"/Volumes/Ebi/echeng/radars/r9146594"}
+!21 = !{i32 0}
diff --git a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
index 8f99bc30a554..a83f1646ef62 100644
--- a/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -12,7 +12,7 @@ entry:
 
 for.body:
 ; CHECK: for.body
-; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}]
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}]
 ; CHECK: add x[[REG:[0-9]+]],
 ; CHECK:                      x[[REG]], #1, lsl  #12
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
diff --git a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
index 4b037db9c84b..b5b1b70975de 100644
--- a/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
+++ b/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
@@ -43,8 +43,8 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!4 = metadata !{}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!4 = !{}
diff --git a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index 168e921bcc02..4db1f59a2c6c 100644
--- a/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm64 -O0 < %s | FileCheck %s
-; RUN: llc -march=arm64 -O3 < %s | FileCheck %s
+; RUN: llc -march=arm64 -O0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -O3 -verify-machineinstrs < %s | FileCheck %s
 
 @.str = private unnamed_addr constant [9 x i8] c"%lf %lu\0A\00", align 1
 @.str1 = private unnamed_addr constant [8 x i8] c"%lf %u\0A\00", align 1
@@ -61,7 +61,7 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
index c4597d5a4815..6266d1cc9b33 100644
--- a/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
+++ b/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll
@@ -1,15 +1,36 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-NOOPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-OPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=true | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-NOOPT
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=generic -aarch64-simd-scalar=true -asm-verbose=false -disable-adv-copy-opt=false | FileCheck %s -check-prefix=GENERIC -check-prefix=GENERIC-OPT
 
 define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
 ; CHECK-LABEL: bar:
 ; CHECK: add.2d	v[[REG:[0-9]+]], v0, v1
 ; CHECK: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; Without advanced copy optimization, we end up with cross register
+; banks copies that cannot be coalesced.
+; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
+; With advanced copy optimization, we end up with just one copy
+; to insert the computed high part into the V register. 
+; CHECK-OPT-NOT: fmov
 ; CHECK: sub	d[[REG2:[0-9]+]], d[[REG]], d1
+; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
+; CHECK-NOOPT: fmov d0, [[COPY_REG3]]
+; CHECK-OPT-NOT: fmov
+; CHECK: ins.d v0[1], [[COPY_REG2]]
+; CHECK-NEXT: ret
+;
 ; GENERIC-LABEL: bar:
 ; GENERIC: add	v[[REG:[0-9]+]].2d, v0.2d, v1.2d
 ; GENERIC: add	d[[REG3:[0-9]+]], d[[REG]], d1
+; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]]
+; GENERIC-OPT-NOT: fmov
 ; GENERIC: sub	d[[REG2:[0-9]+]], d[[REG]], d1
+; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]]
+; GENERIC-NOOPT: fmov d0, [[COPY_REG3]]
+; GENERIC-OPT-NOT: fmov
+; GENERIC: ins v0.d[1], [[COPY_REG2]]
+; GENERIC-NEXT: ret
   %add = add <2 x i64> %a, %b
   %vgetq_lane = extractelement <2 x i64> %add, i32 0
   %vgetq_lane2 = extractelement <2 x i64> %b, i32 0
@@ -65,3 +86,44 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
   %retval = bitcast i64 %sub.i to double
   ret double %retval
 }
+define double @and_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: and_su64:
+; CHECK: and.8b v0, v1, v0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: and_su64:
+; GENERIC: and v0.8b, v1.8b, v0.8b
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %or.i = and i64 %vecext1, %vecext
+  %retval = bitcast i64 %or.i to double
+  ret double %retval
+}
+
+define double @orr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: orr_su64:
+; CHECK: orr.8b v0, v1, v0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: orr_su64:
+; GENERIC: orr v0.8b, v1.8b, v0.8b
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %or.i = or i64 %vecext1, %vecext
+  %retval = bitcast i64 %or.i to double
+  ret double %retval
+}
+
+define double @xorr_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone {
+; CHECK-LABEL: xorr_su64:
+; CHECK: eor.8b v0, v1, v0
+; CHECK-NEXT: ret
+; GENERIC-LABEL: xorr_su64:
+; GENERIC: eor v0.8b, v1.8b, v0.8b
+; GENERIC-NEXT: ret
+  %vecext = extractelement <2 x i64> %a, i32 0
+  %vecext1 = extractelement <2 x i64> %b, i32 0
+  %xor.i = xor i64 %vecext1, %vecext
+  %retval = bitcast i64 %xor.i to double
+  ret double %retval
+}
diff --git a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
index 1b2d54317c23..1bb47fc00b2b 100644
--- a/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
+++ b/test/CodeGen/AArch64/arm64-EXT-undef-mask.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc -O0 -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs < %s | FileCheck %s
 
 ; The following 2 test cases test shufflevector with beginning UNDEF mask.
 define <8 x i16> @test_vext_undef_traverse(<8 x i16> %in) {
diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll
new file mode 100644
index 000000000000..f27570acc820
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=false < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-none-eabi -fast-isel=true < %s | FileCheck %s
+
+; Check narrow argument passing via stack - callee end
+define i32 @test_narrow_args_callee(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i8 %c, i16 %s) #0 {
+entry:
+  %conv = zext i8 %c to i32
+  %conv1 = sext i16 %s to i32
+  %add = add nsw i32 %conv1, %conv
+; CHECK-LABEL: test_narrow_args_callee:
+; CHECK-DAG: ldrb w{{[0-9]}}, [sp, #7]
+; CHECK-DAG: ldr{{s?}}h w{{[0-9]}}, [sp, #14]
+  ret i32 %add
+}
+
+; Check narrow argument passing via stack - caller end
+define i32 @test_narrow_args_caller() #0 {
+entry:
+  %call = tail call i32 @test_narrow_args_callee(i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i8 8, i16 9)
+; CHECK-LABEL: test_narrow_args_caller:
+; CHECK-DAG: strh w{{[0-9]}}, [sp, #14]
+; CHECK-DAG: strb w{{[0-9]}}, [sp, #7]
+  ret i32 %call
+}
+
+define float @test_block_addr([8 x float], [1 x float] %in) {
+; CHECK-LABEL: test_block_addr:
+; CHECK: ldr s0, [sp]
+  %val = extractvalue [1 x float] %in, 0
+  ret float %val
+}
+
+define void @test_block_addr_callee() {
+; CHECK-LABEL: test_block_addr_callee:
+; CHECK: str {{[a-z0-9]+}}, [sp]
+; CHECK: bl test_block_addr
+  %val = insertvalue [1 x float] undef, float 0.0, 0
+  call float @test_block_addr([8 x float] undef, [1 x float] %val)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index 127a7cc0a155..41c3ad5766c3 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -113,7 +113,7 @@ entry:
 ; Check that f16 can be passed and returned (ACLE 2.0 extension)
 define half @test_half(float, half %arg) {
 ; CHECK-LABEL: test_half:
-; CHECK: mov v0.16b, v{{[0-9]+}}.16b
+; CHECK: mov v0.16b, v1.16b
   ret half %arg;
 }
 
@@ -123,3 +123,31 @@ define half @test_half_const() {
 ; CHECK: ldr h0, [x{{[0-9]+}}, :lo12:{{.*}}]
   ret half 0xH4248
 }
+
+; Check that v4f16 can be passed and returned in registers
+define <4 x half> @test_v4_half_register(float, <4 x half> %arg) {
+; CHECK-LABEL: test_v4_half_register:
+; CHECK: mov v0.16b, v1.16b
+  ret <4 x half> %arg;
+}
+
+; Check that v8f16 can be passed and returned in registers
+define <8 x half> @test_v8_half_register(float, <8 x half> %arg) {
+; CHECK-LABEL: test_v8_half_register:
+; CHECK: mov v0.16b, v1.16b
+  ret <8 x half> %arg;
+}
+
+; Check that v4f16 can be passed and returned on the stack
+define <4 x half> @test_v4_half_stack([8 x <2 x double>], <4 x half> %arg) {
+; CHECK-LABEL: test_v4_half_stack:
+; CHECK: ldr d0, [sp]
+  ret <4 x half> %arg;
+}
+
+; Check that v8f16 can be passed and returned on the stack
+define <8 x half> @test_v8_half_stack([8 x <2 x double>], <8 x half> %arg) {
+; CHECK-LABEL: test_v8_half_stack:
+; CHECK: ldr q0, [sp]
+  ret <8 x half> %arg;
+}
diff --git a/test/CodeGen/AArch64/arm64-abi.ll b/test/CodeGen/AArch64/arm64-abi.ll
index a955029b3725..8a6b64d6ff35 100644
--- a/test/CodeGen/AArch64/arm64-abi.ll
+++ b/test/CodeGen/AArch64/arm64-abi.ll
@@ -1,7 +1,5 @@
-; RUN: llc < %s -debug -march=arm64 -mcpu=cyclone -enable-misched=false | FileCheck %s
-; RUN: llc < %s -O0 | FileCheck -check-prefix=FAST %s
-; REQUIRES: asserts
-target triple = "arm64-apple-darwin"
+; RUN: llc     -mtriple=arm64-apple-darwin -mcpu=cyclone -enable-misched=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=arm64-apple-darwin                                     < %s | FileCheck --check-prefix=FAST %s
 
 ; rdar://9932559
 define i64 @i8i16callee(i64 %a1, i64 %a2, i64 %a3, i8 signext %a4, i16 signext %a5, i64 %a6, i64 %a7, i64 %a8, i8 signext %b1, i16 signext %b2, i8 signext %b3, i8 signext %b4) nounwind readnone noinline {
@@ -42,7 +40,7 @@ entry:
 
 define i32 @i8i16caller() nounwind readnone {
 entry:
-; CHECK: i8i16caller
+; CHECK-LABEL: i8i16caller
 ; The 8th, 9th, 10th and 11th arguments are passed at sp, sp+2, sp+4, sp+5.
 ; They are i8, i16, i8 and i8.
 ; CHECK-DAG: strb {{w[0-9]+}}, [sp, #5]
@@ -50,7 +48,7 @@ entry:
 ; CHECK-DAG: strh {{w[0-9]+}}, [sp, #2]
 ; CHECK-DAG: strb {{w[0-9]+}}, [sp]
 ; CHECK: bl
-; FAST: i8i16caller
+; FAST-LABEL: i8i16caller
 ; FAST: strb {{w[0-9]+}}, [sp]
 ; FAST: strh {{w[0-9]+}}, [sp, #2]
 ; FAST: strb {{w[0-9]+}}, [sp, #4]
@@ -64,7 +62,7 @@ entry:
 ; rdar://12651543
 define double @circle_center([2 x float] %a) nounwind ssp {
   %call = tail call double @ext([2 x float] %a) nounwind
-; CHECK: circle_center
+; CHECK-LABEL: circle_center
 ; CHECK: bl
   ret double %call
 }
@@ -75,10 +73,10 @@ declare double @ext([2 x float])
 ; A double argument will be passed on stack, so vecotr should be at sp+16.
 define double @fixed_4i(<4 x i32>* nocapture %in) nounwind {
 entry:
-; CHECK: fixed_4i
+; CHECK-LABEL: fixed_4i
 ; CHECK: str [[REG_1:q[0-9]+]], [sp, #16]
-; FAST: fixed_4i
-; FAST: sub sp, sp, #64
+; FAST-LABEL: fixed_4i
+; FAST: sub sp, sp
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
   %0 = load <4 x i32>* %in, align 16
@@ -93,7 +91,7 @@ declare double @args_vec_4i(double, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
 define void @test1(float %f1, double %d1, double %d2, double %d3, double %d4,
        double %d5, double %d6, double %d7, double %d8, i32 %i) nounwind ssp {
 entry:
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: ldr [[REG_1:d[0-9]+]], [sp]
 ; CHECK: scvtf [[REG_2:s[0-9]+]], w0
 ; CHECK: fadd s0, [[REG_2]], s0
@@ -110,7 +108,7 @@ entry:
 define void @test2(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
             i32 %i7, i32 %i8, i32 %i9, float %d1) nounwind ssp {
 entry:
-; CHECK: test2
+; CHECK-LABEL: test2
 ; CHECK: scvtf [[REG_2:s[0-9]+]], w0
 ; CHECK: fadd s0, [[REG_2]], s0
 ; CHECK: ldr [[REG_1:s[0-9]+]], [sp]
@@ -129,9 +127,9 @@ entry:
 ; Check alignment on stack for v64, f64, i64, f32, i32.
 define double @test3(<2 x i32>* nocapture %in) nounwind {
 entry:
-; CHECK: test3
+; CHECK-LABEL: test3
 ; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
-; FAST: test3
+; FAST-LABEL: test3
 ; FAST: sub sp, sp, #32
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
@@ -146,7 +144,7 @@ declare double @args_vec_2i(double, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>,
 
 define double @test4(double* nocapture %in) nounwind {
 entry:
-; CHECK: test4
+; CHECK-LABEL: test4
 ; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_2:w[0-9]+]], [sp]
 ; CHECK: orr w0, wzr, #0x3
@@ -161,7 +159,7 @@ declare double @args_f64(double, double, double, double, double, double, double,
 
 define i64 @test5(i64* nocapture %in) nounwind {
 entry:
-; CHECK: test5
+; CHECK-LABEL: test5
 ; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
 ; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_2:w[0-9]+]], [sp]
@@ -175,7 +173,7 @@ declare i64 @args_i64(i64, i64, i64, i64, i64, i64, i64, i64, i32, i64,
 
 define i32 @test6(float* nocapture %in) nounwind {
 entry:
-; CHECK: test6
+; CHECK-LABEL: test6
 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
 ; CHECK: strh [[REG_3:w[0-9]+]], [sp]
@@ -192,7 +190,7 @@ declare i32 @args_f32(i32, i32, i32, i32, i32, i32, i32, i32,
 
 define i32 @test7(i32* nocapture %in) nounwind {
 entry:
-; CHECK: test7
+; CHECK-LABEL: test7
 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
 ; CHECK: strh [[REG_3:w[0-9]+]], [sp]
@@ -206,13 +204,13 @@ declare i32 @args_i32(i32, i32, i32, i32, i32, i32, i32, i32, i16 signext, i32,
 
 define i32 @test8(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
-; CHECK: test8
+; CHECK-LABEL: test8
 ; CHECK: strb {{w[0-9]+}}, [sp, #3]
 ; CHECK: strb wzr, [sp, #2]
 ; CHECK: strb {{w[0-9]+}}, [sp, #1]
 ; CHECK: strb wzr, [sp]
 ; CHECK: bl
-; FAST: test8
+; FAST-LABEL: test8
 ; FAST: strb {{w[0-9]+}}, [sp]
 ; FAST: strb {{w[0-9]+}}, [sp, #1]
 ; FAST: strb {{w[0-9]+}}, [sp, #2]
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index 44c5a07ce39a..e03d7fadaf44 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -34,7 +34,7 @@ target triple = "arm64-apple-darwin"
 ; structs with size < 8 bytes, passed via i64 in x1 and x2
 define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce) #0 {
 entry:
-; CHECK: f38
+; CHECK-LABEL: f38
 ; CHECK: add w[[A:[0-9]+]], w1, w0
 ; CHECK: add {{w[0-9]+}}, w[[A]], w2
   %s1.sroa.0.0.extract.trunc = trunc i64 %s1.coerce to i32
@@ -56,7 +56,7 @@ entry:
 
 define i32 @caller38() #1 {
 entry:
-; CHECK: caller38
+; CHECK-LABEL: caller38
 ; CHECK: ldr x1,
 ; CHECK: ldr x2,
   %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
@@ -72,7 +72,7 @@ declare i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 ; i9 at [sp]
 define i32 @caller38_stack() #1 {
 entry:
-; CHECK: caller38_stack
+; CHECK-LABEL: caller38_stack
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
@@ -87,7 +87,7 @@ entry:
 ; passed via i128 in x1 and x3
 define i32 @f39(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
 entry:
-; CHECK: f39
+; CHECK-LABEL: f39
 ; CHECK: add w[[A:[0-9]+]], w1, w0
 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
@@ -109,7 +109,7 @@ entry:
 
 define i32 @caller39() #1 {
 entry:
-; CHECK: caller39
+; CHECK-LABEL: caller39
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
   %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
@@ -125,7 +125,7 @@ declare i32 @f39_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 ; passed on stack at [sp+16] and [sp+32]
 define i32 @caller39_stack() #1 {
 entry:
-; CHECK: caller39_stack
+; CHECK-LABEL: caller39_stack
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
@@ -141,7 +141,7 @@ entry:
 ; passed via i128 in x1 and x3
 define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce) #0 {
 entry:
-; CHECK: f40
+; CHECK-LABEL: f40
 ; CHECK: add w[[A:[0-9]+]], w1, w0
 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
   %s1.coerce.fca.0.extract = extractvalue [2 x i64] %s1.coerce, 0
@@ -165,7 +165,7 @@ entry:
 
 define i32 @caller40() #1 {
 entry:
-; CHECK: caller40
+; CHECK-LABEL: caller40
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
   %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
@@ -181,7 +181,7 @@ declare i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 ; passed on stack at [sp+8] and [sp+24]
 define i32 @caller40_stack() #1 {
 entry:
-; CHECK: caller40_stack
+; CHECK-LABEL: caller40_stack
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #24]
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
@@ -197,7 +197,7 @@ entry:
 ; passed via i128 in x1 and x3
 define i32 @f41(i32 %i, i128 %s1.coerce, i128 %s2.coerce) #0 {
 entry:
-; CHECK: f41
+; CHECK-LABEL: f41
 ; CHECK: add w[[A:[0-9]+]], w1, w0
 ; CHECK: add {{w[0-9]+}}, w[[A]], w3
   %s1.sroa.0.0.extract.trunc = trunc i128 %s1.coerce to i32
@@ -219,7 +219,7 @@ entry:
 
 define i32 @caller41() #1 {
 entry:
-; CHECK: caller41
+; CHECK-LABEL: caller41
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
   %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
@@ -235,7 +235,7 @@ declare i32 @f41_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 ; passed on stack at [sp+16] and [sp+32]
 define i32 @caller41_stack() #1 {
 entry:
-; CHECK: caller41_stack
+; CHECK-LABEL: caller41_stack
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #32]
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
@@ -250,7 +250,7 @@ entry:
 ; structs with size of 22 bytes, passed indirectly in x1 and x2
 define i32 @f42(i32 %i, %struct.s42* nocapture %s1, %struct.s42* nocapture %s2) #2 {
 entry:
-; CHECK: f42
+; CHECK-LABEL: f42
 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
@@ -280,7 +280,7 @@ entry:
 ; For s1, we allocate a 22-byte space, pass its address via x1
 define i32 @caller42() #3 {
 entry:
-; CHECK: caller42
+; CHECK-LABEL: caller42
 ; CHECK: str {{x[0-9]+}}, [sp, #48]
 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 ; CHECK: str {{x[0-9]+}}, [sp, #16]
@@ -290,7 +290,7 @@ entry:
 ; Space for s1 is allocated at sp+32
 ; Space for s2 is allocated at sp
 
-; FAST: caller42
+; FAST-LABEL: caller42
 ; FAST: sub sp, sp, #96
 ; Space for s1 is allocated at fp-24 = sp+72
 ; Space for s2 is allocated at sp+48
@@ -316,7 +316,7 @@ declare i32 @f42_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 
 define i32 @caller42_stack() #3 {
 entry:
-; CHECK: caller42_stack
+; CHECK-LABEL: caller42_stack
 ; CHECK: mov x29, sp
 ; CHECK: sub sp, sp, #96
 ; CHECK: stur {{x[0-9]+}}, [x29, #-16]
@@ -333,7 +333,7 @@ entry:
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
 
-; FAST: caller42_stack
+; FAST-LABEL: caller42_stack
 ; Space for s1 is allocated at fp-24
 ; Space for s2 is allocated at fp-48
 ; FAST: sub x[[A:[0-9]+]], x29, #24
@@ -359,12 +359,12 @@ entry:
 ; passed indirectly in x1 and x2
 define i32 @f43(i32 %i, %struct.s43* nocapture %s1, %struct.s43* nocapture %s2) #2 {
 entry:
-; CHECK: f43
+; CHECK-LABEL: f43
 ; CHECK: ldr w[[A:[0-9]+]], [x1]
 ; CHECK: ldr w[[B:[0-9]+]], [x2]
 ; CHECK: add w[[C:[0-9]+]], w[[A]], w0
 ; CHECK: add {{w[0-9]+}}, w[[C]], w[[B]]
-; FAST: f43
+; FAST-LABEL: f43
 ; FAST: ldr w[[A:[0-9]+]], [x1]
 ; FAST: ldr w[[B:[0-9]+]], [x2]
 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
@@ -388,7 +388,7 @@ entry:
 
 define i32 @caller43() #3 {
 entry:
-; CHECK: caller43
+; CHECK-LABEL: caller43
 ; CHECK: str {{q[0-9]+}}, [sp, #48]
 ; CHECK: str {{q[0-9]+}}, [sp, #32]
 ; CHECK: str {{q[0-9]+}}, [sp, #16]
@@ -398,7 +398,7 @@ entry:
 ; Space for s1 is allocated at sp+32
 ; Space for s2 is allocated at sp
 
-; FAST: caller43
+; FAST-LABEL: caller43
 ; FAST: mov x29, sp
 ; Space for s1 is allocated at sp+32
 ; Space for s2 is allocated at sp
@@ -428,7 +428,7 @@ declare i32 @f43_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6,
 
 define i32 @caller43_stack() #3 {
 entry:
-; CHECK: caller43_stack
+; CHECK-LABEL: caller43_stack
 ; CHECK: mov x29, sp
 ; CHECK: sub sp, sp, #96
 ; CHECK: stur {{q[0-9]+}}, [x29, #-16]
@@ -445,7 +445,7 @@ entry:
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
 
-; FAST: caller43_stack
+; FAST-LABEL: caller43_stack
 ; FAST: sub sp, sp, #96
 ; Space for s1 is allocated at fp-32 = sp+64
 ; Space for s2 is allocated at sp+32
@@ -481,13 +481,13 @@ declare i32 @callee_i128_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 
 define i32 @i128_split() {
 entry:
-; CHECK: i128_split
+; CHECK-LABEL: i128_split
 ; "i128 %0" should be on stack at [sp].
 ; "i32 8" should be on stack at [sp, #16].
 ; CHECK: str {{w[0-9]+}}, [sp, #16]
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp]
-; FAST: i128_split
-; FAST: sub sp, sp, #48
+; FAST-LABEL: i128_split
+; FAST: sub sp, sp
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str {{w[0-9]+}}, [x[[ADDR]], #16]
 ; Load/Store opt is disabled with -O0, so the i128 is split.
@@ -504,14 +504,16 @@ declare i32 @callee_i64(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5,
 
 define i32 @i64_split() {
 entry:
-; CHECK: i64_split
+; CHECK-LABEL: i64_split
 ; "i64 %0" should be in register x7.
 ; "i32 8" should be on stack at [sp].
 ; CHECK: ldr x7, [{{x[0-9]+}}]
 ; CHECK: str {{w[0-9]+}}, [sp]
-; FAST: i64_split
+; FAST-LABEL: i64_split
 ; FAST: ldr x7, [{{x[0-9]+}}]
-; FAST: str {{w[0-9]+}}, [sp]
+; FAST: mov x[[R0:[0-9]+]], sp
+; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
+; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
   %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
                                     i32 6, i32 7, i64 %0, i32 8) #5
@@ -525,8 +527,8 @@ attributes #3 = { nounwind "fp-contract-model"="standard" "relocation-model"="pi
 attributes #4 = { nounwind }
 attributes #5 = { nobuiltin }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{i64 0, i64 4, metadata !0, i64 4, i64 2, metadata !3, i64 8, i64 4, metadata !0, i64 12, i64 2, metadata !3, i64 16, i64 4, metadata !0, i64 20, i64 2, metadata !3}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"short", !1}
+!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3}
diff --git a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
index 08fb8c90c484..74bb3981ba3e 100644
--- a/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
+++ b/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -mtriple arm64-apple-ios3 %s -o - | FileCheck %s
+; RUN: llc -O3 -mtriple arm64-apple-ios3 -aarch64-gep-opt=false %s -o - | FileCheck %s
 ; <rdar://problem/13621857>
 
 @block = common global i8* null, align 8
diff --git a/test/CodeGen/AArch64/arm64-addrmode.ll b/test/CodeGen/AArch64/arm64-addrmode.ll
index 700fba80149a..5433a8c312fe 100644
--- a/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -37,9 +37,8 @@ define void @t3() {
 
 ; base + unsigned offset (> imm12 * size of type in bytes)
 ; CHECK: @t4
-; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, #8, lsl #12
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
+; CHECK: orr w[[NUM:[0-9]+]], wzr, #0x8000
+; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
 ; CHECK: ret
 define void @t4() {
   %incdec.ptr = getelementptr inbounds i64* @object, i64 4096
@@ -60,9 +59,8 @@ define void @t5(i64 %a) {
 ; base + reg + imm
 ; CHECK: @t6
 ; CHECK: add [[ADDREG:x[0-9]+]], x{{[0-9]+}}, x{{[0-9]+}}, lsl #3
-; CHECK-NEXT: add [[ADDREG]], [[ADDREG]], #8, lsl #12
-; CHECK: ldr xzr, [
-; CHECK: [[ADDREG]]]
+; CHECK-NEXT: orr w[[NUM:[0-9]+]], wzr, #0x8000
+; CHECK: ldr xzr, [x{{[0-9]+}}, x[[NUM]]]
 ; CHECK: ret
 define void @t6(i64 %a) {
   %tmp1 = getelementptr inbounds i64* @object, i64 %a
@@ -70,3 +68,114 @@ define void @t6(i64 %a) {
   %tmp = load volatile i64* %incdec.ptr, align 8
   ret void
 }
+
+; Test base + wide immediate
+define void @t7(i64 %a) {
+; CHECK-LABEL: t7:
+; CHECK: orr w[[NUM:[0-9]+]], wzr, #0xffff
+; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
+  %1 = add i64 %a, 65535   ;0xffff
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t8(i64 %a) {
+; CHECK-LABEL: t8:
+; CHECK: movn [[REG:x[0-9]+]], #0x1235
+; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
+  %1 = sub i64 %a, 4662   ;-4662 is 0xffffffffffffedca
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t9(i64 %a) {
+; CHECK-LABEL: t9:
+; CHECK: movn [[REG:x[0-9]+]], #0x1235, lsl #16
+; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
+  %1 = add i64 -305463297, %a   ;-305463297 is 0xffffffffedcaffff
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t10(i64 %a) {
+; CHECK-LABEL: t10:
+; CHECK: movz [[REG:x[0-9]+]], #0x123, lsl #48
+; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
+  %1 = add i64 %a, 81909218222800896   ;0x123000000000000
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t11(i64 %a) {
+; CHECK-LABEL: t11:
+; CHECK: movz w[[NUM:[0-9]+]], #0x123, lsl #16
+; CHECK: movk w[[NUM:[0-9]+]], #0x4567
+; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
+  %1 = add i64 %a, 19088743   ;0x1234567
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+; Test some boundaries that should not use movz/movn/orr
+define void @t12(i64 %a) {
+; CHECK-LABEL: t12:
+; CHECK: add [[REG:x[0-9]+]], x0, #4095
+; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
+  %1 = add i64 %a, 4095   ;0xfff
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t13(i64 %a) {
+; CHECK-LABEL: t13:
+; CHECK: sub [[REG:x[0-9]+]], x0, #4095
+; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
+  %1 = add i64 %a, -4095   ;-0xfff
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t14(i64 %a) {
+; CHECK-LABEL: t14:
+; CHECK: add [[REG:x[0-9]+]], x0, #291, lsl #12
+; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
+  %1 = add i64 %a, 1191936   ;0x123000
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t15(i64 %a) {
+; CHECK-LABEL: t15:
+; CHECK: sub [[REG:x[0-9]+]], x0, #291, lsl #12
+; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
+  %1 = add i64 %a, -1191936   ;0xFFFFFFFFFFEDD000
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t16(i64 %a) {
+; CHECK-LABEL: t16:
+; CHECK: ldr xzr, [x0, #28672]
+  %1 = add i64 %a, 28672   ;0x7000
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
+
+define void @t17(i64 %a) {
+; CHECK-LABEL: t17:
+; CHECK: ldur xzr, [x0, #-256]
+  %1 = add i64 %a, -256   ;-0x100
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load volatile i64* %2, align 8
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index 3377849f6698..642d72aac47e 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -29,8 +29,7 @@ define void @fetch_and_nand(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw nand i128* %p, i128 %bits release
   store i128 %val, i128* @var, align 16
   ret void
@@ -45,8 +44,7 @@ define void @fetch_and_or(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw or i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -61,8 +59,7 @@ define void @fetch_and_add(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw add i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -77,8 +74,7 @@ define void @fetch_and_sub(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw sub i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -99,8 +95,7 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw min i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -121,8 +116,7 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw max i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -143,8 +137,7 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw umin i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
@@ -165,8 +158,7 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
 ; CHECK: stlxp  [[SCRATCH_RES:w[0-9]+]], [[SCRATCH_REGLO]], [[SCRATCH_REGHI]], [x0]
 ; CHECK: cbnz   [[SCRATCH_RES]], [[LABEL]]
 
-; CHECK-DAG: str    [[DEST_REGHI]]
-; CHECK-DAG: str    [[DEST_REGLO]]
+; CHECK-DAG: stp    [[DEST_REGLO]], [[DEST_REGHI]]
   %val = atomicrmw umax i128* %p, i128 %bits seq_cst
   store i128 %val, i128* @var, align 16
   ret void
diff --git a/test/CodeGen/AArch64/arm64-bcc.ll b/test/CodeGen/AArch64/arm64-bcc.ll
new file mode 100644
index 000000000000..138ae9036092
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-bcc.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin  | FileCheck %s
+; Checks for conditional branch b.vs
+
+; Function Attrs: nounwind
+define i32 @add(i32, i32) {
+entry:
+  %2 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %0, i32 %1)
+  %3 = extractvalue { i32, i1 } %2, 1
+  br i1 %3, label %6, label %4
+
+; <label>:4                                       ; preds = %entry
+  %5 = extractvalue { i32, i1 } %2, 0
+  ret i32 %5
+
+; <label>:6                                       ; preds = %entry
+  tail call void @llvm.trap()
+  unreachable
+; CHECK: b.vs
+}
+
+%S64 = type <{ i64 }>
+%S32 = type <{ i32 }>
+%Sstruct = type <{ %S64, %S32 }>
+
+; Checks for compfail when optimizing csincr-cbz sequence
+
+define { i64, i1 } @foo(i64* , %Sstruct* , i1, i64) {
+entry:
+  %.sroa.0 = alloca i72, align 16
+  %.count.value = getelementptr inbounds %Sstruct* %1, i64 0, i32 0, i32 0
+  %4 = load i64* %.count.value, align 8
+  %.repeatedValue.value = getelementptr inbounds %Sstruct* %1, i64 0, i32 1, i32 0
+  %5 = load i32* %.repeatedValue.value, align 8
+  %6 = icmp eq i64 %4, 0
+  br label %7
+
+; <label>:7                                      ; preds = %entry
+  %.mask58 = and i32 %5, -2048
+  %8 = icmp eq i32 %.mask58, 55296
+  %.not134 = xor i1 %8, true
+  %9 = icmp eq i32 %5, 1114112
+  %or.cond135 = and i1 %9, %.not134
+  br i1 %or.cond135, label %10, label %.loopexit
+
+; <label>:10                                      ; preds = %7
+  %11 = and i32 %5, -2048
+  %12 = icmp eq i32 %11, 55296
+  br i1 %12, label %.loopexit, label %10
+
+
+.loopexit:                                        ; preds = %.entry,%7,%10
+  tail call void @llvm.trap()
+  unreachable
+}
+
+; Function Attrs: nounwind readnone
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap()
diff --git a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
index f0e968b2c177..d2985f4dd66a 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -O1 -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -O0 -fast-isel=true -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define void @test_i64_f64(double* %p, i64* %q) {
diff --git a/test/CodeGen/AArch64/arm64-big-endian-eh.ll b/test/CodeGen/AArch64/arm64-big-endian-eh.ll
index 93e7da98de21..a51703a8fc4b 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-eh.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-eh.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple arm64_be-linux-gnu -filetype obj < %s | llvm-objdump -s - | FileCheck %s
+; RUN: llc -mtriple aarch64_be-linux-gnu -filetype obj < %s | llvm-objdump -s - | FileCheck %s
 
 ; ARM EHABI for big endian
 ; This test case checks whether CIE length record is laid out in big endian format.
diff --git a/test/CodeGen/AArch64/arm64-big-endian-varargs.ll b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
index d7b26b975231..db1f48c6fd5e 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
@@ -3,7 +3,7 @@
 ; Vararg saving must save Q registers using the equivalent of STR/STP.
 
 target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
-target triple = "arm64_be-arm-none-eabi"
+target triple = "aarch64_be-arm-none-eabi"
 
 %struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
 
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
index 1dcccf106a29..cc9badc5c552 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-callee.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -fast-isel=true -aarch64-load-store-opt=false -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 define i64 @test_i64_f64(double %p) {
diff --git a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index 9a12b7a01153..d72d0a5db41e 100644
--- a/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
+++ b/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
-; RUN: llc -mtriple arm64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -o - | FileCheck %s
+; RUN: llc -mtriple aarch64_be < %s -aarch64-load-store-opt=false -fast-isel=true -O0 -o - | FileCheck %s
 
 ; CHECK-LABEL: test_i64_f64:
 declare i64 @test_i64_f64_helper(double %p)
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 664a26cafe4d..b032d9c89ef6 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -184,7 +184,7 @@ declare hidden fastcc i32 @Maze1Mech(i64, i64, i64, i64, i64, i32, i32) nounwind
 ; Materializable
 declare hidden fastcc void @CleanNet(i64) nounwind ssp
 
-!0 = metadata !{metadata !"long", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
+!0 = !{!"long", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}
diff --git a/test/CodeGen/AArch64/arm64-cse.ll b/test/CodeGen/AArch64/arm64-cse.ll
index 5d62cfe76a84..b74ece8d288b 100644
--- a/test/CodeGen/AArch64/arm64-cse.ll
+++ b/test/CodeGen/AArch64/arm64-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -O3 < %s -aarch64-atomic-cfg-tidy=0 -aarch64-gep-opt=false | FileCheck %s
 target triple = "arm64-apple-ios"
 
 ; rdar://12462006
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
index 6eed48bf62e3..2eb6307b201b 100644
--- a/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
+++ b/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -1,8 +1,4 @@
 ; RUN: llc -mcpu=cyclone < %s | FileCheck %s
-
-; r208640 broke ppc64/Linux self-hosting; xfailing while this is worked on.
-; XFAIL: *
-
 target datalayout = "e-i64:64-n32:64-S128"
 target triple = "arm64-apple-ios"
 
diff --git a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll b/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
deleted file mode 100644
index ce132c6afa46..000000000000
--- a/test/CodeGen/AArch64/arm64-dagcombiner-indexed-load.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: llc -O3 < %s | FileCheck %s
-; RUN: llc -O3 -addr-sink-using-gep=1 < %s | FileCheck %s
-; Test case for a DAG combiner bug where we combined an indexed load
-; with an extension (sext, zext, or any) into a regular extended load,
-; i.e., dropping the indexed value.
-; <rdar://problem/16389332>
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-%class.A = type { i64, i64 }
-%class.C = type { i64 }
-
-; CHECK-LABEL: XX:
-; CHECK: ldr
-define i32 @XX(%class.A* %K, i1 %tst, i32* %addr, %class.C** %ppC, %class.C* %pC) {
-entry:
-  br i1 %tst, label %if.then, label %lor.rhs.i
-
-lor.rhs.i:                                        ; preds = %entry
-  %tmp = load i32* %addr, align 4
-  %y.i.i.i = getelementptr inbounds %class.A* %K, i64 0, i32 1
-  %tmp1 = load i64* %y.i.i.i, align 8
-  %U.sroa.3.8.extract.trunc.i = trunc i64 %tmp1 to i32
-  %div11.i = sdiv i32 %U.sroa.3.8.extract.trunc.i, 17
-  %add12.i = add nsw i32 0, %div11.i
-  %U.sroa.3.12.extract.shift.i = lshr i64 %tmp1, 32
-  %U.sroa.3.12.extract.trunc.i = trunc i64 %U.sroa.3.12.extract.shift.i to i32
-  %div15.i = sdiv i32 %U.sroa.3.12.extract.trunc.i, 13
-  %add16.i = add nsw i32 %add12.i, %div15.i
-  %rem.i.i = srem i32 %add16.i, %tmp
-  %idxprom = sext i32 %rem.i.i to i64
-  %arrayidx = getelementptr inbounds %class.C** %ppC, i64 %idxprom
-  %tobool533 = icmp eq %class.C* %pC, null
-  br i1 %tobool533, label %while.end, label %while.body
-
-if.then:                                          ; preds = %entry
-  ret i32 42
-
-while.body:                                       ; preds = %lor.rhs.i
-  ret i32 5
-
-while.end:                                        ; preds = %lor.rhs.i
-  %tmp3 = load %class.C** %arrayidx, align 8
-  ret i32 50
-}
diff --git a/test/CodeGen/AArch64/arm64-extern-weak.ll b/test/CodeGen/AArch64/arm64-extern-weak.ll
index a239403befa5..06bd9270ba47 100644
--- a/test/CodeGen/AArch64/arm64-extern-weak.ll
+++ b/test/CodeGen/AArch64/arm64-extern-weak.ll
@@ -1,16 +1,23 @@
-; RUN: llc -mtriple=arm64-none-linux-gnu -o - < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=pic -o - < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK-STATIC %s
 ; RUN: llc -mtriple=arm64-none-linux-gnu -code-model=large -o - < %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
 define i32()* @foo() {
 ; The usual ADRP/ADD pair can't be used for a weak reference because it must
-; evaluate to 0 if the symbol is undefined. We use a litpool entry.
+; evaluate to 0 if the symbol is undefined. We use a GOT entry for PIC
+; otherwise a litpool entry.
   ret i32()* @var
 
 ; CHECK: adrp x[[VAR:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[VAR]], :got_lo12:var]
 
+; CHECK-STATIC: .LCPI0_0:
+; CHECK-STATIC-NEXT: .xword  var
+; CHECK-STATIC: adrp x[[VAR:[0-9]+]], .LCPI0_0
+; CHECK-STATIC: ldr x0, [x[[VAR]], :lo12:.LCPI0_0]
+
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
 ; CHECK-LARGE: movz x0, #:abs_g3:var
@@ -29,6 +36,11 @@ define i32* @bar() {
 ; CHECK: add x0, [[ARR_VAR]], #20
   ret i32* %addr
 
+; CHECK-STATIC: .LCPI1_0:
+; CHECK-STATIC-NEXT: .xword arr_var
+; CHECK-STATIC: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, :lo12:.LCPI1_0]
+; CHECK-STATIC: add x0, [[BASE]], #20
+
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
 ; CHECK-LARGE: movz [[ARR_VAR:x[0-9]+]], #:abs_g3:arr_var
@@ -44,6 +56,9 @@ define i32* @wibble() {
 ; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
 ; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
 
+; CHECK-STATIC: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK-STATIC: add x0, [[BASE]], :lo12:defined_weak_var
+
 ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
index ebd847e0f728..d81bc7cee114 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 @sortlist = common global [5001 x i32] zeroinitializer, align 16
 @sortlist2 = common global [5001 x i64] zeroinitializer, align 16
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
index 1706e9eba2bd..a8417027ce2d 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-alloca.ll
@@ -1,5 +1,5 @@
 ; This test should cause the TargetMaterializeAlloca to be invoked
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 %struct.S1Ty = type { i64 }
 %struct.S2Ty = type { %struct.S1Ty, %struct.S1Ty }
@@ -15,9 +15,8 @@ define void @main() nounwind {
 entry:
 ; CHECK: main
 ; CHECK: mov x29, sp
-; CHECK: mov x[[REG:[0-9]+]], sp
-; CHECK-NEXT: orr x[[REG1:[0-9]+]], xzr, #0x8
-; CHECK-NEXT: add x0, x[[REG]], x[[REG1]]
+; CHECK: mov [[REG:x[0-9]+]], sp
+; CHECK-NEXT: add x0, [[REG]], #8
   %E = alloca %struct.S2Ty, align 4
   %B = getelementptr inbounds %struct.S2Ty* %E, i32 0, i32 1
   call void @takeS1(%struct.S1Ty* %B)
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index 37a8295c8931..f896d8517382 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone -verify-machineinstrs < %s | FileCheck %s
 
 define void @branch1() nounwind uwtable ssp {
   %x = alloca i32, align 4
@@ -95,7 +95,7 @@ entry:
   store i64 %d, i64* %d.addr, align 8
   %0 = load i16* %b.addr, align 2
 ; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
+; CHECK: cmp w0, #0
 ; CHECK: b.eq LBB4_2
   %conv = trunc i16 %0 to i1
   br i1 %conv, label %if.then, label %if.end
@@ -107,7 +107,7 @@ if.then:                                          ; preds = %entry
 if.end:                                           ; preds = %if.then, %entry
   %1 = load i32* %c.addr, align 4
 ; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
-; CHECK: subs w{{[0-9]+}}, w[[REG]], #0
+; CHECK: cmp w[[REG]], #0
 ; CHECK: b.eq LBB4_4
   %conv1 = trunc i32 %1 to i1
   br i1 %conv1, label %if.then3, label %if.end4
@@ -118,7 +118,7 @@ if.then3:                                         ; preds = %if.end
 
 if.end4:                                          ; preds = %if.then3, %if.end
   %2 = load i64* %d.addr, align 8
-; CHECK: subs w{{[0-9]+}}, w{{[0-9]+}}, #0
+; CHECK: cmp w{{[0-9]+}}, #0
 ; CHECK: b.eq LBB4_6
   %conv5 = trunc i64 %2 to i1
   br i1 %conv5, label %if.then7, label %if.end8
@@ -137,11 +137,10 @@ declare void @foo1()
 ; rdar://15174028
 define i32 @trunc64(i64 %foo) nounwind {
 ; CHECK: trunc64
-; CHECK: orr  [[REG:x[0-9]+]], xzr, #0x1
-; CHECK: and  [[REG2:x[0-9]+]], x0, [[REG]]
-; CHECK: mov  x[[REG3:[0-9]+]], [[REG2]]
-; CHECK: and  [[REG4:w[0-9]+]], w[[REG3]], #0x1
-; CHECK: subs {{w[0-9]+}}, [[REG4]], #0
+; CHECK: and  [[REG1:x[0-9]+]], x0, #0x1
+; CHECK: mov  x[[REG2:[0-9]+]], [[REG1]]
+; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0x1
+; CHECK: cmp  [[REG3]], #0
 ; CHECK: b.eq LBB5_2
   %a = and i64 %foo, 1
   %b = trunc i64 %a to i1
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index 8d756ae54619..f1e2c40a33c4 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64_be-linux-gnu | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -verify-machineinstrs -mtriple=arm64-apple-darwin   < %s | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=large -verify-machineinstrs -mtriple=arm64-apple-darwin   < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -O0 -fast-isel-abort -fast-isel-abort-args -code-model=small -verify-machineinstrs -mtriple=aarch64_be-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-BE
 
 define void @call0() nounwind {
 entry:
@@ -8,8 +9,12 @@ entry:
 
 define void @foo0() nounwind {
 entry:
-; CHECK: foo0
-; CHECK: bl _call0
+; CHECK-LABEL: foo0
+; CHECK:       bl _call0
+; LARGE-LABEL: foo0
+; LARGE:       adrp [[REG0:x[0-9]+]], _call0@GOTPAGE
+; LARGE:       ldr  [[REG1:x[0-9]+]], {{\[}}[[REG0]], _call0@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr  [[REG1]]
   call void @call0()
   ret void
 }
@@ -24,10 +29,10 @@ entry:
 
 define i32 @foo1(i32 %a) nounwind {
 entry:
-; CHECK: foo1
-; CHECK: stur w0, [x29, #-4]
-; CHECK-NEXT: ldur w0, [x29, #-4]
-; CHECK-NEXT: bl _call1
+; CHECK-LABEL: foo1
+; CHECK:       stur w0, [x29, #-4]
+; CHECK-NEXT:  ldur w0, [x29, #-4]
+; CHECK-NEXT:  bl _call1
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   %tmp = load i32* %a.addr, align 4
@@ -37,10 +42,10 @@ entry:
 
 define i32 @sext_(i8 %a, i16 %b) nounwind {
 entry:
-; CHECK: @sext_
-; CHECK: sxtb w0, w0
-; CHECK: sxth w1, w1
-; CHECK: bl _foo_sext_
+; CHECK-LABEL: sext_
+; CHECK:       sxtb w0, w0
+; CHECK:       sxth w1, w1
+; CHECK:       bl _foo_sext_
   call void @foo_sext_(i8 signext %a, i16 signext %b)
   ret i32 0
 }
@@ -49,9 +54,9 @@ declare void @foo_sext_(i8 %a, i16 %b)
 
 define i32 @zext_(i8 %a, i16 %b) nounwind {
 entry:
-; CHECK: @zext_
-; CHECK: uxtb w0, w0
-; CHECK: uxth w1, w1
+; CHECK-LABEL: zext_
+; CHECK:       uxtb w0, w0
+; CHECK:       uxth w1, w1
   call void @foo_zext_(i8 zeroext %a, i16 zeroext %b)
   ret i32 0
 }
@@ -60,10 +65,10 @@ declare void @foo_zext_(i8 %a, i16 %b)
 
 define i32 @t1(i32 %argc, i8** nocapture %argv) {
 entry:
-; CHECK: @t1
+; CHECK-LABEL: @t1
 ; The last parameter will be passed on stack via i8.
-; CHECK: strb w{{[0-9]+}}, [sp]
-; CHECK-NEXT: bl _bar
+; CHECK:       strb w{{[0-9]+}}, [sp]
+; CHECK:       bl _bar
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70, i8 zeroext 28, i8 zeroext 39, i8 zeroext -41)
   ret i32 0
 }
@@ -73,18 +78,19 @@ declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8
 ; Test materialization of integers.  Target-independent selector handles this.
 define i32 @t2() {
 entry:
-; CHECK: @t2
-; CHECK: movz x0, #0
-; CHECK: orr w1, wzr, #0xfffffff8
-; CHECK: orr w[[REG:[0-9]+]], wzr, #0x3ff
-; CHECK: orr w[[REG2:[0-9]+]], wzr, #0x2
-; CHECK: movz w[[REG3:[0-9]+]], #0
-; CHECK: orr w[[REG4:[0-9]+]], wzr, #0x1
-; CHECK: uxth w2, w[[REG]]
-; CHECK: sxtb w3, w[[REG2]]
-; CHECK: and w4, w[[REG3]], #0x1
-; CHECK: and w5, w[[REG4]], #0x1
-; CHECK: bl	_func2
+; CHECK-LABEL: t2
+; CHECK:       mov [[REG1:x[0-9]+]], xzr
+; CHECK:       orr w1, wzr, #0xfffffff8
+; CHECK:       orr [[REG2:w[0-9]+]], wzr, #0x3ff
+; CHECK:       orr [[REG3:w[0-9]+]], wzr, #0x2
+; CHECK:       mov [[REG4:w[0-9]+]], wzr
+; CHECK:       orr [[REG5:w[0-9]+]], wzr, #0x1
+; CHECK:       mov x0, [[REG1]]
+; CHECK:       uxth w2, [[REG2]]
+; CHECK:       sxtb w3, [[REG3]]
+; CHECK:       and w4, [[REG4]], #0x1
+; CHECK:       and w5, [[REG5]], #0x1
+; CHECK:       bl _func2
   %call = call i32 @func2(i64 zeroext 0, i32 signext -8, i16 zeroext 1023, i8 signext -254, i1 zeroext 0, i1 zeroext 1)
   ret i32 0
 }
@@ -94,7 +100,170 @@ declare i32 @func2(i64 zeroext, i32 signext, i16 zeroext, i8 signext, i1 zeroext
 declare void @callee_b0f(i8 %bp10, i8 %bp11, i8 %bp12, i8 %bp13, i8 %bp14, i8 %bp15, i8 %bp17, i8 %bp18, i8 %bp19)
 define void @caller_b1f() {
 entry:
-  ; CHECK-BE: strb w{{.*}}, [sp, #7]
+; CHECK-BE-LABEL: caller_b1f
+; CHECK-BE:       strb w{{.*}}, [sp, #7]
   call void @callee_b0f(i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 42)
   ret void
 }
+
+define zeroext i1 @call_arguments1(i1 %a1, i1 %a2, i1 %a3, i1 %a4, i1 %a5, i1 %a6, i1 %a7, i1 %a8) {
+; CHECK-LABEL: call_arguments1
+; CHECK:       and {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  and {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  and {{w[0-9]+}}, w6, w7
+  %1 = and i1 %a1, %a2
+  %2 = and i1 %a3, %a4
+  %3 = and i1 %a5, %a6
+  %4 = and i1 %a7, %a8
+  %5 = and i1 %1, %2
+  %6 = and i1 %3, %4
+  %7 = and i1 %5, %6
+  ret i1 %7
+}
+
+define i32 @call_arguments2(i8 zeroext %a1, i8 zeroext %a2, i8 zeroext %a3, i8 zeroext %a4, i8 signext %a5, i8 signext %a6, i8 signext %a7, i8 signext %a8) {
+; CHECK-LABEL: call_arguments2
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %a1z = zext i8 %a1 to i32
+  %a2z = zext i8 %a2 to i32
+  %a3z = zext i8 %a3 to i32
+  %a4z = zext i8 %a4 to i32
+  %a5s = sext i8 %a5 to i32
+  %a6s = sext i8 %a6 to i32
+  %a7s = sext i8 %a7 to i32
+  %a8s = sext i8 %a8 to i32
+  %1 = add i32 %a1z, %a2z
+  %2 = add i32 %a3z, %a4z
+  %3 = add i32 %a5s, %a6s
+  %4 = add i32 %a7s, %a8s
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i32 @call_arguments3(i16 zeroext %a1, i16 zeroext %a2, i16 zeroext %a3, i16 zeroext %a4, i16 signext %a5, i16 signext %a6, i16 signext %a7, i16 signext %a8) {
+; CHECK-LABEL: call_arguments3
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %a1z = zext i16 %a1 to i32
+  %a2z = zext i16 %a2 to i32
+  %a3z = zext i16 %a3 to i32
+  %a4z = zext i16 %a4 to i32
+  %a5s = sext i16 %a5 to i32
+  %a6s = sext i16 %a6 to i32
+  %a7s = sext i16 %a7 to i32
+  %a8s = sext i16 %a8 to i32
+  %1 = add i32 %a1z, %a2z
+  %2 = add i32 %a3z, %a4z
+  %3 = add i32 %a5s, %a6s
+  %4 = add i32 %a7s, %a8s
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i32 @call_arguments4(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) {
+; CHECK-LABEL: call_arguments4
+; CHECK:       add {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:  add {{w[0-9]+}}, w2, w3
+; CHECK-NEXT:  add {{w[0-9]+}}, w4, w5
+; CHECK-NEXT:  add {{w[0-9]+}}, w6, w7
+  %1 = add i32 %a1, %a2
+  %2 = add i32 %a3, %a4
+  %3 = add i32 %a5, %a6
+  %4 = add i32 %a7, %a8
+  %5 = add i32 %1, %2
+  %6 = add i32 %3, %4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+define i64 @call_arguments5(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8) {
+; CHECK-LABEL: call_arguments5
+; CHECK:       add {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:  add {{x[0-9]+}}, x2, x3
+; CHECK-NEXT:  add {{x[0-9]+}}, x4, x5
+; CHECK-NEXT:  add {{x[0-9]+}}, x6, x7
+  %1 = add i64 %a1, %a2
+  %2 = add i64 %a3, %a4
+  %3 = add i64 %a5, %a6
+  %4 = add i64 %a7, %a8
+  %5 = add i64 %1, %2
+  %6 = add i64 %3, %4
+  %7 = add i64 %5, %6
+  ret i64 %7
+}
+
+define float @call_arguments6(float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7, float %a8) {
+; CHECK-LABEL: call_arguments6
+; CHECK:       fadd {{s[0-9]+}}, s0, s1
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s2, s3
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s4, s5
+; CHECK-NEXT:  fadd {{s[0-9]+}}, s6, s7
+  %1 = fadd float %a1, %a2
+  %2 = fadd float %a3, %a4
+  %3 = fadd float %a5, %a6
+  %4 = fadd float %a7, %a8
+  %5 = fadd float %1, %2
+  %6 = fadd float %3, %4
+  %7 = fadd float %5, %6
+  ret float %7
+}
+
+define double @call_arguments7(double %a1, double %a2, double %a3, double %a4, double %a5, double %a6, double %a7, double %a8) {
+; CHECK-LABEL: call_arguments7
+; CHECK:       fadd {{d[0-9]+}}, d0, d1
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d2, d3
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d4, d5
+; CHECK-NEXT:  fadd {{d[0-9]+}}, d6, d7
+  %1 = fadd double %a1, %a2
+  %2 = fadd double %a3, %a4
+  %3 = fadd double %a5, %a6
+  %4 = fadd double %a7, %a8
+  %5 = fadd double %1, %2
+  %6 = fadd double %3, %4
+  %7 = fadd double %5, %6
+  ret double %7
+}
+
+define i64 @call_arguments8(i32 %a1, i64 %a2, i32 %a3, i64 %a4) {
+; CHECK-LABEL: call_arguments8
+; CHECK:       ubfx  [[REG1:x[0-9]+]], {{x[0-9]+}}, #0, #32
+; CHECK:       ubfx  [[REG2:x[0-9]+]], {{x[0-9]+}}, #0, #32
+; CHECK:       add {{x[0-9]+}}, [[REG1]], x1
+; CHECK-NEXT:  add {{x[0-9]+}}, [[REG2]], x3
+  %aa1 = zext i32 %a1 to i64
+  %aa3 = zext i32 %a3 to i64
+  %1 = add i64 %aa1, %a2
+  %2 = add i64 %aa3, %a4
+  %3 = add i64 %1, %2
+  ret i64 %3
+}
+
+define void @call_arguments9(i8 %a1, i16 %a2, i32 %a3, i64 %a4, float %a5, double %a6, i64 %a7, double %a8) {
+; CHECK-LABEL: call_arguments9
+  ret void
+}
+
+; Test that we use the correct register class for the branch.
+define void @call_blr(i64 %Fn, i1 %c) {
+; CHECK-LABEL: call_blr
+; CHECK:       blr
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %1 = inttoptr i64 %Fn to void (i64)*
+  br label %bb2
+bb2:
+  %2 = phi void (i64)* [ %1, %bb1 ], [ undef, %0 ]
+  call void %2(i64 1)
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index c5417de0ae97..e5151847a598 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -mcpu=cyclone | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s
 
 ;; Test various conversions.
 define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
 entry:
-; CHECK: trunc_
+; CHECK-LABEL: trunc_
 ; CHECK: sub sp, sp, #16
 ; CHECK: strb w0, [sp, #15]
 ; CHECK: strh w1, [sp, #12]
@@ -17,7 +17,6 @@ entry:
 ; CHECK: ldrh w0, [sp, #12]
 ; CHECK: strb w0, [sp, #15]
 ; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
 ; CHECK: add sp, sp, #16
 ; CHECK: ret
   %a.addr = alloca i8, align 1
@@ -44,21 +43,18 @@ entry:
 
 define i64 @zext_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp {
 entry:
-; CHECK: zext_
+; CHECK-LABEL: zext_
 ; CHECK: sub sp, sp, #16
 ; CHECK: strb w0, [sp, #15]
 ; CHECK: strh w1, [sp, #12]
 ; CHECK: str w2, [sp, #8]
 ; CHECK: str x3, [sp]
 ; CHECK: ldrb w0, [sp, #15]
-; CHECK: uxtb w0, w0
 ; CHECK: strh w0, [sp, #12]
 ; CHECK: ldrh w0, [sp, #12]
-; CHECK: uxth w0, w0
 ; CHECK: str w0, [sp, #8]
 ; CHECK: ldr w0, [sp, #8]
 ; CHECK: mov x3, x0
-; CHECK: ubfx x3, x3, #0, #32
 ; CHECK: str x3, [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
@@ -85,37 +81,35 @@ entry:
 
 define i32 @zext_i1_i32(i1 zeroext %a) nounwind ssp {
 entry:
-; CHECK: @zext_i1_i32
-; CHECK: and w0, w0, #0x1
+; CHECK-LABEL: zext_i1_i32
+; CHECK-NOT:   and w0, w0, #0x1
+; CHECK:       ret
   %conv = zext i1 %a to i32
   ret i32 %conv;
 }
 
 define i64 @zext_i1_i64(i1 zeroext %a) nounwind ssp {
 entry:
-; CHECK: @zext_i1_i64
-; CHECK: and w0, w0, #0x1
+; CHECK-LABEL: zext_i1_i64
+; CHECK-NOT:   and w0, w0, #0x1
+; CHECK:       ret
   %conv = zext i1 %a to i64
   ret i64 %conv;
 }
 
 define i64 @sext_(i8 signext %a, i16 signext %b, i32 %c, i64 %d) nounwind ssp {
 entry:
-; CHECK: sext_
+; CHECK-LABEL: sext_
 ; CHECK: sub sp, sp, #16
 ; CHECK: strb w0, [sp, #15]
 ; CHECK: strh w1, [sp, #12]
 ; CHECK: str w2, [sp, #8]
 ; CHECK: str x3, [sp]
-; CHECK: ldrb w0, [sp, #15]
-; CHECK: sxtb w0, w0
+; CHECK: ldrsb w0, [sp, #15]
 ; CHECK: strh w0, [sp, #12]
-; CHECK: ldrh w0, [sp, #12]
-; CHECK: sxth w0, w0
+; CHECK: ldrsh w0, [sp, #12]
 ; CHECK: str w0, [sp, #8]
-; CHECK: ldr w0, [sp, #8]
-; CHECK: mov x3, x0
-; CHECK: sxtw x3, w3
+; CHECK: ldrsw x3, [sp, #8]
 ; CHECK: str x3, [sp]
 ; CHECK: ldr x0, [sp]
 ; CHECK: ret
@@ -161,8 +155,9 @@ define zeroext i64 @sext_i16_i64(i16 zeroext %in) {
 ; Test sext i1 to i32
 define i32 @sext_i1_i32(i1 signext %a) nounwind ssp {
 entry:
-; CHECK: sext_i1_i32
-; CHECK: sbfx w0, w0, #0, #1
+; CHECK-LABEL: sext_i1_i32
+; CHECK-NOT:   sbfx w0, w0, #0, #1
+; CHECK:       ret
   %conv = sext i1 %a to i32
   ret i32 %conv
 }
@@ -170,7 +165,7 @@ entry:
 ; Test sext i1 to i16
 define signext i16 @sext_i1_i16(i1 %a) nounwind ssp {
 entry:
-; CHECK: sext_i1_i16
+; CHECK-LABEL: sext_i1_i16
 ; CHECK: sbfx w0, w0, #0, #1
   %conv = sext i1 %a to i16
   ret i16 %conv
@@ -179,7 +174,7 @@ entry:
 ; Test sext i1 to i8
 define signext i8 @sext_i1_i8(i1 %a) nounwind ssp {
 entry:
-; CHECK: sext_i1_i8
+; CHECK-LABEL: sext_i1_i8
 ; CHECK: sbfx w0, w0, #0, #1
   %conv = sext i1 %a to i8
   ret i8 %conv
@@ -188,7 +183,7 @@ entry:
 ; Test fpext
 define double @fpext_(float %a) nounwind ssp {
 entry:
-; CHECK: fpext_
+; CHECK-LABEL: fpext_
 ; CHECK: fcvt d0, s0
   %conv = fpext float %a to double
   ret double %conv
@@ -197,7 +192,7 @@ entry:
 ; Test fptrunc
 define float @fptrunc_(double %a) nounwind ssp {
 entry:
-; CHECK: fptrunc_
+; CHECK-LABEL: fptrunc_
 ; CHECK: fcvt s0, d0
   %conv = fptrunc double %a to float
   ret float %conv
@@ -206,7 +201,7 @@ entry:
 ; Test fptosi
 define i32 @fptosi_ws(float %a) nounwind ssp {
 entry:
-; CHECK: fptosi_ws
+; CHECK-LABEL: fptosi_ws
 ; CHECK: fcvtzs w0, s0
   %conv = fptosi float %a to i32
   ret i32 %conv
@@ -215,7 +210,7 @@ entry:
 ; Test fptosi
 define i32 @fptosi_wd(double %a) nounwind ssp {
 entry:
-; CHECK: fptosi_wd
+; CHECK-LABEL: fptosi_wd
 ; CHECK: fcvtzs w0, d0
   %conv = fptosi double %a to i32
   ret i32 %conv
@@ -224,7 +219,7 @@ entry:
 ; Test fptoui
 define i32 @fptoui_ws(float %a) nounwind ssp {
 entry:
-; CHECK: fptoui_ws
+; CHECK-LABEL: fptoui_ws
 ; CHECK: fcvtzu w0, s0
   %conv = fptoui float %a to i32
   ret i32 %conv
@@ -233,7 +228,7 @@ entry:
 ; Test fptoui
 define i32 @fptoui_wd(double %a) nounwind ssp {
 entry:
-; CHECK: fptoui_wd
+; CHECK-LABEL: fptoui_wd
 ; CHECK: fcvtzu w0, d0
   %conv = fptoui double %a to i32
   ret i32 %conv
@@ -242,7 +237,7 @@ entry:
 ; Test sitofp
 define float @sitofp_sw_i1(i1 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_sw_i1
+; CHECK-LABEL: sitofp_sw_i1
 ; CHECK: sbfx w0, w0, #0, #1
 ; CHECK: scvtf s0, w0
   %conv = sitofp i1 %a to float
@@ -252,7 +247,7 @@ entry:
 ; Test sitofp
 define float @sitofp_sw_i8(i8 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_sw_i8
+; CHECK-LABEL: sitofp_sw_i8
 ; CHECK: sxtb w0, w0
 ; CHECK: scvtf s0, w0
   %conv = sitofp i8 %a to float
@@ -262,9 +257,7 @@ entry:
 ; Test sitofp
 define float @sitofp_sw_i16(i16 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_sw_i16
-; CHECK: sxth w0, w0
-; CHECK: scvtf s0, w0
+; CHECK-LABEL: sitofp_sw_i16
   %conv = sitofp i16 %a to float
   ret float %conv
 }
@@ -272,7 +265,7 @@ entry:
 ; Test sitofp
 define float @sitofp_sw(i32 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_sw
+; CHECK-LABEL: sitofp_sw
 ; CHECK: scvtf s0, w0
   %conv = sitofp i32 %a to float
   ret float %conv
@@ -281,7 +274,7 @@ entry:
 ; Test sitofp
 define float @sitofp_sx(i64 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_sx
+; CHECK-LABEL: sitofp_sx
 ; CHECK: scvtf s0, x0
   %conv = sitofp i64 %a to float
   ret float %conv
@@ -290,7 +283,7 @@ entry:
 ; Test sitofp
 define double @sitofp_dw(i32 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_dw
+; CHECK-LABEL: sitofp_dw
 ; CHECK: scvtf d0, w0
   %conv = sitofp i32 %a to double
   ret double %conv
@@ -299,7 +292,7 @@ entry:
 ; Test sitofp
 define double @sitofp_dx(i64 %a) nounwind ssp {
 entry:
-; CHECK: sitofp_dx
+; CHECK-LABEL: sitofp_dx
 ; CHECK: scvtf d0, x0
   %conv = sitofp i64 %a to double
   ret double %conv
@@ -308,7 +301,7 @@ entry:
 ; Test uitofp
 define float @uitofp_sw_i1(i1 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_sw_i1
+; CHECK-LABEL: uitofp_sw_i1
 ; CHECK: and w0, w0, #0x1
 ; CHECK: ucvtf s0, w0
   %conv = uitofp i1 %a to float
@@ -318,9 +311,7 @@ entry:
 ; Test uitofp
 define float @uitofp_sw_i8(i8 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_sw_i8
-; CHECK: uxtb w0, w0
-; CHECK: ucvtf s0, w0
+; CHECK-LABEL: uitofp_sw_i8
   %conv = uitofp i8 %a to float
   ret float %conv
 }
@@ -328,9 +319,7 @@ entry:
 ; Test uitofp
 define float @uitofp_sw_i16(i16 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_sw_i16
-; CHECK: uxth w0, w0
-; CHECK: ucvtf s0, w0
+; CHECK-LABEL: uitofp_sw_i16
   %conv = uitofp i16 %a to float
   ret float %conv
 }
@@ -338,7 +327,7 @@ entry:
 ; Test uitofp
 define float @uitofp_sw(i32 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_sw
+; CHECK-LABEL: uitofp_sw
 ; CHECK: ucvtf s0, w0
   %conv = uitofp i32 %a to float
   ret float %conv
@@ -347,7 +336,7 @@ entry:
 ; Test uitofp
 define float @uitofp_sx(i64 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_sx
+; CHECK-LABEL: uitofp_sx
 ; CHECK: ucvtf s0, x0
   %conv = uitofp i64 %a to float
   ret float %conv
@@ -356,7 +345,7 @@ entry:
 ; Test uitofp
 define double @uitofp_dw(i32 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_dw
+; CHECK-LABEL: uitofp_dw
 ; CHECK: ucvtf d0, w0
   %conv = uitofp i32 %a to double
   ret double %conv
@@ -365,7 +354,7 @@ entry:
 ; Test uitofp
 define double @uitofp_dx(i64 %a) nounwind ssp {
 entry:
-; CHECK: uitofp_dx
+; CHECK-LABEL: uitofp_dx
 ; CHECK: ucvtf d0, x0
   %conv = uitofp i64 %a to double
   ret double %conv
@@ -373,7 +362,7 @@ entry:
 
 define i32 @i64_trunc_i32(i64 %a) nounwind ssp {
 entry:
-; CHECK: i64_trunc_i32
+; CHECK-LABEL: i64_trunc_i32
 ; CHECK: mov x1, x0
   %conv = trunc i64 %a to i32
   ret i32 %conv
@@ -381,7 +370,7 @@ entry:
 
 define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp {
 entry:
-; CHECK: i64_trunc_i16
+; CHECK-LABEL: i64_trunc_i16
 ; CHECK: mov x[[REG:[0-9]+]], x0
 ; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xffff
 ; CHECK: uxth w0, [[REG2]]
@@ -391,7 +380,7 @@ entry:
 
 define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp {
 entry:
-; CHECK: i64_trunc_i8
+; CHECK-LABEL: i64_trunc_i8
 ; CHECK: mov x[[REG:[0-9]+]], x0
 ; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0xff
 ; CHECK: uxtb w0, [[REG2]]
@@ -401,7 +390,7 @@ entry:
 
 define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp {
 entry:
-; CHECK: i64_trunc_i1
+; CHECK-LABEL: i64_trunc_i1
 ; CHECK: mov x[[REG:[0-9]+]], x0
 ; CHECK: and [[REG2:w[0-9]+]], w[[REG]], #0x1
 ; CHECK: and w0, [[REG2]], #0x1
@@ -411,7 +400,7 @@ entry:
 
 ; rdar://15101939
 define void @stack_trunc() nounwind {
-; CHECK: stack_trunc
+; CHECK-LABEL: stack_trunc
 ; CHECK: sub  sp, sp, #16
 ; CHECK: ldr  [[REG:x[0-9]+]], [sp]
 ; CHECK: mov  x[[REG2:[0-9]+]], [[REG]]
@@ -428,15 +417,36 @@ define void @stack_trunc() nounwind {
 
 define zeroext i64 @zext_i8_i64(i8 zeroext %in) {
 ; CHECK-LABEL: zext_i8_i64:
-; CHECK: mov x[[TMP:[0-9]+]], x0
-; CHECK: ubfx x0, x[[TMP]], #0, #8
+; CHECK-NOT:   ubfx x0, {{x[0-9]+}}, #0, #8
+; CHECK:       ret
   %big = zext i8 %in to i64
   ret i64 %big
 }
 define zeroext i64 @zext_i16_i64(i16 zeroext %in) {
 ; CHECK-LABEL: zext_i16_i64:
-; CHECK: mov x[[TMP:[0-9]+]], x0
-; CHECK: ubfx x0, x[[TMP]], #0, #16
+; CHECK-NOT:   ubfx x0, {{x[0-9]+}}, #0, #16
+; CHECK:       ret
   %big = zext i16 %in to i64
   ret i64 %big
 }
+
+define float @bitcast_i32_to_float(i32 %a) {
+  %1 = bitcast i32 %a to float
+  ret float %1
+}
+
+define double @bitcast_i64_to_double(i64 %a) {
+  %1 = bitcast i64 %a to double
+  ret double %1
+}
+
+define i32 @bitcast_float_to_i32(float %a) {
+  %1 = bitcast float %a to i32
+  ret i32 %1
+}
+
+define i64 @bitcast_double_to_i64(double %a) {
+  %1 = bitcast double %a to i64
+  ret i64 %1
+}
+
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
index f03059620768..111b6bd3d49e 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-fcmp.ll
@@ -1,146 +1,162 @@
-; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
-define zeroext i1 @fcmp_float1(float %a) nounwind ssp {
-entry:
-; CHECK-LABEL: @fcmp_float1
-; CHECK: fcmp s0, #0.0
-; CHECK: cset w{{[0-9]+}}, ne
-  %cmp = fcmp une float %a, 0.000000e+00
-  ret i1 %cmp
+define zeroext i1 @fcmp_float1(float %a) {
+; CHECK-LABEL: fcmp_float1
+; CHECK:       fcmp s0, #0.0
+; CHECK-NEXT:  cset {{w[0-9]+}}, ne
+  %1 = fcmp une float %a, 0.000000e+00
+  ret i1 %1
 }
 
-define zeroext i1 @fcmp_float2(float %a, float %b) nounwind ssp {
-entry:
-; CHECK-LABEL: @fcmp_float2
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, ne
-  %cmp = fcmp une float %a, %b
-  ret i1 %cmp
+define zeroext i1 @fcmp_float2(float %a, float %b) {
+; CHECK-LABEL: fcmp_float2
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, ne
+  %1 = fcmp une float %a, %b
+  ret i1 %1
 }
 
-define zeroext i1 @fcmp_double1(double %a) nounwind ssp {
-entry:
-; CHECK-LABEL: @fcmp_double1
-; CHECK: fcmp d0, #0.0
-; CHECK: cset w{{[0-9]+}}, ne
-  %cmp = fcmp une double %a, 0.000000e+00
-  ret i1 %cmp
+define zeroext i1 @fcmp_double1(double %a) {
+; CHECK-LABEL: fcmp_double1
+; CHECK:       fcmp d0, #0.0
+; CHECK-NEXT:  cset {{w[0-9]+}}, ne
+  %1 = fcmp une double %a, 0.000000e+00
+  ret i1 %1
 }
 
-define zeroext i1 @fcmp_double2(double %a, double %b) nounwind ssp {
-entry:
-; CHECK-LABEL: @fcmp_double2
-; CHECK: fcmp d0, d1
-; CHECK: cset w{{[0-9]+}}, ne
-  %cmp = fcmp une double %a, %b
-  ret i1 %cmp
+define zeroext i1 @fcmp_double2(double %a, double %b) {
+; CHECK-LABEL: fcmp_double2
+; CHECK:       fcmp d0, d1
+; CHECK-NEXT:  cset {{w[0-9]+}}, ne
+  %1 = fcmp une double %a, %b
+  ret i1 %1
 }
 
 ; Check each fcmp condition
-define float @fcmp_oeq(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_oeq
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, eq
-  %cmp = fcmp oeq float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ogt(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ogt
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, gt
-  %cmp = fcmp ogt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_oge(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_oge
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, ge
-  %cmp = fcmp oge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_olt(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_olt
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, mi
-  %cmp = fcmp olt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ole(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ole
-; CHECK: fcmp s0, s1
-; CHECK: cset w{{[0-9]+}}, ls
-  %cmp = fcmp ole float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ord(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ord
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, vc
-  %cmp = fcmp ord float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uno(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_uno
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, vs
-  %cmp = fcmp uno float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ugt(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ugt
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, hi
-  %cmp = fcmp ugt float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_uge(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_uge
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, pl
-  %cmp = fcmp uge float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ult(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ult
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, lt
-  %cmp = fcmp ult float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_ule(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_ule
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, le
-  %cmp = fcmp ule float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
-}
-
-define float @fcmp_une(float %a, float %b) nounwind ssp {
-; CHECK-LABEL: @fcmp_une
-; CHECK: fcmp s0, s1
-; CHECK: cset {{w[0-9]+}}, ne
-  %cmp = fcmp une float %a, %b
-  %conv = uitofp i1 %cmp to float
-  ret float %conv
+define zeroext i1 @fcmp_false(float %a) {
+; CHECK-LABEL: fcmp_false
+; CHECK:       mov {{w[0-9]+}}, wzr
+  %1 = fcmp ogt float %a, %a
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oeq(float %a, float %b) {
+; CHECK-LABEL: fcmp_oeq
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, eq
+  %1 = fcmp oeq float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ogt(float %a, float %b) {
+; CHECK-LABEL: fcmp_ogt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, gt
+  %1 = fcmp ogt float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_oge(float %a, float %b) {
+; CHECK-LABEL: fcmp_oge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, ge
+  %1 = fcmp oge float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_olt(float %a, float %b) {
+; CHECK-LABEL: fcmp_olt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, mi
+  %1 = fcmp olt float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ole(float %a, float %b) {
+; CHECK-LABEL: fcmp_ole
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, ls
+  %1 = fcmp ole float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_one(float %a, float %b) {
+; CHECK-LABEL: fcmp_one
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset [[REG:w[0-9]+]], mi
+; CHECK-NEXT:  csinc {{w[0-9]+}}, [[REG]], wzr, le
+  %1 = fcmp one float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ord(float %a, float %b) {
+; CHECK-LABEL: fcmp_ord
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, vc
+  %1 = fcmp ord float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uno(float %a, float %b) {
+; CHECK-LABEL: fcmp_uno
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, vs
+  %1 = fcmp uno float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ueq(float %a, float %b) {
+; CHECK-LABEL: fcmp_ueq
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset [[REG:w[0-9]+]], eq
+; CHECK-NEXT:  csinc {{w[0-9]+}}, [[REG]], wzr, vc
+  %1 = fcmp ueq float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ugt(float %a, float %b) {
+; CHECK-LABEL: fcmp_ugt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, hi
+  %1 = fcmp ugt float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_uge(float %a, float %b) {
+; CHECK-LABEL: fcmp_uge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, pl
+  %1 = fcmp uge float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ult(float %a, float %b) {
+; CHECK-LABEL: fcmp_ult
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, lt
+  %1 = fcmp ult float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_ule(float %a, float %b) {
+; CHECK-LABEL: fcmp_ule
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, le
+  %1 = fcmp ule float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_une(float %a, float %b) {
+; CHECK-LABEL: fcmp_une
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  cset {{w[0-9]+}}, ne
+  %1 = fcmp une float %a, %b
+  ret i1 %1
+}
+
+define zeroext i1 @fcmp_true(float %a) {
+; CHECK-LABEL: fcmp_true
+; CHECK:       orr {{w[0-9]+}}, wzr, #0x1
+  %1 = fcmp ueq float %a, %a
+  ret i1 %1
 }
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
index dc4d8953c276..1a4e8eab2d84 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 ; Test load/store of global value from global offset table.
 @seed = common global i64 0, align 8
@@ -6,9 +6,9 @@
 define void @Initrand() nounwind {
 entry:
 ; CHECK: @Initrand
-; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
-; CHECK: str x{{[0-9]+}}, [x[[REG2]]]
+; CHECK: adrp [[REG:x[0-9]+]], _seed@GOTPAGE
+; CHECK: ldr  [[REG2:x[0-9]+]], {{\[}}[[REG]], _seed@GOTPAGEOFF{{\]}}
+; CHECK: str  {{x[0-9]+}}, {{\[}}[[REG2]]{{\]}}
   store i64 74755, i64* @seed, align 8
   ret void
 }
@@ -16,17 +16,16 @@ entry:
 define i32 @Rand() nounwind {
 entry:
 ; CHECK: @Rand
-; CHECK: adrp x[[REG:[0-9]+]], _seed@GOTPAGE
-; CHECK: ldr x[[REG2:[0-9]+]], [x[[REG]], _seed@GOTPAGEOFF]
-; CHECK: movz x[[REG3:[0-9]+]], #0x51d
-; CHECK: ldr x[[REG4:[0-9]+]], [x[[REG2]]]
-; CHECK: mul x[[REG5:[0-9]+]], x[[REG4]], x[[REG3]]
-; CHECK: movz x[[REG6:[0-9]+]], #0x3619
-; CHECK: add x[[REG7:[0-9]+]], x[[REG5]], x[[REG6]]
-; CHECK: orr x[[REG8:[0-9]+]], xzr, #0xffff
-; CHECK: and x[[REG9:[0-9]+]], x[[REG7]], x[[REG8]]
-; CHECK: str x[[REG9]], [x[[REG]]]
-; CHECK: ldr x{{[0-9]+}}, [x[[REG]]]
+; CHECK: adrp [[REG1:x[0-9]+]], _seed@GOTPAGE
+; CHECK: ldr  [[REG2:x[0-9]+]], {{\[}}[[REG1]], _seed@GOTPAGEOFF{{\]}}
+; CHECK: movz [[REG3:x[0-9]+]], #0x3619
+; CHECK: movz [[REG4:x[0-9]+]], #0x51d
+; CHECK: ldr  [[REG5:x[0-9]+]], {{\[}}[[REG2]]{{\]}}
+; CHECK: mul  [[REG6:x[0-9]+]], [[REG5]], [[REG4]]
+; CHECK: add  [[REG7:x[0-9]+]], [[REG6]], [[REG3]]
+; CHECK: and  [[REG8:x[0-9]+]], [[REG7]], #0xffff
+; CHECK: str  [[REG8]], {{\[}}[[REG1]]{{\]}}
+; CHECK: ldr  {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}}
   %0 = load i64* @seed, align 8
   %mul = mul nsw i64 %0, 1309
   %add = add nsw i64 %mul, 13849
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
index 971be5c43469..245c70e8905f 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-icmp.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 define i32 @icmp_eq_imm(i32 %a) nounwind ssp {
 entry:
-; CHECK: icmp_eq_imm
-; CHECK: cmp  w0, #31
-; CHECK: cset w0, eq
+; CHECK-LABEL: icmp_eq_imm
+; CHECK:       cmp w0, #31
+; CHECK-NEXT:  cset w0, eq
   %cmp = icmp eq i32 %a, 31
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -12,19 +12,19 @@ entry:
 
 define i32 @icmp_eq_neg_imm(i32 %a) nounwind ssp {
 entry:
-; CHECK: icmp_eq_neg_imm
-; CHECK: cmn  w0, #7
-; CHECK: cset w0, eq
+; CHECK-LABEL: icmp_eq_neg_imm
+; CHECK:       cmn w0, #7
+; CHECK-NEXT:  cset w0, eq
   %cmp = icmp eq i32 %a, -7
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
-define i32 @icmp_eq(i32 %a, i32 %b) nounwind ssp {
+define i32 @icmp_eq_i32(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_eq
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, eq
+; CHECK-LABEL: icmp_eq_i32
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, eq
   %cmp = icmp eq i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -32,19 +32,39 @@ entry:
 
 define i32 @icmp_ne(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_ne
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, ne
+; CHECK-LABEL: icmp_ne
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, ne
   %cmp = icmp ne i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
 }
 
+define i32 @icmp_eq_ptr(i8* %a) {
+entry:
+; CHECK-LABEL: icmp_eq_ptr
+; CHECK:       cmp x0, #0
+; CHECK-NEXT:  cset {{.+}}, eq
+  %cmp = icmp eq i8* %a, null
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @icmp_ne_ptr(i8* %a) {
+entry:
+; CHECK-LABEL: icmp_ne_ptr
+; CHECK:       cmp x0, #0
+; CHECK-NEXT:  cset {{.+}}, ne
+  %cmp = icmp ne i8* %a, null
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
 define i32 @icmp_ugt(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_ugt
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, hi
+; CHECK-LABEL: icmp_ugt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, hi
   %cmp = icmp ugt i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -52,9 +72,9 @@ entry:
 
 define i32 @icmp_uge(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_uge
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, hs
+; CHECK-LABEL: icmp_uge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, hs
   %cmp = icmp uge i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -62,9 +82,9 @@ entry:
 
 define i32 @icmp_ult(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_ult
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, lo
+; CHECK-LABEL: icmp_ult
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, lo
   %cmp = icmp ult i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -72,9 +92,9 @@ entry:
 
 define i32 @icmp_ule(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_ule
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, ls
+; CHECK-LABEL: icmp_ule
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, ls
   %cmp = icmp ule i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -82,9 +102,9 @@ entry:
 
 define i32 @icmp_sgt(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_sgt
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, gt
+; CHECK-LABEL: icmp_sgt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, gt
   %cmp = icmp sgt i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -92,9 +112,9 @@ entry:
 
 define i32 @icmp_sge(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_sge
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, ge
+; CHECK-LABEL: icmp_sge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, ge
   %cmp = icmp sge i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -102,9 +122,9 @@ entry:
 
 define i32 @icmp_slt(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_slt
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, lt
+; CHECK-LABEL: icmp_slt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, lt
   %cmp = icmp slt i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -112,9 +132,9 @@ entry:
 
 define i32 @icmp_sle(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK: icmp_sle
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, le
+; CHECK-LABEL: icmp_sle
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  cset w0, le
   %cmp = icmp sle i32 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -122,9 +142,9 @@ entry:
 
 define i32 @icmp_i64(i64 %a, i64 %b) nounwind ssp {
 entry:
-; CHECK: icmp_i64
-; CHECK: cmp  x0, x1
-; CHECK: cset w{{[0-9]+}}, le
+; CHECK-LABEL: icmp_i64
+; CHECK:       cmp  x0, x1
+; CHECK-NEXT:  cset w{{[0-9]+}}, le
   %cmp = icmp sle i64 %a, %b
   %conv = zext i1 %cmp to i32
   ret i32 %conv
@@ -132,33 +152,30 @@ entry:
 
 define zeroext i1 @icmp_eq_i16(i16 %a, i16 %b) nounwind ssp {
 entry:
-; CHECK: icmp_eq_i16
-; CHECK: sxth w0, w0
-; CHECK: sxth w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, eq
+; CHECK-LABEL: icmp_eq_i16
+; CHECK:       sxth w0, w0
+; CHECK:       cmp w0, w1, sxth
+; CHECK-NEXT:  cset w0, eq
   %cmp = icmp eq i16 %a, %b
   ret i1 %cmp
 }
 
 define zeroext i1 @icmp_eq_i8(i8 %a, i8 %b) nounwind ssp {
 entry:
-; CHECK: icmp_eq_i8
-; CHECK: sxtb w0, w0
-; CHECK: sxtb w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, eq
+; CHECK-LABEL: icmp_eq_i8
+; CHECK:       sxtb w0, w0
+; CHECK-NEXT:  cmp w0, w1, sxtb
+; CHECK-NEXT:  cset w0, eq
   %cmp = icmp eq i8 %a, %b
   ret i1 %cmp
 }
 
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
-; CHECK: icmp_i16_unsigned
-; CHECK: uxth w0, w0
-; CHECK: uxth w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, lo
+; CHECK-LABEL: icmp_i16_unsigned
+; CHECK:       uxth w0, w0
+; CHECK-NEXT:  cmp w0, w1, uxth
+; CHECK-NEXT:  cset w0, lo
   %cmp = icmp ult i16 %a, %b
   %conv2 = zext i1 %cmp to i32
   ret i32 %conv2
@@ -166,24 +183,34 @@ entry:
 
 define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
 entry:
-; CHECK: @icmp_i8_signed
-; CHECK: sxtb w0, w0
-; CHECK: sxtb w1, w1
-; CHECK: cmp  w0, w1
-; CHECK: cset w0, gt
+; CHECK-LABEL: icmp_i8_signed
+; CHECK:       sxtb w0, w0
+; CHECK-NEXT:  cmp w0, w1, sxtb
+; CHECK-NEXT:  cset w0, gt
   %cmp = icmp sgt i8 %a, %b
   %conv2 = zext i1 %cmp to i32
   ret i32 %conv2
 }
 
+define i32 @icmp_i1_signed(i1 %a, i1 %b) nounwind {
+entry:
+; CHECK-LABEL: icmp_i1_signed
+; CHECK:       sbfx [[REG1:w[0-9]+]], w0, #0, #1
+; CHECK-NEXT:  sbfx [[REG2:w[0-9]+]], w1, #0, #1
+; CHECK-NEXT:  cmp  [[REG1]], [[REG2]]
+; CHECK-NEXT:  cset w0, gt
+  %cmp = icmp sgt i1 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
 
 define i32 @icmp_i16_signed_const(i16 %a) nounwind {
 entry:
-; CHECK: icmp_i16_signed_const
-; CHECK: sxth w0, w0
-; CHECK: cmn  w0, #233
-; CHECK: cset w0, lt
-; CHECK: and w0, w0, #0x1
+; CHECK-LABEL: icmp_i16_signed_const
+; CHECK:       sxth w0, w0
+; CHECK-NEXT:  cmn w0, #233
+; CHECK-NEXT:  cset w0, lt
+; CHECK-NEXT:  and w0, w0, #0x1
   %cmp = icmp slt i16 %a, -233
   %conv2 = zext i1 %cmp to i32
   ret i32 %conv2
@@ -191,11 +218,11 @@ entry:
 
 define i32 @icmp_i8_signed_const(i8 %a) nounwind {
 entry:
-; CHECK: icmp_i8_signed_const
-; CHECK: sxtb w0, w0
-; CHECK: cmp  w0, #124
-; CHECK: cset w0, gt
-; CHECK: and w0, w0, #0x1
+; CHECK-LABEL: icmp_i8_signed_const
+; CHECK:       sxtb w0, w0
+; CHECK-NEXT:  cmp w0, #124
+; CHECK-NEXT:  cset w0, gt
+; CHECK-NEXT:  and w0, w0, #0x1
   %cmp = icmp sgt i8 %a, 124
   %conv2 = zext i1 %cmp to i32
   ret i32 %conv2
@@ -203,11 +230,11 @@ entry:
 
 define i32 @icmp_i1_unsigned_const(i1 %a) nounwind {
 entry:
-; CHECK: icmp_i1_unsigned_const
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp  w0, #0
-; CHECK: cset w0, lo
-; CHECK: and w0, w0, #0x1
+; CHECK-LABEL: icmp_i1_unsigned_const
+; CHECK:       and w0, w0, #0x1
+; CHECK-NEXT:  cmp w0, #0
+; CHECK-NEXT:  cset w0, lo
+; CHECK-NEXT:  and w0, w0, #0x1
   %cmp = icmp ult i1 %a, 0
   %conv2 = zext i1 %cmp to i32
   ret i32 %conv2
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
index 70335ace50c1..a5f45249678b 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 @fn.table = internal global [2 x i8*] [i8* blockaddress(@fn, %ZERO), i8* blockaddress(@fn, %ONE)], align 8
 
 define i32 @fn(i32 %target) nounwind {
 entry:
-; CHECK: @fn
+; CHECK-LABEL: fn
   %retval = alloca i32, align 4
   %target.addr = alloca i32, align 4
   store i32 %target, i32* %target.addr, align 4
@@ -29,8 +29,8 @@ return:                                           ; preds = %ONE, %ZERO
   ret i32 %2
 
 indirectgoto:                                     ; preds = %entry
-; CHECK: ldr x0, [sp]
-; CHECK: br x0
+; CHECK:      ldr [[REG:x[0-9]+]], [sp]
+; CHECK-NEXT: br [[REG]]
   %indirect.goto.dest = phi i8* [ %1, %entry ]
   indirectbr i8* %indirect.goto.dest, [label %ZERO, label %ONE]
 }
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
index 115298805ac1..9ac3e4431830 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios | FileCheck %s --check-prefix=ARM64
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=arm64-apple-ios < %s | FileCheck %s --check-prefix=ARM64
 
 @message = global [80 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 16
 @temp = common global [80 x i8] zeroinitializer, align 16
@@ -7,7 +7,7 @@ define void @t1() {
 ; ARM64-LABEL: t1
 ; ARM64: adrp x8, _message@PAGE
 ; ARM64: add x0, x8, _message@PAGEOFF
-; ARM64: movz w9, #0
+; ARM64: mov w9, wzr
 ; ARM64: movz x2, #0x50
 ; ARM64: uxtb w1, w9
 ; ARM64: bl _memset
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
index ffac131f0cab..1dea5d944be0 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-materialize.ll
@@ -1,27 +1,41 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 ; Materialize using fmov
-define void @float_(float* %value) {
-; CHECK: @float_
-; CHECK: fmov s0, #1.25000000
-  store float 1.250000e+00, float* %value, align 4
-  ret void
+define float @fmov_float1() {
+; CHECK-LABEL: fmov_float1
+; CHECK:       fmov s0, #1.25000000
+  ret float 1.250000e+00
 }
 
-define void @double_(double* %value) {
-; CHECK: @double_
-; CHECK: fmov d0, #1.25000000
-  store double 1.250000e+00, double* %value, align 8
-  ret void
+define float @fmov_float2() {
+; CHECK-LABEL: fmov_float2
+; CHECK:       fmov s0, wzr
+  ret float 0.0e+00
+}
+
+define double @fmov_double1() {
+; CHECK-LABEL: fmov_double1
+; CHECK:       fmov d0, #1.25000000
+  ret double 1.250000e+00
+}
+
+define double @fmov_double2() {
+; CHECK-LABEL: fmov_double2
+; CHECK:       fmov d0, xzr
+  ret double 0.0e+00
 }
 
 ; Materialize from constant pool
-define float @float_cp() {
-; CHECK: @float_cp
+define float @cp_float() {
+; CHECK-LABEL: cp_float
+; CHECK:       adrp [[REG:x[0-9]+]], {{lCPI[0-9]+_0}}@PAGE
+; CHECK-NEXT:  ldr s0, {{\[}}[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF{{\]}}
   ret float 0x400921FB60000000
 }
 
-define double @double_cp() {
-; CHECK: @double_cp
+define double @cp_double() {
+; CHECK-LABEL: cp_double
+; CHECK:       adrp [[REG:x[0-9]+]], {{lCPI[0-9]+_0}}@PAGE
+; CHECK-NEXT:  ldr d0, {{\[}}[[REG]], {{lCPI[0-9]+_0}}@PAGEOFF{{\]}}
   ret double 0x400921FB54442D18
 }
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
index 483d1799f9c8..81daa7c1d5ac 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-noconvert.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -O0 %s -o - | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=aarch64-apple-ios < %s | FileCheck %s
 
 ; Fast-isel can't do vector conversions yet, but it was emitting some highly
 ; suspect UCVTFUWDri MachineInstrs.
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
index d5bdbaae9e75..26f9afaccee7 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-rem.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 ; RUN: llc %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin -print-machineinstrs=expand-isel-pseudos -o /dev/null 2> %t
 ; RUN: FileCheck %s < %t --check-prefix=CHECK-SSA
-; REQUIRES: asserts
 
 ; CHECK-SSA-LABEL: Machine code for function t1
 
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
index d91fd285d551..f84c75504f64 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 ;; Test returns.
 define void @t0() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-select.ll b/test/CodeGen/AArch64/arm64-fast-isel-select.ll
deleted file mode 100644
index 1cc207f59155..000000000000
--- a/test/CodeGen/AArch64/arm64-fast-isel-select.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
-
-define i32 @t1(i32 %c) nounwind readnone {
-entry:
-; CHECK: @t1
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
-  %0 = icmp sgt i32 %c, 1
-  %1 = select i1 %0, i32 123, i32 357
-  ret i32 %1
-}
-
-define i64 @t2(i32 %c) nounwind readnone {
-entry:
-; CHECK: @t2
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
-  %0 = icmp sgt i32 %c, 1
-  %1 = select i1 %0, i64 123, i64 357
-  ret i64 %1
-}
-
-define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
-entry:
-; CHECK: @t3
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: csel w0, w{{[0-9]+}}, w{{[0-9]+}}, ne
-  %0 = select i1 %c, i32 %a, i32 %b
-  ret i32 %0
-}
-
-define i64 @t4(i1 %c, i64 %a, i64 %b) nounwind readnone {
-entry:
-; CHECK: @t4
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: csel x0, x{{[0-9]+}}, x{{[0-9]+}}, ne
-  %0 = select i1 %c, i64 %a, i64 %b
-  ret i64 %0
-}
-
-define float @t5(i1 %c, float %a, float %b) nounwind readnone {
-entry:
-; CHECK: @t5
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: fcsel s0, s0, s1, ne
-  %0 = select i1 %c, float %a, float %b
-  ret float %0
-}
-
-define double @t6(i1 %c, double %a, double %b) nounwind readnone {
-entry:
-; CHECK: @t6
-; CHECK: and w0, w0, #0x1
-; CHECK: subs w0, w0, #0
-; CHECK: fcsel d0, d0, d1, ne
-  %0 = select i1 %c, double %a, double %b
-  ret double %0
-}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-store.ll b/test/CodeGen/AArch64/arm64-fast-isel-store.ll
new file mode 100644
index 000000000000..9494d5553010
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fast-isel-store.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple=aarch64-unknown-unknown                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-unknown -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define void @store_i8(i8* %a) {
+; CHECK-LABEL: store_i8
+; CHECK: strb  wzr, [x0]
+  store i8 0, i8* %a
+  ret void
+}
+
+define void @store_i16(i16* %a) {
+; CHECK-LABEL: store_i16
+; CHECK: strh  wzr, [x0]
+  store i16 0, i16* %a
+  ret void
+}
+
+define void @store_i32(i32* %a) {
+; CHECK-LABEL: store_i32
+; CHECK: str  wzr, [x0]
+  store i32 0, i32* %a
+  ret void
+}
+
+define void @store_i64(i64* %a) {
+; CHECK-LABEL: store_i64
+; CHECK: str  xzr, [x0]
+  store i64 0, i64* %a
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-fast-isel.ll b/test/CodeGen/AArch64/arm64-fast-isel.ll
index 0194b3a6c2d4..434994607c62 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -O0 -fast-isel-abort -verify-machineinstrs -mtriple=arm64-apple-darwin < %s | FileCheck %s
 
 define void @t0(i32 %a) nounwind {
 entry:
@@ -66,8 +66,7 @@ entry:
 define void @t4(i32 *%ptr) nounwind {
 entry:
 ; CHECK-LABEL: t4:
-; CHECK: movz w8, #0
-; CHECK: stur w8, [x0, #-4]
+; CHECK: stur wzr, [x0, #-4]
 ; CHECK: ret
   %0 = getelementptr i32 *%ptr, i32 -1
   store i32 0, i32* %0, align 4
@@ -77,8 +76,7 @@ entry:
 define void @t5(i32 *%ptr) nounwind {
 entry:
 ; CHECK-LABEL: t5:
-; CHECK: movz w8, #0
-; CHECK: stur w8, [x0, #-256]
+; CHECK: stur wzr, [x0, #-256]
 ; CHECK: ret
   %0 = getelementptr i32 *%ptr, i32 -64
   store i32 0, i32* %0, align 4
diff --git a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
index 8a744c513d7a..a9b8024a5c62 100644
--- a/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
+++ b/test/CodeGen/AArch64/arm64-fastcc-tailcall.ll
@@ -19,6 +19,6 @@ define internal fastcc void @callee(i32* nocapture %p, i32 %a) nounwind optsize
   ret void
 }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/arm64-fold-address.ll b/test/CodeGen/AArch64/arm64-fold-address.ll
index 96cc3e90f63a..1f0b918ecd9c 100644
--- a/test/CodeGen/AArch64/arm64-fold-address.ll
+++ b/test/CodeGen/AArch64/arm64-fold-address.ll
@@ -72,8 +72,8 @@ entry:
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!4 = metadata !{}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!4 = !{}
diff --git a/test/CodeGen/AArch64/arm64-frameaddr.ll b/test/CodeGen/AArch64/arm64-frameaddr.ll
deleted file mode 100644
index 469078c88143..000000000000
--- a/test/CodeGen/AArch64/arm64-frameaddr.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=arm64 | FileCheck %s
-
-define i8* @t() nounwind {
-entry:
-; CHECK-LABEL: t:
-; CHECK: stp x29, x30, [sp, #-16]!
-; CHECK: mov x29, sp
-; CHECK: mov x0, x29
-; CHECK: ldp x29, x30, [sp], #16
-; CHECK: ret
-	%0 = call i8* @llvm.frameaddress(i32 0)
-        ret i8* %0
-}
-
-declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
index e501c6e403bd..a8620f428da4 100644
--- a/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -349,3 +349,15 @@ define i8* @preidx8sext64(i8* %src, i64* %out) {
   store i64 %ext, i64* %out, align 4
   ret i8* %ptr
 }
+
+; This test checks if illegal post-index is generated
+
+define i64* @postidx_clobber(i64* %addr) nounwind noinline ssp {
+; CHECK-LABEL: postidx_clobber:
+; CHECK-NOT: str     x0, [x0], #8
+; ret
+ %paddr = bitcast i64* %addr to i64**
+ store i64* %addr, i64** %paddr
+ %newaddr = getelementptr i64* %addr, i32 1
+ ret i64* %newaddr
+}
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index c118f109289b..917911ad2d40 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -34,7 +34,7 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #1
 attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"double", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"double", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index d76cca3f21c6..9c8bcaadc17c 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -87,13 +87,17 @@ entry:
   ret i32 %1
 }
 
-define i32 @constraint_J(i32 %i, i32 %j) nounwind {
+define i32 @constraint_J(i32 %i, i32 %j, i64 %k) nounwind {
 entry:
   ; CHECK-LABEL: constraint_J:
   %0 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -16773120) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4278194176
+  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #-16773120
   %1 = tail call i32 asm sideeffect "sub ${0:w}, ${1:w}, $2", "=r,r,J"(i32 %i, i32 -1) nounwind
-  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #4294967295
+  ; CHECK: sub   {{w[0-9]+}}, {{w[0-9]+}}, #-1
+  %2 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i32 -1) nounwind
+  ; CHECK: sub   {{x[0-9]+}}, {{x[0-9]+}}, #-1
+  %3 = tail call i64 asm sideeffect "sub ${0:x}, ${1:x}, $2", "=r,r,J"(i64 %k, i64 -1) nounwind
+  ; CHECK: sub   {{x[0-9]+}}, {{x[0-9]+}}, #-1
   ret i32 %1
 }
 
diff --git a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
index d86d2e617ee5..0c564544a538 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-alloc.ll
@@ -11,4 +11,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"x5\00"}
+!0 = !{!"x5\00"}
diff --git a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
index 3ca14c408f4b..759bc15807b5 100644
--- a/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
+++ b/test/CodeGen/AArch64/arm64-named-reg-notareg.ll
@@ -10,4 +10,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"notareg\00"}
+!0 = !{!"notareg\00"}
diff --git a/test/CodeGen/AArch64/arm64-neon-select_cc.ll b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
index 95c582a5348c..d334c0846ace 100644
--- a/test/CodeGen/AArch64/arm64-neon-select_cc.ll
+++ b/test/CodeGen/AArch64/arm64-neon-select_cc.ll
@@ -204,3 +204,18 @@ define <2 x double> @test_select_cc_v2f64(double %a, double %b, <2 x double> %c,
   %e = select i1 %cmp31, <2 x double> %c, <2 x double> %d
   ret <2 x double> %e
 }
+
+; Special case: when the select condition is an icmp with i1 operands, don't
+; do the comparison on vectors.
+; Part of PR21549.
+define <2 x i32> @test_select_cc_v2i32_icmpi1(i1 %cc, <2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: test_select_cc_v2i32_icmpi1:
+; CHECK: tst   w0, #0x1
+; CHECK: csetm [[MASK:w[0-9]+]], ne
+; CHECK: dup   [[DUPMASK:v[0-9]+]].2s, [[MASK]]
+; CHECK: bsl   [[DUPMASK]].8b, v0.8b, v1.8b
+; CHECK: mov   v0.16b, [[DUPMASK]].16b
+  %cmp = icmp ne i1 %cc, 0
+  %e = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %e
+}
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
new file mode 100644
index 000000000000..d39722b9c8a5
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone < %s | FileCheck %s
+
+; Test that scratch registers are spilled around patchpoints
+; CHECK: InlineAsm End
+; CHECK-NEXT: mov x{{[0-9]+}}, x16
+; CHECK-NEXT: mov x{{[0-9]+}}, x17
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: nop
+define void @clobberScratch(i32* %p) {
+  %v = load i32* %p
+  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
+  store i32 %v, i32* %p
+  ret void
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
new file mode 100644
index 000000000000..8f79f80ba33d
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -0,0 +1,118 @@
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone            < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel < %s | FileCheck %s --check-prefix=FAST
+
+; One argument will be passed in register, the other will be pushed on the stack.
+; Return value in x0.
+define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen:
+; CHECK:       Ltmp
+; CHECK:       str x{{.+}}, [sp]
+; CHECK-NEXT:  mov  x0, x{{.+}}
+; CHECK:       Ltmp
+; CHECK-NEXT:  movz  x16, #0xffff, lsl #32
+; CHECK-NEXT:  movk  x16, #0xdead, lsl #16
+; CHECK-NEXT:  movk  x16, #0xbeef
+; CHECK-NEXT:  blr x16
+; FAST-LABEL:  jscall_patchpoint_codegen:
+; FAST:        Ltmp
+; FAST:        str x{{.+}}, [sp]
+; FAST:        Ltmp
+; FAST-NEXT:   movz  x16, #0xffff, lsl #32
+; FAST-NEXT:   movk  x16, #0xdead, lsl #16
+; FAST-NEXT:   movk  x16, #0xbeef
+; FAST-NEXT:   blr x16
+  %resolveCall2 = inttoptr i64 281474417671919 to i8*
+  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+  %resolveCall3 = inttoptr i64 244837814038255 to i8*
+  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+  ret void
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen2(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen2:
+; CHECK:       Ltmp
+; CHECK:       orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK-NEXT:  str x[[REG]], [sp, #24]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK-NEXT:  str w[[REG]], [sp, #16]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x2
+; CHECK-NEXT:  str x[[REG]], [sp]
+; CHECK:       Ltmp
+; CHECK-NEXT:  movz  x16, #0xffff, lsl #32
+; CHECK-NEXT:  movk  x16, #0xdead, lsl #16
+; CHECK-NEXT:  movk  x16, #0xbeef
+; CHECK-NEXT:  blr x16
+; FAST-LABEL:  jscall_patchpoint_codegen2:
+; FAST:        Ltmp
+; FAST:        orr [[REG1:x[0-9]+]], xzr, #0x2
+; FAST-NEXT:   orr [[REG2:w[0-9]+]], wzr, #0x4
+; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
+; FAST-NEXT:   str [[REG1]], [sp]
+; FAST-NEXT:   str [[REG2]], [sp, #16]
+; FAST-NEXT:   str [[REG3]], [sp, #24]
+; FAST:        Ltmp
+; FAST-NEXT:   movz  x16, #0xffff, lsl #32
+; FAST-NEXT:   movk  x16, #0xdead, lsl #16
+; FAST-NEXT:   movk  x16, #0xbeef
+; FAST-NEXT:   blr x16
+  %call = inttoptr i64 281474417671919 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+  ret i64 %result
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen3(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen3:
+; CHECK:       Ltmp
+; CHECK:       movz  w[[REG:[0-9]+]], #0xa
+; CHECK-NEXT:  str x[[REG]], [sp, #48]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x8
+; CHECK-NEXT:  str w[[REG]], [sp, #36]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x6
+; CHECK-NEXT:  str x[[REG]], [sp, #24]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x4
+; CHECK-NEXT:  str w[[REG]], [sp, #16]
+; CHECK-NEXT:  orr w[[REG:[0-9]+]], wzr, #0x2
+; CHECK-NEXT:  str x[[REG]], [sp]
+; CHECK:       Ltmp
+; CHECK-NEXT:  movz  x16, #0xffff, lsl #32
+; CHECK-NEXT:  movk  x16, #0xdead, lsl #16
+; CHECK-NEXT:  movk  x16, #0xbeef
+; CHECK-NEXT:  blr x16
+; FAST-LABEL:  jscall_patchpoint_codegen3:
+; FAST:        Ltmp
+; FAST:        orr [[REG1:x[0-9]+]], xzr, #0x2
+; FAST-NEXT:   orr [[REG2:w[0-9]+]], wzr, #0x4
+; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
+; FAST-NEXT:   orr [[REG4:w[0-9]+]], wzr, #0x8
+; FAST-NEXT:   movz [[REG5:x[0-9]+]], #0xa
+; FAST-NEXT:   str [[REG1]], [sp]
+; FAST-NEXT:   str [[REG2]], [sp, #16]
+; FAST-NEXT:   str [[REG3]], [sp, #24]
+; FAST-NEXT:   str [[REG4]], [sp, #36]
+; FAST-NEXT:   str [[REG5]], [sp, #48]
+; FAST:        Ltmp
+; FAST-NEXT:   movz  x16, #0xffff, lsl #32
+; FAST-NEXT:   movk  x16, #0xdead, lsl #16
+; FAST-NEXT:   movk  x16, #0xbeef
+; FAST-NEXT:   blr x16
+  %call = inttoptr i64 281474417671919 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+  ret i64 %result
+}
+
+; CHECK-LABEL: test_i16:
+; CHECK: ldrh [[BREG:w[0-9]+]], [sp]
+; CHECK: add {{w[0-9]+}}, w0, [[BREG]]
+define webkit_jscc zeroext i16 @test_i16(i16 zeroext %a, i16 zeroext %b) {
+  %sum = add i16 %a, %b
+  ret i16 %sum
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/AArch64/arm64-patchpoint.ll b/test/CodeGen/AArch64/arm64-patchpoint.ll
index 039cdfcc3858..278cba5d9f49 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone                             < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -enable-misched=0 -mcpu=cyclone -fast-isel -fast-isel-abort < %s | FileCheck %s
 
 ; Trivial patchpoint codegen
 ;
@@ -41,73 +42,6 @@ entry:
   ret void
 }
 
-; Test the webkit_jscc calling convention.
-; One argument will be passed in register, the other will be pushed on the stack.
-; Return value in x0.
-define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen:
-; CHECK:      Ltmp
-; CHECK:      str x{{.+}}, [sp]
-; CHECK-NEXT: mov  x0, x{{.+}}
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #0xffff, lsl #32
-; CHECK-NEXT: movk  x16, #0xdead, lsl #16
-; CHECK-NEXT: movk  x16, #0xbeef
-; CHECK-NEXT: blr x16
-  %resolveCall2 = inttoptr i64 281474417671919 to i8*
-  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
-  %resolveCall3 = inttoptr i64 244837814038255 to i8*
-  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 20, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
-  ret void
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen2(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen2:
-; CHECK:      Ltmp
-; CHECK:      orr w{{.+}}, wzr, #0x6
-; CHECK-NEXT: str x{{.+}}, [sp, #24]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
-; CHECK-NEXT: str w{{.+}}, [sp, #16]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x2
-; CHECK-NEXT: str x{{.+}}, [sp]
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #0xffff, lsl #32
-; CHECK-NEXT: movk  x16, #0xdead, lsl #16
-; CHECK-NEXT: movk  x16, #0xbeef
-; CHECK-NEXT: blr x16
-  %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
-  ret i64 %result
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen3(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen3:
-; CHECK:      Ltmp
-; CHECK:      movz  w{{.+}}, #0xa
-; CHECK-NEXT: str x{{.+}}, [sp, #48]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x8
-; CHECK-NEXT: str w{{.+}}, [sp, #36]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x6
-; CHECK-NEXT: str x{{.+}}, [sp, #24]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x4
-; CHECK-NEXT: str w{{.+}}, [sp, #16]
-; CHECK-NEXT: orr w{{.+}}, wzr, #0x2
-; CHECK-NEXT: str x{{.+}}, [sp]
-; CHECK:      Ltmp
-; CHECK-NEXT: movz  x16, #0xffff, lsl #32
-; CHECK-NEXT: movk  x16, #0xdead, lsl #16
-; CHECK-NEXT: movk  x16, #0xbeef
-; CHECK-NEXT: blr x16
-  %call = inttoptr i64 281474417671919 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 20, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
-  ret i64 %result
-}
-
 ; Test patchpoints reusing the same TargetConstant.
 ; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
 ; There is no way to verify this, since it depends on memory allocation.
@@ -144,28 +78,7 @@ entry:
   ret void
 }
 
-; Test that scratch registers are spilled around patchpoints
-; CHECK: InlineAsm End
-; CHECK-NEXT: mov x{{[0-9]+}}, x16
-; CHECK-NEXT: mov x{{[0-9]+}}, x17
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT: nop
-define void @clobberScratch(i32* %p) {
-  %v = load i32* %p
-  tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
-  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
-  store i32 %v, i32* %p
-  ret void
-}
-
 declare void @llvm.experimental.stackmap(i64, i32, ...)
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
 declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
 
-; CHECK-LABEL: test_i16:
-; CHECK: ldrh [[BREG:w[0-9]+]], [sp]
-; CHECK: add w0, w0, [[BREG]]
-define webkit_jscc i16 @test_i16(i16 zeroext %a, i16 zeroext %b) {
-  %sum = add i16 %a, %b
-  ret i16 %sum
-}
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index 2afade2ee750..117ab3a5e05a 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=aarch64 -mattr -neon -aarch64-neon-syntax=apple | FileCheck -check-prefix=CHECK-NONEON %s
 
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
@@ -8,6 +9,13 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
 ; CHECK: uaddlv.8b	h0, v0
 ; CHECK: fmov w0, s0
 ; CHECK: ret
+; CHECK-NONEON-LABEL: cnt32_advsimd
+; CHECK-NONEON-NOT: 8b
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x55555555
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0x33333333
+; CHECK-NONEON: and w{{[0-9]+}}, w{{[0-9]+}}, #0xf0f0f0f
+; CHECK-NONEON: mul
+
 }
 
 define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
@@ -18,6 +26,12 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone {
 ; CHECK: uaddlv.8b	h0, v0
 ; CHECK: fmov	w0, s0
 ; CHECK: ret
+; CHECK-NONEON-LABEL: cnt64_advsimd
+; CHECK-NONEON-NOT: 8b
+; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0x5555555555555555
+; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0x3333333333333333
+; CHECK-NONEON: and x{{[0-9]+}}, x{{[0-9]+}}, #0xf0f0f0f0f0f0f0f
+; CHECK-NONEON: mul
 }
 
 ; Do not use AdvSIMD when -mno-implicit-float is specified.
diff --git a/test/CodeGen/AArch64/arm64-prefetch.ll b/test/CodeGen/AArch64/arm64-prefetch.ll
index b2e06edf931c..aac3515c4278 100644
--- a/test/CodeGen/AArch64/arm64-prefetch.ll
+++ b/test/CodeGen/AArch64/arm64-prefetch.ll
@@ -17,6 +17,15 @@ entry:
   ; CHECK: prfum pldl1keep
   call void @llvm.prefetch(i8* %tmp, i32 0, i32 3, i32 1)
 
+  ; CHECK: prfum plil1strm
+  call void @llvm.prefetch(i8* %tmp, i32 0, i32 0, i32 0)
+  ; CHECK: prfum plil3keep
+  call void @llvm.prefetch(i8* %tmp, i32 0, i32 1, i32 0)
+  ; CHECK: prfum plil2keep
+  call void @llvm.prefetch(i8* %tmp, i32 0, i32 2, i32 0)
+  ; CHECK: prfum plil1keep
+  call void @llvm.prefetch(i8* %tmp, i32 0, i32 3, i32 0)
+
   ; CHECK: prfum pstl1strm
   call void @llvm.prefetch(i8* %tmp, i32 1, i32 0, i32 1)
   ; CHECK: prfum pstl3keep
@@ -57,32 +66,58 @@ entry:
   %arrayidx12 = getelementptr inbounds i32* %tmp10, i64 %idxprom
   %tmp11 = bitcast i32* %arrayidx12 to i8*
 
-  ; CHECK: prfm pstl1strm
-  call void @llvm.prefetch(i8* %tmp11, i32 1, i32 0, i32 1)
+
+  ; CHECK: prfm plil1strm
+  call void @llvm.prefetch(i8* %tmp11, i32 0, i32 0, i32 0)
   %tmp12 = load i32** @a, align 8, !tbaa !3
   %arrayidx15 = getelementptr inbounds i32* %tmp12, i64 %idxprom
-  %tmp13 = bitcast i32* %arrayidx15 to i8*
+  %tmp13 = bitcast i32* %arrayidx3 to i8*
 
-  ; CHECK: prfm pstl3keep
-  call void @llvm.prefetch(i8* %tmp13, i32 1, i32 1, i32 1)
+  ; CHECK: prfm plil3keep
+  call void @llvm.prefetch(i8* %tmp13, i32 0, i32 1, i32 0)
   %tmp14 = load i32** @a, align 8, !tbaa !3
   %arrayidx18 = getelementptr inbounds i32* %tmp14, i64 %idxprom
-  %tmp15 = bitcast i32* %arrayidx18 to i8*
+  %tmp15 = bitcast i32* %arrayidx6 to i8*
 
-  ; CHECK: prfm pstl2keep
-  call void @llvm.prefetch(i8* %tmp15, i32 1, i32 2, i32 1)
+  ; CHECK: prfm plil2keep
+  call void @llvm.prefetch(i8* %tmp15, i32 0, i32 2, i32 0)
   %tmp16 = load i32** @a, align 8, !tbaa !3
   %arrayidx21 = getelementptr inbounds i32* %tmp16, i64 %idxprom
-  %tmp17 = bitcast i32* %arrayidx21 to i8*
+  %tmp17 = bitcast i32* %arrayidx9 to i8*
+
+  ; CHECK: prfm plil1keep
+  call void @llvm.prefetch(i8* %tmp17, i32 0, i32 3, i32 0)
+  %tmp18 = load i32** @a, align 8, !tbaa !3
+  %arrayidx24 = getelementptr inbounds i32* %tmp18, i64 %idxprom
+  %tmp19 = bitcast i32* %arrayidx12 to i8*
+
+
+  ; CHECK: prfm pstl1strm
+  call void @llvm.prefetch(i8* %tmp19, i32 1, i32 0, i32 1)
+  %tmp20 = load i32** @a, align 8, !tbaa !3
+  %arrayidx27 = getelementptr inbounds i32* %tmp20, i64 %idxprom
+  %tmp21 = bitcast i32* %arrayidx15 to i8*
+
+  ; CHECK: prfm pstl3keep
+  call void @llvm.prefetch(i8* %tmp21, i32 1, i32 1, i32 1)
+  %tmp22 = load i32** @a, align 8, !tbaa !3
+  %arrayidx30 = getelementptr inbounds i32* %tmp22, i64 %idxprom
+  %tmp23 = bitcast i32* %arrayidx18 to i8*
+
+  ; CHECK: prfm pstl2keep
+  call void @llvm.prefetch(i8* %tmp23, i32 1, i32 2, i32 1)
+  %tmp24 = load i32** @a, align 8, !tbaa !3
+  %arrayidx33 = getelementptr inbounds i32* %tmp24, i64 %idxprom
+  %tmp25 = bitcast i32* %arrayidx21 to i8*
 
   ; CHECK: prfm pstl1keep
-  call void @llvm.prefetch(i8* %tmp17, i32 1, i32 3, i32 1)
+  call void @llvm.prefetch(i8* %tmp25, i32 1, i32 3, i32 1)
   ret void
 }
 
 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"any pointer", metadata !1}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"any pointer", !1}
diff --git a/test/CodeGen/AArch64/arm64-promote-const.ll b/test/CodeGen/AArch64/arm64-promote-const.ll
index 380ff55d6839..94fd8e33b892 100644
--- a/test/CodeGen/AArch64/arm64-promote-const.ll
+++ b/test/CodeGen/AArch64/arm64-promote-const.ll
@@ -41,8 +41,7 @@ entry:
 ; PROMOTED-LABEL: test2:
 ; In stress mode, constant vector are promoted
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
 ; Destination register is defined by ABI
 ; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
 ; PROMOTED-NEXT: mla.16b v0, v0, v[[REGNUM]]
@@ -72,15 +71,13 @@ define <16 x i8> @test3(<16 x i8> %arg, i32 %path) {
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
 ; Destination register is defined by ABI
 ; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
 ; PROMOTED-NEXT: cbnz w0, [[LABEL:LBB.*]]
 ; Next BB
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV2:__PromotedConst[0-9]+]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV2]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: ldr q[[REGNUM]], {{\[}}[[PAGEADDR]], [[CSTV2]]@PAGEOFF]
 ; Next BB
 ; PROMOTED-NEXT: [[LABEL]]:
 ; PROMOTED-NEXT: mul.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
@@ -136,8 +133,7 @@ define <16 x i8> @test4(<16 x i8> %arg, i32 %path) {
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
 ; Destination register is defined by ABI
 ; PROMOTED-NEXT: add.16b v0, v0, v[[REGNUM]]
 ; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
@@ -185,8 +181,7 @@ define <16 x i8> @test5(<16 x i8> %arg, i32 %path) {
 ; Since, the constant is the same as the previous function,
 ; the same address must be used
 ; PROMOTED: adrp [[PAGEADDR:x[0-9]+]], [[CSTV1]]@PAGE
-; PROMOTED: add [[BASEADDR:x[0-9]+]], [[PAGEADDR]], [[CSTV1]]@PAGEOFF
-; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[BASEADDR]]]
+; PROMOTED-NEXT: ldr q[[REGNUM:[0-9]+]], {{\[}}[[PAGEADDR]], [[CSTV1]]@PAGEOFF]
 ; PROMOTED-NEXT: cbz w0, [[LABEL:LBB.*]]
 ; Next BB
 ; PROMOTED: add.16b [[DESTV:v[0-9]+]], v0, v[[REGNUM]]
diff --git a/test/CodeGen/AArch64/arm64-scaled_iv.ll b/test/CodeGen/AArch64/arm64-scaled_iv.ll
index 987373e542af..63428df9610c 100644
--- a/test/CodeGen/AArch64/arm64-scaled_iv.ll
+++ b/test/CodeGen/AArch64/arm64-scaled_iv.ll
@@ -20,7 +20,7 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds double* %b, i64 %tmp
   %tmp1 = load double* %arrayidx, align 8
 ; The induction variable should carry the scaling factor: 1 * 8 = 8.
-; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
+; CHECK: [[IVNEXT]] = add nuw i64 [[IV]], 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
   %tmp2 = load double* %arrayidx2, align 8
diff --git a/test/CodeGen/AArch64/arm64-scvt.ll b/test/CodeGen/AArch64/arm64-scvt.ll
index 2e006cff159a..8baaf22238d3 100644
--- a/test/CodeGen/AArch64/arm64-scvt.ll
+++ b/test/CodeGen/AArch64/arm64-scvt.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -mcpu=cyclone -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -march=arm64 -mcpu=cortex-a57 | FileCheck --check-prefix=CHECK-A57 %s
 ; rdar://13082402
 
 define float @t1(i32* nocapture %src) nounwind ssp {
@@ -409,6 +410,10 @@ define float @sfct1(i8* nocapture %sp0) {
 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct1:
+; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8* %sp0, i64 1
   %pix_sp0.0.copyload = load i8* %addr, align 1
@@ -466,6 +471,10 @@ define float @sfct5(i8* nocapture %sp0, i64 %offset) {
 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct5:
+; CHECK-A57: ldrsb w[[REGNUM:[0-9]+]], [x0, x1]
+; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8* %sp0, i64 %offset
   %pix_sp0.0.copyload = load i8* %addr, align 1
@@ -536,6 +545,10 @@ define double @sfct10(i16* nocapture %sp0) {
 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct10:
+; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, #2]
+; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16* %sp0, i64 1
   %pix_sp0.0.copyload = load i16* %addr, align 1
@@ -592,6 +605,10 @@ define double @sfct14(i16* nocapture %sp0, i64 %offset) {
 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct14:
+; CHECK-A57: ldrsh w[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
+; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16* %sp0, i64 %offset
   %pix_sp0.0.copyload = load i16* %addr, align 1
@@ -636,6 +653,10 @@ entry:
 ; CHECK-NEXT: sshll.4s v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:s[0-9]+]], s[[SEXTREG]]
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct17:
+; CHECK-A57: ldursb w[[REGNUM:[0-9]+]], [x0, #-1]
+; CHECK-A57-NEXT: scvtf [[REG:s[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
@@ -713,6 +734,10 @@ define double @sfct22(i16* nocapture %sp0) {
 ; CHECK-NEXT: sshll.2d v[[SEXTREG:[0-9]+]], [[SEXTREG1]], #0
 ; CHECK: scvtf [[REG:d[0-9]+]], d[[SEXTREG]]
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
+; CHECK-A57-LABEL: sfct22:
+; CHECK-A57: ldursh w[[REGNUM:[0-9]+]], [x0, #1]
+; CHECK-A57-NEXT: scvtf [[REG:d[0-9]+]], w[[REGNUM]]
+; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
   %bitcast = ptrtoint i16* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i16*
diff --git a/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
index 045c9cd9aeb7..67283b6d3c1b 100644
--- a/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -asm-verbose=false -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=arm64-apple-ios | FileCheck %s
 
 define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
 ; CHECK-LABEL: foo:
@@ -25,3 +25,23 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin
   store <4 x double> %result, <4 x double>* %p
   ret void
 }
+
+; Fold explicit AND operations when the constant isn't a splat of a single
+; scalar value like what the zext creates.
+define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: lCPI2_0:
+; CHECK-NEXT: .long 1065353216
+; CHECK-NEXT: .long 0
+; CHECK-NEXT: .long 1065353216
+; CHECK-NEXT: .long 0
+; CHECK-LABEL: foo2:
+; CHECK: adrp  x8, lCPI2_0@PAGE
+; CHECK: ldr q2, [x8, lCPI2_0@PAGEOFF]
+; CHECK-NEXT:  fcmeq.4s  v0, v0, v1
+; CHECK-NEXT:  and.16b v0, v0, v2
+  %cmp = fcmp oeq <4 x float> %val, %test
+  %ext = zext <4 x i1> %cmp to <4 x i32>
+  %and = and <4 x i32> %ext, <i32 255, i32 256, i32 257, i32 258>
+  %result = sitofp <4 x i32> %and to <4 x float>
+  ret <4 x float> %result
+}
diff --git a/test/CodeGen/AArch64/arm64-shifted-sext.ll b/test/CodeGen/AArch64/arm64-shifted-sext.ll
index b7b4e5de1d5c..71f15b1222b2 100644
--- a/test/CodeGen/AArch64/arm64-shifted-sext.ll
+++ b/test/CodeGen/AArch64/arm64-shifted-sext.ll
@@ -166,8 +166,8 @@ entry:
 define i32 @extendedLeftShiftshortTointBy16(i16 signext %a) nounwind readnone ssp {
 entry:
 ; CHECK-LABEL: extendedLeftShiftshortTointBy16:
-; CHECK: add [[REG:w[0-9]+]], w0, #1
-; CHECK: lsl w0, [[REG]], #16
+; CHECK: lsl [[REG:w[0-9]+]], w0, #16
+; CHECK: add w0, [[REG]], #16, lsl #12
   %inc = add i16 %a, 1
   %conv2 = zext i16 %inc to i32
   %shl = shl nuw i32 %conv2, 16
diff --git a/test/CodeGen/AArch64/arm64-st1.ll b/test/CodeGen/AArch64/arm64-st1.ll
index 4370484478c0..76d52f44b480 100644
--- a/test/CodeGen/AArch64/arm64-st1.ll
+++ b/test/CodeGen/AArch64/arm64-st1.ll
@@ -8,6 +8,26 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
   ret void
 }
 
+define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_16b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <16 x i8> %A, i32 1
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_16b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[0], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <16 x i8> %A, i32 0
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
 define void @st1lane_8h(<8 x i16> %A, i16* %D) {
 ; CHECK-LABEL: st1lane_8h
 ; CHECK: st1.h
@@ -16,6 +36,25 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
   ret void
 }
 
+define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_8h
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.h { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <8 x i16> %A, i32 1
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8h
+; CHECK: str h0, [x0, x1, lsl #1]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <8 x i16> %A, i32 0
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
 define void @st1lane_4s(<4 x i32> %A, i32* %D) {
 ; CHECK-LABEL: st1lane_4s
 ; CHECK: st1.s
@@ -24,6 +63,25 @@ define void @st1lane_4s(<4 x i32> %A, i32* %D) {
   ret void
 }
 
+define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4s
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <4 x i32> %A, i32 1
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <4 x i32> %A, i32 0
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
 define void @st1lane_4s_float(<4 x float> %A, float* %D) {
 ; CHECK-LABEL: st1lane_4s_float
 ; CHECK: st1.s
@@ -32,6 +90,25 @@ define void @st1lane_4s_float(<4 x float> %A, float* %D) {
   ret void
 }
 
+define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4s_float
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <4 x float> %A, i32 1
+  store float %tmp, float* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <4 x float> %A, i32 0
+  store float %tmp, float* %ptr
+  ret void
+}
+
 define void @st1lane_2d(<2 x i64> %A, i64* %D) {
 ; CHECK-LABEL: st1lane_2d
 ; CHECK: st1.d
@@ -40,6 +117,25 @@ define void @st1lane_2d(<2 x i64> %A, i64* %D) {
   ret void
 }
 
+define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2d
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.d { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i64* %D, i64 %offset
+  %tmp = extractelement <2 x i64> %A, i32 1
+  store i64 %tmp, i64* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d
+; CHECK: str d0, [x0, x1, lsl #3]
+  %ptr = getelementptr i64* %D, i64 %offset
+  %tmp = extractelement <2 x i64> %A, i32 0
+  store i64 %tmp, i64* %ptr
+  ret void
+}
+
 define void @st1lane_2d_double(<2 x double> %A, double* %D) {
 ; CHECK-LABEL: st1lane_2d_double
 ; CHECK: st1.d
@@ -48,6 +144,25 @@ define void @st1lane_2d_double(<2 x double> %A, double* %D) {
   ret void
 }
 
+define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2d_double
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.d { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr double* %D, i64 %offset
+  %tmp = extractelement <2 x double> %A, i32 1
+  store double %tmp, double* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2d_double
+; CHECK: str d0, [x0, x1, lsl #3]
+  %ptr = getelementptr double* %D, i64 %offset
+  %tmp = extractelement <2 x double> %A, i32 0
+  store double %tmp, double* %ptr
+  ret void
+}
+
 define void @st1lane_8b(<8 x i8> %A, i8* %D) {
 ; CHECK-LABEL: st1lane_8b
 ; CHECK: st1.b
@@ -56,6 +171,26 @@ define void @st1lane_8b(<8 x i8> %A, i8* %D) {
   ret void
 }
 
+define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_8b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <8 x i8> %A, i32 1
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_8b
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.b { v0 }[0], [x[[XREG]]]
+  %ptr = getelementptr i8* %D, i64 %offset
+  %tmp = extractelement <8 x i8> %A, i32 0
+  store i8 %tmp, i8* %ptr
+  ret void
+}
+
 define void @st1lane_4h(<4 x i16> %A, i16* %D) {
 ; CHECK-LABEL: st1lane_4h
 ; CHECK: st1.h
@@ -64,6 +199,25 @@ define void @st1lane_4h(<4 x i16> %A, i16* %D) {
   ret void
 }
 
+define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_4h
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.h { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <4 x i16> %A, i32 1
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_4h
+; CHECK: str h0, [x0, x1, lsl #1]
+  %ptr = getelementptr i16* %D, i64 %offset
+  %tmp = extractelement <4 x i16> %A, i32 0
+  store i16 %tmp, i16* %ptr
+  ret void
+}
+
 define void @st1lane_2s(<2 x i32> %A, i32* %D) {
 ; CHECK-LABEL: st1lane_2s
 ; CHECK: st1.s
@@ -72,6 +226,25 @@ define void @st1lane_2s(<2 x i32> %A, i32* %D) {
   ret void
 }
 
+define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2s
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <2 x i32> %A, i32 1
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr i32* %D, i64 %offset
+  %tmp = extractelement <2 x i32> %A, i32 0
+  store i32 %tmp, i32* %ptr
+  ret void
+}
+
 define void @st1lane_2s_float(<2 x float> %A, float* %D) {
 ; CHECK-LABEL: st1lane_2s_float
 ; CHECK: st1.s
@@ -80,6 +253,25 @@ define void @st1lane_2s_float(<2 x float> %A, float* %D) {
   ret void
 }
 
+define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane_ro_2s_float
+; CHECK: add x[[XREG:[0-9]+]], x0, x1
+; CHECK: st1.s { v0 }[1], [x[[XREG]]]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <2 x float> %A, i32 1
+  store float %tmp, float* %ptr
+  ret void
+}
+
+define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
+; CHECK-LABEL: st1lane0_ro_2s_float
+; CHECK: str s0, [x0, x1, lsl #2]
+  %ptr = getelementptr float* %D, i64 %offset
+  %tmp = extractelement <2 x float> %A, i32 0
+  store float %tmp, float* %ptr
+  ret void
+}
+
 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
 ; CHECK-LABEL: st2lane_16b
 ; CHECK: st2.b
diff --git a/test/CodeGen/AArch64/arm64-stackmap-nops.ll b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
new file mode 100644
index 000000000000..5915b64edf0a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-stackmap-nops.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+
+define void @test_shadow_optimization() {
+entry:
+; Expect 8 bytes worth of nops here rather than 16: With the shadow optimization
+; in place, 8 bytes will be consumed by the frame teardown and return instr.
+; CHECK-LABEL: test_shadow_optimization:
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  16)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 2c7c6ae5d6d5..144c2fd2ee38 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin                             < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort < %s | FileCheck %s
 ;
 ; Note: Print verbose stackmaps using -debug-only=stackmaps.
 
diff --git a/test/CodeGen/AArch64/arm64-stackpointer.ll b/test/CodeGen/AArch64/arm64-stackpointer.ll
index 581faf130f10..a33de8c34f17 100644
--- a/test/CodeGen/AArch64/arm64-stackpointer.ll
+++ b/test/CodeGen/AArch64/arm64-stackpointer.ll
@@ -21,4 +21,4 @@ declare void @llvm.write_register.i64(metadata, i64) nounwind
 
 ; register unsigned long current_stack_pointer asm("sp");
 ; CHECK-NOT: .asciz  "sp"
-!0 = metadata !{metadata !"sp\00"}
+!0 = !{!"sp\00"}
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index e8a83fd7db3a..30ea63b4664a 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -20,7 +20,7 @@ define i32 @test_generaldynamic() {
 ; CHECK: mrs x[[TP:[0-9]+]], TPIDR_EL0
 ; CHECK: ldr w0, [x[[TP]], x0]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -43,7 +43,7 @@ define i32* @test_generaldynamic_addr() {
 ; CHECK: mrs [[TP:x[0-9]+]], TPIDR_EL0
 ; CHECK: add x0, [[TP]], x0
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -73,7 +73,7 @@ define i32 @test_localdynamic() {
 
 ; CHECK: ldr w0, [x[[TPIDR]], x[[TPREL]]]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
@@ -101,7 +101,7 @@ define i32* @test_localdynamic_addr() {
 
 ; CHECK: add x0, [[TPIDR]], [[TPREL]]
 
-; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE
+; CHECK-RELOC: R_AARCH64_TLSDESC_ADR_PAGE21
 ; CHECK-RELOC: R_AARCH64_TLSDESC_ADD_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_LD64_LO12_NC
 ; CHECK-RELOC: R_AARCH64_TLSDESC_CALL
diff --git a/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
new file mode 100644
index 000000000000..923742d0370e
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a53 -enable-aa-sched-mi | FileCheck %s
+; Check that the scheduler moves the load from a[1] past the store into a[2].
+@a = common global i32* null, align 8
+@m = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @func(i32 %i, i32 %j, i32 %k) #0 {
+entry:
+; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
+; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
+  %0 = load i32** @a, align 8, !tbaa !1
+  %arrayidx = getelementptr inbounds i32* %0, i64 2
+  store i32 %i, i32* %arrayidx, align 4, !tbaa !5
+  %arrayidx1 = getelementptr inbounds i32* %0, i64 1
+  %1 = load i32* %arrayidx1, align 4, !tbaa !5
+  %add = add nsw i32 %k, %i
+  store i32 %add, i32* @m, align 4, !tbaa !5
+  ret i32 %1
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.6.0 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index 5afc8d9f3f49..fae2b90e5ba1 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -802,3 +802,73 @@ define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
   %res1 = zext <2 x i32> %res to <2 x i64>
   ret <2 x i64> %res1
 }
+
+define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
+; CHECK-LABEL: abspattern1:
+; CHECK: abs.2s
+; CHECK-NEXT: ret
+        %tmp1neg = sub <2 x i32> zeroinitializer, %a
+        %b = icmp sge <2 x i32> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i32> %a, <2 x i32> %tmp1neg
+        ret <2 x i32> %abs
+}
+
+define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
+; CHECK-LABEL: abspattern2:
+; CHECK: abs.4h
+; CHECK-NEXT: ret
+        %tmp1neg = sub <4 x i16> zeroinitializer, %a
+        %b = icmp sgt <4 x i16> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i16> %a, <4 x i16> %tmp1neg
+        ret <4 x i16> %abs
+}
+
+define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
+; CHECK-LABEL: abspattern3:
+; CHECK: abs.8b
+; CHECK-NEXT: ret
+        %tmp1neg = sub <8 x i8> zeroinitializer, %a
+        %b = icmp slt <8 x i8> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i8> %tmp1neg, <8 x i8> %a
+        ret <8 x i8> %abs
+}
+
+define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
+; CHECK-LABEL: abspattern4:
+; CHECK: abs.4s
+; CHECK-NEXT: ret
+        %tmp1neg = sub <4 x i32> zeroinitializer, %a
+        %b = icmp sge <4 x i32> %a, zeroinitializer
+        %abs = select <4 x i1> %b, <4 x i32> %a, <4 x i32> %tmp1neg
+        ret <4 x i32> %abs
+}
+
+define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
+; CHECK-LABEL: abspattern5:
+; CHECK: abs.8h
+; CHECK-NEXT: ret
+        %tmp1neg = sub <8 x i16> zeroinitializer, %a
+        %b = icmp sgt <8 x i16> %a, zeroinitializer
+        %abs = select <8 x i1> %b, <8 x i16> %a, <8 x i16> %tmp1neg
+        ret <8 x i16> %abs
+}
+
+define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
+; CHECK-LABEL: abspattern6:
+; CHECK: abs.16b
+; CHECK-NEXT: ret
+        %tmp1neg = sub <16 x i8> zeroinitializer, %a
+        %b = icmp slt <16 x i8> %a, zeroinitializer
+        %abs = select <16 x i1> %b, <16 x i8> %tmp1neg, <16 x i8> %a
+        ret <16 x i8> %abs
+}
+
+define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
+; CHECK-LABEL: abspattern7:
+; CHECK: abs.2d
+; CHECK-NEXT: ret
+        %tmp1neg = sub <2 x i64> zeroinitializer, %a
+        %b = icmp sle <2 x i64> %a, zeroinitializer
+        %abs = select <2 x i1> %b, <2 x i64> %tmp1neg, <2 x i64> %a
+        ret <2 x i64> %abs
+}
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 36a7bfd92520..44f2af1c5e79 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -12,6 +12,7 @@ define void @test_simple(i32 %n, ...) {
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
 
 ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 
 ; CHECK: stp x1, x2, [sp, #[[GR_BASE:[0-9]+]]]
 ; ... omit middle ones ...
@@ -21,11 +22,10 @@ define void @test_simple(i32 %n, ...) {
 ; ... omit middle ones ...
 ; CHECK: stp q6, q7, [sp, #
 
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
 
 ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
 ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #56
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
 
 ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
@@ -50,6 +50,7 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #[[STACKSIZE]]
 
 ; CHECK: adrp x[[VA_LIST_HI:[0-9]+]], var
+; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 
 ; CHECK: stp x3, x4, [sp, #[[GR_BASE:[0-9]+]]]
 ; ... omit middle ones ...
@@ -59,11 +60,10 @@ define void @test_fewargs(i32 %n, i32 %n1, i32 %n2, float %m, ...) {
 ; ... omit middle ones ...
 ; CHECK: str q7, [sp, #
 
-; CHECK: str [[STACK_TOP]], [x[[VA_LIST_HI]], :lo12:var]
+; CHECK: str [[STACK_TOP]], [x[[VA_LIST]]]
 
 ; CHECK: add [[GR_TOPTMP:x[0-9]+]], sp, #[[GR_BASE]]
 ; CHECK: add [[GR_TOP:x[0-9]+]], [[GR_TOPTMP]], #40
-; CHECK: add x[[VA_LIST:[0-9]+]], {{x[0-9]+}}, :lo12:var
 ; CHECK: str [[GR_TOP]], [x[[VA_LIST]], #8]
 
 ; CHECK: mov [[VR_TOPTMP:x[0-9]+]], sp
@@ -89,18 +89,20 @@ define void @test_nospare([8 x i64], [8 x float], ...) {
   call void @llvm.va_start(i8* %addr)
 ; CHECK-NOT: sub sp, sp
 ; CHECK: mov [[STACK:x[0-9]+]], sp
-; CHECK: str [[STACK]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[STACK]], [x[[VAR]]]
 
   ret void
 }
 
 ; If there are non-variadic arguments on the stack (here two i64s) then the
 ; __stack field should point just past them.
-define void @test_offsetstack([10 x i64], [3 x float], ...) {
+define void @test_offsetstack([8 x i64], [2 x i64], [3 x float], ...) {
 ; CHECK-LABEL: test_offsetstack:
 ; CHECK: sub sp, sp, #80
 ; CHECK: add [[STACK_TOP:x[0-9]+]], sp, #96
-; CHECK: str [[STACK_TOP]], [{{x[0-9]+}}, :lo12:var]
+; CHECK: add x[[VAR:[0-9]+]], {{x[0-9]+}}, :lo12:var
+; CHECK: str [[STACK_TOP]], [x[[VAR]]]
 
   %addr = bitcast %va_list* @var to i8*
   call void @llvm.va_start(i8* %addr)
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
index 650ff1e14f02..5bee1611e6c6 100644
--- a/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -14,3 +14,14 @@ define void @func30(%T0_30 %v0, %T1_30* %p1) {
   store %T1_30 %r, %T1_30* %p1
   ret void
 }
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+  %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+  %ZE = zext <1 x i1> %I29 to <1 x i32>
+  ret <1 x i32> %ZE
+}
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index 0c300de802b7..59ce6848afb9 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,13 +1,14 @@
-; RUN: llc < %s -march=arm64 -aarch64-atomic-cfg-tidy=0 | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
 
 ;
 ; Get the actual value of the overflow bit.
 ;
-define i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @saddo1.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; CHECK-LABEL:  saddo.i32
-; CHECK:        adds w8, w0, w1
-; CHECK-NEXT:   cset w0, vs
+; CHECK-LABEL:  saddo1.i32
+; CHECK:        adds {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -15,11 +16,64 @@ entry:
   ret i1 %obit
 }
 
-define i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
+; Test the immediate version.
+define zeroext i1 @saddo2.i32(i32 %v1, i32* %res) {
 entry:
-; CHECK-LABEL:  saddo.i64
-; CHECK:        adds x8, x0, x1
-; CHECK-NEXT:   cset w0, vs
+; CHECK-LABEL:  saddo2.i32
+; CHECK:        adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+; Test negative immediates.
+define zeroext i1 @saddo3.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo3.i32
+; CHECK:        subs {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+; Test immediates that are too large to be encoded.
+define zeroext i1 @saddo4.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo4.i32
+; CHECK:        adds {{w[0-9]+}}, w0, {{w[0-9]+}}
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+; Test shift folding.
+define zeroext i1 @saddo5.i32(i32 %v1, i32 %v2, i32* %res) {
+entry:
+; CHECK-LABEL:  saddo5.i32
+; CHECK:        adds {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %lsl = shl i32 %v2, 16
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %lsl)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo1.i64(i64 %v1, i64 %v2, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo1.i64
+; CHECK:        adds {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -27,11 +81,35 @@ entry:
   ret i1 %obit
 }
 
-define i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @saddo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo2.i64
+; CHECK:        adds {{x[0-9]+}}, x0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo3.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  saddo3.i64
+; CHECK:        subs {{x[0-9]+}}, x0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -4)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
 ; CHECK-LABEL:  uaddo.i32
-; CHECK:        adds w8, w0, w1
-; CHECK-NEXT:   cset w0, hs
+; CHECK:        adds {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:   cset {{w[0-9]+}}, hs
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -39,11 +117,11 @@ entry:
   ret i1 %obit
 }
 
-define i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
+define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
 ; CHECK-LABEL:  uaddo.i64
-; CHECK:        adds x8, x0, x1
-; CHECK-NEXT:   cset w0, hs
+; CHECK:        adds {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:   cset {{w[0-9]+}}, hs
   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -51,11 +129,11 @@ entry:
   ret i1 %obit
 }
 
-define i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @ssubo1.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; CHECK-LABEL:  ssubo.i32
-; CHECK:        subs w8, w0, w1
-; CHECK-NEXT:   cset w0, vs
+; CHECK-LABEL:  ssubo1.i32
+; CHECK:        subs {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
   %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -63,11 +141,23 @@ entry:
   ret i1 %obit
 }
 
-define i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
+define zeroext i1 @ssubo2.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL:  ssubo2.i32
+; CHECK:        adds {{w[0-9]+}}, w0, #4
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 -4)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
 ; CHECK-LABEL:  ssubo.i64
-; CHECK:        subs x8, x0, x1
-; CHECK-NEXT:   cset w0, vs
+; CHECK:        subs {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
   %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -75,11 +165,11 @@ entry:
   ret i1 %obit
 }
 
-define i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
 ; CHECK-LABEL:  usubo.i32
-; CHECK:        subs w8, w0, w1
-; CHECK-NEXT:   cset w0, lo
+; CHECK:        subs {{w[0-9]+}}, w0, w1
+; CHECK-NEXT:   cset {{w[0-9]+}}, lo
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -87,11 +177,11 @@ entry:
   ret i1 %obit
 }
 
-define i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
+define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
 ; CHECK-LABEL:  usubo.i64
-; CHECK:        subs x8, x0, x1
-; CHECK-NEXT:   cset w0, lo
+; CHECK:        subs {{x[0-9]+}}, x0, x1
+; CHECK-NEXT:   cset {{w[0-9]+}}, lo
   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -99,13 +189,13 @@ entry:
   ret i1 %obit
 }
 
-define i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
 ; CHECK-LABEL:  smulo.i32
-; CHECK:        smull x8, w0, w1
-; CHECK-NEXT:   lsr x9, x8, #32
-; CHECK-NEXT:   cmp w9, w8, asr #31
-; CHECK-NEXT:   cset w0, ne
+; CHECK:        smull x[[MREG:[0-9]+]], w0, w1
+; CHECK-NEXT:   lsr x[[SREG:[0-9]+]], x[[MREG]], #32
+; CHECK-NEXT:   cmp w[[SREG]], w[[MREG]], asr #31
+; CHECK-NEXT:   cset {{w[0-9]+}}, ne
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -113,13 +203,13 @@ entry:
   ret i1 %obit
 }
 
-define i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
+define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
 ; CHECK-LABEL:  smulo.i64
-; CHECK:        mul x8, x0, x1
-; CHECK-NEXT:   smulh x9, x0, x1
-; CHECK-NEXT:   cmp x9, x8, asr #63
-; CHECK-NEXT:   cset w0, ne
+; CHECK:        mul [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   smulh [[HREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp [[HREG]], [[MREG]], asr #63
+; CHECK-NEXT:   cset {{w[0-9]+}}, ne
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -127,12 +217,24 @@ entry:
   ret i1 %obit
 }
 
-define i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
+define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  smulo2.i64
+; CHECK:        adds [[MREG:x[0-9]+]], x0, x0
+; CHECK-NEXT:   cset {{w[0-9]+}}, vs
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
 ; CHECK-LABEL:  umulo.i32
-; CHECK:        umull x8, w0, w1
-; CHECK-NEXT:   cmp xzr, x8, lsr #32
-; CHECK-NEXT:   cset w0, ne
+; CHECK:        umull [[MREG:x[0-9]+]], w0, w1
+; CHECK-NEXT:   cmp xzr, [[MREG]], lsr #32
+; CHECK-NEXT:   cset {{w[0-9]+}}, ne
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -140,13 +242,12 @@ entry:
   ret i1 %obit
 }
 
-define i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
+define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
 ; CHECK-LABEL:  umulo.i64
-; CHECK:        umulh x8, x0, x1
-; CHECK-NEXT:   cmp xzr, x8
-; CHECK-NEXT:   cset w8, ne
-; CHECK-NEXT:   mul x9, x0, x1
+; CHECK:        umulh [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp xzr, [[MREG]]
+; CHECK-NEXT:   cset {{w[0-9]+}}, ne
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -154,6 +255,18 @@ entry:
   ret i1 %obit
 }
 
+define zeroext i1 @umulo2.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL:  umulo2.i64
+; CHECK:        adds [[MREG:x[0-9]+]], x0, x0
+; CHECK-NEXT:   cset {{w[0-9]+}}, hs
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
 
 ;
 ; Check the use of the overflow bit in combination with a select instruction.
@@ -249,9 +362,9 @@ entry:
 define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  smulo.select.i32
-; CHECK:        smull    x8, w0, w1
-; CHECK-NEXT:   lsr     x9, x8, #32
-; CHECK-NEXT:   cmp     w9, w8, asr #31
+; CHECK:        smull   x[[MREG:[0-9]+]], w0, w1
+; CHECK-NEXT:   lsr     x[[SREG:[0-9]+]], x[[MREG]], #32
+; CHECK-NEXT:   cmp     w[[SREG]], w[[MREG]], asr #31
 ; CHECK-NEXT:   csel    w0, w0, w1, ne
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -262,9 +375,9 @@ entry:
 define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  smulo.select.i64
-; CHECK:        mul      x8, x0, x1
-; CHECK-NEXT:   smulh   x9, x0, x1
-; CHECK-NEXT:   cmp     x9, x8, asr #63
+; CHECK:        mul     [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   smulh   [[HREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp     [[HREG]], [[MREG]], asr #63
 ; CHECK-NEXT:   csel    x0, x0, x1, ne
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -275,8 +388,8 @@ entry:
 define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  umulo.select.i32
-; CHECK:        umull    x8, w0, w1
-; CHECK-NEXT:   cmp     xzr, x8, lsr #32
+; CHECK:        umull   [[MREG:x[0-9]+]], w0, w1
+; CHECK-NEXT:   cmp     xzr, [[MREG]], lsr #32
 ; CHECK-NEXT:   csel    w0, w0, w1, ne
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
@@ -287,8 +400,8 @@ entry:
 define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  umulo.select.i64
-; CHECK:        umulh   x8, x0, x1
-; CHECK-NEXT:   cmp     xzr, x8
+; CHECK:        umulh   [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp     xzr, [[MREG]]
 ; CHECK-NEXT:   csel    x0, x0, x1, ne
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
@@ -300,7 +413,7 @@ entry:
 ;
 ; Check the use of the overflow bit in combination with a branch instruction.
 ;
-define i1 @saddo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @saddo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  saddo.br.i32
 ; CHECK:        cmn w0, w1
@@ -317,7 +430,7 @@ continue:
   ret i1 true
 }
 
-define i1 @saddo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  saddo.br.i64
 ; CHECK:        cmn x0, x1
@@ -334,7 +447,7 @@ continue:
   ret i1 true
 }
 
-define i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  uaddo.br.i32
 ; CHECK:        cmn w0, w1
@@ -351,7 +464,7 @@ continue:
   ret i1 true
 }
 
-define i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  uaddo.br.i64
 ; CHECK:        cmn x0, x1
@@ -368,7 +481,7 @@ continue:
   ret i1 true
 }
 
-define i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  ssubo.br.i32
 ; CHECK:        cmp w0, w1
@@ -385,7 +498,7 @@ continue:
   ret i1 true
 }
 
-define i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  ssubo.br.i64
 ; CHECK:        cmp x0, x1
@@ -402,7 +515,7 @@ continue:
   ret i1 true
 }
 
-define i1 @usubo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  usubo.br.i32
 ; CHECK:        cmp w0, w1
@@ -419,7 +532,7 @@ continue:
   ret i1 true
 }
 
-define i1 @usubo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  usubo.br.i64
 ; CHECK:        cmp x0, x1
@@ -436,12 +549,12 @@ continue:
   ret i1 true
 }
 
-define i1 @smulo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @smulo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  smulo.br.i32
-; CHECK:        smull    x8, w0, w1
-; CHECK-NEXT:   lsr     x9, x8, #32
-; CHECK-NEXT:   cmp     w9, w8, asr #31
+; CHECK:        smull   x[[MREG:[0-9]+]], w0, w1
+; CHECK-NEXT:   lsr     x[[SREG:[0-9]+]], x8, #32
+; CHECK-NEXT:   cmp     w[[SREG]], w[[MREG]], asr #31
 ; CHECK-NEXT:   b.eq
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
@@ -455,12 +568,12 @@ continue:
   ret i1 true
 }
 
-define i1 @smulo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  smulo.br.i64
-; CHECK:        mul      x8, x0, x1
-; CHECK-NEXT:   smulh   x9, x0, x1
-; CHECK-NEXT:   cmp     x9, x8, asr #63
+; CHECK:        mul     [[MREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   smulh   [[HREG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   cmp     [[HREG]], [[MREG]], asr #63
 ; CHECK-NEXT:   b.eq
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
@@ -474,11 +587,28 @@ continue:
   ret i1 true
 }
 
-define i1 @umulo.br.i32(i32 %v1, i32 %v2) {
+define zeroext i1 @smulo2.br.i64(i64 %v1) {
+entry:
+; CHECK-LABEL:  smulo2.br.i64
+; CHECK:        cmn  x0, x0
+; CHECK-NEXT:   b.vc
+  %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
+define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
 entry:
 ; CHECK-LABEL:  umulo.br.i32
-; CHECK:        umull    x8, w0, w1
-; CHECK-NEXT:   cmp     xzr, x8, lsr #32
+; CHECK:        umull   [[MREG:x[0-9]+]], w0, w1
+; CHECK-NEXT:   cmp     xzr, [[MREG]], lsr #32
 ; CHECK-NEXT:   b.eq
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
@@ -492,11 +622,11 @@ continue:
   ret i1 true
 }
 
-define i1 @umulo.br.i64(i64 %v1, i64 %v2) {
+define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
 entry:
 ; CHECK-LABEL:  umulo.br.i64
-; CHECK:        umulh   x8, x0, x1
-; CHECK-NEXT:   cbz
+; CHECK:        umulh   [[REG:x[0-9]+]], x0, x1
+; CHECK-NEXT:   {{cbz|cmp}}
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -509,6 +639,23 @@ continue:
   ret i1 true
 }
 
+define zeroext i1 @umulo2.br.i64(i64 %v1) {
+entry:
+; CHECK-LABEL:  umulo2.br.i64
+; CHECK:        cmn  x0, x0
+; CHECK-NEXT:   b.lo
+  %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  br i1 %obit, label %overflow, label %continue
+
+overflow:
+  ret i1 false
+
+continue:
+  ret i1 true
+}
+
 declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
 declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index 26301b92f9fe..ef209e9c6e5b 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -509,7 +509,7 @@ define i8 @test_atomic_load_min_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
    ret i8 %old
 }
 
@@ -534,7 +534,7 @@ define i16 @test_atomic_load_min_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
    ret i16 %old
 }
 
@@ -607,7 +607,7 @@ define i8 @test_atomic_load_max_i8(i8 %offset) nounwind {
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
    ret i8 %old
 }
 
@@ -632,7 +632,7 @@ define i16 @test_atomic_load_max_i16(i16 %offset) nounwind {
 ; CHECK-NEXT: cbnz [[STATUS]], .LBB{{[0-9]+}}_1
 ; CHECK-NOT: dmb
 
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
+; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD_EXT]]
    ret i16 %old
 }
 
diff --git a/test/CodeGen/AArch64/bitcast-v2i8.ll b/test/CodeGen/AArch64/bitcast-v2i8.ll
new file mode 100644
index 000000000000..4bdac641c5bc
--- /dev/null
+++ b/test/CodeGen/AArch64/bitcast-v2i8.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+
+; Part of PR21549: going through the stack isn't ideal but is correct.
+
+define i16 @test_bitcast_v2i8_to_i16(<2 x i8> %a) {
+; CHECK-LABEL: test_bitcast_v2i8_to_i16
+; CHECK:      mov.s   [[WREG_HI:w[0-9]+]], v0[1]
+; CHECK-NEXT: fmov    [[WREG_LO:w[0-9]+]], s0
+; CHECK-NEXT: strb    [[WREG_HI]], [sp, #15]
+; CHECK-NEXT: strb    [[WREG_LO]], [sp, #14]
+; CHECK-NEXT: ldrh    w0, [sp, #14]
+
+  %aa = bitcast <2 x i8> %a to i16
+  ret i16 %aa
+}
diff --git a/test/CodeGen/AArch64/br-to-eh-lpad.ll b/test/CodeGen/AArch64/br-to-eh-lpad.ll
new file mode 100644
index 000000000000..20bffd9e4129
--- /dev/null
+++ b/test/CodeGen/AArch64/br-to-eh-lpad.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios -verify-machineinstrs
+
+; This function tests that the machine verifier accepts an unconditional
+; branch from an invoke basic block, to its EH landing pad basic block.
+; The test is brittle and isn't ideally reduced, because in most cases the
+; branch would be removed (for instance, turned into a fallthrough), and in
+; that case, the machine verifier, which relies on analyzing branches for this
+; kind of verification, is unable to check anything, so accepts the CFG.
+
+define void @test_branch_to_landingpad() {
+entry:
+  br i1 undef, label %if.end50.thread, label %if.then6
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @"OBJC_EHTYPE_$_NSString"
+          catch %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765* @OBJC_EHTYPE_id
+          catch i8* null
+  br i1 undef, label %invoke.cont33, label %catch.fallthrough
+
+catch.fallthrough:
+  %matches31 = icmp eq i32 undef, 0
+  br i1 %matches31, label %invoke.cont41, label %finally.catchall
+
+if.then6:
+  invoke void @objc_exception_throw()
+          to label %invoke.cont7 unwind label %lpad
+
+invoke.cont7:
+  unreachable
+
+if.end50.thread:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 128)
+  unreachable
+
+invoke.cont33:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 119)
+  unreachable
+
+invoke.cont41:
+  invoke void @objc_exception_rethrow()
+          to label %invoke.cont43 unwind label %lpad40
+
+invoke.cont43:
+  unreachable
+
+lpad40:
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  br label %finally.catchall
+
+finally.catchall:
+  tail call void (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @.str1, i64 0, i64 0), i32 125)
+  unreachable
+}
+
+%struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765 = type { i8**, i8*, %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764* }
+%struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764 = type { %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764*, %struct._class_t.10.127.192.283.348.491.517.530.569.595.621.764*, %struct._objc_cache.0.117.182.273.338.481.507.520.559.585.611.754*, i8* (i8*, i8*)**, %struct._class_ro_t.9.126.191.282.347.490.516.529.568.594.620.763* }
+%struct._objc_cache.0.117.182.273.338.481.507.520.559.585.611.754 = type opaque
+%struct._class_ro_t.9.126.191.282.347.490.516.529.568.594.620.763 = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760*, %struct._ivar_list_t.8.125.190.281.346.489.515.528.567.593.619.762*, i8*, %struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758* }
+%struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756 = type { i32, i32, [0 x %struct._objc_method.1.118.183.274.339.482.508.521.560.586.612.755] }
+%struct._objc_method.1.118.183.274.339.482.508.521.560.586.612.755 = type { i8*, i8*, i8* }
+%struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760 = type { i64, [0 x %struct._protocol_t.5.122.187.278.343.486.512.525.564.590.616.759*] }
+%struct._protocol_t.5.122.187.278.343.486.512.525.564.590.616.759 = type { i8*, i8*, %struct._objc_protocol_list.6.123.188.279.344.487.513.526.565.591.617.760*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct.__method_list_t.2.119.184.275.340.483.509.522.561.587.613.756*, %struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758*, i32, i32, i8** }
+%struct._ivar_list_t.8.125.190.281.346.489.515.528.567.593.619.762 = type { i32, i32, [0 x %struct._ivar_t.7.124.189.280.345.488.514.527.566.592.618.761] }
+%struct._ivar_t.7.124.189.280.345.488.514.527.566.592.618.761 = type { i32*, i8*, i8*, i32, i32 }
+%struct._prop_list_t.4.121.186.277.342.485.511.524.563.589.615.758 = type { i32, i32, [0 x %struct._prop_t.3.120.185.276.341.484.510.523.562.588.614.757] }
+%struct._prop_t.3.120.185.276.341.484.510.523.562.588.614.757 = type { i8*, i8* }
+
+@.str1 = external unnamed_addr constant [17 x i8], align 1
+@OBJC_EHTYPE_id = external global %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765
+@"OBJC_EHTYPE_$_NSString" = external global %struct._objc_typeinfo.12.129.194.285.350.493.519.532.571.597.623.765, section "__DATA,__datacoal_nt,coalesced", align 8
+
+declare void @objc_exception_throw()
+declare void @objc_exception_rethrow()
+declare i32 @__objc_personality_v0(...)
+declare void @printf(i8* nocapture readonly, ...)
diff --git a/test/CodeGen/AArch64/br-undef-cond.ll b/test/CodeGen/AArch64/br-undef-cond.ll
new file mode 100644
index 000000000000..12d0da2e4fcd
--- /dev/null
+++ b/test/CodeGen/AArch64/br-undef-cond.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -verify-machineinstrs
+
+; Make sure we don't end up with a CBNZ of an undef v-/phys-reg.
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+declare void @bar(i8*)
+
+define void @foo(i8* %m, i32 %off0) {
+.thread1653:
+  br i1 undef, label %0, label %.thread1880
+
+  %1 = icmp eq i32 undef, 0
+  %.not = xor i1 %1, true
+  %brmerge = or i1 %.not, undef
+  br i1 %brmerge, label %.thread1880, label %.thread1705
+
+.thread1705:
+  ret void
+
+.thread1880:
+  %m1652.ph = phi i8* [ %m, %0 ], [ null, %.thread1653 ]
+  call void @bar(i8* %m1652.ph)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/cmp-const-max.ll b/test/CodeGen/AArch64/cmp-const-max.ll
new file mode 100644
index 000000000000..0431e391a30b
--- /dev/null
+++ b/test/CodeGen/AArch64/cmp-const-max.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -aarch64-atomic-cfg-tidy=0 < %s -mtriple=aarch64-none-eabihf -fast-isel=false | FileCheck %s
+
+
+define i32 @ule_64_max(i64 %p) {
+entry:
+; CHECK-LABEL: ule_64_max:
+; CHECK: cmn x0, #1
+; CHECK: b.hi [[RET_ZERO:.LBB[0-9]+_[0-9]+]]
+  %cmp = icmp ule i64 %p, 18446744073709551615 ; 0xffffffffffffffff
+  br i1 %cmp, label %ret_one, label %ret_zero
+
+ret_one:
+  ret i32 1
+
+ret_zero:
+; CHECK: [[RET_ZERO]]:
+; CHECK-NEXT: mov w0, wzr
+  ret i32 0
+}
+
+define i32 @ugt_64_max(i64 %p) {
+entry:
+; CHECK-LABEL: ugt_64_max:
+; CHECK: cmn x0, #1
+; CHECK: b.ls [[RET_ZERO:.LBB[0-9]+_[0-9]+]]
+  %cmp = icmp ugt i64 %p, 18446744073709551615 ; 0xffffffffffffffff
+  br i1 %cmp, label %ret_one, label %ret_zero
+
+ret_one:
+  ret i32 1
+
+ret_zero:
+; CHECK: [[RET_ZERO]]:
+; CHECK-NEXT: mov w0, wzr
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/cmpwithshort.ll b/test/CodeGen/AArch64/cmpwithshort.ll
new file mode 100644
index 000000000000..14efdcc9d188
--- /dev/null
+++ b/test/CodeGen/AArch64/cmpwithshort.ll
@@ -0,0 +1,46 @@
+; RUN: llc -O3 -march=aarch64 < %s | FileCheck %s 
+
+define i16 @test_1cmp_signed_1(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_signed_1
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp eq i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}
+
+define i16 @test_1cmp_signed_2(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_signed_2
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp sge i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}
+
+define i16 @test_1cmp_unsigned_1(i16* %ptr1) {
+; CHECK-LABLE: @test_1cmp_unsigned_1
+; CHECK: ldrsh
+; CHECK-NEXT: cmn
+entry:
+  %addr = getelementptr inbounds i16* %ptr1, i16 0
+  %val = load i16* %addr, align 2
+  %cmp = icmp uge i16 %val, -1
+  br i1 %cmp, label %if, label %if.then
+if:
+  ret i16 1
+if.then:
+  ret i16 0
+}                                           
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
new file mode 100644
index 000000000000..df8dc87176cc
--- /dev/null
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -0,0 +1,413 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; marked as external to prevent possible optimizations
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@d = external global i32
+
+; (a > 10 && b == c) || (a >= 10 && b == d)
+define i32 @combine_gt_ge_10() #0 {
+; CHECK-LABEL: combine_gt_ge_10
+; CHECK: cmp
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.lt
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sgt i32 %0, 10
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp sgt i32 %0, 9
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false, %land.lhs.true
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; (a > 5 && b == c) || (a < 5 && b == d)
+define i32 @combine_gt_lt_5() #0 {
+; CHECK-LABEL: combine_gt_lt_5
+; CHECK: cmp
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.ge
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sgt i32 %0, 5
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp slt i32 %0, 5
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; (a < 5 && b == c) || (a <= 5 && b == d)
+define i32 @combine_lt_ge_5() #0 {
+; CHECK-LABEL: combine_lt_ge_5
+; CHECK: cmp
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.gt
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 5
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %land.lhs.true3
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp slt i32 %0, 6
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false, %land.lhs.true
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; (a < 5 && b == c) || (a > 5 && b == d)
+define i32 @combine_lt_gt_5() #0 {
+; CHECK-LABEL: combine_lt_gt_5
+; CHECK: cmp
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmp
+; CHECK: b.le
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, 5
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp sgt i32 %0, 5
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; (a > -5 && b == c) || (a < -5 && b == d)
+define i32 @combine_gt_lt_n5() #0 {
+; CHECK-LABEL: combine_gt_lt_n5
+; CHECK: cmn
+; CHECK: b.le
+; CHECK: ret
+; CHECK-NOT: cmn
+; CHECK: b.ge
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp sgt i32 %0, -5
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp slt i32 %0, -5
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; (a < -5 && b == c) || (a > -5 && b == d)
+define i32 @combine_lt_gt_n5() #0 {
+; CHECK-LABEL: combine_lt_gt_n5
+; CHECK: cmn
+; CHECK: b.ge
+; CHECK: ret
+; CHECK-NOT: cmn
+; CHECK: b.le
+entry:
+  %0 = load i32* @a, align 4
+  %cmp = icmp slt i32 %0, -5
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:                                    ; preds = %entry
+  %1 = load i32* @b, align 4
+  %2 = load i32* @c, align 4
+  %cmp1 = icmp eq i32 %1, %2
+  br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp2 = icmp sgt i32 %0, -5
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:                                   ; preds = %lor.lhs.false
+  %3 = load i32* @b, align 4
+  %4 = load i32* @d, align 4
+  %cmp4 = icmp eq i32 %3, %4
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true3, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+%struct.Struct = type { i64, i64 }
+
+@glob = internal unnamed_addr global %struct.Struct* null, align 8
+
+declare %struct.Struct* @Update(%struct.Struct*) #1
+
+; no checks for this case, it just should be processed without errors
+define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 {
+entry:
+  %size = getelementptr inbounds %struct.Struct* %hdCall, i64 0, i32 0
+  %0 = load i64* %size, align 8
+  br label %land.rhs
+
+land.rhs:
+  %rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ]
+  %1 = load i64* inttoptr (i64 24 to i64*), align 8
+  %cmp2 = icmp sgt i64 %1, 0
+  br i1 %cmp2, label %while.body, label %while.end
+
+while.body:
+  %2 = load %struct.Struct** @glob, align 8
+  %call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2
+  %sub = add nsw i64 %rp.06, -2
+  %cmp = icmp slt i64 %0, %rp.06
+  br i1 %cmp, label %land.rhs, label %while.end
+
+while.end:
+  ret void
+}
+
+; undefined external to prevent possible optimizations
+declare void @do_something() #1
+
+define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
+; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
+; CHECK: cmn
+; CHECK: b.gt
+; CHECK: cmp
+; CHECK: b.gt
+entry:
+  %0 = load i32* @a, align 4
+  %cmp4 = icmp slt i32 %0, -1
+  br i1 %cmp4, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %while.body.preheader
+  %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
+  tail call void @do_something() #2
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %i.05, 0
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:                   ; preds = %while.body
+  %.pre = load i32* @a, align 4
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  %1 = phi i32 [ %.pre, %while.cond.while.end_crit_edge ], [ %0, %entry ]
+  %cmp1 = icmp slt i32 %1, 2
+  br i1 %cmp1, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %while.end
+  %2 = load i32* @b, align 4
+  %3 = load i32* @d, align 4
+  %cmp2 = icmp eq i32 %2, %3
+  br i1 %cmp2, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true, %while.end
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
+; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other
+; CHECK: cmp
+; CHECK: b.gt
+; CHECK: cmn
+; CHECK: b.lt
+entry:
+  %0 = load i32* @a, align 4
+  %cmp4 = icmp slt i32 %0, 1
+  br i1 %cmp4, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %while.body.preheader
+  %i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
+  tail call void @do_something() #2
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %i.05, 0
+  br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %1 = load i32* @c, align 4
+  %cmp1 = icmp sgt i32 %1, -3
+  br i1 %cmp1, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %while.end
+  %2 = load i32* @b, align 4
+  %3 = load i32* @d, align 4
+  %cmp2 = icmp eq i32 %2, %3
+  br i1 %cmp2, label %return, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true, %while.end
+  br label %return
+
+return:                                           ; preds = %if.end, %land.lhs.true
+  %retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+; Test in the following case, we don't hit 'cmp' and trigger a false positive
+; cmp  w19, #0
+; cinc w0, w19, gt
+; ...
+; fcmp d8, #0.0
+; b.gt .LBB0_5
+
+define i32 @fcmpri(i32 %argc, i8** nocapture readonly %argv) {
+
+; CHECK-LABEL: fcmpri:
+; CHECK: cmp w0, #2
+; CHECK: b.lt .LBB9_3
+; CHECK-NOT: cmp w0, #1
+; CHECK-NOT: b.le .LBB9_3
+
+; CHECK-LABEL-DAG: .LBB9_3
+; CHECK: cmp w19, #0
+; CHECK: fcmp d8, #0.0
+; CHECK: b.gt .LBB9_5
+; CHECK-NOT: cmp w19, #1
+; CHECK-NOT: b.ge .LBB9_5
+
+entry:
+  %cmp = icmp sgt i32 %argc, 1
+  br i1 %cmp, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1
+  %0 = load i8** %arrayidx, align 8
+  %cmp1 = icmp eq i8* %0, null
+  br i1 %cmp1, label %if.end, label %return
+
+if.end:                                           ; preds = %land.lhs.true, %entry
+  %call = call i32 @zoo(i32 1)
+  %call2 = call double @yoo(i32 -1)
+  %cmp4 = icmp sgt i32 %call, 0
+  %add = zext i1 %cmp4 to i32
+  %cond = add nsw i32 %add, %call
+  %call7 = call i32 @xoo(i32 %cond, i32 2)
+  %cmp9 = fcmp ogt double %call2, 0.000000e+00
+  br i1 %cmp9, label %cond.end14, label %cond.false12
+
+cond.false12:                                     ; preds = %if.end
+  %sub = fadd fast double %call2, -1.000000e+00
+  br label %cond.end14
+
+cond.end14:                                       ; preds = %if.end, %cond.false12
+  %cond15 = phi double [ %sub, %cond.false12 ], [ %call2, %if.end ]
+  %call16 = call i32 @woo(double %cond15, double -2.000000e+00)
+  br label %return
+
+return:                                           ; preds = %land.lhs.true, %cond.end14
+  %retval.0 = phi i32 [ 4, %cond.end14 ], [ 3, %land.lhs.true ]
+  ret i32 %retval.0
+}
+
+declare i32 @zoo(i32)
+
+declare double @yoo(i32)
+
+declare i32 @xoo(i32, i32)
+
+declare i32 @woo(double, double)
diff --git a/test/CodeGen/AArch64/compiler-ident.ll b/test/CodeGen/AArch64/compiler-ident.ll
index 035057194a06..217340db7955 100644
--- a/test/CodeGen/AArch64/compiler-ident.ll
+++ b/test/CodeGen/AArch64/compiler-ident.ll
@@ -8,5 +8,5 @@ target triple = "aarch64--linux-gnu"
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"some LLVM version"}
+!0 = !{!"some LLVM version"}
 
diff --git a/test/CodeGen/AArch64/cond-sel.ll b/test/CodeGen/AArch64/cond-sel.ll
index 5f81cba66cbc..dfc83aacfcfc 100644
--- a/test/CodeGen/AArch64/cond-sel.ll
+++ b/test/CodeGen/AArch64/cond-sel.ll
@@ -214,3 +214,20 @@ define void @test_csetm(i32 %lhs, i32 %rhs, i64 %lhs64) {
   ret void
 ; CHECK: ret
 }
+
+define <1 x i1> @test_wide_comparison(i32 %in) {
+; CHECK-LABEL: test_wide_comparison:
+; CHECK: cmp w0, #1234
+; CHECK: cset
+
+  %tmp = icmp sgt i32 %in, 1234
+  %res = select i1 %tmp, <1 x i1> <i1 1>, <1 x i1> zeroinitializer
+  ret <1 x i1> %res
+}
+
+define i32 @test_select_undef() {
+; CHECK-LABEL: test_select_undef:
+; CHECK: ret
+  %res = select i1 undef, i32 0, i32 42
+  ret i32 %res
+}
diff --git a/test/CodeGen/AArch64/dag-combine-invaraints.ll b/test/CodeGen/AArch64/dag-combine-invaraints.ll
new file mode 100644
index 000000000000..115fc64174c8
--- /dev/null
+++ b/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=arm64-apple-darwin8.0 -relocation-model=pic -O1 < %s | FileCheck %s
+
+@.str2 = private unnamed_addr constant [9 x i8] c"_%d____\0A\00", align 1
+
+; Function Attrs: nounwind ssp
+define i32 @main(i32 %argc, i8** %argv) #0 {
+main_:
+  %tmp = alloca i32, align 4
+  %i32T = alloca i32, align 4
+  %i32F = alloca i32, align 4
+  %i32X = alloca i32, align 4
+  store i32 0, i32* %tmp
+  store i32 15, i32* %i32T, align 4
+  store i32 5, i32* %i32F, align 4
+  %tmp6 = load i32* %tmp, align 4
+  %tmp7 = icmp ne i32 %tmp6, 0
+  %tmp8 = xor i1 %tmp7, true
+  %tmp9 = load i32* %i32T, align 4
+  %tmp10 = load i32* %i32F, align 4
+  %DHSelect = select i1 %tmp8, i32 %tmp9, i32 %tmp10
+  store i32 %DHSelect, i32* %i32X, align 4
+  %tmp15 = load i32* %i32X, align 4
+  %tmp17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
+  ret i32 0
+
+; CHECK: main:
+; CHECK-DAG: movz
+; CHECK-DAG: orr
+; CHECK: csel
+}
+
+
+declare i32 @printf(i8*, ...) #1
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/AArch64/dont-take-over-the-world.ll b/test/CodeGen/AArch64/dont-take-over-the-world.ll
new file mode 100644
index 000000000000..d9e13b745fd5
--- /dev/null
+++ b/test/CodeGen/AArch64/dont-take-over-the-world.ll
@@ -0,0 +1,7 @@
+; RUN: not llc -mtriple=x86-64 2>&1 | FileCheck %s
+
+; To support "arm64" as a -march option, we need to register a second AArch64
+; target, but we have to be careful how we do that so that it doesn't become the
+; target of last resort when the specified triple is completely wrong.
+
+; CHECK: unable to get target for 'x86-64', see --version and --triple.
diff --git a/test/CodeGen/AArch64/dp-3source.ll b/test/CodeGen/AArch64/dp-3source.ll
index 22bd4a844e1a..bd96ec728f52 100644
--- a/test/CodeGen/AArch64/dp-3source.ll
+++ b/test/CodeGen/AArch64/dp-3source.ll
@@ -161,3 +161,18 @@ define i64 @test_umnegl(i32 %lhs, i32 %rhs) {
 ; CHECK: umnegl {{x[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
   ret i64 %res
 }
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+
+define void @test_mneg(){
+; CHECK-LABEL: test_mneg:
+  %1 = load i32* @a, align 4
+  %2 = load i32* @b, align 4
+  %3 = sub i32 0, %1
+  %4 = mul i32 %2, %3
+  store i32 %4, i32* @c, align 4
+; CHECK: mneg {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
+  ret void
+}
diff --git a/test/CodeGen/AArch64/extern-weak.ll b/test/CodeGen/AArch64/extern-weak.ll
index ce5c0f686615..f647c4bcda51 100644
--- a/test/CodeGen/AArch64/extern-weak.ll
+++ b/test/CodeGen/AArch64/extern-weak.ll
@@ -1,17 +1,24 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=static -o - < %s | FileCheck --check-prefix=CHECK-STATIC %s
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -code-model=large -o - %s | FileCheck --check-prefix=CHECK-LARGE %s
 
 declare extern_weak i32 @var()
 
 define i32()* @foo() {
 ; The usual ADRP/ADD pair can't be used for a weak reference because it must
-; evaluate to 0 if the symbol is undefined. We use a litpool entry.
+; evaluate to 0 if the symbol is undefined. We use a GOT entry for PIC
+; otherwise a litpool entry.
   ret i32()* @var
 
 
 ; CHECK: adrp x[[ADDRHI:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[ADDRHI]], :got_lo12:var]
 
+; CHECK-STATIC: .LCPI0_0:
+; CHECK-STATIC-NEXT: .xword  var
+; CHECK-STATIC: adrp x[[VAR:[0-9]+]], .LCPI0_0
+; CHECK-STATIC: ldr x0, [x[[VAR]], :lo12:.LCPI0_0]
+
   ; In the large model, the usual relocations are absolute and can
   ; materialise 0.
 ; CHECK-LARGE: movz x0, #:abs_g3:var
@@ -31,6 +38,11 @@ define i32* @bar() {
 ; CHECK: ldr [[BASE:x[0-9]+]], [x[[ADDRHI]], :got_lo12:arr_var]
 ; CHECK: add x0, [[BASE]], #20
 
+; CHECK-STATIC: .LCPI1_0:
+; CHECK-STATIC-NEXT: .xword arr_var
+; CHECK-STATIC: ldr [[BASE:x[0-9]+]], [{{x[0-9]+}}, :lo12:.LCPI1_0]
+; CHECK-STATIC: add x0, [[BASE]], #20
+
   ret i32* %addr
 
   ; In the large model, the usual relocations are absolute and can
@@ -49,6 +61,9 @@ define i32* @wibble() {
 ; CHECK: adrp [[BASE:x[0-9]+]], defined_weak_var
 ; CHECK: add x0, [[BASE]], :lo12:defined_weak_var
 
+; CHECK-STATIC: adrp [[BASE:x[0-9]+]], defined_weak_var
+; CHECK-STATIC: add x0, [[BASE]], :lo12:defined_weak_var
+
 ; CHECK-LARGE: movz x0, #:abs_g3:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g2_nc:defined_weak_var
 ; CHECK-LARGE: movk x0, #:abs_g1_nc:defined_weak_var
diff --git a/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
new file mode 100644
index 000000000000..d86f00d38622
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -0,0 +1,627 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
+
+; Load / Store Base Register only
+define zeroext i1 @load_breg_i1(i1* %a) {
+; CHECK-LABEL: load_breg_i1
+; CHECK:       ldrb {{w[0-9]+}}, [x0]
+  %1 = load i1* %a
+  ret i1 %1
+}
+
+define zeroext i8 @load_breg_i8(i8* %a) {
+; CHECK-LABEL: load_breg_i8
+; CHECK:       ldrb {{w[0-9]+}}, [x0]
+  %1 = load i8* %a
+  ret i8 %1
+}
+
+define zeroext i16 @load_breg_i16(i16* %a) {
+; CHECK-LABEL: load_breg_i16
+; CHECK:       ldrh {{w[0-9]+}}, [x0]
+  %1 = load i16* %a
+  ret i16 %1
+}
+
+define i32 @load_breg_i32(i32* %a) {
+; CHECK-LABEL: load_breg_i32
+; CHECK:       ldr {{w[0-9]+}}, [x0]
+  %1 = load i32* %a
+  ret i32 %1
+}
+
+define i64 @load_breg_i64(i64* %a) {
+; CHECK-LABEL: load_breg_i64
+; CHECK:       ldr {{x[0-9]+}}, [x0]
+  %1 = load i64* %a
+  ret i64 %1
+}
+
+define float @load_breg_f32(float* %a) {
+; CHECK-LABEL: load_breg_f32
+; CHECK:       ldr {{s[0-9]+}}, [x0]
+  %1 = load float* %a
+  ret float %1
+}
+
+define double @load_breg_f64(double* %a) {
+; CHECK-LABEL: load_breg_f64
+; CHECK:       ldr {{d[0-9]+}}, [x0]
+  %1 = load double* %a
+  ret double %1
+}
+
+define void @store_breg_i1(i1* %a) {
+; CHECK-LABEL: store_breg_i1
+; CHECK:       strb wzr, [x0]
+  store i1 0, i1* %a
+  ret void
+}
+
+define void @store_breg_i1_2(i1* %a) {
+; CHECK-LABEL: store_breg_i1_2
+; CHECK:       strb {{w[0-9]+}}, [x0]
+  store i1 true, i1* %a
+  ret void
+}
+
+define void @store_breg_i8(i8* %a) {
+; CHECK-LABEL: store_breg_i8
+; CHECK:       strb wzr, [x0]
+  store i8 0, i8* %a
+  ret void
+}
+
+define void @store_breg_i16(i16* %a) {
+; CHECK-LABEL: store_breg_i16
+; CHECK:       strh wzr, [x0]
+  store i16 0, i16* %a
+  ret void
+}
+
+define void @store_breg_i32(i32* %a) {
+; CHECK-LABEL: store_breg_i32
+; CHECK:       str wzr, [x0]
+  store i32 0, i32* %a
+  ret void
+}
+
+define void @store_breg_i64(i64* %a) {
+; CHECK-LABEL: store_breg_i64
+; CHECK:       str xzr, [x0]
+  store i64 0, i64* %a
+  ret void
+}
+
+define void @store_breg_f32(float* %a) {
+; CHECK-LABEL: store_breg_f32
+; CHECK:       str wzr, [x0]
+  store float 0.0, float* %a
+  ret void
+}
+
+define void @store_breg_f64(double* %a) {
+; CHECK-LABEL: store_breg_f64
+; CHECK:       str xzr, [x0]
+  store double 0.0, double* %a
+  ret void
+}
+
+; Load Immediate
+define i32 @load_immoff_1() {
+; CHECK-LABEL: load_immoff_1
+; CHECK:       orr {{w|x}}[[REG:[0-9]+]], {{wzr|xzr}}, #0x80
+; CHECK:       ldr {{w[0-9]+}}, {{\[}}x[[REG]]{{\]}}
+  %1 = inttoptr i64 128 to i32*
+  %2 = load i32* %1
+  ret i32 %2
+}
+
+; Load / Store Base Register + Immediate Offset
+; Max supported negative offset
+define i32 @load_breg_immoff_1(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_1
+; CHECK:       ldur {{w[0-9]+}}, [x0, #-256]
+  %1 = add i64 %a, -256
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min not-supported negative offset
+define i32 @load_breg_immoff_2(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_2
+; CHECK:       sub [[REG:x[0-9]+]], x0, #257
+; CHECK-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, -257
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported unscaled offset
+define i32 @load_breg_immoff_3(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_3
+; CHECK:       ldur {{w[0-9]+}}, [x0, #255]
+  %1 = add i64 %a, 255
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min un-supported unscaled offset
+define i32 @load_breg_immoff_4(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_4
+; CHECK:       add [[REG:x[0-9]+]], x0, #257
+; CHECK-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 257
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported scaled offset
+define i32 @load_breg_immoff_5(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_5
+; CHECK:       ldr {{w[0-9]+}}, [x0, #16380]
+  %1 = add i64 %a, 16380
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Min un-supported scaled offset
+define i32 @load_breg_immoff_6(i64 %a) {
+; SDAG-LABEL: load_breg_immoff_6
+; SDAG:       orr	w[[NUM:[0-9]+]], wzr, #0x4000
+; SDAG-NEXT:  ldr {{w[0-9]+}}, [x0, x[[NUM]]]
+; FAST-LABEL: load_breg_immoff_6
+; FAST:       add [[REG:x[0-9]+]], x0, #4, lsl #12
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 16384
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Max supported negative offset
+define void @store_breg_immoff_1(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_1
+; CHECK:       stur wzr, [x0, #-256]
+  %1 = add i64 %a, -256
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min not-supported negative offset
+define void @store_breg_immoff_2(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_2
+; CHECK:       sub [[REG:x[0-9]+]], x0, #257
+; CHECK-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, -257
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Max supported unscaled offset
+define void @store_breg_immoff_3(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_3
+; CHECK:       stur wzr, [x0, #255]
+  %1 = add i64 %a, 255
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min un-supported unscaled offset
+define void @store_breg_immoff_4(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_4
+; CHECK:       add [[REG:x[0-9]+]], x0, #257
+; CHECK-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 257
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Max supported scaled offset
+define void @store_breg_immoff_5(i64 %a) {
+; CHECK-LABEL: store_breg_immoff_5
+; CHECK:       str wzr, [x0, #16380]
+  %1 = add i64 %a, 16380
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+; Min un-supported scaled offset
+define void @store_breg_immoff_6(i64 %a) {
+; SDAG-LABEL: store_breg_immoff_6
+; SDAG:       orr	w[[NUM:[0-9]+]], wzr, #0x4000
+; SDAG-NEXT:  str wzr, [x0, x[[NUM]]]
+; FAST-LABEL: store_breg_immoff_6
+; FAST:       add [[REG:x[0-9]+]], x0, #4, lsl #12
+; FAST-NEXT:  str wzr, {{\[}}[[REG]]{{\]}}
+  %1 = add i64 %a, 16384
+  %2 = inttoptr i64 %1 to i32*
+  store i32 0, i32* %2
+  ret void
+}
+
+define i64 @load_breg_immoff_7(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_7
+; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
+  %1 = add i64 %a, 48
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Flip add operands
+define i64 @load_breg_immoff_8(i64 %a) {
+; CHECK-LABEL: load_breg_immoff_8
+; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
+  %1 = add i64 48, %a
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Load Base Register + Register Offset
+define i64 @load_breg_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_1
+; CHECK:       ldr {{x[0-9]+}}, [x0, x1]
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Flip add operands
+define i64 @load_breg_offreg_2(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_2
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0]
+  %1 = add i64 %b, %a
+  %2 = inttoptr i64 %1 to i64*
+  %3 = load i64* %2
+  ret i64 %3
+}
+
+; Load Base Register + Register Offset + Immediate Offset
+define i64 @load_breg_offreg_immoff_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_offreg_immoff_1
+; CHECK:       add [[REG:x[0-9]+]], x0, x1
+; CHECK-NEXT:  ldr x0, {{\[}}[[REG]], #48{{\]}}
+  %1 = add i64 %a, %b
+  %2 = add i64 %1, 48
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  ret i64 %4
+}
+
+define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_offreg_immoff_2
+; SDAG:       add [[REG1:x[0-9]+]], x0, x1
+; SDAG-NEXT:  orr w[[NUM:[0-9]+]], wzr, #0xf000
+; SDAG-NEXT:  ldr x0, {{\[}}[[REG1]], x[[NUM]]]
+; FAST-LABEL: load_breg_offreg_immoff_2
+; FAST:       add [[REG:x[0-9]+]], x0, #15, lsl #12
+; FAST-NEXT:  ldr x0, {{\[}}[[REG]], x1{{\]}}
+  %1 = add i64 %a, %b
+  %2 = add i64 %1, 61440
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  ret i64 %4
+}
+
+; Load Scaled Register Offset
+define i32 @load_shift_offreg_1(i64 %a) {
+; CHECK-LABEL: load_shift_offreg_1
+; CHECK:       lsl [[REG:x[0-9]+]], x0, #2
+; CHECK:       ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+define i32 @load_mul_offreg_1(i64 %a) {
+; CHECK-LABEL: load_mul_offreg_1
+; CHECK:       lsl [[REG:x[0-9]+]], x0, #2
+; CHECK:       ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
+  %1 = mul i64 %a, 4
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  ret i32 %3
+}
+
+; Load Base Register + Scaled Register Offset
+define i32 @load_breg_shift_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, x0, lsl #2]
+  %1 = shl i64 %a, 2
+  %2 = add i64 %1, %b
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  ret i32 %4
+}
+
+define i32 @load_breg_shift_offreg_2(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, x0, lsl #2]
+  %1 = shl i64 %a, 2
+  %2 = add i64 %b, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  ret i32 %4
+}
+
+define i32 @load_breg_shift_offreg_3(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_3
+; SDAG:       lsl [[REG:x[0-9]+]], x0, #2
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_3
+; FAST:       lsl [[REG:x[0-9]+]], x1, #2
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 2
+  %3 = add i64 %1, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_shift_offreg_4(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_4
+; SDAG:       lsl [[REG:x[0-9]+]], x1, #2
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_4
+; FAST:       lsl [[REG:x[0-9]+]], x0, #2
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x1, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 2
+  %3 = add i64 %2, %1
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_shift_offreg_5(i64 %a, i64 %b) {
+; SDAG-LABEL: load_breg_shift_offreg_5
+; SDAG:       lsl [[REG:x[0-9]+]], x1, #3
+; SDAG-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+; FAST-LABEL: load_breg_shift_offreg_5
+; FAST:       lsl [[REG:x[0-9]+]], x1, #3
+; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = shl i64 %a, 2
+  %2 = shl i64 %b, 3
+  %3 = add i64 %1, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_mul_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_mul_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, x0, lsl #2]
+  %1 = mul i64 %a, 4
+  %2 = add i64 %1, %b
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  ret i32 %4
+}
+
+define zeroext i8 @load_breg_and_offreg_1(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_and_offreg_1
+; CHECK:       ldrb {{w[0-9]+}}, [x1, w0, uxtw]
+  %1 = and i64 %a, 4294967295
+  %2 = add i64 %1, %b
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  ret i8 %4
+}
+
+define zeroext i16 @load_breg_and_offreg_2(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_and_offreg_2
+; CHECK:       ldrh {{w[0-9]+}}, [x1, w0, uxtw #1]
+  %1 = and i64 %a, 4294967295
+  %2 = shl i64 %1, 1
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i16*
+  %5 = load i16* %4
+  ret i16 %5
+}
+
+define i32 @load_breg_and_offreg_3(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_and_offreg_3
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = and i64 %a, 4294967295
+  %2 = shl i64 %1, 2
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i64 @load_breg_and_offreg_4(i64 %a, i64 %b) {
+; CHECK-LABEL: load_breg_and_offreg_4
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+  %1 = and i64 %a, 4294967295
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Not all 'and' instructions have immediates.
+define i64 @load_breg_and_offreg_5(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: load_breg_and_offreg_5
+; CHECK:       and [[REG:x[0-9]+]], x0, x2
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], x1{{\]}}
+  %1 = and i64 %a, %c
+  %2 = add i64 %1, %b
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  ret i64 %4
+}
+
+define i64 @load_breg_and_offreg_6(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: load_breg_and_offreg_6
+; CHECK:       and [[REG:x[0-9]+]], x0, x2
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}x1, [[REG]], lsl #3{{\]}}
+  %1 = and i64 %a, %c
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Load Base Register + Scaled Register Offset + Sign/Zero extension
+define i32 @load_breg_zext_shift_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_zext_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_zext_shift_offreg_2(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_zext_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_zext_mul_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_zext_mul_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, uxtw #2]
+  %1 = zext i32 %a to i64
+  %2 = mul i64 %1, 4
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_sext_shift_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+define i32 @load_breg_sext_shift_offreg_2(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_2
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 2
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+; Make sure that we don't drop the first 'add' instruction.
+define i32 @load_breg_sext_shift_offreg_3(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_3
+; CHECK:       add [[REG:w[0-9]+]], w0, #4
+; CHECK:       ldr {{w[0-9]+}}, {{\[}}x1, [[REG]], sxtw #2{{\]}}
+  %1 = add i32 %a, 4
+  %2 = sext i32 %1 to i64
+  %3 = shl i64 %2, 2
+  %4 = add i64 %b, %3
+  %5 = inttoptr i64 %4 to i32*
+  %6 = load i32* %5
+  ret i32 %6
+}
+
+
+define i32 @load_breg_sext_mul_offreg_1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_mul_offreg_1
+; CHECK:       ldr {{w[0-9]+}}, [x1, w0, sxtw #2]
+  %1 = sext i32 %a to i64
+  %2 = mul i64 %1, 4
+  %3 = add i64 %2, %b
+  %4 = inttoptr i64 %3 to i32*
+  %5 = load i32* %4
+  ret i32 %5
+}
+
+; Load Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_sext_shift_offreg_imm1(i32 %a) {
+; CHECK-LABEL: load_sext_shift_offreg_imm1
+; CHECK:       sbfiz [[REG:x[0-9]+]], {{x[0-9]+}}, #3, #32
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %2, 8
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+; Load Base Register + Scaled Register Offset + Immediate Offset + Sign/Zero extension
+define i64 @load_breg_sext_shift_offreg_imm1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_breg_sext_shift_offreg_imm1
+; CHECK:       add [[REG:x[0-9]+]], x1, w0, sxtw #3
+; CHECK-NEXT:  ldr {{x[0-9]+}}, {{\[}}[[REG]], #8{{\]}}
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = add i64 %3, 8
+  %5 = inttoptr i64 %4 to i64*
+  %6 = load i64* %5
+  ret i64 %6
+}
+
+; Test that the kill flag is not set - the machine instruction verifier does that for us.
+define i64 @kill_reg(i64 %a) {
+  %1 = sub i64 %a, 8
+  %2 = add i64 %1, 96
+  %3 = inttoptr i64 %2 to i64*
+  %4 = load i64* %3
+  %5 = add i64 %2, %4
+  ret i64 %5
+}
+
+define void @store_fi(i64 %i) {
+; CHECK-LABEL: store_fi
+; CHECK:       mov [[REG:x[0-9]+]], sp
+; CHECK:       str {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = alloca [8 x i32]
+  %2 = ptrtoint [8 x i32]* %1 to i64
+  %3 = mul i64 %i, 4
+  %4 = add i64 %2, %3
+  %5 = inttoptr i64 %4 to i32*
+  store i32 47, i32* %5, align 4
+  ret void
+}
+
+define i32 @load_fi(i64 %i) {
+; CHECK-LABEL: load_fi
+; CHECK:       mov [[REG:x[0-9]+]], sp
+; CHECK:       ldr {{w[0-9]+}}, {{\[}}[[REG]], x0, lsl #2{{\]}}
+  %1 = alloca [8 x i32]
+  %2 = ptrtoint [8 x i32]* %1 to i64
+  %3 = mul i64 %i, 4
+  %4 = add i64 %2, %3
+  %5 = inttoptr i64 %4 to i32*
+  %6 = load i32* %5, align 4
+  ret i32 %6
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
new file mode 100644
index 000000000000..bc4a210df62b
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-label: test_or
+; CHECK:       cbnz w0, {{LBB[0-9]+_2}}
+; CHECK:       cbz w1, {{LBB[0-9]+_1}}
+define i64 @test_or(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp eq i32 %a, 0
+  %1 = icmp eq i32 %b, 0
+  %or.cond = or i1 %0, %1
+  br i1 %or.cond, label %bb3, label %bb4, !prof !0
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
+; CHECK-label: test_ans
+; CHECK:       cbz w0, {{LBB[0-9]+_2}}
+; CHECK:       cbnz w1, {{LBB[0-9]+_3}}
+define i64 @test_and(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp ne i32 %a, 0
+  %1 = icmp ne i32 %b, 0
+  %or.cond = and i1 %0, %1
+  br i1 %or.cond, label %bb4, label %bb3, !prof !1
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
+declare i64 @bar()
+
+!0 = !{!"branch_weights", i32 5128, i32 32}
+!1 = !{!"branch_weights", i32 1024, i32 4136}
diff --git a/test/CodeGen/AArch64/fast-isel-branch_weights.ll b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
new file mode 100644
index 000000000000..70dbdf216c7d
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-branch_weights.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; Test if the BBs are reordred according to their branch weights.
+define i64 @branch_weights_test(i64 %a, i64 %b) {
+; CHECK-LABEL: branch_weights_test
+; CHECK-LABEL: success
+; CHECK-LABEL: fail
+  %1 = icmp ult i64 %a, %b
+  br i1 %1, label %fail, label %success, !prof !0
+
+fail:
+  ret i64 -1
+
+success:
+  ret i64 0
+}
+
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/AArch64/fast-isel-call-return.ll b/test/CodeGen/AArch64/fast-isel-call-return.ll
new file mode 100644
index 000000000000..9b10969417df
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-call-return.ll
@@ -0,0 +1,12 @@
+; RUN: llc -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+define i8* @test_call_return_type(i64 %size) {
+entry:
+; CHECK: bl xmalloc
+  %0 = call noalias i8* @xmalloc(i64 undef)
+  ret i8* %0
+}
+
+declare noalias i8* @xmalloc(i64)
diff --git a/test/CodeGen/AArch64/fast-isel-cbz.ll b/test/CodeGen/AArch64/fast-isel-cbz.ll
new file mode 100644
index 000000000000..6e31a045d285
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-cbz.ll
@@ -0,0 +1,70 @@
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @icmp_eq_i1(i1 %a) {
+; CHECK-LABEL: icmp_eq_i1
+; CHECK:       tbz w0, #0, {{LBB.+_2}}
+  %1 = icmp eq i1 %a, 0
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i8(i8 %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK:       uxtb [[REG:w[0-9]+]], w0
+; CHECK:       cbz [[REG]], {{LBB.+_2}}
+  %1 = icmp eq i8 %a, 0
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK:       uxth [[REG:w[0-9]+]], w0
+; CHECK:       cbz [[REG]], {{LBB.+_2}}
+  %1 = icmp eq i16 %a, 0
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK:       cbz w0, {{LBB.+_2}}
+  %1 = icmp eq i32 %a, 0
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64
+; CHECK:       cbz x0, {{LBB.+_2}}
+  %1 = icmp eq i64 %a, 0
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq_ptr(i8* %a) {
+; CHECK-LABEL: icmp_eq_ptr
+; CHECK:       cbz x0, {{LBB.+_2}}
+  %1 = icmp eq i8* %a, null
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-branch.ll b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
new file mode 100644
index 000000000000..3651f194efda
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-cmp-branch.ll
@@ -0,0 +1,293 @@
+; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define i32 @fcmp_oeq(float %x, float %y) {
+; CHECK-LABEL: fcmp_oeq
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.ne {{LBB.+_2}}
+  %1 = fcmp oeq float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ogt(float %x, float %y) {
+; CHECK-LABEL: fcmp_ogt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.le {{LBB.+_2}}
+  %1 = fcmp ogt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_oge(float %x, float %y) {
+; CHECK-LABEL: fcmp_oge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.lt {{LBB.+_2}}
+  %1 = fcmp oge float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_olt(float %x, float %y) {
+; CHECK-LABEL: fcmp_olt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.pl {{LBB.+_2}}
+  %1 = fcmp olt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ole(float %x, float %y) {
+; CHECK-LABEL: fcmp_ole
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.hi {{LBB.+_2}}
+  %1 = fcmp ole float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_one(float %x, float %y) {
+; CHECK-LABEL: fcmp_one
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.mi
+; CHECK-NEXT:  b.gt
+  %1 = fcmp one float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ord(float %x, float %y) {
+; CHECK-LABEL: fcmp_ord
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.vs {{LBB.+_2}}
+  %1 = fcmp ord float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uno(float %x, float %y) {
+; CHECK-LABEL: fcmp_uno
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.vs {{LBB.+_2}}
+  %1 = fcmp uno float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ueq(float %x, float %y) {
+; CHECK-LABEL: fcmp_ueq
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.eq {{LBB.+_2}}
+; CHECK-NEXT:  b.vs {{LBB.+_2}}
+  %1 = fcmp ueq float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ugt(float %x, float %y) {
+; CHECK-LABEL: fcmp_ugt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.ls {{LBB.+_2}}
+  %1 = fcmp ugt float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_uge(float %x, float %y) {
+; CHECK-LABEL: fcmp_uge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.mi {{LBB.+_2}}
+  %1 = fcmp uge float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ult(float %x, float %y) {
+; CHECK-LABEL: fcmp_ult
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.ge {{LBB.+_2}}
+  %1 = fcmp ult float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_ule(float %x, float %y) {
+; CHECK-LABEL: fcmp_ule
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.gt {{LBB.+_2}}
+  %1 = fcmp ule float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @fcmp_une(float %x, float %y) {
+; CHECK-LABEL: fcmp_une
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  b.eq {{LBB.+_2}}
+  %1 = fcmp une float %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_eq(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_eq
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.ne {{LBB.+_2}}
+  %1 = icmp eq i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ne(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ne
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.eq {{LBB.+_2}}
+  %1 = icmp ne i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ugt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ugt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.ls {{LBB.+_2}}
+  %1 = icmp ugt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_uge(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_uge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.lo {{LBB.+_2}}
+  %1 = icmp uge i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ult(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ult
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.hs {{LBB.+_2}}
+  %1 = icmp ult i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_ule(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_ule
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.hi {{LBB.+_2}}
+  %1 = icmp ule i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sgt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.le {{LBB.+_2}}
+  %1 = icmp sgt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sge(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.lt {{LBB.+_2}}
+  %1 = icmp sge i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_slt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.ge {{LBB.+_2}}
+  %1 = icmp slt i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
+define i32 @icmp_sle(i32 %x, i32 %y) {
+; CHECK-LABEL: icmp_sle
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  b.gt {{LBB.+_2}}
+  %1 = icmp sle i32 %x, %y
+  br i1 %1, label %bb1, label %bb2
+bb2:
+  ret i32 1
+bb1:
+  ret i32 0
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-folding.ll b/test/CodeGen/AArch64/fast-isel-folding.ll
new file mode 100644
index 000000000000..6b524ff2c099
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-folding.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel-abort -verify-machineinstrs < %s
+
+; Test that we don't fold the shift.
+define i64 @fold_shift_test(i64 %a, i1 %c) {
+  %1 = sub i64 %a, 8
+  %2 = ashr i64 %1, 3
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %3 = icmp ult i64 0, %2
+  br i1 %3, label %bb2, label %bb3
+bb2:
+  ret i64 1
+bb3:
+  ret i64 2
+}
+
+; Test that we don't fold the sign-extend.
+define i64 @fold_sext_test1(i32 %a, i1 %c) {
+  %1 = sub i32 %a, 8
+  %2 = sext i32 %1 to i64
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %3 = icmp ult i64 0, %2
+  br i1 %3, label %bb2, label %bb3
+bb2:
+  ret i64 1
+bb3:
+  ret i64 2
+}
+
+; Test that we don't fold the sign-extend.
+define i64 @fold_sext_test2(i32 %a, i1 %c) {
+  %1 = sub i32 %a, 8
+  %2 = sext i32 %1 to i64
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  %3 = shl i64 %2, 4
+  ret i64 %3
+bb2:
+  ret i64 %2
+}
+
+; Test that we clear the kill flag.
+define i32 @fold_kill_test(i32 %a) {
+  %1 = sub i32 %a, 8
+  %2 = shl i32 %1, 3
+  %3 = icmp ult i32 0, %2
+  br i1 %3, label %bb1, label %bb2
+bb1:
+  ret i32 %2
+bb2:
+  %4 = add i32 %2, 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/AArch64/fast-isel-gep.ll b/test/CodeGen/AArch64/fast-isel-gep.ll
new file mode 100644
index 000000000000..4dc0a05894f1
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-gep.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+%struct.foo = type { i32, i64, float, double }
+
+define double* @test_struct(%struct.foo* %f) {
+; CHECK-LABEL: test_struct
+; CHECK:       add x0, x0, #24
+  %1 = getelementptr inbounds %struct.foo* %f, i64 0, i32 3
+  ret double* %1
+}
+
+define i32* @test_array1(i32* %a, i64 %i) {
+; CHECK-LABEL: test_array1
+; CHECK:       orr [[REG:x[0-9]+]], xzr, #0x4
+; CHECK-NEXT:  madd  x0, x1, [[REG]], x0
+  %1 = getelementptr inbounds i32* %a, i64 %i
+  ret i32* %1
+}
+
+define i32* @test_array2(i32* %a) {
+; CHECK-LABEL: test_array2
+; CHECK:       add  x0, x0, #16
+  %1 = getelementptr inbounds i32* %a, i64 4
+  ret i32* %1
+}
+
+define i32* @test_array3(i32* %a) {
+; CHECK-LABEL: test_array3
+; CHECK:       add x0, x0, #1, lsl #12
+  %1 = getelementptr inbounds i32* %a, i64 1024
+  ret i32* %1
+}
+
+define i32* @test_array4(i32* %a) {
+; CHECK-LABEL: test_array4
+; CHECK:       movz [[REG:x[0-9]+]], #0x1008
+; CHECK-NEXR:  add x0, x0, [[REG]]
+  %1 = getelementptr inbounds i32* %a, i64 1026
+  ret i32* %1
+}
+
+define i32* @test_array5(i32* %a, i32 %i) {
+; CHECK-LABEL: test_array5
+; CHECK:       sxtw [[REG1:x[0-9]+]], w1
+; CHECK-NEXT:  orr  [[REG2:x[0-9]+]], xzr, #0x4
+; CHECK-NEXT:  madd  {{x[0-9]+}}, [[REG1]], [[REG2]], x0
+  %1 = getelementptr inbounds i32* %a, i32 %i
+  ret i32* %1
+}
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext.ll b/test/CodeGen/AArch64/fast-isel-int-ext.ll
new file mode 100644
index 000000000000..866febac2622
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext.ll
@@ -0,0 +1,491 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+;
+; Test that we only use the sign/zero extend in the address calculation when
+; necessary.
+;
+; SHIFT
+;
+define i64 @load_addr_shift_zext1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_zext1
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_shift_zext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_zext2
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_shift_zext3(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_zext3
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_shift_sext1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_sext1
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_shift_sext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_sext2
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_shift_sext3(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_shift_sext3
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+  %1 = sext i32 %a to i64
+  %2 = shl i64 %1, 3
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+;
+; MUL
+;
+define i64 @load_addr_mul_zext1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_zext1
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+  %1 = zext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_mul_zext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_zext2
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+  %1 = zext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_mul_zext3(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_zext3
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, uxtw #3]
+  %1 = zext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_mul_sext1(i32 %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_sext1
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+  %1 = sext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_mul_sext2(i32 zeroext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_sext2
+; CHECK:       ldr {{x[0-9]+}}, [x1, w0, sxtw #3]
+  %1 = sext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+define i64 @load_addr_mul_sext3(i32 signext %a, i64 %b) {
+; CHECK-LABEL: load_addr_mul_sext3
+; CHECK:       ldr {{x[0-9]+}}, [x1, x0, lsl #3]
+  %1 = sext i32 %a to i64
+  %2 = mul i64 %1, 8
+  %3 = add i64 %b, %2
+  %4 = inttoptr i64 %3 to i64*
+  %5 = load i64* %4
+  ret i64 %5
+}
+
+
+;
+; Test folding of the sign-/zero-extend into the load instruction.
+;
+
+; Unscaled
+define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i32
+; CHECK:       ldurb w0, [x0, #-8]
+; CHECK-NOT:   uxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = zext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i32
+; CHECK:       ldurh w0, [x0, #-8]
+; CHECK-NOT:   uxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i64
+; CHECK:       ldurb w0, [x0, #-8]
+; CHECK-NOT:   uxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i64
+; CHECK:       ldurh w0, [x0, #-8]
+; CHECK-NOT:   uxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = zext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i32_to_i64
+; CHECK:       ldur w0, [x0, #-8]
+; CHECK-NOT:   uxtw
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  %4 = zext i32 %3 to i64
+  ret i64 %4
+}
+
+define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i32
+; CHECK:       ldursb w0, [x0, #-8]
+; CHECK-NOT:   sxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i32
+; CHECK:       ldursh w0, [x0, #-8]
+; CHECK-NOT:   sxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = sext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i64
+; CHECK:       ldursb x0, [x0, #-8]
+; CHECK-NOT:   sxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i64
+; CHECK:       ldursh x0, [x0, #-8]
+; CHECK-NOT:   sxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = sext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i32_to_i64
+; CHECK:       ldursw x0, [x0, #-8]
+; CHECK-NOT:   sxtw
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  %4 = sext i32 %3 to i64
+  ret i64 %4
+}
+
+; Register
+define i32 @load_register_zext_i8_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i8_to_i32
+; CHECK:       ldrb w0, [x0, x1]
+; CHECK-NOT:   uxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = zext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_register_zext_i16_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i16_to_i32
+; CHECK:       ldrh w0, [x0, x1]
+; CHECK-NOT:   uxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_register_zext_i8_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i8_to_i64
+; CHECK:       ldrb w0, [x0, x1]
+; CHECK-NOT:   uxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_zext_i16_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i16_to_i64
+; CHECK:       ldrh w0, [x0, x1]
+; CHECK-NOT:   uxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = zext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_zext_i32_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i32_to_i64
+; CHECK:       ldr w0, [x0, x1]
+; CHECK-NOT:   uxtw
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  %4 = zext i32 %3 to i64
+  ret i64 %4
+}
+
+define i32 @load_register_sext_i8_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i8_to_i32
+; CHECK:       ldrsb w0, [x0, x1]
+; CHECK-NOT:   sxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_register_sext_i16_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i16_to_i32
+; CHECK:       ldrsh w0, [x0, x1]
+; CHECK-NOT:   sxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = sext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_register_sext_i8_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i8_to_i64
+; CHECK:       ldrsb x0, [x0, x1]
+; CHECK-NOT:   sxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_sext_i16_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i16_to_i64
+; CHECK:       ldrsh x0, [x0, x1]
+; CHECK-NOT:   sxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  %4 = sext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_sext_i32_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i32_to_i64
+; CHECK:       ldrsw x0, [x0, x1]
+; CHECK-NOT:   sxtw
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  %4 = sext i32 %3 to i64
+  ret i64 %4
+}
+
+; Extend
+define i32 @load_extend_zext_i8_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i8_to_i32
+; CHECK:       ldrb w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  %5 = zext i8 %4 to i32
+  ret i32 %5
+}
+
+define i32 @load_extend_zext_i16_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i16_to_i32
+; CHECK:       ldrh w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  %5 = zext i16 %4 to i32
+  ret i32 %5
+}
+
+define i64 @load_extend_zext_i8_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i8_to_i64
+; CHECK:       ldrb w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  %5 = zext i8 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_zext_i16_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i16_to_i64
+; CHECK:       ldrh w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  %5 = zext i16 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_zext_i32_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i32_to_i64
+; CHECK:       ldr w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtw
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  %5 = zext i32 %4 to i64
+  ret i64 %5
+}
+
+define i32 @load_extend_sext_i8_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i8_to_i32
+; CHECK:       ldrsb w0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  %5 = sext i8 %4 to i32
+  ret i32 %5
+}
+
+define i32 @load_extend_sext_i16_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i16_to_i32
+; CHECK:       ldrsh w0, [x0, w1, sxtw]
+; CHECK-NOT:   sxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  %5 = sext i16 %4 to i32
+  ret i32 %5
+}
+
+define i64 @load_extend_sext_i8_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i8_to_i64
+; CHECK:       ldrsb x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  %5 = sext i8 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_sext_i16_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i16_to_i64
+; CHECK:       ldrsh x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  %5 = sext i16 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_sext_i32_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i32_to_i64
+; CHECK:       ldrsw x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtw
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  %5 = sext i32 %4 to i64
+  ret i64 %5
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext2.ll b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
new file mode 100644
index 000000000000..8df26b26971a
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext2.ll
@@ -0,0 +1,439 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=false -disable-cgp-branch-opts -verify-machineinstrs < %s | FileCheck %s
+
+;
+; Test folding of the sign-/zero-extend into the load instruction.
+;
+
+; Unscaled
+define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i32
+; CHECK:       ldurb w0, [x0, #-8]
+; CHECK-NOT:   uxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i32
+; CHECK:       ldurh w0, [x0, #-8]
+; CHECK-NOT:   uxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i64
+; CHECK:       ldurb w0, [x0, #-8]
+; CHECK-NOT:   uxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i64
+; CHECK:       ldurh w0, [x0, #-8]
+; CHECK-NOT:   uxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i32_to_i64
+; CHECK:       ldur w0, [x0, #-8]
+; CHECK-NOT:   uxtw
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i32 %3 to i64
+  ret i64 %4
+}
+
+define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i32
+; CHECK:       ldursb w0, [x0, #-8]
+; CHECK-NOT:   sxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i32
+; CHECK:       ldursh w0, [x0, #-8]
+; CHECK-NOT:   sxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i64
+; CHECK:       ldursb x0, [x0, #-8]
+; CHECK-NOT:   sxtb
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i64
+; CHECK:       ldursh x0, [x0, #-8]
+; CHECK-NOT:   sxth
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i32_to_i64
+; CHECK:       ldursw x0, [x0, #-8]
+; CHECK-NOT:   sxtw
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i32 %3 to i64
+  ret i64 %4
+}
+
+; Register
+define i32 @load_register_zext_i8_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i8_to_i32
+; CHECK:       ldrb w0, [x0, x1]
+; CHECK-NOT:   uxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_register_zext_i16_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i16_to_i32
+; CHECK:       ldrh w0, [x0, x1]
+; CHECK-NOT:   uxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_register_zext_i8_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i8_to_i64
+; CHECK:       ldrb w0, [x0, x1]
+; CHECK-NOT:   uxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_zext_i16_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i16_to_i64
+; CHECK:       ldrh w0, [x0, x1]
+; CHECK-NOT:   uxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_zext_i32_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_zext_i32_to_i64
+; CHECK:       ldr w0, [x0, x1]
+; CHECK-NOT:   uxtw
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  br label %bb2
+
+bb2:
+  %4 = zext i32 %3 to i64
+  ret i64 %4
+}
+
+define i32 @load_register_sext_i8_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i8_to_i32
+; CHECK:       ldrsb w0, [x0, x1]
+; CHECK-NOT:   sxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_register_sext_i16_to_i32(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i16_to_i32
+; CHECK:       ldrsh w0, [x0, x1]
+; CHECK-NOT:   sxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_register_sext_i8_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i8_to_i64
+; CHECK:       ldrsb x0, [x0, x1]
+; CHECK-NOT:   sxtb
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i8*
+  %3 = load i8* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_sext_i16_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i16_to_i64
+; CHECK:       ldrsh x0, [x0, x1]
+; CHECK-NOT:   sxth
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i16*
+  %3 = load i16* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_register_sext_i32_to_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: load_register_sext_i32_to_i64
+; CHECK:       ldrsw x0, [x0, x1]
+; CHECK-NOT:   sxtw
+  %1 = add i64 %a, %b
+  %2 = inttoptr i64 %1 to i32*
+  %3 = load i32* %2
+  br label %bb2
+
+bb2:
+  %4 = sext i32 %3 to i64
+  ret i64 %4
+}
+
+; Extend
+define i32 @load_extend_zext_i8_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i8_to_i32
+; CHECK:       ldrb w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  br label %bb2
+
+bb2:
+  %5 = zext i8 %4 to i32
+  ret i32 %5
+}
+
+define i32 @load_extend_zext_i16_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i16_to_i32
+; CHECK:       ldrh w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  br label %bb2
+
+bb2:
+  %5 = zext i16 %4 to i32
+  ret i32 %5
+}
+
+define i64 @load_extend_zext_i8_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i8_to_i64
+; CHECK:       ldrb w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  br label %bb2
+
+bb2:
+  %5 = zext i8 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_zext_i16_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i16_to_i64
+; CHECK:       ldrh w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  br label %bb2
+
+bb2:
+  %5 = zext i16 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_zext_i32_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_zext_i32_to_i64
+; CHECK:       ldr w0, [x0, w1, sxtw]
+; CHECK-NOT:   uxtw
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  br label %bb2
+
+bb2:
+  %5 = zext i32 %4 to i64
+  ret i64 %5
+}
+
+define i32 @load_extend_sext_i8_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i8_to_i32
+; CHECK:       ldrsb w0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  br label %bb2
+
+bb2:
+  %5 = sext i8 %4 to i32
+  ret i32 %5
+}
+
+define i32 @load_extend_sext_i16_to_i32(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i16_to_i32
+; CHECK:       ldrsh w0, [x0, w1, sxtw]
+; CHECK-NOT:   sxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  br label %bb2
+
+bb2:
+  %5 = sext i16 %4 to i32
+  ret i32 %5
+}
+
+define i64 @load_extend_sext_i8_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i8_to_i64
+; CHECK:       ldrsb x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtb
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i8*
+  %4 = load i8* %3
+  br label %bb2
+
+bb2:
+  %5 = sext i8 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_sext_i16_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i16_to_i64
+; CHECK:       ldrsh x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxth
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i16*
+  %4 = load i16* %3
+  br label %bb2
+
+bb2:
+  %5 = sext i16 %4 to i64
+  ret i64 %5
+}
+
+define i64 @load_extend_sext_i32_to_i64(i64 %a, i32 %b) {
+; CHECK-LABEL: load_extend_sext_i32_to_i64
+; CHECK:       ldrsw x0, [x0, w1, sxtw]
+; CHECK-NOT:   sxtw
+  %1 = sext i32 %b to i64
+  %2 = add i64 %a, %1
+  %3 = inttoptr i64 %2 to i32*
+  %4 = load i32* %3
+  br label %bb2
+
+bb2:
+  %5 = sext i32 %4 to i64
+  ret i64 %5
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext3.ll b/test/CodeGen/AArch64/fast-isel-int-ext3.ll
new file mode 100644
index 000000000000..5d55a6b38f6b
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext3.ll
@@ -0,0 +1,117 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s
+
+;
+; Test folding of the sign-/zero-extend into the load instruction.
+;
+
+; Unscaled
+define i32 @load_unscaled_zext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i32
+; CHECK:       ldurb [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       uxtb w0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8 addrspace(256)*
+  %3 = load i8 addrspace(256)* %2
+  %4 = zext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_zext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i32
+; CHECK:       ldurh [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       uxth w0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16 addrspace(256)*
+  %3 = load i16 addrspace(256)* %2
+  %4 = zext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_zext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i8_to_i64
+; CHECK:       ldurb w[[REG:[0-9]+]], [x0, #-8]
+; CHECK:       ubfx x0, x[[REG]], #0, #8
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8 addrspace(256)*
+  %3 = load i8 addrspace(256)* %2
+  %4 = zext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i16_to_i64
+; CHECK:       ldurh w[[REG:[0-9]+]], [x0, #-8]
+; CHECK:       ubfx x0, x[[REG]], #0, #16
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16 addrspace(256)*
+  %3 = load i16 addrspace(256)* %2
+  %4 = zext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_zext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_zext_i32_to_i64
+; CHECK:       ldur w[[REG:[0-9]+]], [x0, #-8]
+; CHECK:       ubfx x0, x[[REG]], #0, #32
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32 addrspace(256)*
+  %3 = load i32 addrspace(256)* %2
+  %4 = zext i32 %3 to i64
+  ret i64 %4
+}
+
+define i32 @load_unscaled_sext_i8_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i32
+; CHECK:       ldurb [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       sxtb w0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8 addrspace(256)*
+  %3 = load i8 addrspace(256)* %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @load_unscaled_sext_i16_to_i32(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i32
+; CHECK:       ldurh [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       sxth w0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16 addrspace(256)*
+  %3 = load i16 addrspace(256)* %2
+  %4 = sext i16 %3 to i32
+  ret i32 %4
+}
+
+define i64 @load_unscaled_sext_i8_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i8_to_i64
+; CHECK:       ldurb [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       sxtb x0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i8 addrspace(256)*
+  %3 = load i8 addrspace(256)* %2
+  %4 = sext i8 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i16_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i16_to_i64
+; CHECK:       ldurh [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       sxth x0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i16 addrspace(256)*
+  %3 = load i16 addrspace(256)* %2
+  %4 = sext i16 %3 to i64
+  ret i64 %4
+}
+
+define i64 @load_unscaled_sext_i32_to_i64(i64 %a) {
+; CHECK-LABEL: load_unscaled_sext_i32_to_i64
+; CHECK:       ldur [[REG:w[0-9]+]], [x0, #-8]
+; CHECK:       sxtw x0, [[REG]]
+  %1 = sub i64 %a, 8
+  %2 = inttoptr i64 %1 to i32 addrspace(256)*
+  %3 = load i32 addrspace(256)* %2
+  %4 = sext i32 %3 to i64
+  ret i64 %4
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-int-ext4.ll b/test/CodeGen/AArch64/fast-isel-int-ext4.ll
new file mode 100644
index 000000000000..f25bb98af758
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-int-ext4.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @kill_flag(i16 signext %a) {
+; CHECK-LABEL: kill_flag
+entry:
+  %0 = sext i16 %a to i32
+  br label %bb1
+
+bb1:
+  %1 = icmp slt i32 undef, %0
+  br i1 %1, label %loop, label %exit
+
+loop:
+  %2 = sext i16 %a to i32
+  %3 = icmp slt i32 undef, %2
+  br i1 %3, label %bb1, label %exit
+
+exit:
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/fast-isel-intrinsic.ll b/test/CodeGen/AArch64/fast-isel-intrinsic.ll
new file mode 100644
index 000000000000..fd1198a5a4eb
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-intrinsic.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-apple-darwin            -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s
+
+define float @fabs_f32(float %a) {
+; CHECK-LABEL: fabs_f32
+; CHECK:       fabs s0, s0
+  %1 = call float @llvm.fabs.f32(float %a)
+  ret float %1
+}
+
+define double @fabs_f64(double %a) {
+; CHECK-LABEL: fabs_f64
+; CHECK:       fabs d0, d0
+  %1 = call double @llvm.fabs.f64(double %a)
+  ret double %1
+}
+
+declare double @llvm.fabs.f64(double)
+declare float @llvm.fabs.f32(float)
diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll
new file mode 100644
index 000000000000..2c7486e4cf8a
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll
@@ -0,0 +1,362 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0                  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; AND
+define zeroext i1 @and_rr_i1(i1 signext %a, i1 signext %b) {
+; CHECK-LABEL: and_rr_i1
+; CHECK:       and [[REG:w[0-9]+]], w0, w1
+  %1 = and i1 %a, %b
+  ret i1 %1
+}
+
+define zeroext i8 @and_rr_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: and_rr_i8
+; CHECK:       and [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = and i8 %a, %b
+  ret i8 %1
+}
+
+define zeroext i16 @and_rr_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: and_rr_i16
+; CHECK:       and [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = and i16 %a, %b
+  ret i16 %1
+}
+
+define i32 @and_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: and_rr_i32
+; CHECK:       and w0, w0, w1
+  %1 = and i32 %a, %b
+  ret i32 %1
+}
+
+define i64 @and_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: and_rr_i64
+; CHECK:       and x0, x0, x1
+  %1 = and i64 %a, %b
+  ret i64 %1
+}
+
+define zeroext i1 @and_ri_i1(i1 signext %a) {
+; CHECK-LABEL: and_ri_i1
+; CHECK:       and {{w[0-9]+}}, w0, #0x1
+  %1 = and i1 %a, 1
+  ret i1 %1
+}
+
+define zeroext i8 @and_ri_i8(i8 signext %a) {
+; CHECK-LABEL: and_ri_i8
+; CHECK:       and {{w[0-9]+}}, w0, #0xf
+  %1 = and i8 %a, 15
+  ret i8 %1
+}
+
+define zeroext i16 @and_ri_i16(i16 signext %a) {
+; CHECK-LABEL: and_ri_i16
+; CHECK:       and {{w[0-9]+}}, w0, #0xff
+  %1 = and i16 %a, 255
+  ret i16 %1
+}
+
+define i32 @and_ri_i32(i32 %a) {
+; CHECK-LABEL: and_ri_i32
+; CHECK:       and w0, w0, #0xff
+  %1 = and i32 %a, 255
+  ret i32 %1
+}
+
+define i64 @and_ri_i64(i64 %a) {
+; CHECK-LABEL: and_ri_i64
+; CHECK:       and x0, x0, #0xff
+  %1 = and i64 %a, 255
+  ret i64 %1
+}
+
+define zeroext i8 @and_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: and_rs_i8
+; CHECK:       and [[REG:w[0-9]+]], w0, w1, lsl #4
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xff|#0xf0}}
+  %1 = shl i8 %b, 4
+  %2 = and i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @and_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: and_rs_i16
+; CHECK:       and [[REG:w[0-9]+]], w0, w1, lsl #8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xffff|#0xff00}}
+  %1 = shl i16 %b, 8
+  %2 = and i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @and_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: and_rs_i32
+; CHECK:       and w0, w0, w1, lsl #8
+  %1 = shl i32 %b, 8
+  %2 = and i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @and_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: and_rs_i64
+; CHECK:       and x0, x0, x1, lsl #8
+  %1 = shl i64 %b, 8
+  %2 = and i64 %a, %1
+  ret i64 %2
+}
+
+define i32 @and_mul_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: and_mul_i32
+; CHECK:       and w0, w0, w1, lsl #2
+  %1 = mul i32 %b, 4
+  %2 = and i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @and_mul_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: and_mul_i64
+; CHECK:       and x0, x0, x1, lsl #2
+  %1 = mul i64 %b, 4
+  %2 = and i64 %a, %1
+  ret i64 %2
+}
+
+; OR
+define zeroext i1 @or_rr_i1(i1 signext %a, i1 signext %b) {
+; CHECK-LABEL: or_rr_i1
+; CHECK:       orr [[REG:w[0-9]+]], w0, w1
+  %1 = or i1 %a, %b
+  ret i1 %1
+}
+
+define zeroext i8 @or_rr_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: or_rr_i8
+; CHECK:       orr [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = or i8 %a, %b
+  ret i8 %1
+}
+
+define zeroext i16 @or_rr_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: or_rr_i16
+; CHECK:       orr [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = or i16 %a, %b
+  ret i16 %1
+}
+
+define i32 @or_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: or_rr_i32
+; CHECK:       orr w0, w0, w1
+  %1 = or i32 %a, %b
+  ret i32 %1
+}
+
+define i64 @or_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: or_rr_i64
+; CHECK:       orr x0, x0, x1
+  %1 = or i64 %a, %b
+  ret i64 %1
+}
+
+define zeroext i8 @or_ri_i8(i8 %a) {
+; CHECK-LABEL: or_ri_i8
+; CHECK:       orr [[REG:w[0-9]+]], w0, #0xf
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = or i8 %a, 15
+  ret i8 %1
+}
+
+define zeroext i16 @or_ri_i16(i16 %a) {
+; CHECK-LABEL: or_ri_i16
+; CHECK:       orr [[REG:w[0-9]+]], w0, #0xff
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = or i16 %a, 255
+  ret i16 %1
+}
+
+define i32 @or_ri_i32(i32 %a) {
+; CHECK-LABEL: or_ri_i32
+; CHECK:       orr w0, w0, #0xff
+  %1 = or i32 %a, 255
+  ret i32 %1
+}
+
+define i64 @or_ri_i64(i64 %a) {
+; CHECK-LABEL: or_ri_i64
+; CHECK:       orr x0, x0, #0xff
+  %1 = or i64 %a, 255
+  ret i64 %1
+}
+
+define zeroext i8 @or_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: or_rs_i8
+; CHECK:       orr [[REG:w[0-9]+]], w0, w1, lsl #4
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xff|#0xf0}}
+  %1 = shl i8 %b, 4
+  %2 = or i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @or_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: or_rs_i16
+; CHECK:       orr [[REG:w[0-9]+]], w0, w1, lsl #8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xffff|#0xff00}}
+  %1 = shl i16 %b, 8
+  %2 = or i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @or_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: or_rs_i32
+; CHECK:       orr w0, w0, w1, lsl #8
+  %1 = shl i32 %b, 8
+  %2 = or i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @or_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: or_rs_i64
+; CHECK:       orr x0, x0, x1, lsl #8
+  %1 = shl i64 %b, 8
+  %2 = or i64 %a, %1
+  ret i64 %2
+}
+
+define i32 @or_mul_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: or_mul_i32
+; CHECK:       orr w0, w0, w1, lsl #2
+  %1 = mul i32 %b, 4
+  %2 = or i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @or_mul_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: or_mul_i64
+; CHECK:       orr x0, x0, x1, lsl #2
+  %1 = mul i64 %b, 4
+  %2 = or i64 %a, %1
+  ret i64 %2
+}
+
+; XOR
+define zeroext i1 @xor_rr_i1(i1 signext %a, i1 signext %b) {
+; CHECK-LABEL: xor_rr_i1
+; CHECK:       eor [[REG:w[0-9]+]], w0, w1
+  %1 = xor i1 %a, %b
+  ret i1 %1
+}
+
+define zeroext i8 @xor_rr_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: xor_rr_i8
+; CHECK:       eor [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = xor i8 %a, %b
+  ret i8 %1
+}
+
+define zeroext i16 @xor_rr_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: xor_rr_i16
+; CHECK:       eor [[REG:w[0-9]+]], w0, w1
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = xor i16 %a, %b
+  ret i16 %1
+}
+
+define i32 @xor_rr_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: xor_rr_i32
+; CHECK:       eor w0, w0, w1
+  %1 = xor i32 %a, %b
+  ret i32 %1
+}
+
+define i64 @xor_rr_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: xor_rr_i64
+; CHECK:       eor x0, x0, x1
+  %1 = xor i64 %a, %b
+  ret i64 %1
+}
+
+define zeroext i8 @xor_ri_i8(i8 signext %a) {
+; CHECK-LABEL: xor_ri_i8
+; CHECK:       eor [[REG:w[0-9]+]], w0, #0xf
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = xor i8 %a, 15
+  ret i8 %1
+}
+
+define zeroext i16 @xor_ri_i16(i16 signext %a) {
+; CHECK-LABEL: xor_ri_i16
+; CHECK:       eor [[REG:w[0-9]+]], w0, #0xff
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = xor i16 %a, 255
+  ret i16 %1
+}
+
+define i32 @xor_ri_i32(i32 %a) {
+; CHECK-LABEL: xor_ri_i32
+; CHECK:       eor w0, w0, #0xff
+  %1 = xor i32 %a, 255
+  ret i32 %1
+}
+
+define i64 @xor_ri_i64(i64 %a) {
+; CHECK-LABEL: xor_ri_i64
+; CHECK:       eor x0, x0, #0xff
+  %1 = xor i64 %a, 255
+  ret i64 %1
+}
+
+define zeroext i8 @xor_rs_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: xor_rs_i8
+; CHECK:       eor [[REG:w[0-9]+]], w0, w1, lsl #4
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xff|#0xf0}}
+  %1 = shl i8 %b, 4
+  %2 = xor i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @xor_rs_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: xor_rs_i16
+; CHECK:       eor [[REG:w[0-9]+]], w0, w1, lsl #8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], {{#0xffff|#0xff00}}
+  %1 = shl i16 %b, 8
+  %2 = xor i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @xor_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: xor_rs_i32
+; CHECK:       eor w0, w0, w1, lsl #8
+  %1 = shl i32 %b, 8
+  %2 = xor i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @xor_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: xor_rs_i64
+; CHECK:       eor x0, x0, x1, lsl #8
+  %1 = shl i64 %b, 8
+  %2 = xor i64 %a, %1
+  ret i64 %2
+}
+
+define i32 @xor_mul_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: xor_mul_i32
+; CHECK:       eor w0, w0, w1, lsl #2
+  %1 = mul i32 %b, 4
+  %2 = xor i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @xor_mul_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: xor_mul_i64
+; CHECK:       eor x0, x0, x1, lsl #2
+  %1 = mul i64 %b, 4
+  %2 = xor i64 %a, %1
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-memcpy.ll b/test/CodeGen/AArch64/fast-isel-memcpy.ll
new file mode 100644
index 000000000000..9161dad249a9
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-memcpy.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; Test that we don't segfault.
+; CHECK-LABEL: test
+; CHECK:       ldr [[REG1:x[0-9]+]], [x1]
+; CHECK-NEXT:  and [[REG2:x[0-9]+]], x0, #0x7fffffffffffffff
+; CHECK-NEXT:  str [[REG1]], {{\[}}[[REG2]]{{\]}}
+define void @test(i64 %a, i8* %b) {
+  %1 = and i64 %a, 9223372036854775807
+  %2 = inttoptr i64 %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %b, i64 8, i32 8, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/CodeGen/AArch64/fast-isel-mul.ll b/test/CodeGen/AArch64/fast-isel-mul.ll
index d02c67f52f8d..f2fda27c2f7b 100644
--- a/test/CodeGen/AArch64/fast-isel-mul.ll
+++ b/test/CodeGen/AArch64/fast-isel-mul.ll
@@ -1,40 +1,44 @@
-; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64 -o - %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
 
-@var8 = global i8 0
-@var16 = global i16 0
-@var32 = global i32 0
-@var64 = global i64 0
-
-define void @test_mul8(i8 %lhs, i8 %rhs) {
+define zeroext i8 @test_mul8(i8 %lhs, i8 %rhs) {
 ; CHECK-LABEL: test_mul8:
-; CHECK: mul w0, w0, w1
-;  %lhs = load i8* @var8
-;  %rhs = load i8* @var8
-  %prod = mul i8 %lhs, %rhs
-  store i8 %prod, i8* @var8
-  ret void
+; CHECK:       mul {{w[0-9]+}}, w0, w1
+  %1 = mul i8 %lhs, %rhs
+  ret i8 %1
 }
 
-define void @test_mul16(i16 %lhs, i16 %rhs) {
+define zeroext i16 @test_mul16(i16 %lhs, i16 %rhs) {
 ; CHECK-LABEL: test_mul16:
-; CHECK: mul w0, w0, w1
-  %prod = mul i16 %lhs, %rhs
-  store i16 %prod, i16* @var16
-  ret void
+; CHECK:       mul {{w[0-9]+}}, w0, w1
+  %1 = mul i16 %lhs, %rhs
+  ret i16 %1
 }
 
-define void @test_mul32(i32 %lhs, i32 %rhs) {
+define i32 @test_mul32(i32 %lhs, i32 %rhs) {
 ; CHECK-LABEL: test_mul32:
-; CHECK: mul w0, w0, w1
-  %prod = mul i32 %lhs, %rhs
-  store i32 %prod, i32* @var32
-  ret void
+; CHECK:       mul {{w[0-9]+}}, w0, w1
+  %1 = mul i32 %lhs, %rhs
+  ret i32 %1
 }
 
-define void @test_mul64(i64 %lhs, i64 %rhs) {
+define i64 @test_mul64(i64 %lhs, i64 %rhs) {
 ; CHECK-LABEL: test_mul64:
-; CHECK: mul x0, x0, x1
-  %prod = mul i64 %lhs, %rhs
-  store i64 %prod, i64* @var64
-  ret void
+; CHECK:       mul {{x[0-9]+}}, x0, x1
+  %1 = mul i64 %lhs, %rhs
+  ret i64 %1
+}
+
+define i32 @test_mul2shift_i32(i32 %a) {
+; CHECK-LABEL: test_mul2shift_i32:
+; CHECK:       lsl {{w[0-9]+}}, w0, #2
+  %1 = mul i32 %a, 4
+  ret i32 %1
 }
+
+define i64 @test_mul2shift_i64(i64 %a) {
+; CHECK-LABEL: test_mul2shift_i64:
+; CHECK:       lsl {{x[0-9]+}}, x0, #3
+  %1 = mul i64 %a, 8
+  ret i64 %1
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll b/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll
new file mode 100644
index 000000000000..8d2d39a1a1ff
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-runtime-libcall.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -code-model=small -verify-machineinstrs < %s | FileCheck %s --check-prefix=SMALL
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+
+define float @frem_f32(float %a, float %b) {
+; SMALL-LABEL: frem_f32
+; SMALL:       bl _fmodf
+; LARGE-LABEL: frem_f32
+; LARGE:       adrp  [[REG:x[0-9]+]], _fmodf@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _fmodf@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = frem float %a, %b
+  ret float %1
+}
+
+define double @frem_f64(double %a, double %b) {
+; SMALL-LABEL: frem_f64
+; SMALL:       bl _fmod
+; LARGE-LABEL: frem_f64
+; LARGE:       adrp  [[REG:x[0-9]+]], _fmod@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _fmod@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = frem double %a, %b
+  ret double %1
+}
+
+define float @sin_f32(float %a) {
+; SMALL-LABEL: sin_f32
+; SMALL:       bl _sinf
+; LARGE-LABEL: sin_f32
+; LARGE:       adrp  [[REG:x[0-9]+]], _sinf@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _sinf@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call float @llvm.sin.f32(float %a)
+  ret float %1
+}
+
+define double @sin_f64(double %a) {
+; SMALL-LABEL: sin_f64
+; SMALL:       bl _sin
+; LARGE-LABEL: sin_f64
+; LARGE:       adrp  [[REG:x[0-9]+]], _sin@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _sin@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call double @llvm.sin.f64(double %a)
+  ret double %1
+}
+
+define float @cos_f32(float %a) {
+; SMALL-LABEL: cos_f32
+; SMALL:       bl _cosf
+; LARGE-LABEL: cos_f32
+; LARGE:       adrp  [[REG:x[0-9]+]], _cosf@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _cosf@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call float @llvm.cos.f32(float %a)
+  ret float %1
+}
+
+define double @cos_f64(double %a) {
+; SMALL-LABEL: cos_f64
+; SMALL:       bl _cos
+; LARGE-LABEL: cos_f64
+; LARGE:       adrp  [[REG:x[0-9]+]], _cos@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _cos@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call double @llvm.cos.f64(double %a)
+  ret double %1
+}
+
+define float @pow_f32(float %a, float %b) {
+; SMALL-LABEL: pow_f32
+; SMALL:       bl _powf
+; LARGE-LABEL: pow_f32
+; LARGE:       adrp  [[REG:x[0-9]+]], _powf@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _powf@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call float @llvm.pow.f32(float %a, float %b)
+  ret float %1
+}
+
+define double @pow_f64(double %a, double %b) {
+; SMALL-LABEL: pow_f64
+; SMALL:       bl _pow
+; LARGE-LABEL: pow_f64
+; LARGE:       adrp  [[REG:x[0-9]+]], _pow@GOTPAGE
+; LARGE:       ldr [[REG]], {{\[}}[[REG]], _pow@GOTPAGEOFF{{\]}}
+; LARGE-NEXT:  blr [[REG]]
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  ret double %1
+}
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
diff --git a/test/CodeGen/AArch64/fast-isel-sdiv.ll b/test/CodeGen/AArch64/fast-isel-sdiv.ll
new file mode 100644
index 000000000000..30807767fa79
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-sdiv.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @sdiv_i32_exact(i32 %a) {
+; CHECK-LABEL: sdiv_i32_exact
+; CHECK:       asr {{w[0-9]+}}, w0, #3
+  %1 = sdiv exact i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @sdiv_i32_pos(i32 %a) {
+; CHECK-LABEL: sdiv_i32_pos
+; CHECK:       add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT:  cmp w0, #0
+; CHECK-NEXT:  csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT:  asr {{w[0-9]+}}, [[REG2]], #3
+  %1 = sdiv i32 %a, 8
+  ret i32 %1
+}
+
+define i32 @sdiv_i32_neg(i32 %a) {
+; CHECK-LABEL: sdiv_i32_neg
+; CHECK:       add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT:  cmp w0, #0
+; CHECK-NEXT:  csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT:  neg {{w[0-9]+}}, [[REG2]], asr #3
+  %1 = sdiv i32 %a, -8
+  ret i32 %1
+}
+
+define i64 @sdiv_i64_exact(i64 %a) {
+; CHECK-LABEL: sdiv_i64_exact
+; CHECK:       asr {{x[0-9]+}}, x0, #4
+  %1 = sdiv exact i64 %a, 16
+  ret i64 %1
+}
+
+define i64 @sdiv_i64_pos(i64 %a) {
+; CHECK-LABEL: sdiv_i64_pos
+; CHECK:       add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT:  cmp x0, #0
+; CHECK-NEXT:  csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT:  asr {{x[0-9]+}}, [[REG2]], #4
+  %1 = sdiv i64 %a, 16
+  ret i64 %1
+}
+
+define i64 @sdiv_i64_neg(i64 %a) {
+; CHECK-LABEL: sdiv_i64_neg
+; CHECK:       add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT:  cmp x0, #0
+; CHECK-NEXT:  csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT:  neg {{x[0-9]+}}, [[REG2]], asr #4
+  %1 = sdiv i64 %a, -16
+  ret i64 %1
+}
diff --git a/test/CodeGen/AArch64/fast-isel-select.ll b/test/CodeGen/AArch64/fast-isel-select.ll
new file mode 100644
index 000000000000..928e9d46741d
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-select.ll
@@ -0,0 +1,316 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; First test the different supported value types for select.
+define zeroext i1 @select_i1(i1 zeroext %c, i1 zeroext %a, i1 zeroext %b) {
+; CHECK-LABEL: select_i1
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i1 %a, i1 %b
+  ret i1 %1
+}
+
+define zeroext i8 @select_i8(i1 zeroext %c, i8 zeroext %a, i8 zeroext %b) {
+; CHECK-LABEL: select_i8
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i8 %a, i8 %b
+  ret i8 %1
+}
+
+define zeroext i16 @select_i16(i1 zeroext %c, i16 zeroext %a, i16 zeroext %b) {
+; CHECK-LABEL: select_i16
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i16 %a, i16 %b
+  ret i16 %1
+}
+
+define i32 @select_i32(i1 zeroext %c, i32 %a, i32 %b) {
+; CHECK-LABEL: select_i32
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{w[0-9]+}}, w1, w2, ne
+  %1 = select i1 %c, i32 %a, i32 %b
+  ret i32 %1
+}
+
+define i64 @select_i64(i1 zeroext %c, i64 %a, i64 %b) {
+; CHECK-LABEL: select_i64
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  csel {{x[0-9]+}}, x1, x2, ne
+  %1 = select i1 %c, i64 %a, i64 %b
+  ret i64 %1
+}
+
+define float @select_f32(i1 zeroext %c, float %a, float %b) {
+; CHECK-LABEL: select_f32
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+  %1 = select i1 %c, float %a, float %b
+  ret float %1
+}
+
+define double @select_f64(i1 zeroext %c, double %a, double %b) {
+; CHECK-LABEL: select_f64
+; CHECK:       {{cmp w0, #0|tst w0, #0x1}}
+; CHECK-NEXT:  fcsel {{d[0-9]+}}, d0, d1, ne
+  %1 = select i1 %c, double %a, double %b
+  ret double %1
+}
+
+; Now test the folding of all compares.
+define float @select_fcmp_false(float %x, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_false
+; CHECK:       mov.16b {{v[0-9]+}}, v2
+  %1 = fcmp ogt float %x, %x
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ogt(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ogt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, gt
+  %1 = fcmp ogt float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_oge(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_oge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ge
+  %1 = fcmp oge float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_olt(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_olt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, mi
+  %1 = fcmp olt float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ole(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ole
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ls
+  %1 = fcmp ole float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_one(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_one
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel [[REG:s[0-9]+]], s2, s3, mi
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, [[REG]], gt
+  %1 = fcmp one float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ord(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ord
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, vc
+  %1 = fcmp ord float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_uno(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_uno
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, vs
+  %1 = fcmp uno float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ueq(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ueq
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel [[REG:s[0-9]+]], s2, s3, eq
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, [[REG]], vs
+  %1 = fcmp ueq float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ugt(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ugt
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, hi
+  %1 = fcmp ugt float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_uge(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_uge
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, pl
+  %1 = fcmp uge float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_ult(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ult
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, lt
+  %1 = fcmp ult float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+
+define float @select_fcmp_ule(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_ule
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, le
+  %1 = fcmp ule float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_une(float %x, float %y, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_une
+; CHECK:       fcmp s0, s1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s2, s3, ne
+  %1 = fcmp une float %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_fcmp_true(float %x, float %a, float %b) {
+; CHECK-LABEL: select_fcmp_true
+; CHECK:       mov.16b {{v[0-9]+}}, v1
+  %1 = fcmp ueq float %x, %x
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_eq(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_eq
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, eq
+  %1 = icmp eq i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_ne(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_ne
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ne
+  %1 = icmp ne i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_ugt(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_ugt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, hi
+  %1 = icmp ugt i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_uge(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_uge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, hs
+  %1 = icmp uge i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_ult(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_ult
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, lo
+  %1 = icmp ult i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_ule(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_ule
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ls
+  %1 = icmp ule i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_sgt(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_sgt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, gt
+  %1 = icmp sgt i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_sge(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_sge
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, ge
+  %1 = icmp sge i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_slt(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_slt
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, lt
+  %1 = icmp slt i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+define float @select_icmp_sle(i32 %x, i32 %y, float %a, float %b) {
+; CHECK-LABEL: select_icmp_sle
+; CHECK:       cmp w0, w1
+; CHECK-NEXT:  fcsel {{s[0-9]+}}, s0, s1, le
+  %1 = icmp sle i32 %x, %y
+  %2 = select i1 %1, float %a, float %b
+  ret float %2
+}
+
+; Test peephole optimizations for select.
+define zeroext i1 @select_opt1(i1 zeroext %c, i1 zeroext %a) {
+; CHECK-LABEL: select_opt1
+; CHECK:       orr {{w[0-9]+}}, w0, w1
+  %1 = select i1 %c, i1 true, i1 %a
+  ret i1 %1
+}
+
+define zeroext i1 @select_opt2(i1 zeroext %c, i1 zeroext %a) {
+; CHECK-LABEL: select_opt2
+; CHECK:       eor [[REG:w[0-9]+]], w0, #0x1
+; CHECK:       orr {{w[0-9]+}}, [[REG]], w1
+  %1 = select i1 %c, i1 %a, i1 true
+  ret i1 %1
+}
+
+define zeroext i1 @select_opt3(i1 zeroext %c, i1 zeroext %a) {
+; CHECK-LABEL: select_opt3
+; CHECK:       bic {{w[0-9]+}}, w1, w0
+  %1 = select i1 %c, i1 false, i1 %a
+  ret i1 %1
+}
+
+define zeroext i1 @select_opt4(i1 zeroext %c, i1 zeroext %a) {
+; CHECK-LABEL: select_opt4
+; CHECK:       and {{w[0-9]+}}, w0, w1
+  %1 = select i1 %c, i1 %a, i1 false
+  ret i1 %1
+}
diff --git a/test/CodeGen/AArch64/fast-isel-shift.ll b/test/CodeGen/AArch64/fast-isel-shift.ll
new file mode 100644
index 000000000000..ce4ba49f4999
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-shift.ll
@@ -0,0 +1,545 @@
+; RUN: llc -fast-isel -fast-isel-abort -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: asr_zext_i1_i16
+; CHECK:       uxth {{w[0-9]*}}, wzr
+define zeroext i16 @asr_zext_i1_i16(i1 %b) {
+  %1 = zext i1 %b to i16
+  %2 = ashr i16 %1, 1
+  ret i16 %2
+}
+
+; CHECK-LABEL: asr_sext_i1_i16
+; CHECK:       sbfx [[REG1:w[0-9]+]], {{w[0-9]*}}, #0, #1
+; CHECK-NEXT:  sxth {{w[0-9]*}}, [[REG1]]
+define signext i16 @asr_sext_i1_i16(i1 %b) {
+  %1 = sext i1 %b to i16
+  %2 = ashr i16 %1, 1
+  ret i16 %2
+}
+
+; CHECK-LABEL: asr_zext_i1_i32
+; CHECK:       mov {{w[0-9]*}}, wzr
+define i32 @asr_zext_i1_i32(i1 %b) {
+  %1 = zext i1 %b to i32
+  %2 = ashr i32 %1, 1
+  ret i32 %2
+}
+
+; CHECK-LABEL: asr_sext_i1_i32
+; CHECK:       sbfx  {{w[0-9]*}}, {{w[0-9]*}}, #0, #1
+define i32 @asr_sext_i1_i32(i1 %b) {
+  %1 = sext i1 %b to i32
+  %2 = ashr i32 %1, 1
+  ret i32 %2
+}
+
+; CHECK-LABEL: asr_zext_i1_i64
+; CHECK:       mov {{x[0-9]*}}, xzr
+define i64 @asr_zext_i1_i64(i1 %b) {
+  %1 = zext i1 %b to i64
+  %2 = ashr i64 %1, 1
+  ret i64 %2
+}
+
+; CHECK-LABEL: asr_sext_i1_i64
+; CHECK:       sbfx {{x[0-9]*}}, {{x[0-9]*}}, #0, #1
+define i64 @asr_sext_i1_i64(i1 %b) {
+  %1 = sext i1 %b to i64
+  %2 = ashr i64 %1, 1
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsr_zext_i1_i16
+; CHECK:       uxth {{w[0-9]*}}, wzr
+define zeroext i16 @lsr_zext_i1_i16(i1 %b) {
+  %1 = zext i1 %b to i16
+  %2 = lshr i16 %1, 1
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsr_sext_i1_i16
+; CHECK:       sbfx [[REG1:w[0-9]+]], {{w[0-9]*}}, #0, #1
+; CHECK-NEXT:  ubfx [[REG2:w[0-9]+]], [[REG1]], #1, #15
+; CHECK-NEXT:  sxth {{w[0-9]*}}, [[REG2]]
+define signext i16 @lsr_sext_i1_i16(i1 %b) {
+  %1 = sext i1 %b to i16
+  %2 = lshr i16 %1, 1
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsr_zext_i1_i32
+; CHECK:       mov {{w[0-9]*}}, wzr
+define i32 @lsr_zext_i1_i32(i1 %b) {
+  %1 = zext i1 %b to i32
+  %2 = lshr i32 %1, 1
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsr_sext_i1_i32
+; CHECK:       sbfx [[REG1:w[0-9]+]], {{w[0-9]*}}, #0, #1
+; CHECK-NEXT:  lsr {{w[0-9]*}}, [[REG1:w[0-9]+]], #1
+define i32 @lsr_sext_i1_i32(i1 %b) {
+  %1 = sext i1 %b to i32
+  %2 = lshr i32 %1, 1
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsr_zext_i1_i64
+; CHECK:       mov {{x[0-9]*}}, xzr
+define i64 @lsr_zext_i1_i64(i1 %b) {
+  %1 = zext i1 %b to i64
+  %2 = lshr i64 %1, 1
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsl_zext_i1_i16
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #1
+define zeroext i16 @lsl_zext_i1_i16(i1 %b) {
+  %1 = zext i1 %b to i16
+  %2 = shl i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsl_sext_i1_i16
+; CHECK:       sbfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #1
+define signext i16 @lsl_sext_i1_i16(i1 %b) {
+  %1 = sext i1 %b to i16
+  %2 = shl i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsl_zext_i1_i32
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #1
+define i32 @lsl_zext_i1_i32(i1 %b) {
+  %1 = zext i1 %b to i32
+  %2 = shl i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_sext_i1_i32
+; CHECK:       sbfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #1
+define i32 @lsl_sext_i1_i32(i1 %b) {
+  %1 = sext i1 %b to i32
+  %2 = shl i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_zext_i1_i64
+; CHECK:       ubfiz {{x[0-9]*}}, {{x[0-9]*}}, #4, #1
+define i64 @lsl_zext_i1_i64(i1 %b) {
+  %1 = zext i1 %b to i64
+  %2 = shl i64 %1, 4
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsl_sext_i1_i64
+; CHECK:       sbfiz {{x[0-9]*}}, {{x[0-9]*}}, #4, #1
+define i64 @lsl_sext_i1_i64(i1 %b) {
+  %1 = sext i1 %b to i64
+  %2 = shl i64 %1, 4
+  ret i64 %2
+}
+
+; CHECK-LABEL: lslv_i8
+; CHECK:       and [[REG1:w[0-9]+]], w1, #0xff
+; CHECK-NEXT:  lsl [[REG2:w[0-9]+]], w0, [[REG1]]
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG2]], #0xff
+define zeroext i8 @lslv_i8(i8 %a, i8 %b) {
+  %1 = shl i8 %a, %b
+  ret i8 %1
+}
+
+; CHECK-LABEL: lsl_i8
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define zeroext i8 @lsl_i8(i8 %a) {
+  %1 = shl i8 %a, 4
+  ret i8 %1
+}
+
+; CHECK-LABEL: lsl_zext_i8_i16
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #8
+define zeroext i16 @lsl_zext_i8_i16(i8 %b) {
+  %1 = zext i8 %b to i16
+  %2 = shl i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsl_sext_i8_i16
+; CHECK:       sbfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #8
+define signext i16 @lsl_sext_i8_i16(i8 %b) {
+  %1 = sext i8 %b to i16
+  %2 = shl i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsl_zext_i8_i32
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #8
+define i32 @lsl_zext_i8_i32(i8 %b) {
+  %1 = zext i8 %b to i32
+  %2 = shl i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_sext_i8_i32
+; CHECK:       sbfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #8
+define i32 @lsl_sext_i8_i32(i8 %b) {
+  %1 = sext i8 %b to i32
+  %2 = shl i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_zext_i8_i64
+; CHECK:       ubfiz {{x[0-9]*}}, {{x[0-9]*}}, #4, #8
+define i64 @lsl_zext_i8_i64(i8 %b) {
+  %1 = zext i8 %b to i64
+  %2 = shl i64 %1, 4
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsl_sext_i8_i64
+; CHECK:       sbfiz {{x[0-9]*}}, {{x[0-9]*}}, #4, #8
+define i64 @lsl_sext_i8_i64(i8 %b) {
+  %1 = sext i8 %b to i64
+  %2 = shl i64 %1, 4
+  ret i64 %2
+}
+
+; CHECK-LABEL: lslv_i16
+; CHECK:       and [[REG1:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT:  lsl [[REG2:w[0-9]+]], w0, [[REG1]]
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG2]], #0xffff
+define zeroext i16 @lslv_i16(i16 %a, i16 %b) {
+  %1 = shl i16 %a, %b
+  ret i16 %1
+}
+
+; CHECK-LABEL: lsl_i16
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+define zeroext i16 @lsl_i16(i16 %a) {
+  %1 = shl i16 %a, 8
+  ret i16 %1
+}
+
+; CHECK-LABEL: lsl_zext_i16_i32
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #16
+define i32 @lsl_zext_i16_i32(i16 %b) {
+  %1 = zext i16 %b to i32
+  %2 = shl i32 %1, 8
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_sext_i16_i32
+; CHECK:       sbfiz {{w[0-9]*}}, {{w[0-9]*}}, #8, #16
+define i32 @lsl_sext_i16_i32(i16 %b) {
+  %1 = sext i16 %b to i32
+  %2 = shl i32 %1, 8
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsl_zext_i16_i64
+; CHECK:       ubfiz {{x[0-9]*}}, {{x[0-9]*}}, #8, #16
+define i64 @lsl_zext_i16_i64(i16 %b) {
+  %1 = zext i16 %b to i64
+  %2 = shl i64 %1, 8
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsl_sext_i16_i64
+; CHECK:       sbfiz {{x[0-9]*}}, {{x[0-9]*}}, #8, #16
+define i64 @lsl_sext_i16_i64(i16 %b) {
+  %1 = sext i16 %b to i64
+  %2 = shl i64 %1, 8
+  ret i64 %2
+}
+
+; CHECK-LABEL: lslv_i32
+; CHECK:       lsl {{w[0-9]*}}, w0, w1
+define zeroext i32 @lslv_i32(i32 %a, i32 %b) {
+  %1 = shl i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK-LABEL: lsl_i32
+; CHECK:       lsl {{w[0-9]*}}, {{w[0-9]*}}, #16
+define zeroext i32 @lsl_i32(i32 %a) {
+  %1 = shl i32 %a, 16
+  ret i32 %1
+}
+
+; CHECK-LABEL: lsl_zext_i32_i64
+; CHECK:       ubfiz {{x[0-9]+}}, {{x[0-9]+}}, #16, #32
+define i64 @lsl_zext_i32_i64(i32 %b) {
+  %1 = zext i32 %b to i64
+  %2 = shl i64 %1, 16
+  ret i64 %2
+}
+
+; CHECK-LABEL: lsl_sext_i32_i64
+; CHECK:       sbfiz {{x[0-9]+}}, {{x[0-9]+}}, #16, #32
+define i64 @lsl_sext_i32_i64(i32 %b) {
+  %1 = sext i32 %b to i64
+  %2 = shl i64 %1, 16
+  ret i64 %2
+}
+
+; CHECK-LABEL: lslv_i64
+; CHECK:       lsl {{x[0-9]*}}, x0, x1
+define i64 @lslv_i64(i64 %a, i64 %b) {
+  %1 = shl i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK-LABEL: lsl_i64
+; CHECK:       lsl {{x[0-9]*}}, {{x[0-9]*}}, #32
+define i64 @lsl_i64(i64 %a) {
+  %1 = shl i64 %a, 32
+  ret i64 %1
+}
+
+; CHECK-LABEL: lsrv_i8
+; CHECK:       and [[REG1:w[0-9]+]], w0, #0xff
+; CHECK-NEXT:  and [[REG2:w[0-9]+]], w1, #0xff
+; CHECK-NEXT:  lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG3]], #0xff
+define zeroext i8 @lsrv_i8(i8 %a, i8 %b) {
+  %1 = lshr i8 %a, %b
+  ret i8 %1
+}
+
+; CHECK-LABEL: lsr_i8
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define zeroext i8 @lsr_i8(i8 %a) {
+  %1 = lshr i8 %a, 4
+  ret i8 %1
+}
+
+; CHECK-LABEL: lsr_zext_i8_i16
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define zeroext i16 @lsr_zext_i8_i16(i8 %b) {
+  %1 = zext i8 %b to i16
+  %2 = lshr i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsr_sext_i8_i16
+; CHECK:       sxtb [[REG:w[0-9]+]], w0
+; CHECK-NEXT:  ubfx {{w[0-9]*}}, [[REG]], #4, #12
+define signext i16 @lsr_sext_i8_i16(i8 %b) {
+  %1 = sext i8 %b to i16
+  %2 = lshr i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: lsr_zext_i8_i32
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define i32 @lsr_zext_i8_i32(i8 %b) {
+  %1 = zext i8 %b to i32
+  %2 = lshr i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsr_sext_i8_i32
+; CHECK:       sxtb [[REG:w[0-9]+]], w0
+; CHECK-NEXT:  lsr {{w[0-9]*}}, [[REG]], #4
+define i32 @lsr_sext_i8_i32(i8 %b) {
+  %1 = sext i8 %b to i32
+  %2 = lshr i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: lsrv_i16
+; CHECK:       and [[REG1:w[0-9]+]], w0, #0xffff
+; CHECK-NEXT:  and [[REG2:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT:  lsr [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG3]], #0xffff
+define zeroext i16 @lsrv_i16(i16 %a, i16 %b) {
+  %1 = lshr i16 %a, %b
+  ret i16 %1
+}
+
+; CHECK-LABEL: lsr_i16
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+define zeroext i16 @lsr_i16(i16 %a) {
+  %1 = lshr i16 %a, 8
+  ret i16 %1
+}
+
+; CHECK-LABEL: lsrv_i32
+; CHECK:       lsr {{w[0-9]*}}, w0, w1
+define zeroext i32 @lsrv_i32(i32 %a, i32 %b) {
+  %1 = lshr i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK-LABEL: lsr_i32
+; CHECK:       lsr {{w[0-9]*}}, {{w[0-9]*}}, #16
+define zeroext i32 @lsr_i32(i32 %a) {
+  %1 = lshr i32 %a, 16
+  ret i32 %1
+}
+
+; CHECK-LABEL: lsrv_i64
+; CHECK:       lsr {{x[0-9]*}}, x0, x1
+define i64 @lsrv_i64(i64 %a, i64 %b) {
+  %1 = lshr i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK-LABEL: lsr_i64
+; CHECK:       lsr {{x[0-9]*}}, {{x[0-9]*}}, #32
+define i64 @lsr_i64(i64 %a) {
+  %1 = lshr i64 %a, 32
+  ret i64 %1
+}
+
+; CHECK-LABEL: asrv_i8
+; CHECK:       sxtb [[REG1:w[0-9]+]], w0
+; CHECK-NEXT:  and  [[REG2:w[0-9]+]], w1, #0xff
+; CHECK-NEXT:  asr  [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT:  and  {{w[0-9]+}}, [[REG3]], #0xff
+define zeroext i8 @asrv_i8(i8 %a, i8 %b) {
+  %1 = ashr i8 %a, %b
+  ret i8 %1
+}
+
+; CHECK-LABEL: asr_i8
+; CHECK:       sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define zeroext i8 @asr_i8(i8 %a) {
+  %1 = ashr i8 %a, 4
+  ret i8 %1
+}
+
+; CHECK-LABEL: asr_zext_i8_i16
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define zeroext i16 @asr_zext_i8_i16(i8 %b) {
+  %1 = zext i8 %b to i16
+  %2 = ashr i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: asr_sext_i8_i16
+; CHECK:       sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define signext i16 @asr_sext_i8_i16(i8 %b) {
+  %1 = sext i8 %b to i16
+  %2 = ashr i16 %1, 4
+  ret i16 %2
+}
+
+; CHECK-LABEL: asr_zext_i8_i32
+; CHECK:       ubfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define i32 @asr_zext_i8_i32(i8 %b) {
+  %1 = zext i8 %b to i32
+  %2 = ashr i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: asr_sext_i8_i32
+; CHECK:       sbfx {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define i32 @asr_sext_i8_i32(i8 %b) {
+  %1 = sext i8 %b to i32
+  %2 = ashr i32 %1, 4
+  ret i32 %2
+}
+
+; CHECK-LABEL: asrv_i16
+; CHECK:       sxth [[REG1:w[0-9]+]], w0
+; CHECK-NEXT:  and  [[REG2:w[0-9]+]], w1, #0xffff
+; CHECK-NEXT:  asr  [[REG3:w[0-9]+]], [[REG1]], [[REG2]]
+; CHECK-NEXT:  and  {{w[0-9]+}}, [[REG3]], #0xffff
+define zeroext i16 @asrv_i16(i16 %a, i16 %b) {
+  %1 = ashr i16 %a, %b
+  ret i16 %1
+}
+
+; CHECK-LABEL: asr_i16
+; CHECK:       sbfx {{w[0-9]*}}, {{w[0-9]*}}, #8, #8
+define zeroext i16 @asr_i16(i16 %a) {
+  %1 = ashr i16 %a, 8
+  ret i16 %1
+}
+
+; CHECK-LABEL: asrv_i32
+; CHECK:       asr {{w[0-9]*}}, w0, w1
+define zeroext i32 @asrv_i32(i32 %a, i32 %b) {
+  %1 = ashr i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK-LABEL: asr_i32
+; CHECK:       asr {{w[0-9]*}}, {{w[0-9]*}}, #16
+define zeroext i32 @asr_i32(i32 %a) {
+  %1 = ashr i32 %a, 16
+  ret i32 %1
+}
+
+; CHECK-LABEL: asrv_i64
+; CHECK:       asr {{x[0-9]*}}, x0, x1
+define i64 @asrv_i64(i64 %a, i64 %b) {
+  %1 = ashr i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK-LABEL: asr_i64
+; CHECK:       asr {{x[0-9]*}}, {{x[0-9]*}}, #32
+define i64 @asr_i64(i64 %a) {
+  %1 = ashr i64 %a, 32
+  ret i64 %1
+}
+
+; CHECK-LABEL: shift_test1
+; CHECK:       ubfiz {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+; CHECK-NEXT:  sbfx  {{w[0-9]*}}, {{w[0-9]*}}, #4, #4
+define i32 @shift_test1(i8 %a) {
+  %1 = shl i8 %a, 4
+  %2 = ashr i8 %1, 4
+  %3 = sext i8 %2 to i32
+  ret i32 %3
+}
+
+; Test zero shifts
+
+; CHECK-LABEL: shl_zero
+; CHECK-NOT:   lsl
+define i32 @shl_zero(i32 %a) {
+  %1 = shl i32 %a, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: lshr_zero
+; CHECK-NOT:   lsr
+define i32 @lshr_zero(i32 %a) {
+  %1 = lshr i32 %a, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: ashr_zero
+; CHECK-NOT:   asr
+define i32 @ashr_zero(i32 %a) {
+  %1 = ashr i32 %a, 0
+  ret i32 %1
+}
+
+; CHECK-LABEL: shl_zext_zero
+; CHECK:       ubfx x0, x0, #0, #32
+define i64 @shl_zext_zero(i32 %a) {
+  %1 = zext i32 %a to i64
+  %2 = shl i64 %1, 0
+  ret i64 %2
+}
+
+; CHECK-LABEL: lshr_zext_zero
+; CHECK:       ubfx x0, x0, #0, #32
+define i64 @lshr_zext_zero(i32 %a) {
+  %1 = zext i32 %a to i64
+  %2 = lshr i64 %1, 0
+  ret i64 %2
+}
+
+; CHECK-LABEL: ashr_zext_zero
+; CHECK:       ubfx x0, x0, #0, #32
+define i64 @ashr_zext_zero(i32 %a) {
+  %1 = zext i32 %a to i64
+  %2 = ashr i64 %1, 0
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-sqrt.ll b/test/CodeGen/AArch64/fast-isel-sqrt.ll
new file mode 100644
index 000000000000..1331d5c7de5b
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-sqrt.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=arm64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define float @test_sqrt_f32(float %a) {
+; CHECK-LABEL: test_sqrt_f32
+; CHECK:       fsqrt s0, s0
+  %res = call float @llvm.sqrt.f32(float %a)
+  ret float %res
+}
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+define double @test_sqrt_f64(double %a) {
+; CHECK-LABEL: test_sqrt_f64
+; CHECK:       fsqrt d0, d0
+  %res = call double @llvm.sqrt.f64(double %a)
+  ret double %res
+}
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+
diff --git a/test/CodeGen/AArch64/fast-isel-switch-phi.ll b/test/CodeGen/AArch64/fast-isel-switch-phi.ll
new file mode 100644
index 000000000000..c4f871cb56e4
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-switch-phi.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s
+
+; Test that the Machine Instruction PHI node doesn't have more than one operand
+; from the same predecessor.
+define i32 @foo(i32 %a, i32 %b, i1 %c) {
+entry:
+  br i1 %c, label %switch, label %direct
+
+switch:
+  switch i32 %a, label %exit [
+    i32 43, label %continue
+    i32 45, label %continue
+  ]
+
+direct:
+  %var = add i32 %b, 1
+  br label %continue
+
+continue:
+  %var.phi = phi i32 [ %var, %direct ], [ 0, %switch ], [ 0, %switch ]
+  ret i32 %var.phi
+
+exit:
+  ret i32 1
+}
diff --git a/test/CodeGen/AArch64/fast-isel-tbz.ll b/test/CodeGen/AArch64/fast-isel-tbz.ll
new file mode 100644
index 000000000000..a5f02ffa39ac
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-tbz.ll
@@ -0,0 +1,295 @@
+; RUN: llc                             -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s
+; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
+
+define i32 @icmp_eq_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i8
+; CHECK:       tbz {{w[0-9]+}}, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp eq i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_eq_i16
+; CHECK:       tbz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp eq i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i32(i32 %a) {
+; CHECK-LABEL: icmp_eq_i32
+; CHECK:       tbz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_1
+; CHECK:       tbz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_eq_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_eq_i64_2
+; CHECK:       tbz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp eq i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i8(i8 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i8
+; CHECK:       tbnz w0, #0, {{LBB.+_2}}
+  %1 = and i8 %a, 1
+  %2 = icmp ne i8 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i16(i16 zeroext %a) {
+; CHECK-LABEL: icmp_ne_i16
+; CHECK:       tbnz w0, #1, {{LBB.+_2}}
+  %1 = and i16 %a, 2
+  %2 = icmp ne i16 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i32(i32 %a) {
+; CHECK-LABEL: icmp_ne_i32
+; CHECK:       tbnz w0, #2, {{LBB.+_2}}
+  %1 = and i32 %a, 4
+  %2 = icmp ne i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_1(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_1
+; CHECK:       tbnz w0, #3, {{LBB.+_2}}
+  %1 = and i64 %a, 8
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_ne_i64_2(i64 %a) {
+; CHECK-LABEL: icmp_ne_i64_2
+; CHECK:       tbnz x0, #32, {{LBB.+_2}}
+  %1 = and i64 %a, 4294967296
+  %2 = icmp ne i64 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_slt_i8
+; FAST:       tbnz w0, #7, {{LBB.+_2}}
+  %1 = icmp slt i8 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_slt_i16
+; FAST:       tbnz w0, #15, {{LBB.+_2}}
+  %1 = icmp slt i16 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i32(i32 %a) {
+; CHECK-LABEL: icmp_slt_i32
+; CHECK:       tbnz w0, #31, {{LBB.+_2}}
+  %1 = icmp slt i32 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_slt_i64(i64 %a) {
+; CHECK-LABEL: icmp_slt_i64
+; CHECK:       tbnz x0, #63, {{LBB.+_2}}
+  %1 = icmp slt i64 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sge_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sge_i8
+; FAST:       tbz w0, #7, {{LBB.+_2}}
+  %1 = icmp sge i8 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sge_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sge_i16
+; FAST:       tbz w0, #15, {{LBB.+_2}}
+  %1 = icmp sge i16 %a, 0
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sle_i8
+; FAST:       tbnz w0, #7, {{LBB.+_2}}
+  %1 = icmp sle i8 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sle_i16
+; FAST:       tbnz w0, #15, {{LBB.+_2}}
+  %1 = icmp sle i16 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i32(i32 %a) {
+; CHECK-LABEL: icmp_sle_i32
+; CHECK:       tbnz w0, #31, {{LBB.+_2}}
+  %1 = icmp sle i32 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sle_i64(i64 %a) {
+; CHECK-LABEL: icmp_sle_i64
+; CHECK:       tbnz x0, #63, {{LBB.+_2}}
+  %1 = icmp sle i64 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i8(i8 zeroext %a) {
+; FAST-LABEL: icmp_sgt_i8
+; FAST:       tbz w0, #7, {{LBB.+_2}}
+  %1 = icmp sgt i8 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i16(i16 zeroext %a) {
+; FAST-LABEL: icmp_sgt_i16
+; FAST:       tbz w0, #15, {{LBB.+_2}}
+  %1 = icmp sgt i16 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i32(i32 %a) {
+; CHECK-LABEL: icmp_sgt_i32
+; CHECK:       tbz w0, #31, {{LBB.+_2}}
+  %1 = icmp sgt i32 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+define i32 @icmp_sgt_i64(i64 %a) {
+; FAST-LABEL: icmp_sgt_i64
+; FAST:       tbz x0, #63, {{LBB.+_2}}
+  %1 = icmp sgt i64 %a, -1
+  br i1 %1, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+; Test that we don't fold the 'and' instruction into the compare.
+define i32 @icmp_eq_and_i32(i32 %a, i1 %c) {
+; CHECK-LABEL: icmp_eq_and_i32
+; CHECK:       and  [[REG:w[0-9]+]], w0, #0x4
+; CHECK-NEXT:  cbz  [[REG]], {{LBB.+_3}}
+  %1 = and i32 %a, 4
+  br i1 %c, label %bb0, label %bb2
+bb0:
+  %2 = icmp eq i32 %1, 0
+  br i1 %2, label %bb1, label %bb2, !prof !0
+bb1:
+  ret i32 1
+bb2:
+  ret i32 0
+}
+
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 2147483647, i32 0}
diff --git a/test/CodeGen/AArch64/fast-isel-trunc.ll b/test/CodeGen/AArch64/fast-isel-trunc.ll
new file mode 100644
index 000000000000..55937eb76fa9
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-trunc.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s
+
+; Test that %1 doesn't get the kill flag set before its last use.
+define i32 @test_trunc(i32 %a) {
+  %1 = add i32 %a, 1
+  %2 = trunc i32 %1 to i16
+  %3 = icmp ult i16 1, %2
+  %4 = add i32 %1, 1
+  %5 = sext i1 %3 to i32
+  %6 = and i32 %4, %5
+  ret i32 %6
+}
diff --git a/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
new file mode 100644
index 000000000000..eaa0db527949
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-vector-arithmetic.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=aarch64-apple-darwin                                                   -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -fast-isel-abort-args -verify-machineinstrs < %s | FileCheck %s
+
+; Vector Integer Add
+define <8 x i8> @add_v8i8_rr(<8 x i8> %a, <8 x i8> %b) {
+; CHECK: add_v8i8_rr
+; CHECK: add.8b v0, v0, v1
+  %1 = add <8 x i8> %a, %b
+  ret <8 x i8> %1
+}
+
+define <16 x i8> @add_v16i8_rr(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: add_v16i8_rr
+; CHECK: add.16b v0, v0, v1
+  %1 = add <16 x i8> %a, %b
+  ret <16 x i8> %1
+}
+
+define <4 x i16> @add_v4i16_rr(<4 x i16> %a, <4 x i16> %b) {
+; CHECK: add_v4i16_rr
+; CHECK: add.4h v0, v0, v1
+  %1 = add <4 x i16> %a, %b
+  ret <4 x i16> %1
+}
+
+define <8 x i16> @add_v8i16_rr(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: add_v8i16_rr
+; CHECK: add.8h v0, v0, v1
+  %1 = add <8 x i16> %a, %b
+  ret <8 x i16> %1
+}
+
+define <2 x i32> @add_v2i32_rr(<2 x i32> %a, <2 x i32> %b) {
+; CHECK: add_v2i32_rr
+; CHECK: add.2s v0, v0, v1
+  %1 = add <2 x i32> %a, %b
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @add_v4i32_rr(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: add_v4i32_rr
+; CHECK: add.4s v0, v0, v1
+  %1 = add <4 x i32> %a, %b
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @add_v2i64_rr(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: add_v2i64_rr
+; CHECK: add.2d v0, v0, v1
+  %1 = add <2 x i64> %a, %b
+  ret <2 x i64> %1
+}
+
+; Vector Floating-point Add
+define <2 x float> @add_v2f32_rr(<2 x float> %a, <2 x float> %b) {
+; CHECK: add_v2f32_rr
+; CHECK: fadd.2s v0, v0, v1
+  %1 = fadd <2 x float> %a, %b
+  ret <2 x float> %1
+}
+
+define <4 x float> @add_v4f32_rr(<4 x float> %a, <4 x float> %b) {
+; CHECK: add_v4f32_rr
+; CHECK: fadd.4s v0, v0, v1
+  %1 = fadd <4 x float> %a, %b
+  ret <4 x float> %1
+}
+
+define <2 x double> @add_v2f64_rr(<2 x double> %a, <2 x double> %b) {
+; CHECK: add_v2f64_rr
+; CHECK: fadd.2d v0, v0, v1
+  %1 = fadd <2 x double> %a, %b
+  ret <2 x double> %1
+}
diff --git a/test/CodeGen/AArch64/fast-isel-vret.ll b/test/CodeGen/AArch64/fast-isel-vret.ll
new file mode 100644
index 000000000000..9ad92273d3af
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-vret.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+; Test that we don't abort fast-isle for ret
+define <8 x i8> @ret_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: ret_v8i8
+; CHECK:       add.8b v0, v0, v1
+  %1 = add <8 x i8> %a, %b
+  ret <8 x i8> %1
+}
diff --git a/test/CodeGen/AArch64/fdiv-combine.ll b/test/CodeGen/AArch64/fdiv-combine.ll
new file mode 100644
index 000000000000..389eefd97b28
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv-combine.ll
@@ -0,0 +1,94 @@
+; RUN: llc -march=aarch64 < %s | FileCheck %s
+
+; Following test cases check:
+;   a / D; b / D; c / D;
+;                =>
+;   recip = 1.0 / D; a * recip; b * recip; c * recip;
+define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  %div2 = fdiv float %c, %D
+  tail call void @foo_3f(float %div, float %div1, float %div2)
+  ret void
+}
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  %div2 = fdiv double %c, %D
+  tail call void @foo_3d(double %div, double %div1, double %div2)
+  ret void
+}
+
+define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <4 x float> %a, %D
+  %div1 = fdiv <4 x float> %b, %D
+  %div2 = fdiv <4 x float> %c, %D
+  tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2)
+  ret void
+}
+
+define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv <2 x double> %a, %D
+  %div1 = fdiv <2 x double> %b, %D
+  %div2 = fdiv <2 x double> %c, %D
+  tail call void @foo_3_2xd(<2 x double> %div, <2 x double> %div1, <2 x double> %div2)
+  ret void
+}
+
+; Following test cases check we never combine two FDIVs if neither of them
+; calculates a reciprocal.
+define void @two_fdiv_float(float %D, float %a, float %b) #0 {
+; CHECK-LABEL: two_fdiv_float:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv float %a, %D
+  %div1 = fdiv float %b, %D
+  tail call void @foo_2f(float %div, float %div1)
+  ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  tail call void @foo_2d(double %div, double %div1)
+  ret void
+}
+
+declare void @foo_3f(float, float, float)
+declare void @foo_3d(double, double, double)
+declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2f(float, float)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/fp16-instructions.ll b/test/CodeGen/AArch64/fp16-instructions.ll
new file mode 100644
index 000000000000..7a44cd128cb0
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-instructions.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define half @add_h(half %a, half %b) {
+entry:
+; CHECK-LABEL: add_h:
+; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
+; CHECK: fadd [[RES:s[0-9]+]], [[OP1]], [[OP2]]
+; CHECK: fcvt h0, [[RES]]
+  %0 = fadd half %a, %b
+  ret half %0
+}
+
+
+define half @sub_h(half %a, half %b) {
+entry:
+; CHECK-LABEL: sub_h:
+; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
+; CHECK: fsub [[RES:s[0-9]+]], [[OP1]], [[OP2]]
+; CHECK: fcvt h0, [[RES]]
+  %0 = fsub half %a, %b
+  ret half %0
+}
+
+
+define half @mul_h(half %a, half %b) {
+entry:
+; CHECK-LABEL: mul_h:
+; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
+; CHECK: fmul [[RES:s[0-9]+]], [[OP1]], [[OP2]]
+; CHECK: fcvt h0, [[RES]]
+  %0 = fmul half %a, %b
+  ret half %0
+}
+
+
+define half @div_h(half %a, half %b) {
+entry:
+; CHECK-LABEL: div_h:
+; CHECK-DAG: fcvt [[OP1:s[0-9]+]], h0
+; CHECK-DAG: fcvt [[OP2:s[0-9]+]], h1
+; CHECK: fdiv [[RES:s[0-9]+]], [[OP1]], [[OP2]]
+; CHECK: fcvt h0, [[RES]]
+  %0 = fdiv half %a, %b
+  ret half %0
+}
+
+
+define half @load_h(half* %a) {
+entry:
+; CHECK-LABEL: load_h:
+; CHECK: ldr h0, [x0]
+  %0 = load half* %a, align 4
+  ret half %0
+}
+
+
+define void @store_h(half* %a, half %b) {
+entry:
+; CHECK-LABEL: store_h:
+; CHECK: str h0, [x0]
+  store half %b, half* %a, align 4
+  ret void
+}
+
+define half @s_to_h(float %a) {
+; CHECK-LABEL: s_to_h:
+; CHECK: fcvt h0, s0
+  %1 = fptrunc float %a to half
+  ret half %1
+}
+
+define half @d_to_h(double %a) {
+; CHECK-LABEL: d_to_h:
+; CHECK: fcvt h0, d0
+  %1 = fptrunc double %a to half
+  ret half %1
+}
+
+define float @h_to_s(half %a) {
+; CHECK-LABEL: h_to_s:
+; CHECK: fcvt s0, h0
+  %1 = fpext half %a to float
+  ret float %1
+}
+
+define double @h_to_d(half %a) {
+; CHECK-LABEL: h_to_d:
+; CHECK: fcvt d0, h0
+  %1 = fpext half %a to double
+  ret double %1
+}
+
+define half @bitcast_i_to_h(i16 %a) {
+; CHECK-LABEL: bitcast_i_to_h:
+; CHECK: fmov s0, w0
+  %1 = bitcast i16 %a to half
+  ret half %1
+}
+
+
+define i16 @bitcast_h_to_i(half %a) {
+; CHECK-LABEL: bitcast_h_to_i:
+; CHECK: fmov w0, s0
+  %1 = bitcast half %a to i16
+  ret i16 %1
+}
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
new file mode 100644
index 000000000000..8e8968108c78
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define <4 x half> @add_h(<4 x half> %a, <4 x half> %b) {
+entry:
+; CHECK-LABEL: add_h:
+; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
+; CHECK: fadd [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
+; CHECK: fcvtn v0.4h, [[RES]]
+  %0 = fadd <4 x half> %a, %b
+  ret <4 x half> %0
+}
+
+
+define <4 x half> @sub_h(<4 x half> %a, <4 x half> %b) {
+entry:
+; CHECK-LABEL: sub_h:
+; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
+; CHECK: fsub [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
+; CHECK: fcvtn v0.4h, [[RES]]
+  %0 = fsub <4 x half> %a, %b
+  ret <4 x half> %0
+}
+
+
+define <4 x half> @mul_h(<4 x half> %a, <4 x half> %b) {
+entry:
+; CHECK-LABEL: mul_h:
+; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
+; CHECK: fmul [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
+; CHECK: fcvtn v0.4h, [[RES]]
+  %0 = fmul <4 x half> %a, %b
+  ret <4 x half> %0
+}
+
+
+define <4 x half> @div_h(<4 x half> %a, <4 x half> %b) {
+entry:
+; CHECK-LABEL: div_h:
+; CHECK-DAG: fcvtl [[OP1:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl [[OP2:v[0-9]+\.4s]], v1.4h
+; CHECK: fdiv [[RES:v[0-9]+.4s]], [[OP1]], [[OP2]]
+; CHECK: fcvtn v0.4h, [[RES]]
+  %0 = fdiv <4 x half> %a, %b
+  ret <4 x half> %0
+}
+
+
+define <4 x half> @load_h(<4 x half>* %a) {
+entry:
+; CHECK-LABEL: load_h:
+; CHECK: ldr d0, [x0]
+  %0 = load <4 x half>* %a, align 4
+  ret <4 x half> %0
+}
+
+
+define void @store_h(<4 x half>* %a, <4 x half> %b) {
+entry:
+; CHECK-LABEL: store_h:
+; CHECK: str d0, [x0]
+  store <4 x half> %b, <4 x half>* %a, align 4
+  ret void
+}
+
+define <4 x half> @s_to_h(<4 x float> %a) {
+; CHECK-LABEL: s_to_h:
+; CHECK: fcvtn v0.4h, v0.4s
+  %1 = fptrunc <4 x float> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @d_to_h(<4 x double> %a) {
+; CHECK-LABEL: d_to_h:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+  %1 = fptrunc <4 x double> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x float> @h_to_s(<4 x half> %a) {
+; CHECK-LABEL: h_to_s:
+; CHECK: fcvtl v0.4s, v0.4h
+  %1 = fpext <4 x half> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x double> @h_to_d(<4 x half> %a) {
+; CHECK-LABEL: h_to_d:
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+  %1 = fpext <4 x half> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <4 x half> @bitcast_i_to_h(float, <4 x i16> %a) {
+; CHECK-LABEL: bitcast_i_to_h:
+; CHECK: mov v0.16b, v1.16b
+  %2 = bitcast <4 x i16> %a to <4 x half>
+  ret <4 x half> %2
+}
+
+define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
+; CHECK-LABEL: bitcast_h_to_i:
+; CHECK: mov v0.16b, v1.16b
+  %2 = bitcast <4 x half> %a to <4 x i16>
+  ret <4 x i16> %2
+}
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
new file mode 100644
index 000000000000..9ee2296ace83
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -0,0 +1,255 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define <8 x half> @add_h(<8 x half> %a, <8 x half> %b) {
+entry:
+; CHECK-LABEL: add_h:
+; CHECK: fcvt
+; CHECK: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fadd
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK: fcvt
+  %0 = fadd <8 x half> %a, %b
+  ret <8 x half> %0
+}
+
+
+define <8 x half> @sub_h(<8 x half> %a, <8 x half> %b) {
+entry:
+; CHECK-LABEL: sub_h:
+; CHECK: fcvt
+; CHECK: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fsub
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK: fcvt
+  %0 = fsub <8 x half> %a, %b
+  ret <8 x half> %0
+}
+
+
+define <8 x half> @mul_h(<8 x half> %a, <8 x half> %b) {
+entry:
+; CHECK-LABEL: mul_h:
+; CHECK: fcvt
+; CHECK: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fmul
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK: fcvt
+  %0 = fmul <8 x half> %a, %b
+  ret <8 x half> %0
+}
+
+
+define <8 x half> @div_h(<8 x half> %a, <8 x half> %b) {
+entry:
+; CHECK-LABEL: div_h:
+; CHECK: fcvt
+; CHECK: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fdiv
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK-DAG: fcvt
+; CHECK: fcvt
+  %0 = fdiv <8 x half> %a, %b
+  ret <8 x half> %0
+}
+
+
+define <8 x half> @load_h(<8 x half>* %a) {
+entry:
+; CHECK-LABEL: load_h:
+; CHECK: ldr q0, [x0]
+  %0 = load <8 x half>* %a, align 4
+  ret <8 x half> %0
+}
+
+
+define void @store_h(<8 x half>* %a, <8 x half> %b) {
+entry:
+; CHECK-LABEL: store_h:
+; CHECK: str q0, [x0]
+  store <8 x half> %b, <8 x half>* %a, align 4
+  ret void
+}
+
+define <8 x half> @s_to_h(<8 x float> %a) {
+; CHECK-LABEL: s_to_h:
+; CHECK-DAG: fcvtn v0.4h, v0.4s
+; CHECK-DAG: fcvtn [[REG:v[0-9+]]].4h, v1.4s
+; CHECK: ins v0.d[1], [[REG]].d[0]
+  %1 = fptrunc <8 x float> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x half> @d_to_h(<8 x double> %a) {
+; CHECK-LABEL: d_to_h:
+; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: ins v{{[0-9]+}}.d
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: fcvt h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+; CHECK-DAG: ins v{{[0-9]+}}.h
+  %1 = fptrunc <8 x double> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x float> @h_to_s(<8 x half> %a) {
+; CHECK-LABEL: h_to_s:
+; CHECK: fcvtl2 v1.4s, v0.8h
+; CHECK: fcvtl v0.4s, v0.4h
+  %1 = fpext <8 x half> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x double> @h_to_d(<8 x half> %a) {
+; CHECK-LABEL: h_to_d:
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: fcvt d
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+; CHECK-DAG: ins
+  %1 = fpext <8 x half> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+
+define <8 x half> @bitcast_i_to_h(float, <8 x i16> %a) {
+; CHECK-LABEL: bitcast_i_to_h:
+; CHECK: mov v0.16b, v1.16b
+  %2 = bitcast <8 x i16> %a to <8 x half>
+  ret <8 x half> %2
+}
+
+define <8 x i16> @bitcast_h_to_i(float, <8 x half> %a) {
+; CHECK-LABEL: bitcast_h_to_i:
+; CHECK: mov v0.16b, v1.16b
+  %2 = bitcast <8 x half> %a to <8 x i16>
+  ret <8 x i16> %2
+}
+
diff --git a/test/CodeGen/AArch64/fp16-vector-bitcast.ll b/test/CodeGen/AArch64/fp16-vector-bitcast.ll
new file mode 100644
index 000000000000..421a4f52ff51
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-vector-bitcast.ll
@@ -0,0 +1,203 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+define <4 x i16> @v4f16_to_v4i16(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_v4i16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to <4 x i16>
+  ret <4 x i16> %1
+}
+
+define <2 x i32> @v4f16_to_v2i32(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_v2i32:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to <2 x i32>
+  ret <2 x i32> %1
+}
+
+define <1 x i64> @v4f16_to_v1i64(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_v1i64:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to <1 x i64>
+  ret <1 x i64> %1
+}
+
+define i64 @v4f16_to_i64(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_i64:
+; CHECK: fmov x0, d1
+entry:
+  %1 = bitcast <4 x half> %a to i64
+  ret i64 %1
+}
+
+define <2 x float> @v4f16_to_v2float(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_v2float:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to <2 x float>
+  ret <2 x float> %1
+}
+
+define <1 x double> @v4f16_to_v1double(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_v1double:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to <1 x double>
+  ret <1 x double> %1
+}
+
+define double @v4f16_to_double(float, <4 x half> %a) #0 {
+; CHECK-LABEL: v4f16_to_double:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x half> %a to double
+  ret double %1
+}
+
+
+define <4 x half> @v4i16_to_v4f16(float, <4 x i16> %a) #0 {
+; CHECK-LABEL: v4i16_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x i16> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @v2i32_to_v4f16(float, <2 x i32> %a) #0 {
+; CHECK-LABEL: v2i32_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <2 x i32> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @v1i64_to_v4f16(float, <1 x i64> %a) #0 {
+; CHECK-LABEL: v1i64_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <1 x i64> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @i64_to_v4f16(float, i64 %a) #0 {
+; CHECK-LABEL: i64_to_v4f16:
+; CHECK: fmov d0, x0
+entry:
+  %1 = bitcast i64 %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @v2float_to_v4f16(float, <2 x float> %a) #0 {
+; CHECK-LABEL: v2float_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <2 x float> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @v1double_to_v4f16(float, <1 x double> %a) #0 {
+; CHECK-LABEL: v1double_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <1 x double> %a to <4 x half>
+  ret <4 x half> %1
+}
+
+define <4 x half> @double_to_v4f16(float, double %a) #0 {
+; CHECK-LABEL: double_to_v4f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast double %a to <4 x half>
+  ret <4 x half> %1
+}
+
+
+
+
+
+
+
+
+
+
+define <8 x i16> @v8f16_to_v8i16(float, <8 x half> %a) #0 {
+; CHECK-LABEL: v8f16_to_v8i16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x half> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @v8f16_to_v4i32(float, <8 x half> %a) #0 {
+; CHECK-LABEL: v8f16_to_v4i32:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x half> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @v8f16_to_v2i64(float, <8 x half> %a) #0 {
+; CHECK-LABEL: v8f16_to_v2i64:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x half> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <4 x float> @v8f16_to_v4float(float, <8 x half> %a) #0 {
+; CHECK-LABEL: v8f16_to_v4float:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x half> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <2 x double> @v8f16_to_v2double(float, <8 x half> %a) #0 {
+; CHECK-LABEL: v8f16_to_v2double:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x half> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <8 x half> @v8i16_to_v8f16(float, <8 x i16> %a) #0 {
+; CHECK-LABEL: v8i16_to_v8f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <8 x i16> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x half> @v4i32_to_v8f16(float, <4 x i32> %a) #0 {
+; CHECK-LABEL: v4i32_to_v8f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x i32> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x half> @v2i64_to_v8f16(float, <2 x i64> %a) #0 {
+; CHECK-LABEL: v2i64_to_v8f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <2 x i64> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x half> @v4float_to_v8f16(float, <4 x float> %a) #0 {
+; CHECK-LABEL: v4float_to_v8f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <4 x float> %a to <8 x half>
+  ret <8 x half> %1
+}
+
+define <8 x half> @v2double_to_v8f16(float, <2 x double> %a) #0 {
+; CHECK-LABEL: v2double_to_v8f16:
+; CHECK: mov v0.16b, v1.16b
+entry:
+  %1 = bitcast <2 x double> %a to <8 x half>
+  ret <8 x half> %1
+}
diff --git a/test/CodeGen/AArch64/fp16-vector-load-store.ll b/test/CodeGen/AArch64/fp16-vector-load-store.ll
new file mode 100644
index 000000000000..edbbffe001c5
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-vector-load-store.ll
@@ -0,0 +1,528 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+; Simple load of v4i16
+define <4 x half> @load_64(<4 x half>* nocapture readonly %a) #0 {
+; CHECK-LABEL: load_64:
+; CHECK: ldr d0, [x0]
+entry:
+  %0 = load <4 x half>* %a, align 8
+  ret <4 x half> %0
+}
+
+; Simple load of v8i16
+define <8 x half> @load_128(<8 x half>* nocapture readonly %a) #0 {
+; CHECK-LABEL: load_128:
+; CHECK: ldr q0, [x0]
+entry:
+  %0 = load <8 x half>* %a, align 16
+  ret <8 x half> %0
+}
+
+; Duplicating load to v4i16
+define <4 x half> @load_dup_64(half* nocapture readonly %a) #0 {
+; CHECK-LABEL: load_dup_64:
+; CHECK: ld1r { v0.4h }, [x0]
+entry:
+  %0 = load half* %a, align 2
+  %1 = insertelement <4 x half> undef, half %0, i32 0
+  %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
+  ret <4 x half> %2
+}
+
+; Duplicating load to v8i16
+define <8 x half> @load_dup_128(half* nocapture readonly %a) #0 {
+; CHECK-LABEL: load_dup_128:
+; CHECK: ld1r { v0.8h }, [x0]
+entry:
+  %0 = load half* %a, align 2
+  %1 = insertelement <8 x half> undef, half %0, i32 0
+  %2 = shufflevector <8 x half> %1, <8 x half> undef, <8 x i32> zeroinitializer
+  ret <8 x half> %2
+}
+
+; Load to one lane of v4f16
+define <4 x half> @load_lane_64(half* nocapture readonly %a, <4 x half> %b) #0 {
+; CHECK-LABEL: load_lane_64:
+; CHECK: ld1 { v0.h }[2], [x0]
+entry:
+  %0 = load half* %a, align 2
+  %1 = insertelement <4 x half> %b, half %0, i32 2
+  ret <4 x half> %1
+}
+
+; Load to one lane of v8f16
+define <8 x half> @load_lane_128(half* nocapture readonly %a, <8 x half> %b) #0 {
+; CHECK-LABEL: load_lane_128:
+; CHECK: ld1 { v0.h }[5], [x0]
+entry:
+  %0 = load half* %a, align 2
+  %1 = insertelement <8 x half> %b, half %0, i32 5
+  ret <8 x half> %1
+}
+
+; Simple store of v4f16
+define void @store_64(<4 x half>* nocapture %a, <4 x half> %b) #1 {
+; CHECK-LABEL: store_64:
+; CHECK: str d0, [x0]
+entry:
+  store <4 x half> %b, <4 x half>* %a, align 8
+  ret void
+}
+
+; Simple store of v8f16
+define void @store_128(<8 x half>* nocapture %a, <8 x half> %b) #1 {
+; CHECK-LABEL: store_128:
+; CHECK: str q0, [x0]
+entry:
+  store <8 x half> %b, <8 x half>* %a, align 16
+  ret void
+}
+
+; Store from one lane of v4f16
+define void @store_lane_64(half* nocapture %a, <4 x half> %b) #1 {
+; CHECK-LABEL: store_lane_64:
+; CHECK: st1 { v0.h }[2], [x0]
+entry:
+  %0 = extractelement <4 x half> %b, i32 2
+  store half %0, half* %a, align 2
+  ret void
+}
+
+; Store from one lane of v8f16
+define void @store_lane_128(half* nocapture %a, <8 x half> %b) #1 {
+; CHECK-LABEL: store_lane_128:
+; CHECK: st1 { v0.h }[5], [x0]
+entry:
+  %0 = extractelement <8 x half> %b, i32 5
+  store half %0, half* %a, align 2
+  ret void
+}
+
+; NEON intrinsics - (de-)interleaving loads and stores
+declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>*)
+declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>*)
+declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>*)
+declare void @llvm.aarch64.neon.st2.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>*)
+declare void @llvm.aarch64.neon.st3.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>*)
+declare void @llvm.aarch64.neon.st4.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, <4 x half>*)
+declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>*)
+declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>*)
+declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>*)
+declare void @llvm.aarch64.neon.st2.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>*)
+declare void @llvm.aarch64.neon.st3.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>*)
+declare void @llvm.aarch64.neon.st4.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, <8 x half>*)
+
+; Load 2 x v4f16 with de-interleaving
+define { <4 x half>, <4 x half> } @load_interleave_64_2(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_64_2:
+; CHECK: ld2 { v0.4h, v1.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half> } %0
+}
+
+; Load 3 x v4f16 with de-interleaving
+define { <4 x half>, <4 x half>, <4 x half> } @load_interleave_64_3(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_64_3:
+; CHECK: ld3 { v0.4h, v1.4h, v2.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Load 4 x v4f16 with de-interleaving
+define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_interleave_64_4(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_64_4:
+; CHECK: ld4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Store 2 x v4f16 with interleaving
+define void @store_interleave_64_2(<4 x half>* %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: store_interleave_64_2:
+; CHECK: st2 { v0.4h, v1.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st2.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half>* %a)
+  ret void
+}
+
+; Store 3 x v4f16 with interleaving
+define void @store_interleave_64_3(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
+; CHECK-LABEL: store_interleave_64_3:
+; CHECK: st3 { v0.4h, v1.4h, v2.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st3.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half>* %a)
+  ret void
+}
+
+; Store 4 x v4f16 with interleaving
+define void @store_interleave_64_4(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
+; CHECK-LABEL: store_interleave_64_4:
+; CHECK: st4 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st4.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, <4 x half>* %a)
+  ret void
+}
+
+; Load 2 x v8f16 with de-interleaving
+define { <8 x half>, <8 x half> } @load_interleave_128_2(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_128_2:
+; CHECK: ld2 { v0.8h, v1.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half> } %0
+}
+
+; Load 3 x v8f16 with de-interleaving
+define { <8 x half>, <8 x half>, <8 x half> } @load_interleave_128_3(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_128_3:
+; CHECK: ld3 { v0.8h, v1.8h, v2.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Load 8 x v8f16 with de-interleaving
+define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_interleave_128_4(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_interleave_128_4:
+; CHECK: ld4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Store 2 x v8f16 with interleaving
+define void @store_interleave_128_2(<8 x half>* %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: store_interleave_128_2:
+; CHECK: st2 { v0.8h, v1.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st2.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half>* %a)
+  ret void
+}
+
+; Store 3 x v8f16 with interleaving
+define void @store_interleave_128_3(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
+; CHECK-LABEL: store_interleave_128_3:
+; CHECK: st3 { v0.8h, v1.8h, v2.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st3.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half>* %a)
+  ret void
+}
+
+; Store 8 x v8f16 with interleaving
+define void @store_interleave_128_4(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
+; CHECK-LABEL: store_interleave_128_4:
+; CHECK: st4 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st4.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, <8 x half>* %a)
+  ret void
+}
+
+; NEON intrinsics - duplicating loads
+declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half*)
+declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half*)
+declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half*)
+declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half*)
+declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half*)
+declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half*)
+
+; Load 2 x v4f16 with duplication
+define { <4 x half>, <4 x half> } @load_dup_64_2(half* %a) #0 {
+; CHECK-LABEL: load_dup_64_2:
+; CHECK: ld2r { v0.4h, v1.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2r.v4f16.p0f16(half* %a)
+  ret { <4 x half>, <4 x half> } %0
+}
+
+; Load 3 x v4f16 with duplication
+define { <4 x half>, <4 x half>, <4 x half> } @load_dup_64_3(half* %a) #0 {
+; CHECK-LABEL: load_dup_64_3:
+; CHECK: ld3r { v0.4h, v1.4h, v2.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3r.v4f16.p0f16(half* %a)
+  ret { <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Load 4 x v4f16 with duplication
+define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_dup_64_4(half* %a) #0 {
+; CHECK-LABEL: load_dup_64_4:
+; CHECK: ld4r { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4r.v4f16.p0f16(half* %a)
+  ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Load 2 x v8f16 with duplication
+define { <8 x half>, <8 x half> } @load_dup_128_2(half* %a) #0 {
+; CHECK-LABEL: load_dup_128_2:
+; CHECK: ld2r { v0.8h, v1.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2r.v8f16.p0f16(half* %a)
+  ret { <8 x half>, <8 x half> } %0
+}
+
+; Load 3 x v8f16 with duplication
+define { <8 x half>, <8 x half>, <8 x half> } @load_dup_128_3(half* %a) #0 {
+; CHECK-LABEL: load_dup_128_3:
+; CHECK: ld3r { v0.8h, v1.8h, v2.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3r.v8f16.p0f16(half* %a)
+  ret { <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Load 8 x v8f16 with duplication
+define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_dup_128_4(half* %a) #0 {
+; CHECK-LABEL: load_dup_128_4:
+; CHECK: ld4r { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4r.v8f16.p0f16(half* %a)
+  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+
+; NEON intrinsics - loads and stores to/from one lane
+declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0f16(<4 x half>, <4 x half>, i64, half*)
+declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, i64, half*)
+declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st2lane.v4f16.p0f16(<4 x half>, <4 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st3lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st4lane.v4f16.p0f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, i64, half*)
+declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0f16(<8 x half>, <8 x half>, i64, half*)
+declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, i64, half*)
+declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st2lane.v8f16.p0f16(<8 x half>, <8 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st3lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, i64, half*)
+declare void @llvm.aarch64.neon.st4lane.v8f16.p0f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, i64, half*)
+
+; Load one lane of 2 x v4f16
+define { <4 x half>, <4 x half> } @load_lane_64_2(half* %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: load_lane_64_2:
+; CHECK: ld2 { v0.h, v1.h }[2], [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, i64 2, half* %a)
+  ret { <4 x half>, <4 x half> } %0
+}
+
+; Load one lane of 3 x v4f16
+define { <4 x half>, <4 x half>, <4 x half> } @load_lane_64_3(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
+; CHECK-LABEL: load_lane_64_3:
+; CHECK: ld3 { v0.h, v1.h, v2.h }[2], [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, i64 2, half* %a)
+  ret { <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Load one lane of 4 x v4f16
+define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_lane_64_4(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
+; CHECK-LABEL: load_lane_64_4:
+; CHECK: ld4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, i64 2, half* %a)
+  ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Store one lane of 2 x v4f16
+define void @store_lane_64_2(half* %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: store_lane_64_2:
+; CHECK: st2 { v0.h, v1.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st2lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, i64 2, half* %a)
+  ret void
+}
+
+; Store one lane of 3 x v4f16
+define void @store_lane_64_3(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
+; CHECK-LABEL: store_lane_64_3:
+; CHECK: st3 { v0.h, v1.h, v2.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st3lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, i64 2, half* %a)
+  ret void
+}
+
+; Store one lane of 4 x v4f16
+define void @store_lane_64_4(half* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
+; CHECK-LABEL: store_lane_64_4:
+; CHECK: st4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st4lane.v4f16.p0f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, i64 2, half* %a)
+  ret void
+}
+
+; Load one lane of 2 x v8f16
+define { <8 x half>, <8 x half> } @load_lane_128_2(half* %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: load_lane_128_2:
+; CHECK: ld2 { v0.h, v1.h }[2], [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, i64 2, half* %a)
+  ret { <8 x half>, <8 x half> } %0
+}
+
+; Load one lane of 3 x v8f16
+define { <8 x half>, <8 x half>, <8 x half> } @load_lane_128_3(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
+; CHECK-LABEL: load_lane_128_3:
+; CHECK: ld3 { v0.h, v1.h, v2.h }[2], [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, i64 2, half* %a)
+  ret { <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Load one lane of 8 x v8f16
+define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_lane_128_4(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
+; CHECK-LABEL: load_lane_128_4:
+; CHECK: ld4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, i64 2, half* %a)
+  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Store one lane of 2 x v8f16
+define void @store_lane_128_2(half* %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: store_lane_128_2:
+; CHECK: st2 { v0.h, v1.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st2lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, i64 2, half* %a)
+  ret void
+}
+
+; Store one lane of 3 x v8f16
+define void @store_lane_128_3(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
+; CHECK-LABEL: store_lane_128_3:
+; CHECK: st3 { v0.h, v1.h, v2.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st3lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, i64 2, half* %a)
+  ret void
+}
+
+; Store one lane of 8 x v8f16
+define void @store_lane_128_4(half* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
+; CHECK-LABEL: store_lane_128_4:
+; CHECK: st4 { v0.h, v1.h, v2.h, v3.h }[2], [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st4lane.v8f16.p0f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, i64 2, half* %a)
+  ret void
+}
+
+; NEON intrinsics - load/store without interleaving
+declare { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0v4f16(<4 x half>*)
+declare { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0v4f16(<4 x half>*)
+declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0v4f16(<4 x half>*)
+declare void @llvm.aarch64.neon.st1x2.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>*)
+declare void @llvm.aarch64.neon.st1x3.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>*)
+declare void @llvm.aarch64.neon.st1x4.v4f16.p0v4f16(<4 x half>, <4 x half>, <4 x half>, <4 x half>, <4 x half>*)
+declare { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0v8f16(<8 x half>*)
+declare { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0v8f16(<8 x half>*)
+declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0v8f16(<8 x half>*)
+declare void @llvm.aarch64.neon.st1x2.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>*)
+declare void @llvm.aarch64.neon.st1x3.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>*)
+declare void @llvm.aarch64.neon.st1x4.v8f16.p0v8f16(<8 x half>, <8 x half>, <8 x half>, <8 x half>, <8 x half>*)
+
+; Load 2 x v4f16 without de-interleaving
+define { <4 x half>, <4 x half> } @load_64_2(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_64_2:
+; CHECK: ld1 { v0.4h, v1.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x2.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half> } %0
+}
+
+; Load 3 x v4f16 without de-interleaving
+define { <4 x half>, <4 x half>, <4 x half> } @load_64_3(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_64_3:
+; CHECK: ld1 { v0.4h, v1.4h, v2.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x3.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Load 4 x v4f16 without de-interleaving
+define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @load_64_4(<4 x half>* %a) #0 {
+; CHECK-LABEL: load_64_4:
+; CHECK: ld1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+entry:
+  %0 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld1x4.v4f16.p0v4f16(<4 x half>* %a)
+  ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %0
+}
+
+; Store 2 x v4f16 without interleaving
+define void @store_64_2(<4 x half>* %a, <4 x half> %b, <4 x half> %c) #0 {
+; CHECK-LABEL: store_64_2:
+; CHECK: st1 { v0.4h, v1.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x2.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half>* %a)
+  ret void
+}
+
+; Store 3 x v4f16 without interleaving
+define void @store_64_3(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) #0 {
+; CHECK-LABEL: store_64_3:
+; CHECK: st1 { v0.4h, v1.4h, v2.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x3.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half>* %a)
+  ret void
+}
+
+; Store 4 x v4f16 without interleaving
+define void @store_64_4(<4 x half>* %a, <4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e) #0 {
+; CHECK-LABEL: store_64_4:
+; CHECK: st1 { v0.4h, v1.4h, v2.4h, v3.4h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x4.v4f16.p0v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %d, <4 x half> %e, <4 x half>* %a)
+  ret void
+}
+
+; Load 2 x v8f16 without de-interleaving
+define { <8 x half>, <8 x half> } @load_128_2(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_128_2:
+; CHECK: ld1 { v0.8h, v1.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x2.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half> } %0
+}
+
+; Load 3 x v8f16 without de-interleaving
+define { <8 x half>, <8 x half>, <8 x half> } @load_128_3(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_128_3:
+; CHECK: ld1 { v0.8h, v1.8h, v2.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x3.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Load 8 x v8f16 without de-interleaving
+define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @load_128_4(<8 x half>* %a) #0 {
+; CHECK-LABEL: load_128_4:
+; CHECK: ld1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
+entry:
+  %0 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld1x4.v8f16.p0v8f16(<8 x half>* %a)
+  ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %0
+}
+
+; Store 2 x v8f16 without interleaving
+define void @store_128_2(<8 x half>* %a, <8 x half> %b, <8 x half> %c) #0 {
+; CHECK-LABEL: store_128_2:
+; CHECK: st1 { v0.8h, v1.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x2.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half>* %a)
+  ret void
+}
+
+; Store 3 x v8f16 without interleaving
+define void @store_128_3(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d) #0 {
+; CHECK-LABEL: store_128_3:
+; CHECK: st1 { v0.8h, v1.8h, v2.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x3.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half>* %a)
+  ret void
+}
+
+; Store 8 x v8f16 without interleaving
+define void @store_128_4(<8 x half>* %a, <8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e) #0 {
+; CHECK-LABEL: store_128_4:
+; CHECK: st1 { v0.8h, v1.8h, v2.8h, v3.8h }, [x0]
+entry:
+  tail call void @llvm.aarch64.neon.st1x4.v8f16.p0v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %d, <8 x half> %e, <8 x half>* %a)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fp16-vector-shuffle.ll b/test/CodeGen/AArch64/fp16-vector-shuffle.ll
new file mode 100644
index 000000000000..74d1b436e126
--- /dev/null
+++ b/test/CodeGen/AArch64/fp16-vector-shuffle.ll
@@ -0,0 +1,301 @@
+; RUN: llc < %s -mtriple=aarch64-none-eabi | FileCheck %s
+
+; float16x4_t select_64(float16x4_t a, float16x4_t b, uint16x4_t c) { return vbsl_u16(c, a, b); }
+define <4 x half> @select_64(<4 x half> %a, <4 x half> %b, <4 x i16> %c) #0 {
+; CHECK-LABEL: select_64:
+; CHECK: bsl
+entry:
+  %0 = bitcast <4 x half> %a to <4 x i16>
+  %1 = bitcast <4 x half> %b to <4 x i16>
+  %vbsl3.i = and <4 x i16> %0, %c
+  %2 = xor <4 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1>
+  %vbsl4.i = and <4 x i16> %1, %2
+  %vbsl5.i = or <4 x i16> %vbsl3.i, %vbsl4.i
+  %3 = bitcast <4 x i16> %vbsl5.i to <4 x half>
+  ret <4 x half> %3
+}
+
+; float16x8_t select_128(float16x8_t a, float16x8_t b, uint16x8_t c) { return vbslq_u16(c, a, b); }
+define <8 x half> @select_128(<8 x half> %a, <8 x half> %b, <8 x i16> %c) #0 {
+; CHECK-LABEL: select_128:
+; CHECK: bsl
+entry:
+  %0 = bitcast <8 x half> %a to <8 x i16>
+  %1 = bitcast <8 x half> %b to <8 x i16>
+  %vbsl3.i = and <8 x i16> %0, %c
+  %2 = xor <8 x i16> %c, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %vbsl4.i = and <8 x i16> %1, %2
+  %vbsl5.i = or <8 x i16> %vbsl3.i, %vbsl4.i
+  %3 = bitcast <8 x i16> %vbsl5.i to <8 x half>
+  ret <8 x half> %3
+}
+
+; float16x4_t lane_64_64(float16x4_t a, float16x4_t b) {
+;  return vcopy_lane_s16(a, 1, b, 2);
+; }
+define <4 x half> @lane_64_64(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: lane_64_64:
+; CHECK: ins
+entry:
+  %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x half> %0
+}
+
+; float16x8_t lane_128_64(float16x8_t a, float16x4_t b) {
+;   return vcopyq_lane_s16(a, 1, b, 2);
+; }
+define <8 x half> @lane_128_64(<8 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: lane_128_64:
+; CHECK: ins
+entry:
+  %0 = bitcast <4 x half> %b to <4 x i16>
+  %vget_lane = extractelement <4 x i16> %0, i32 2
+  %1 = bitcast <8 x half> %a to <8 x i16>
+  %vset_lane = insertelement <8 x i16> %1, i16 %vget_lane, i32 1
+  %2 = bitcast <8 x i16> %vset_lane to <8 x half>
+  ret <8 x half> %2
+}
+
+; float16x4_t lane_64_128(float16x4_t a, float16x8_t b) {
+;   return vcopy_laneq_s16(a, 3, b, 5);
+; }
+define <4 x half> @lane_64_128(<4 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: lane_64_128:
+; CHECK: ins
+entry:
+  %0 = bitcast <8 x half> %b to <8 x i16>
+  %vgetq_lane = extractelement <8 x i16> %0, i32 5
+  %1 = bitcast <4 x half> %a to <4 x i16>
+  %vset_lane = insertelement <4 x i16> %1, i16 %vgetq_lane, i32 3
+  %2 = bitcast <4 x i16> %vset_lane to <4 x half>
+  ret <4 x half> %2
+}
+
+; float16x8_t lane_128_128(float16x8_t a, float16x8_t b) {
+;   return vcopyq_laneq_s16(a, 3, b, 5);
+; }
+define <8 x half> @lane_128_128(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: lane_128_128:
+; CHECK: ins
+entry:
+  %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x half> %0
+}
+
+; float16x4_t ext_64(float16x4_t a, float16x4_t b) {
+;   return vext_s16(a, b, 3);
+; }
+define <4 x half> @ext_64(<4 x half> %a, <4 x half> %b) #0 {
+; CHECK-LABEL: ext_64:
+; CHECK: ext
+entry:
+  %0 = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x half> %0
+}
+
+; float16x8_t ext_128(float16x8_t a, float16x8_t b) {
+;   return vextq_s16(a, b, 3);
+; }
+define <8 x half> @ext_128(<8 x half> %a, <8 x half> %b) #0 {
+; CHECK-LABEL: ext_128:
+; CHECK: ext
+entry:
+  %0 = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x half> %0
+}
+
+; float16x4_t rev32_64(float16x4_t a) {
+;   return vrev32_s16(a);
+; }
+define <4 x half> @rev32_64(<4 x half> %a) #0 {
+entry:
+; CHECK-LABEL: rev32_64:
+; CHECK: rev32
+  %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x half> %0
+}
+
+; float16x4_t rev64_64(float16x4_t a) {
+;   return vrev64_s16(a);
+; }
+define <4 x half> @rev64_64(<4 x half> %a) #0 {
+entry:
+; CHECK-LABEL: rev64_64:
+; CHECK: rev64
+  %0 = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x half> %0
+}
+
+; float16x8_t rev32_128(float16x8_t a) {
+;   return vrev32q_s16(a);
+; }
+define <8 x half> @rev32_128(<8 x half> %a) #0 {
+entry:
+; CHECK-LABEL: rev32_128:
+; CHECK: rev32
+  %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x half> %0
+}
+
+; float16x8_t rev64_128(float16x8_t a) {
+;   return vrev64q_s16(a);
+; }
+define <8 x half> @rev64_128(<8 x half> %a) #0 {
+entry:
+; CHECK-LABEL: rev64_128:
+; CHECK: rev64
+  %0 = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x half> %0
+}
+
+; float16x4_t create_64(long long a) { return vcreate_f16(a); }
+define <4 x half> @create_64(i64 %a) #0 {
+; CHECK-LABEL: create_64:
+; CHECK: fmov
+entry:
+  %0 = bitcast i64 %a to <4 x half>
+  ret <4 x half> %0
+}
+
+; float16x4_t dup_64(__fp16 a) { return vdup_n_f16(a); }
+define <4 x half> @dup_64(half %a) #0 {
+; CHECK-LABEL: dup_64:
+; CHECK: dup
+entry:
+  %vecinit = insertelement <4 x half> undef, half %a, i32 0
+  %vecinit1 = insertelement <4 x half> %vecinit, half %a, i32 1
+  %vecinit2 = insertelement <4 x half> %vecinit1, half %a, i32 2
+  %vecinit3 = insertelement <4 x half> %vecinit2, half %a, i32 3
+  ret <4 x half> %vecinit3
+}
+
+; float16x8_t dup_128(__fp16 a) { return vdupq_n_f16(a); }
+define <8 x half> @dup_128(half %a) #0 {
+entry:
+; CHECK-LABEL: dup_128:
+; CHECK: dup
+  %vecinit = insertelement <8 x half> undef, half %a, i32 0
+  %vecinit1 = insertelement <8 x half> %vecinit, half %a, i32 1
+  %vecinit2 = insertelement <8 x half> %vecinit1, half %a, i32 2
+  %vecinit3 = insertelement <8 x half> %vecinit2, half %a, i32 3
+  %vecinit4 = insertelement <8 x half> %vecinit3, half %a, i32 4
+  %vecinit5 = insertelement <8 x half> %vecinit4, half %a, i32 5
+  %vecinit6 = insertelement <8 x half> %vecinit5, half %a, i32 6
+  %vecinit7 = insertelement <8 x half> %vecinit6, half %a, i32 7
+  ret <8 x half> %vecinit7
+}
+
+; float16x4_t dup_lane_64(float16x4_t a) { return vdup_lane_f16(a, 2); }
+define <4 x half> @dup_lane_64(<4 x half> %a) #0 {
+entry:
+; CHECK-LABEL: dup_lane_64:
+; CHECK: dup
+  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x half> %shuffle
+}
+
+; float16x8_t dup_lane_128(float16x4_t a) { return vdupq_lane_f16(a, 2); }
+define <8 x half> @dup_lane_128(<4 x half> %a) #0 {
+entry:
+; CHECK-LABEL: dup_lane_128:
+; CHECK: dup
+  %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %shuffle
+}
+
+; float16x4_t dup_laneq_64(float16x8_t a) { return vdup_laneq_f16(a, 2); }
+define <4 x half> @dup_laneq_64(<8 x half> %a) #0 {
+entry:
+; CHECK-LABEL: dup_laneq_64:
+; CHECK: dup
+  %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x half> %shuffle
+}
+
+; float16x8_t dup_laneq_128(float16x8_t a) { return vdupq_laneq_f16(a, 2); }
+define <8 x half> @dup_laneq_128(<8 x half> %a) #0 {
+entry:
+; CHECK-LABEL: dup_laneq_128:
+; CHECK: dup
+  %shuffle = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <8 x half> %shuffle
+}
+
+; float16x8_t vcombine(float16x4_t a, float16x4_t b) { return vcombine_f16(a, b); }
+define <8 x half> @vcombine(<4 x half> %a, <4 x half> %b) #0 {
+entry:
+; CHECK-LABEL: vcombine:
+; CHECK: ins
+  %shuffle.i = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x half> %shuffle.i
+}
+
+; float16x4_t get_high(float16x8_t a) { return vget_high_f16(a); }
+define <4 x half> @get_high(<8 x half> %a) #0 {
+; CHECK-LABEL: get_high:
+; CHECK: ext
+entry:
+  %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x half> %shuffle.i
+}
+
+
+; float16x4_t get_low(float16x8_t a) { return vget_low_f16(a); }
+define <4 x half> @get_low(<8 x half> %a) #0 {
+; CHECK-LABEL: get_low:
+; CHECK-NOT: ext
+entry:
+  %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x half> %shuffle.i
+}
+
+; float16x4_t set_lane_64(float16x4_t a, __fp16 b) { return vset_lane_f16(b, a, 2); }
+define <4 x half> @set_lane_64(<4 x half> %a, half %b) #0 {
+; CHECK-LABEL: set_lane_64:
+; CHECK: fmov
+; CHECK: ins
+entry:
+  %0 = bitcast half %b to i16
+  %1 = bitcast <4 x half> %a to <4 x i16>
+  %vset_lane = insertelement <4 x i16> %1, i16 %0, i32 2
+  %2 = bitcast <4 x i16> %vset_lane to <4 x half>
+  ret <4 x half> %2
+}
+
+
+; float16x8_t set_lane_128(float16x8_t a, __fp16 b) { return vsetq_lane_f16(b, a, 2); }
+define <8 x half> @set_lane_128(<8 x half> %a, half %b) #0 {
+; CHECK-LABEL: set_lane_128:
+; CHECK: fmov
+; CHECK: ins
+entry:
+  %0 = bitcast half %b to i16
+  %1 = bitcast <8 x half> %a to <8 x i16>
+  %vset_lane = insertelement <8 x i16> %1, i16 %0, i32 2
+  %2 = bitcast <8 x i16> %vset_lane to <8 x half>
+  ret <8 x half> %2
+}
+
+; __fp16 get_lane_64(float16x4_t a) { return vget_lane_f16(a, 2); }
+define half @get_lane_64(<4 x half> %a) #0 {
+; CHECK-LABEL: get_lane_64:
+; CHECK: umov
+; CHECK: fmov
+entry:
+  %0 = bitcast <4 x half> %a to <4 x i16>
+  %vget_lane = extractelement <4 x i16> %0, i32 2
+  %1 = bitcast i16 %vget_lane to half
+  ret half %1
+}
+
+; __fp16 get_lane_128(float16x8_t a) { return vgetq_lane_f16(a, 2); }
+define half @get_lane_128(<8 x half> %a) #0 {
+; CHECK-LABEL: get_lane_128:
+; CHECK: umov
+; CHECK: fmov
+entry:
+  %0 = bitcast <8 x half> %a to <8 x i16>
+  %vgetq_lane = extractelement <8 x i16> %0, i32 2
+  %1 = bitcast i16 %vgetq_lane to half
+  ret half %1
+}
diff --git a/test/CodeGen/AArch64/fpconv-vector-op-scalarize.ll b/test/CodeGen/AArch64/fpconv-vector-op-scalarize.ll
new file mode 100644
index 000000000000..56e0b4a0b180
--- /dev/null
+++ b/test/CodeGen/AArch64/fpconv-vector-op-scalarize.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
+
+; PR20778
+; Check that the legalizer doesn't crash when scalarizing FP conversion
+; instructions' operands.  The operands are all illegal on AArch64,
+; ensuring they are legalized.  The results are all legal.
+
+define <1 x double> @test_sitofp(<1 x i1> %in) {
+; CHECK-LABEL: test_sitofp:
+; CHECK:       sbfx  [[GPR:w[0-9]+]], w0, #0, #1
+; CHECK-NEXT:  scvtf d0, [[GPR]]
+; CHECK-NEXT:  ret
+entry:
+  %0 = sitofp <1 x i1> %in to <1 x double>
+  ret <1 x double> %0
+}
+
+define <1 x double> @test_uitofp(<1 x i1> %in) {
+; CHECK-LABEL: test_uitofp:
+; CHECK:       and   [[GPR:w[0-9]+]], w0, #0x1
+; CHECK-NEXT:  ucvtf d0, [[GPR]]
+; CHECK-NEXT:  ret
+entry:
+  %0 = uitofp <1 x i1> %in to <1 x double>
+  ret <1 x double> %0
+}
+
+define <1 x i64> @test_fptosi(<1 x fp128> %in) {
+; CHECK-LABEL: test_fptosi:
+; CHECK:       bl    ___fixtfdi
+; CHECK-NEXT:  fmov  d0, x0
+entry:
+  %0 = fptosi <1 x fp128> %in to <1 x i64>
+  ret <1 x i64> %0
+}
+
+define <1 x i64> @test_fptoui(<1 x fp128> %in) {
+; CHECK-LABEL: test_fptoui:
+; CHECK:       bl    ___fixunstfdi
+; CHECK-NEXT:  fmov  d0, x0
+entry:
+  %0 = fptoui <1 x fp128> %in to <1 x i64>
+  ret <1 x i64> %0
+}
diff --git a/test/CodeGen/AArch64/fpimm.ll b/test/CodeGen/AArch64/fpimm.ll
index e59520c4dc95..b7db9182a393 100644
--- a/test/CodeGen/AArch64/fpimm.ll
+++ b/test/CodeGen/AArch64/fpimm.ll
@@ -1,4 +1,6 @@
-; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu                                                  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large                             -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
+; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s --check-prefix=LARGE
 
 @varf32 = global float 0.0
 @varf64 = global double 0.0
@@ -34,3 +36,22 @@ define void @check_double() {
 ; CHECK: ret
   ret void
 }
+
+; LARGE-LABEL: check_float2
+; LARGE:       movz [[REG:w[0-9]+]], #0x4049, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0xfdb
+; LARGE-NEXT:  fmov s0, [[REG]]
+define float @check_float2() {
+  ret float 3.14159274101257324218750
+}
+
+; LARGE-LABEL: check_double2
+; LARGE:       movz [[REG:x[0-9]+]], #0x4009, lsl #48
+; LARGE-NEXT:  movk [[REG]], #0x21fb, lsl #32
+; LARGE-NEXT:  movk [[REG]], #0x5444, lsl #16
+; LARGE-NEXT:  movk [[REG]], #0x2d18
+; LARGE-NEXT:  fmov d0, [[REG]]
+define double @check_double2() {
+  ret double 3.1415926535897931159979634685441851615905761718750
+}
+
diff --git a/test/CodeGen/AArch64/frameaddr.ll b/test/CodeGen/AArch64/frameaddr.ll
index 85d95e21c9b7..d6bb50e57a73 100644
--- a/test/CodeGen/AArch64/frameaddr.ll
+++ b/test/CodeGen/AArch64/frameaddr.ll
@@ -1,20 +1,29 @@
-; RUN: llc -o - %s -mtriple=arm64-apple-ios7.0  | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin                             -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
 
-define i8* @t() nounwind {
+define i8* @test_frameaddress0() nounwind {
 entry:
-; CHECK-LABEL: t:
+; CHECK-LABEL: test_frameaddress0:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
 ; CHECK: mov x0, x29
-	%0 = call i8* @llvm.frameaddress(i32 0)
-        ret i8* %0
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+  %0 = call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
 }
 
-define i8* @t2() nounwind {
+define i8* @test_frameaddress2() nounwind {
 entry:
-; CHECK-LABEL: t2:
+; CHECK-LABEL: test_frameaddress2:
+; CHECK: stp x29, x30, [sp, #-16]!
+; CHECK: mov x29, sp
 ; CHECK: ldr x[[reg:[0-9]+]], [x29]
-; CHECK: ldr {{x[0-9]+}}, [x[[reg]]]
-	%0 = call i8* @llvm.frameaddress(i32 2)
-        ret i8* %0
+; CHECK: ldr x0, [x[[reg]]]
+; CHECK: ldp x29, x30, [sp], #16
+; CHECK: ret
+  %0 = call i8* @llvm.frameaddress(i32 2)
+  ret i8* %0
 }
 
 declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index abb732ccf43a..9fc9a5f0190f 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -96,10 +96,8 @@ define [2 x i64] @return_struct() {
     %addr = bitcast %myStruct* @varstruct to [2 x i64]*
     %val = load [2 x i64]* %addr
     ret [2 x i64] %val
-; CHECK-DAG: ldr x0, [{{x[0-9]+}}, {{#?}}:lo12:varstruct]
-    ; Odd register regex below disallows x0 which we want to be live now.
-; CHECK-DAG: add {{x[1-9][0-9]*}}, {{x[1-9][0-9]*}}, {{#?}}:lo12:varstruct
-; CHECK: ldr x1, [{{x[1-9][0-9]*}}, #8]
+; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct
+; CHECK: ldp x0, x1, [x[[VARSTRUCT]]]
     ; Make sure epilogue immediately follows
 ; CHECK-NEXT: ret
 }
@@ -166,8 +164,8 @@ define void @stacked_fpu(float %var0, double %var1, float %var2, float %var3,
 define i64 @check_i128_regalign(i32 %val0, i128 %val1, i64 %val2) {
 ; CHECK-LABEL: check_i128_regalign
     store i128 %val1, i128* @var128
-; CHECK-DAG: str x2, [{{x[0-9]+}}, {{#?}}:lo12:var128]
-; CHECK-DAG: str x3, [{{x[0-9]+}}, #8]
+; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK-DAG: stp x2, x3, [x[[VAR128]]]
 
     ret i64 %val2
 ; CHECK: mov x0, x4
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 422c5765ec48..16157f83aaca 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefix=CHECK
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-neon | FileCheck --check-prefix=CHECK-NONEON %s
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=arm64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64_be-none-linux-gnu | FileCheck --check-prefix=CHECK-BE %s
 
 %myStruct = type { i64 , i8, i32 }
 
@@ -62,8 +62,8 @@ define void @simple_rets() {
   %arr = call [2 x i64] @return_smallstruct()
   store [2 x i64] %arr, [2 x i64]* @varsmallstruct
 ; CHECK: bl return_smallstruct
-; CHECK: str x1, [{{x[0-9]+}}, #8]
-; CHECK: str x0, [{{x[0-9]+}}, {{#?}}:lo12:varsmallstruct]
+; CHECK: add x[[VARSMALLSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varsmallstruct
+; CHECK: stp x0, x1, [x[[VARSMALLSTRUCT]]]
 
   call void @return_large_struct(%myStruct* sret @varstruct)
 ; CHECK: add x8, {{x[0-9]+}}, {{#?}}:lo12:varstruct
@@ -128,12 +128,12 @@ define void @check_i128_align() {
   call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
                                    i32 4, i32 5, i32 6, i32 7,
                                    i32 42, i128 %val)
-; CHECK: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:var128]
-; CHECK: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK: stp [[I128LO]], [[I128HI]], [sp, #16]
 
-; CHECK-NONEON: ldr [[I128LO:x[0-9]+]], [{{x[0-9]+}}, :lo12:var128]
-; CHECK-NONEON: ldr [[I128HI:x[0-9]+]], [{{x[0-9]+}}, #8]
+; CHECK-NONEON: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128
+; CHECK-NONEON: ldp [[I128LO:x[0-9]+]], [[I128HI:x[0-9]+]], [x[[VAR128]]]
 ; CHECK-NONEON: stp [[I128LO]], [[I128HI]], [sp, #16]
 ; CHECK: bl check_i128_stackalign
 
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index 68aba5ebe065..7dc8da1c444b 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -11,6 +11,7 @@
 @n = internal global i32 0, align 4
 
 define void @f1(i32 %a1, i32 %a2) {
+;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals@PAGEOFF
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index a7735667b359..70b700c7e57a 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -8,6 +8,7 @@
 
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _f1:
+;CHECK-APPLE-IOS-NOT: adrp
 ;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
 ;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
diff --git a/test/CodeGen/AArch64/init-array.ll b/test/CodeGen/AArch64/init-array.ll
index f47b490baebd..a275e7ecc570 100644
--- a/test/CodeGen/AArch64/init-array.ll
+++ b/test/CodeGen/AArch64/init-array.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -use-init-array -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-none-none-eabi -verify-machineinstrs -o - %s | FileCheck %s
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
   ret void
diff --git a/test/CodeGen/AArch64/jump-table.ll b/test/CodeGen/AArch64/jump-table.ll
index 69fbd9972b87..16682e92c17d 100644
--- a/test/CodeGen/AArch64/jump-table.ll
+++ b/test/CodeGen/AArch64/jump-table.ll
@@ -56,10 +56,11 @@ lbl4:
 ; CHECK-NEXT: .xword
 
 ; CHECK-PIC-NOT: .data_region
+; CHECK-PIC-NOT: .LJTI0_0
 ; CHECK-PIC: .LJTI0_0:
-; CHECK-PIC-NEXT: .word
-; CHECK-PIC-NEXT: .word
-; CHECK-PIC-NEXT: .word
-; CHECK-PIC-NEXT: .word
-; CHECK-PIC-NEXT: .word
+; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0
+; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0
+; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0
+; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0
+; CHECK-PIC-NEXT: .word .LBB{{.*}}-.LJTI0_0
 ; CHECK-PIC-NOT: .end_data_region
diff --git a/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll b/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll
new file mode 100644
index 000000000000..b785a8f045f4
--- /dev/null
+++ b/test/CodeGen/AArch64/legalize-bug-bogus-cpu.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=aarch64 -mcpu=bogus -o - %s
+
+; Fix the bug in PR20557. Set mcpu to a bogus name, llc will crash in type
+; legalization.
+define <4 x float> @fneg4(<4 x float> %x) {
+  %sub = fsub <4 x float> zeroinitializer, %x
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/AArch64/machine_cse.ll b/test/CodeGen/AArch64/machine_cse.ll
new file mode 100644
index 000000000000..bc9ab1078759
--- /dev/null
+++ b/test/CodeGen/AArch64/machine_cse.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
+
+; marked as external to prevent possible optimizations
+@a = external global i32
+@b = external global i32
+@c = external global i32
+@d = external global i32
+@e = external global i32
+
+define void @combine-sign-comparisons-by-cse(i32 *%arg) {
+; CHECK: cmp
+; CHECK: b.ge
+; CHECK-NOT: cmp
+; CHECK: b.le
+
+entry:
+  %a = load i32* @a, align 4
+  %b = load i32* @b, align 4
+  %c = load i32* @c, align 4
+  %d = load i32* @d, align 4
+  %e = load i32* @e, align 4
+
+  %cmp = icmp slt i32 %a, %e
+  br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
+
+land.lhs.true:
+  %cmp1 = icmp eq i32 %b, %c
+  br i1 %cmp1, label %return, label %if.end
+
+lor.lhs.false:
+  %cmp2 = icmp sgt i32 %a, %e
+  br i1 %cmp2, label %land.lhs.true3, label %if.end
+
+land.lhs.true3:
+  %cmp4 = icmp eq i32 %b, %d
+  br i1 %cmp4, label %return, label %if.end
+
+if.end:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
+  store i32 %a, i32 *%arg
+  ret void
+}
diff --git a/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
new file mode 100644
index 000000000000..e77824f5f142
--- /dev/null
+++ b/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios -fast-isel -verify-machineinstrs | FileCheck %s
+
+; Check that the kill flag is cleared between CSE'd instructions on their
+; imp-def'd registers.
+; The verifier would complain otherwise.
+define i64 @csed-impdef-killflag(i64 %a) {
+; CHECK-LABEL: csed-impdef-killflag
+; CHECK-DAG:  mov    [[REG0:w[0-9]+]], wzr
+; CHECK-DAG:  orr    [[REG1:w[0-9]+]], wzr, #0x1
+; CHECK-DAG:  orr    [[REG2:x[0-9]+]], xzr, #0x2
+; CHECK-DAG:  orr    [[REG3:x[0-9]+]], xzr, #0x3
+; CHECK:      cmp    x0, #0
+; CHECK-DAG:  csel   w[[SELECT_WREG_1:[0-9]+]], [[REG0]], [[REG1]], ne
+; CHECK-DAG:  csel   [[SELECT_XREG_2:x[0-9]+]], [[REG2]], [[REG3]], ne
+; CHECK:      ubfx   [[SELECT_XREG_1:x[0-9]+]], x[[SELECT_WREG_1]], #0, #32
+; CHECK-NEXT: add    x0, [[SELECT_XREG_2]], [[SELECT_XREG_1]]
+; CHECK-NEXT: ret
+
+  %1 = icmp ne i64 %a, 0
+  %2 = select i1 %1, i32 0, i32 1
+  %3 = icmp ne i64 %a, 0
+  %4 = select i1 %3, i64 2, i64 3
+  %5 = zext i32 %2 to i64
+  %6 = add i64 %4, %5
+  ret i64 %6
+}
diff --git a/test/CodeGen/AArch64/madd-combiner.ll b/test/CodeGen/AArch64/madd-combiner.ll
new file mode 100644
index 000000000000..7c9787a7281a
--- /dev/null
+++ b/test/CodeGen/AArch64/madd-combiner.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mtriple=aarch64-apple-darwin            -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs < %s | FileCheck %s
+
+; Test that we use the correct register class.
+define i32 @mul_add_imm(i32 %a, i32 %b) {
+; CHECK-LABEL: mul_add_imm
+; CHECK:       orr [[REG:w[0-9]+]], wzr, #0x4
+; CHECK-NEXT:  madd  {{w[0-9]+}}, w0, w1, [[REG]]
+  %1 = mul i32 %a, %b
+  %2 = add i32 %1, 4
+  ret i32 %2
+}
+
+define i32 @mul_sub_imm1(i32 %a, i32 %b) {
+; CHECK-LABEL: mul_sub_imm1
+; CHECK:       orr [[REG:w[0-9]+]], wzr, #0x4
+; CHECK-NEXT:  msub  {{w[0-9]+}}, w0, w1, [[REG]]
+  %1 = mul i32 %a, %b
+  %2 = sub i32 4, %1
+  ret i32 %2
+}
+
+; bugpoint reduced test case. This only tests that we pass the MI verifier.
+define void @mul_add_imm2() {
+entry:
+  br label %for.body
+for.body:
+  br i1 undef, label %for.body, label %for.body8
+for.body8:
+  %0 = mul i64 undef, -3
+  %mul1971 = add i64 %0, -3
+  %cmp7 = icmp slt i64 %mul1971, 1390451930000
+  br i1 %cmp7, label %for.body8, label %for.end20
+for.end20:
+  ret void
+}
+
diff --git a/test/CodeGen/AArch64/madd-lohi.ll b/test/CodeGen/AArch64/madd-lohi.ll
new file mode 100644
index 000000000000..550a8cb24b77
--- /dev/null
+++ b/test/CodeGen/AArch64/madd-lohi.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
+
+define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
+; CHECK-LABEL: test_128bitmul:
+; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
+; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
+; CHECK: madd x1, x1, x2, [[PART1]]
+; CHECK: mul x0, x0, x2
+
+; CHECK-BE-LABEL: test_128bitmul:
+; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
+; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
+; CHECK-BE: madd x0, x0, x3, [[PART1]]
+; CHECK-BE: mul x1, x1, x3
+
+  %prod = mul i128 %lhs, %rhs
+  ret i128 %prod
+}
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll
index 0689fbdcc078..4515697b9991 100644
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -1,17 +1,16 @@
-; RUN: llc -mtriple=arm64-apple-ios7.0 %s -o - | FileCheck %s
-; RUN: llc -mtriple=arm64_be-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-BE %s
-
+; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s
 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:
+; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
 ; CHECK-DAG: umulh [[CARRY:x[0-9]+]], x0, x2
-; CHECK-DAG: madd [[PART1:x[0-9]+]], x0, x3, [[CARRY]]
-; CHECK: madd x1, x1, x2, [[PART1]]
+; CHECK: mul [[PART2:x[0-9]+]], x1, x2
 ; CHECK: mul x0, x0, x2
 
 ; CHECK-BE-LABEL: test_128bitmul:
+; CHECK-BE-DAG: mul [[PART1:x[0-9]+]], x1, x2
 ; CHECK-BE-DAG: umulh [[CARRY:x[0-9]+]], x1, x3
-; CHECK-BE-DAG: madd [[PART1:x[0-9]+]], x1, x2, [[CARRY]]
-; CHECK-BE: madd x0, x0, x3, [[PART1]]
+; CHECK-BE: mul [[PART2:x[0-9]+]], x0, x3
 ; CHECK-BE: mul x1, x1, x3
 
   %prod = mul i128 %lhs, %rhs
diff --git a/test/CodeGen/AArch64/neon-perm.ll b/test/CodeGen/AArch64/neon-perm.ll
index 4f8571db7480..41e391dcd76c 100644
--- a/test/CodeGen/AArch64/neon-perm.ll
+++ b/test/CodeGen/AArch64/neon-perm.ll
@@ -1387,6 +1387,13 @@ entry:
   ret <8 x i16> %shuffle.i
 }
 
+define <4 x i8> @test_vzip1_v4i8(<8 x i8> %p) {
+; CHECK-LABEL: test_vzip1_v4i8:
+; CHECK: zip1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
+ %lo = shufflevector <8 x i8> %p, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i8> %lo
+}
+
 define <8 x i8> @test_same_vzip2_s8(<8 x i8> %a) {
 ; CHECK-LABEL: test_same_vzip2_s8:
 ; CHECK: zip2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
diff --git a/test/CodeGen/AArch64/paired-load.ll b/test/CodeGen/AArch64/paired-load.ll
new file mode 100644
index 000000000000..3dddb9eff325
--- /dev/null
+++ b/test/CodeGen/AArch64/paired-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linux-gnu"
+
+; Ensure we're generating ldp instructions instead of ldr Q.
+; CHECK: ldp
+; CHECK: stp
+define void @f(i64* %p, i64* %q) {
+  %addr2 = getelementptr i64* %q, i32 1
+  %addr = getelementptr i64* %p, i32 1
+  %x = load i64* %p
+  %y = load i64* %addr
+  store i64 %x, i64* %q
+  store i64 %y, i64* %addr2
+  ret void
+}
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index e8c762504fc9..93ee0e67b901 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
-; RUN: llc -mtriple=arm64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-none-linux-gnu -relocation-model=pic -o - %s | FileCheck %s
 
 ; Make sure exception-handling PIC code can be linked correctly. An alternative
 ; to the sequence described below would have .gcc_except_table itself writable
diff --git a/test/CodeGen/AArch64/postra-mi-sched.ll b/test/CodeGen/AArch64/postra-mi-sched.ll
new file mode 100644
index 000000000000..5a407246609e
--- /dev/null
+++ b/test/CodeGen/AArch64/postra-mi-sched.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -O3 -march=aarch64 -mcpu=cortex-a53 | FileCheck %s
+
+; With cortex-a53, each of fmul and fcvt have latency of 6 cycles.  After the
+; pre-RA MI scheduler, fmul, fcvt and fdiv will be consecutive.  The top-down
+; post-RA MI scheduler will clean this up.
+
+@d1 = common global double 0.000000e+00, align 8
+
+define i32 @test1(float %s2, float %s3, double %d, i32 %i2, i32 %i3) {
+entry:
+; CHECK-LABEL: @test1
+; CHECK: fmul
+; CHECK-NEXT: add
+; CHECK: fcvt
+; CHECK-NEXT: mul
+  %mul = fmul float %s2, %s3
+  %conv = fpext float %mul to double
+  %div = fdiv double %d, %conv
+  store double %div, double* @d1, align 8
+  %factor = shl i32 %i3, 1
+  %add1 = add i32 %i2, 4
+  %add2 = add i32 %add1, %factor
+  %add3 = add nsw i32 %add2, %i2
+  %add4 = add nsw i32 %add3, %add2
+  %mul5 = mul i32 %add3, %add3
+  %mul6 = mul i32 %mul5, %add4
+  %mul7 = shl i32 %add4, 1
+  %factor18 = mul i32 %mul7, %mul6
+  %add9 = add i32 %factor18, %mul6
+  ret i32 %add9
+}
diff --git a/test/CodeGen/AArch64/ragreedy-csr.ll b/test/CodeGen/AArch64/ragreedy-csr.ll
index de29b1baa8d5..31ff543117ff 100644
--- a/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -271,27 +271,27 @@ return:
   %retval.0 = phi i32 [ 0, %entry ], [ 1, %land.lhs.true52 ], [ 1, %land.lhs.true43 ], [ 0, %if.else123 ], [ 1, %while.cond59.preheader ], [ 1, %while.cond95.preheader ], [ 1, %while.cond130.preheader ], [ 1, %land.lhs.true28 ], [ 1, %if.then83 ], [ 0, %lor.lhs.false74 ], [ 1, %land.rhs ], [ 1, %if.then117 ], [ 0, %while.body104 ], [ 1, %land.rhs99 ], [ 1, %if.then152 ], [ 0, %while.body139 ], [ 1, %land.rhs134 ], [ 0, %while.body ]
   ret i32 %retval.0
 }
-!181 = metadata !{metadata !"branch_weights", i32 662038, i32 1}
-!988 = metadata !{metadata !"branch_weights", i32 12091450, i32 1916}
-!989 = metadata !{metadata !"branch_weights", i32 7564670, i32 4526781}
-!990 = metadata !{metadata !"branch_weights", i32 7484958, i32 13283499}
-!991 = metadata !{metadata !"branch_weights", i32 8677007, i32 4606493}
-!992 = metadata !{metadata !"branch_weights", i32 -1172426948, i32 145094705}
-!993 = metadata !{metadata !"branch_weights", i32 1468914, i32 5683688}
-!994 = metadata !{metadata !"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
-!995 = metadata !{metadata !"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
-!996 = metadata !{metadata !"branch_weights", i32 1004870, i32 20259}
-!997 = metadata !{metadata !"branch_weights", i32 20071, i32 189}
-!998 = metadata !{metadata !"branch_weights", i32 -1020255939, i32 572177766}
-!999 = metadata !{metadata !"branch_weights", i32 2666513, i32 3466431}
-!1000 = metadata !{metadata !"branch_weights", i32 5117635, i32 1859780}
-!1001 = metadata !{metadata !"branch_weights", i32 354902465, i32 -1444604407}
-!1002 = metadata !{metadata !"branch_weights", i32 -1762419279, i32 1592770684}
-!1003 = metadata !{metadata !"branch_weights", i32 1435905930, i32 -1951930624}
-!1004 = metadata !{metadata !"branch_weights", i32 1, i32 504888}
-!1005 = metadata !{metadata !"branch_weights", i32 94662, i32 504888}
-!1006 = metadata !{metadata !"branch_weights", i32 -1897793104, i32 160196332}
-!1007 = metadata !{metadata !"branch_weights", i32 2074643678, i32 -29579071}
-!1008 = metadata !{metadata !"branch_weights", i32 1, i32 226163}
-!1009 = metadata !{metadata !"branch_weights", i32 58357, i32 226163}
-!1010 = metadata !{metadata !"branch_weights", i32 -2072848646, i32 92907517}
+!181 = !{!"branch_weights", i32 662038, i32 1}
+!988 = !{!"branch_weights", i32 12091450, i32 1916}
+!989 = !{!"branch_weights", i32 7564670, i32 4526781}
+!990 = !{!"branch_weights", i32 7484958, i32 13283499}
+!991 = !{!"branch_weights", i32 8677007, i32 4606493}
+!992 = !{!"branch_weights", i32 -1172426948, i32 145094705}
+!993 = !{!"branch_weights", i32 1468914, i32 5683688}
+!994 = !{!"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
+!995 = !{!"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
+!996 = !{!"branch_weights", i32 1004870, i32 20259}
+!997 = !{!"branch_weights", i32 20071, i32 189}
+!998 = !{!"branch_weights", i32 -1020255939, i32 572177766}
+!999 = !{!"branch_weights", i32 2666513, i32 3466431}
+!1000 = !{!"branch_weights", i32 5117635, i32 1859780}
+!1001 = !{!"branch_weights", i32 354902465, i32 -1444604407}
+!1002 = !{!"branch_weights", i32 -1762419279, i32 1592770684}
+!1003 = !{!"branch_weights", i32 1435905930, i32 -1951930624}
+!1004 = !{!"branch_weights", i32 1, i32 504888}
+!1005 = !{!"branch_weights", i32 94662, i32 504888}
+!1006 = !{!"branch_weights", i32 -1897793104, i32 160196332}
+!1007 = !{!"branch_weights", i32 2074643678, i32 -29579071}
+!1008 = !{!"branch_weights", i32 1, i32 226163}
+!1009 = !{!"branch_weights", i32 58357, i32 226163}
+!1010 = !{!"branch_weights", i32 -2072848646, i32 92907517}
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
new file mode 100644
index 000000000000..32b3ed2d9ac4
--- /dev/null
+++ b/test/CodeGen/AArch64/remat.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
+
+%X = type { i64, i64, i64 }
+declare void @f(%X*)
+define void @t() {
+entry:
+  %tmp = alloca %X
+  call void @f(%X* %tmp)
+; CHECK: add x0, sp, #8
+; CHECK-NEXT-NOT: mov
+  call void @f(%X* %tmp)               
+; CHECK: add x0, sp, #8
+; CHECK-NEXT-NOT: mov
+  ret void 
+}
diff --git a/test/CodeGen/AArch64/rm_redundant_cmp.ll b/test/CodeGen/AArch64/rm_redundant_cmp.ll
new file mode 100644
index 000000000000..36dc118ed1a5
--- /dev/null
+++ b/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 | FileCheck %s
+
+; The following cases are for i16
+
+%struct.s_signed_i16 = type { i16, i16, i16 }
+%struct.s_unsigned_i16 = type { i16, i16, i16 }
+
+@cost_s_i8_i16 = common global %struct.s_signed_i16 zeroinitializer, align 2
+@cost_u_i16 = common global %struct.s_unsigned_i16 zeroinitializer, align 2
+
+define void @test_i16_2cmp_signed_1() {
+; CHECK-LABEL: test_i16_2cmp_signed_1
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.gt
+; CHECK-NOT: cmp
+; CHECK: b.ne
+entry:
+  %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+  %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+  %cmp = icmp sgt i16 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp eq i16 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i16_2cmp_signed_2() {
+; CHECK-LABEL: test_i16_2cmp_signed_2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.le
+; CHECK-NOT: cmp
+; CHECK: b.ge
+entry:
+  %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+  %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+  %cmp = icmp sgt i16 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i16 %0, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp slt i16 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i16 %1, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i16_2cmp_unsigned_1() {
+; CHECK-LABEL: test_i16_2cmp_unsigned_1
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.hi
+; CHECK-NOT: cmp
+; CHECK: b.ne
+entry:
+  %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+  %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+  %cmp = icmp ugt i16 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp eq i16 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i16_2cmp_unsigned_2() {
+; CHECK-LABEL: test_i16_2cmp_unsigned_2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.ls
+; CHECK-NOT: cmp
+; CHECK: b.hs
+entry:
+  %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+  %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+  %cmp = icmp ugt i16 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i16 %0, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp ult i16 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i16 %1, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+; The following cases are for i8
+
+%struct.s_signed_i8 = type { i8, i8, i8 }
+%struct.s_unsigned_i8 = type { i8, i8, i8 }
+
+@cost_s = common global %struct.s_signed_i8 zeroinitializer, align 2
+@cost_u_i8 = common global %struct.s_unsigned_i8 zeroinitializer, align 2
+
+
+define void @test_i8_2cmp_signed_1() {
+; CHECK-LABEL: test_i8_2cmp_signed_1
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.gt
+; CHECK-NOT: cmp
+; CHECK: b.ne
+entry:
+  %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+  %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+  %cmp = icmp sgt i8 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp eq i8 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i8_2cmp_signed_2() {
+; CHECK-LABEL: test_i8_2cmp_signed_2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.le
+; CHECK-NOT: cmp
+; CHECK: b.ge
+entry:
+  %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+  %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+  %cmp = icmp sgt i8 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i8 %0, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp slt i8 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i8 %1, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i8_2cmp_unsigned_1() {
+; CHECK-LABEL: test_i8_2cmp_unsigned_1
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.hi
+; CHECK-NOT: cmp
+; CHECK: b.ne
+entry:
+  %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+  %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+  %cmp = icmp ugt i8 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp eq i8 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+define void @test_i8_2cmp_unsigned_2() {
+; CHECK-LABEL: test_i8_2cmp_unsigned_2
+; CHECK: cmp {{w[0-9]+}}, {{w[0-9]+}}
+; CHECK-NEXT: b.ls
+; CHECK-NOT: cmp
+; CHECK: b.hs
+entry:
+  %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+  %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+  %cmp = icmp ugt i8 %0, %1
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  store i8 %0, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp ult i8 %0, %1
+  br i1 %cmp5, label %if.then7, label %if.end8
+
+if.then7:                                         ; preds = %if.else
+  store i8 %1, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 0), align 2
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then7, %if.then
+  ret void
+}
+
+; Make sure the case below won't crash.
+
+; The optimization of ZERO_EXTEND and SIGN_EXTEND in type legalization stage can't assert
+; the operand of a set_cc is always a TRUNCATE.
+
+define i1 @foo(float %inl, float %inr) {
+  %lval = fptosi float %inl to i8
+  %rval = fptosi float %inr to i8
+  %sum = icmp eq i8 %lval, %rval
+  ret i1 %sum
+}
diff --git a/test/CodeGen/AArch64/sdivpow2.ll b/test/CodeGen/AArch64/sdivpow2.ll
new file mode 100644
index 000000000000..6c02ea9a467f
--- /dev/null
+++ b/test/CodeGen/AArch64/sdivpow2.ll
@@ -0,0 +1,74 @@
+; RUN: llc -mtriple=arm64-linux-gnu -fast-isel=0 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @test1(i32 %x) {
+; CHECK-LABEL: test1
+; CHECK: add w8, w0, #7
+; CHECK: cmp w0, #0
+; CHECK: csel w8, w8, w0, lt
+; CHECK: asr w0, w8, #3
+  %div = sdiv i32 %x, 8
+  ret i32 %div
+}
+
+define i32 @test2(i32 %x) {
+; CHECK-LABEL: test2
+; CHECK: add w8, w0, #7
+; CHECK: cmp w0, #0
+; CHECK: csel w8, w8, w0, lt
+; CHECK: neg w0, w8, asr #3
+  %div = sdiv i32 %x, -8
+  ret i32 %div
+}
+
+define i32 @test3(i32 %x) {
+; CHECK-LABEL: test3
+; CHECK: add w8, w0, #31
+; CHECK: cmp w0, #0
+; CHECK: csel w8, w8, w0, lt
+; CHECK: asr w0, w8, #5
+  %div = sdiv i32 %x, 32
+  ret i32 %div
+}
+
+define i64 @test4(i64 %x) {
+; CHECK-LABEL: test4
+; CHECK: add x8, x0, #7
+; CHECK: cmp x0, #0
+; CHECK: csel x8, x8, x0, lt
+; CHECK: asr x0, x8, #3
+  %div = sdiv i64 %x, 8
+  ret i64 %div
+}
+
+define i64 @test5(i64 %x) {
+; CHECK-LABEL: test5
+; CHECK: add x8, x0, #7
+; CHECK: cmp x0, #0
+; CHECK: csel x8, x8, x0, lt
+; CHECK: neg x0, x8, asr #3
+  %div = sdiv i64 %x, -8
+  ret i64 %div
+}
+
+define i64 @test6(i64 %x) {
+; CHECK-LABEL: test6
+; CHECK: add x8, x0, #63
+; CHECK: cmp x0, #0
+; CHECK: csel x8, x8, x0, lt
+; CHECK: asr x0, x8, #6
+  %div = sdiv i64 %x, 64
+  ret i64 %div
+}
+
+define i64 @test7(i64 %x) {
+; CHECK-LABEL: test7
+; CHECK: orr [[REG:x[0-9]+]], xzr, #0xffffffffffff
+; CHECK: add x8, x0, [[REG]]
+; CHECK: cmp x0, #0
+; CHECK: csel x8, x8, x0, lt
+; CHECK: asr x0, x8, #48
+  %div = sdiv i64 %x, 281474976710656
+  ret i64 %div
+}
+
diff --git a/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
new file mode 100644
index 000000000000..bedbf5facbb7
--- /dev/null
+++ b/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic | FileCheck %s
+
+@__stack_chk_guard = external global i64*
+
+; PR20558
+
+; CHECK: adrp [[R0:x[0-9]+]], ___stack_chk_guard@GOTPAGE
+; CHECK: ldr  [[R1:x[0-9]+]], {{\[}}[[R0]], ___stack_chk_guard@GOTPAGEOFF{{\]}}
+; CHECK: ldr  [[R2:x[0-9]+]], {{\[}}[[R1]]{{\]}}
+; CHECK: stur [[R2]], {{\[}}x29, [[SLOT0:[0-9#\-]+]]{{\]}}
+; CHECK: ldur [[R3:x[0-9]+]], {{\[}}x29, [[SLOT0]]{{\]}}
+; CHECK: sub  [[R4:x[0-9]+]], [[R2]], [[R3]]
+; CHECK: cbnz [[R4]], LBB
+
+define i32 @test_stack_guard_remat2() {
+entry:
+  %StackGuardSlot = alloca i8*
+  %StackGuard = load i8** bitcast (i64** @__stack_chk_guard to i8**)
+  call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+  %container = alloca [32 x i8], align 1
+  call void @llvm.stackprotectorcheck(i8** bitcast (i64** @__stack_chk_guard to i8**))
+  ret i32 -1
+}
+
+declare void @llvm.stackprotector(i8*, i8**)
+declare void @llvm.stackprotectorcheck(i8**)
diff --git a/test/CodeGen/AArch64/stack_guard_remat.ll b/test/CodeGen/AArch64/stack_guard_remat.ll
new file mode 100644
index 000000000000..cee7266abdcc
--- /dev/null
+++ b/test/CodeGen/AArch64/stack_guard_remat.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm64-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-LINUX
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -relocation-model=static -code-model=large -no-integrated-as | FileCheck %s -check-prefix=STATIC-LARGE
+; RUN: llc < %s -mtriple=aarch64-linux-gnu -relocation-model=static -code-model=small -no-integrated-as | FileCheck %s -check-prefix=STATIC-SMALL
+
+; DARWIN: foo2
+; DARWIN: adrp [[R0:x[0-9]+]], ___stack_chk_guard@GOTPAGE
+; DARWIN: ldr [[R1:x[0-9]+]], {{\[}}[[R0]], ___stack_chk_guard@GOTPAGEOFF{{\]}}
+; DARWIN: ldr {{x[0-9]+}}, {{\[}}[[R1]]{{\]}}
+
+; PIC-LINUX: foo2
+; PIC-LINUX: adrp [[R0:x[0-9]+]], :got:__stack_chk_guard
+; PIC-LINUX: ldr [[R1:x[0-9]+]], {{\[}}[[R0]], :got_lo12:__stack_chk_guard{{\]}}
+; PIC-LINUX: ldr {{x[0-9]+}}, {{\[}}[[R1]]{{\]}}
+
+; STATIC-LARGE: foo2
+; STATIC-LARGE: movz [[R0:x[0-9]+]], #:abs_g3:__stack_chk_guard
+; STATIC-LARGE: movk [[R0]], #:abs_g2_nc:__stack_chk_guard
+; STATIC-LARGE: movk [[R0]], #:abs_g1_nc:__stack_chk_guard
+; STATIC-LARGE: movk [[R0]], #:abs_g0_nc:__stack_chk_guard
+; STATIC-LARGE: ldr {{x[0-9]+}}, {{\[}}[[R0]]{{\]}}
+
+; STATIC-SMALL: foo2
+; STATIC-SMALL: adrp [[R0:x[0-9]+]], __stack_chk_guard
+; STATIC-SMALL: ldr {{x[0-9]+}}, {{\[}}[[R0]], :lo12:__stack_chk_guard{{\]}}
+
+define i32 @test_stack_guard_remat() #0 {
+entry:
+  %a1 = alloca [256 x i32], align 4
+  %0 = bitcast [256 x i32]* %a1 to i8*
+  call void @llvm.lifetime.start(i64 1024, i8* %0)
+  %arraydecay = getelementptr inbounds [256 x i32]* %a1, i64 0, i64 0
+  call void @foo3(i32* %arraydecay)
+  call void asm sideeffect "foo2", "~{w0},~{w1},~{w2},~{w3},~{w4},~{w5},~{w6},~{w7},~{w8},~{w9},~{w10},~{w11},~{w12},~{w13},~{w14},~{w15},~{w16},~{w17},~{w18},~{w19},~{w20},~{w21},~{w22},~{w23},~{w24},~{w25},~{w26},~{w27},~{w28},~{w29},~{w30}"()
+  call void @llvm.lifetime.end(i64 1024, i8* %0)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo3(i32*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { nounwind sspstrong "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index 8aab84215260..7fb39545a32e 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -3,6 +3,7 @@
 declare fastcc void @callee_stack0()
 declare fastcc void @callee_stack8([8 x i32], i64)
 declare fastcc void @callee_stack16([8 x i32], i64, i64)
+declare extern_weak fastcc void @callee_weak()
 
 define fastcc void @caller_to0_from0() nounwind {
 ; CHECK-LABEL: caller_to0_from0:
@@ -92,3 +93,13 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
 ; CHECK-NEXT: add sp, sp, #16
 ; CHECK-NEXT: b callee_stack16
 }
+
+
+; Weakly-referenced extern functions cannot be tail-called, as AAELF does
+; not define the behaviour of branch instructions to undefined weak symbols.
+define fastcc void @caller_weak() {
+; CHECK-LABEL: caller_weak:
+; CHECK: bl callee_weak
+  tail call void @callee_weak()
+  ret void
+}
diff --git a/test/CodeGen/AArch64/tailcall-fastisel.ll b/test/CodeGen/AArch64/tailcall-fastisel.ll
new file mode 100644
index 000000000000..3ba639183161
--- /dev/null
+++ b/test/CodeGen/AArch64/tailcall-fastisel.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=arm64-apple-darwin -O0 | FileCheck %s
+
+; CHECK: b _foo0
+
+define i32 @foo1() {
+entry:
+  %call = tail call i32 @foo0()
+  ret i32 %call
+}
+
+declare i32 @foo0()
diff --git a/test/CodeGen/AArch64/tbz-tbnz.ll b/test/CodeGen/AArch64/tbz-tbnz.ll
new file mode 100644
index 000000000000..c77043cae94f
--- /dev/null
+++ b/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -0,0 +1,258 @@
+; RUN: llc -O1 -march=aarch64 < %s | FileCheck %s
+
+declare void @t()
+
+define void @test1(i32 %a) {
+; CHECK-LABEL: @test1
+entry:
+  %sub = add nsw i32 %a, -12
+  %cmp = icmp slt i32 %sub, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:w[0-9]+]], w0, #12
+; CHECK: tbz [[CMP]], #31
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test2(i64 %a) {
+; CHECK-LABEL: @test2
+entry:
+  %sub = add nsw i64 %a, -12
+  %cmp = icmp slt i64 %sub, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:x[0-9]+]], x0, #12
+; CHECK: tbz [[CMP]], #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test3(i32 %a) {
+; CHECK-LABEL: @test3
+entry:
+  %sub = add nsw i32 %a, -12
+  %cmp = icmp sgt i32 %sub, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:w[0-9]+]], w0, #12
+; CHECK: tbnz [[CMP]], #31
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test4(i64 %a) {
+; CHECK-LABEL: @test4
+entry:
+  %sub = add nsw i64 %a, -12
+  %cmp = icmp sgt i64 %sub, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:x[0-9]+]], x0, #12
+; CHECK: tbnz [[CMP]], #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test5(i32 %a) {
+; CHECK-LABEL: @test5
+entry:
+  %sub = add nsw i32 %a, -12
+  %cmp = icmp sge i32 %sub, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:w[0-9]+]], w0, #12
+; CHECK: tbnz [[CMP]], #31
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test6(i64 %a) {
+; CHECK-LABEL: @test6
+entry:
+  %sub = add nsw i64 %a, -12
+  %cmp = icmp sge i64 %sub, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:x[0-9]+]], x0, #12
+; CHECK: tbnz [[CMP]], #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) {
+; CHECK-LABEL: @test7
+entry:
+  %sub = sub nsw i32 %a, 12
+  %cmp = icmp slt i32 %sub, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+; CHECK: sub [[CMP:w[0-9]+]], w0, #12
+; CHECK: tbz [[CMP]], #31
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test8(i64 %val1, i64 %val2, i64 %val3) {
+; CHECK-LABEL: @test8
+  %and1 = and i64 %val1, %val2
+  %tst1 = icmp slt i64 %and1, 0
+  br i1 %tst1, label %if.then1, label %if.end
+
+; CHECK: tst x0, x1
+; CHECK-NEXT: b.ge
+
+if.then1:
+  %and2 = and i64 %val2, %val3
+  %tst2 = icmp sge i64 %and2, 0
+  br i1 %tst2, label %if.then2, label %if.end
+
+; CHECK: and [[CMP:x[0-9]+]], x1, x2
+; CHECK-NOT: cmp
+; CHECK: tbnz [[CMP]], #63
+
+if.then2:
+  %shifted_op1 = shl i64 %val2, 63
+  %shifted_and1 = and i64 %val1, %shifted_op1
+  %tst3 = icmp slt i64 %shifted_and1, 0
+  br i1 %tst3, label %if.then3, label %if.end
+
+; CHECK: tst x0, x1, lsl #63
+; CHECK: b.ge
+
+if.then3:
+  %shifted_op2 = shl i64 %val2, 62
+  %shifted_and2 = and i64 %val1, %shifted_op2
+  %tst4 = icmp sge i64 %shifted_and2, 0
+  br i1 %tst4, label %if.then4, label %if.end
+
+; CHECK: tst x0, x1, lsl #62
+; CHECK: b.lt
+
+if.then4:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test9(i64 %val1) {
+; CHECK-LABEL: @test9
+  %tst = icmp slt i64 %val1, 0
+  br i1 %tst, label %if.then, label %if.end
+
+; CHECK-NOT: cmp
+; CHECK: tbz x0, #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test10(i64 %val1) {
+; CHECK-LABEL: @test10
+  %tst = icmp slt i64 %val1, 0
+  br i1 %tst, label %if.then, label %if.end
+
+; CHECK-NOT: cmp
+; CHECK: tbz x0, #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test11(i64 %val1, i64* %ptr) {
+; CHECK-LABEL: @test11
+
+; CHECK: ldr [[CMP:x[0-9]+]], [x1]
+; CHECK-NOT: cmp
+; CHECK: tbz [[CMP]], #63
+
+  %val = load i64* %ptr
+  %tst = icmp slt i64 %val, 0
+  br i1 %tst, label %if.then, label %if.end
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test12(i64 %val1) {
+; CHECK-LABEL: @test12
+  %tst = icmp slt i64 %val1, 0
+  br i1 %tst, label %if.then, label %if.end
+
+; CHECK-NOT: cmp
+; CHECK: tbz x0, #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test13(i64 %val1, i64 %val2) {
+; CHECK-LABEL: @test13
+  %or = or i64 %val1, %val2
+  %tst = icmp slt i64 %or, 0
+  br i1 %tst, label %if.then, label %if.end
+
+; CHECK: orr [[CMP:x[0-9]+]], x0, x1
+; CHECK-NOT: cmp
+; CHECK: tbz [[CMP]], #63
+
+if.then:
+  call void @t()
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/trunc-v1i64.ll b/test/CodeGen/AArch64/trunc-v1i64.ll
index 159b8e0cff33..19efd2fafd5e 100644
--- a/test/CodeGen/AArch64/trunc-v1i64.ll
+++ b/test/CodeGen/AArch64/trunc-v1i64.ll
@@ -60,4 +60,23 @@ define <8 x i8> @test_v1i8_1(<1 x i64> %in0) {
   %1 = shufflevector <1 x i64> %in0, <1 x i64> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = trunc <8 x i64> %1 to <8 x i8>
   ret <8 x i8> %2
-}
-\ No newline at end of file
+}
+
+; PR20777: v1i1 is also problematic, but we can't widen it, so we extract_elt
+; the i64 out of the v1i64 operand, and truncate that scalar instead.
+
+define <1 x i1> @test_v1i1_0(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i1_0:
+; CHECK: fmov w0, s0
+  %1 = trunc <1 x i64> %in0 to <1 x i1>
+  ret <1 x i1> %1
+}
+
+define i1 @test_v1i1_1(<1 x i64> %in0) {
+; CHECK-LABEL: test_v1i1_1:
+; CHECK: fmov [[REG:w[0-9]+]], s0
+  %1 = trunc <1 x i64> %in0 to <1 x i1>
+; CHECK: and w0, [[REG]], #0x1
+  %2 = extractelement <1 x i1> %1, i32 0
+  ret i1 %2
+}
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 55cea3aad13f..90a3b372937e 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -enable-tail-merge | FileCheck %s
 ; Check that calls to baz and quux are tail-merged.
 ; PR1628
 
+; CHECK: bl _baz
+; CHECK-NOT: bl _baz
+; CHECK: bl _quux
+; CHECK-NOT: bl _quux
+
 ; ModuleID = 'tail.c'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
+target triple = "arm-apple-darwin8"
 
 define i32 @f(i32 %i, i32 %q) {
 entry:
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 48941162c81c..37e41ecc4b1e 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -1,6 +1,11 @@
-; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm  | FileCheck %s
+
 ; Check that calls to baz and quux are tail-merged.
+; CHECK: bl _baz
+; CHECK-NOT: bl _baz
+; CHECK: bl _quux
+; CHECK-NOT: bl _quux
+
 ; PR1628
 
 ; ModuleID = 'tail.c'
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index acbab8a0a071..30ae7237395e 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -1,10 +1,23 @@
-; RUN: llc < %s -march=arm | grep bl.*baz | count 1
-; RUN: llc < %s -march=arm | grep bl.*quux | count 1
-; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
-; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
-; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | \
+; RUN:   FileCheck --check-prefix=NOMERGE %s
+
+; Check that tail merging is the default on ARM, and that -enable-tail-merge=0
+; works.
 ; PR1628
 
+; CHECK: bl _baz
+; CHECK-NOT: bl _baz
+
+; CHECK: bl _quux
+; CHECK-NOT: bl _quux
+
+; NOMERGE: bl _baz
+; NOMERGE: bl _baz
+
+; NOMERGE: bl _quux
+; NOMERGE: bl _quux
+
 ; ModuleID = 'tail.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index 570fcf96e641..de05644fc901 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
   br label %do.body, !dbg !0
 
 do.body:                                          ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !{!"0x102"})
   %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
   %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
   br label %do.end, !dbg !0
@@ -18,17 +18,17 @@ do.end:                                           ; preds = %do.body
   ret void, !dbg !7
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @foo(i32) ssp
 
-!0 = metadata !{i32 5, i32 2, metadata !1, null}
-!1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
-!5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!6 = metadata !{i32 458788, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-!7 = metadata !{i32 6, i32 1, metadata !2, null}
-!8 = metadata !{metadata !"genmodes.i", metadata !"/Users/yash/Downloads"}
-!9 = metadata !{i32 0}
+!0 = !MDLocation(line: 5, column: 2, scope: !1)
+!1 = !{!"0xb\001\001\000", null, !2}; [DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", i32 0, !3, null, null, null, null, null, null}; [DW_TAG_subprogram ]
+!3 = !{!"0x11\0012\00clang 1.1\001\00\000\00\000", !8, null, !9, null, null, null}; [DW_TAG_compile_unit ]
+!4 = !{!"0x100\00count_\005\000", !5, !3, !6}; [ DW_TAG_auto_variable ]
+!5 = !{!"0xb\001\001\000", null, !1}; [DW_TAG_lexical_block ]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3}; [DW_TAG_base_type ]
+!7 = !MDLocation(line: 6, column: 1, scope: !2)
+!8 = !{!"genmodes.i", !"/Users/yash/Downloads"}
+!9 = !{i32 0}
diff --git a/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll b/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
deleted file mode 100644
index 0f021d28aa1b..000000000000
--- a/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc -mcpu=cortex-a8 -mattr=+neon < %s | grep vneg
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "armv7-eabi"
-
-%aaa = type { %fff, %fff }
-%bbb = type { [6 x %ddd] }
-%ccc = type { %eee, %fff }
-%ddd = type { %fff }
-%eee = type { %fff, %fff, %fff, %fff }
-%fff = type { %struct.vec_float4 }
-%struct.vec_float4 = type { <4 x float> }
-
-define linkonce_odr arm_aapcs_vfpcc void @foo(%eee* noalias sret %agg.result, i64 %tfrm.0.0, i64 %tfrm.0.1, i64 %tfrm.0.2, i64 %tfrm.0.3, i64 %tfrm.0.4, i64 %tfrm.0.5, i64 %tfrm.0.6, i64 %tfrm.0.7) nounwind noinline {
-entry:
-  %tmp104 = zext i64 %tfrm.0.2 to i512            ; <i512> [#uses=1]
-  %tmp105 = shl i512 %tmp104, 128                 ; <i512> [#uses=1]
-  %tmp118 = zext i64 %tfrm.0.3 to i512            ; <i512> [#uses=1]
-  %tmp119 = shl i512 %tmp118, 192                 ; <i512> [#uses=1]
-  %ins121 = or i512 %tmp119, %tmp105              ; <i512> [#uses=1]
-  %tmp99 = zext i64 %tfrm.0.4 to i512             ; <i512> [#uses=1]
-  %tmp100 = shl i512 %tmp99, 256                  ; <i512> [#uses=1]
-  %tmp123 = zext i64 %tfrm.0.5 to i512            ; <i512> [#uses=1]
-  %tmp124 = shl i512 %tmp123, 320                 ; <i512> [#uses=1]
-  %tmp96 = zext i64 %tfrm.0.6 to i512             ; <i512> [#uses=1]
-  %tmp97 = shl i512 %tmp96, 384                   ; <i512> [#uses=1]
-  %tmp128 = zext i64 %tfrm.0.7 to i512            ; <i512> [#uses=1]
-  %tmp129 = shl i512 %tmp128, 448                 ; <i512> [#uses=1]
-  %mask.masked = or i512 %tmp124, %tmp100         ; <i512> [#uses=1]
-  %ins131 = or i512 %tmp129, %tmp97               ; <i512> [#uses=1]
-  %tmp109132 = zext i64 %tfrm.0.0 to i128         ; <i128> [#uses=1]
-  %tmp113134 = zext i64 %tfrm.0.1 to i128         ; <i128> [#uses=1]
-  %tmp114133 = shl i128 %tmp113134, 64            ; <i128> [#uses=1]
-  %tmp94 = or i128 %tmp114133, %tmp109132         ; <i128> [#uses=1]
-  %tmp95 = bitcast i128 %tmp94 to <4 x float>     ; <<4 x float>> [#uses=0]
-  %tmp82 = lshr i512 %ins121, 128                 ; <i512> [#uses=1]
-  %tmp83 = trunc i512 %tmp82 to i128              ; <i128> [#uses=1]
-  %tmp84 = bitcast i128 %tmp83 to <4 x float>     ; <<4 x float>> [#uses=0]
-  %tmp86 = lshr i512 %mask.masked, 256            ; <i512> [#uses=1]
-  %tmp87 = trunc i512 %tmp86 to i128              ; <i128> [#uses=1]
-  %tmp88 = bitcast i128 %tmp87 to <4 x float>     ; <<4 x float>> [#uses=0]
-  %tmp90 = lshr i512 %ins131, 384                 ; <i512> [#uses=1]
-  %tmp91 = trunc i512 %tmp90 to i128              ; <i128> [#uses=1]
-  %tmp92 = bitcast i128 %tmp91 to <4 x float>     ; <<4 x float>> [#uses=1]
-  %tmp = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp92 ; <<4 x float>> [#uses=1]
-  %tmp28 = getelementptr inbounds %eee* %agg.result, i32 0, i32 3, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
-  store <4 x float> %tmp, <4 x float>* %tmp28, align 16
-  ret void
-}
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 35739d76eae0..6f7db9352188 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -5,28 +5,28 @@ target triple = "armv4t-apple-darwin10"
 
 define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %b}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !0, metadata !{!"0x102"})
   %0 = add nsw i32 %b, %a, !dbg !9                ; <i32> [#uses=1]
   ret i32 %0, !dbg !11
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!15}
-!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !12, null, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
-!12 = metadata !{metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc"}
-!3 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, metadata !14, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !6, metadata !6}
-!6 = metadata !{i32 524310, metadata !12, null, metadata !"SItype", i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 524324, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 95, i32 0, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !12, metadata !1, i32 94, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 100, i32 0, metadata !10, null}
-!13 = metadata !{i32 0}
-!14 = metadata !{metadata !1}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00b\0093\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00__addvsi3\00__addvsi3\00__addvsi3\0094\000\001\000\006\000\000\000", !12, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!12 = !{!"libgcc2.c", !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc"}
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)\001\00\000\00\000", !12, !13, !13, !14, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !12, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !6, !6}
+!6 = !{!"0x16\00SItype\00152\000\000\000\000", !12, null, !8} ; [ DW_TAG_typedef ]
+!7 = !{!"0x29", !"libgcc2.h", !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", !3} ; [ DW_TAG_file_type ]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !2} ; [ DW_TAG_base_type ]
+!9 = !MDLocation(line: 95, scope: !10)
+!10 = !{!"0xb\0094\000\000", !12, !1} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 100, scope: !10)
+!13 = !{i32 0}
+!14 = !{!1}
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index a53200e72c3f..18b3be0aba5c 100644
--- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -7,16 +7,16 @@ target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
 
 define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8* %buf}, i64 0, metadata !0), !dbg !15
-  tail call void @llvm.dbg.value(metadata !{i32 %nbytes}, i64 0, metadata !8), !dbg !16
+  tail call void @llvm.dbg.value(metadata i8* %buf, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !15
+  tail call void @llvm.dbg.value(metadata i32 %nbytes, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !16
   %tmp = load i32* @length, !dbg !17              ; <i32> [#uses=3]
   %cmp = icmp eq i32 %tmp, -1, !dbg !17           ; <i1> [#uses=1]
   %cmp.not = xor i1 %cmp, true                    ; <i1> [#uses=1]
   %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17    ; <i1> [#uses=1]
   %or.cond = and i1 %cmp.not, %cmp3               ; <i1> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !8), !dbg !17
+  tail call void @llvm.dbg.value(metadata i32 %tmp, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !17
   %nbytes.addr.0 = select i1 %or.cond, i32 %tmp, i32 %nbytes ; <i32> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !10), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !19
   br label %while.cond, !dbg !20
 
 while.cond:                                       ; preds = %while.body, %entry
@@ -42,35 +42,35 @@ while.end:                                        ; preds = %land.rhs, %while.co
 
 declare i32 @x1() optsize
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.lv.fn = !{!0, !8, !10, !12}
 !llvm.dbg.gv = !{!14}
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"buf", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !26, null, metadata !"x0", metadata !"x0", metadata !"x0", i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !26} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"t.c", metadata !".", metadata !"clang 2.0", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !26, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!5 = metadata !{null}
-!6 = metadata !{i32 524303, metadata !26, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524545, metadata !1, metadata !"nbytes", metadata !2, i32 4, metadata !9} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"unsigned long", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 524544, metadata !11, metadata !"nread", metadata !2, i32 6, metadata !9} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 524299, metadata !26, metadata !1, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 524544, metadata !11, metadata !"c", metadata !2, i32 7, metadata !13} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 524324, metadata !26, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 524340, i32 0, metadata !2, metadata !"length", metadata !"length", metadata !"length", metadata !2, i32 1, metadata !13, i1 false, i1 true, i32* @length} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 4, i32 24, metadata !1, null}
-!16 = metadata !{i32 4, i32 43, metadata !1, null}
-!17 = metadata !{i32 9, i32 2, metadata !11, null}
-!18 = metadata !{i32 0}
-!19 = metadata !{i32 10, i32 2, metadata !11, null}
-!20 = metadata !{i32 11, i32 2, metadata !11, null}
-!21 = metadata !{i32 12, i32 3, metadata !22, null}
-!22 = metadata !{i32 524299, metadata !26, metadata !11, i32 11, i32 45, i32 0} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 13, i32 3, metadata !22, null}
-!24 = metadata !{i32 14, i32 2, metadata !22, null}
-!25 = metadata !{i32 15, i32 1, metadata !11, null}
-!26 = metadata !{metadata !"t.c", metadata !"/private/tmp"}
+!0 = !{!"0x101\00buf\004\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00x0\00x0\00x0\005\000\001\000\006\000\000\000", !26, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang 2.0\001\00\00\00\00", !26, null, null, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !26, !2, null, !5, null} ; [ DW_TAG_subroutine_type ]
+!5 = !{null}
+!6 = !{!"0xf\00\000\0032\0032\000\000", !26, !2, !7} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !26, !2} ; [ DW_TAG_base_type ]
+!8 = !{!"0x101\00nbytes\004\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!9 = !{!"0x24\00unsigned long\000\0032\0032\000\000\007", !26, !2} ; [ DW_TAG_base_type ]
+!10 = !{!"0x100\00nread\006\000", !11, !2, !9} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\005\001\000", !26, !1} ; [ DW_TAG_lexical_block ]
+!12 = !{!"0x100\00c\007\000", !11, !2, !13} ; [ DW_TAG_auto_variable ]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !26, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0x34\00length\00length\00length\001\000\001", !2, !2, !13, i32* @length} ; [ DW_TAG_variable ]
+!15 = !MDLocation(line: 4, column: 24, scope: !1)
+!16 = !MDLocation(line: 4, column: 43, scope: !1)
+!17 = !MDLocation(line: 9, column: 2, scope: !11)
+!18 = !{i32 0}
+!19 = !MDLocation(line: 10, column: 2, scope: !11)
+!20 = !MDLocation(line: 11, column: 2, scope: !11)
+!21 = !MDLocation(line: 12, column: 3, scope: !22)
+!22 = !{!"0xb\0011\0045\000", !26, !11} ; [ DW_TAG_lexical_block ]
+!23 = !MDLocation(line: 13, column: 3, scope: !22)
+!24 = !MDLocation(line: 14, column: 2, scope: !22)
+!25 = !MDLocation(line: 15, column: 1, scope: !11)
+!26 = !{!"t.c", !"/private/tmp"}
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 48de24497189..f71a6c9732a6 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -6,8 +6,8 @@
 define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
-  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !24
+  call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !24
   %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !27
 
@@ -34,7 +34,7 @@ return:                                           ; preds = %bb2
 define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2  {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !34
   %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
   store i8* null, i8** %0, align 8, !dbg !34
   %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
@@ -45,14 +45,14 @@ return:                                           ; preds = %entry
   ret void, !dbg !35
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @main() nounwind ssp {
 entry:
   %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
   %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !{!"0x102"}), !dbg !41
   call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
   %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
   store i32 1, i32* %1, align 8, !dbg !42
@@ -65,65 +65,65 @@ entry:
   %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
   store i32 %7, i32* %5, align 8, !dbg !43
   %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
   br label %return, !dbg !45
 
 return:                                           ; preds = %entry
   ret i32 0, !dbg !45
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !48, null, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
-!2 = metadata !{i32 786473, metadata !48} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !48, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !47, metadata !47, metadata !46, metadata !47,  metadata !47, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 786445, metadata !48, metadata !1, metadata !"Data", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786447, metadata !48, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786445, metadata !48, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786468, metadata !48, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !48, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786468, metadata !48, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, metadata !48, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, metadata !48, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !48, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!22 = metadata !{metadata !13}
-!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 786448, metadata !48, metadata !2, metadata !"SVal", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
-!27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !2, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
-!29 = metadata !{i32 18, i32 0, metadata !28, null}
-!30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 786470, metadata !48, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 786447, metadata !48, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 11, i32 0, metadata !16, null}
-!35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !48, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !48, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !48, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !48, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 24, i32 0, metadata !39, null}
-!42 = metadata !{i32 25, i32 0, metadata !39, null}
-!43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!45 = metadata !{i32 27, i32 0, metadata !39, null}
-!46 = metadata !{metadata !16, metadata !17, metadata !20}
-!47 = metadata !{}
-!48 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00SVal\00SVal\00\0011\000\000\000\006\000\000\000", !48, !1, !14, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x13\00SVal\001\00128\0064\000\000\000", !48, null, null, !4, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
+!2 = !{!"0x29", !48} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\001", !48, !47, !47, !46, !47,  !47} ; [ DW_TAG_compile_unit ]
+!4 = !{!5, !7, !0, !9}
+!5 = !{!"0xd\00Data\007\0064\0064\000\000", !48, !1, !6} ; [ DW_TAG_member ]
+!6 = !{!"0xf\00\000\0064\0064\000\000", !48, null, null} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0xd\00Kind\008\0032\0032\0064\000", !48, !1, !8} ; [ DW_TAG_member ]
+!8 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !48, null} ; [ DW_TAG_base_type ]
+!9 = !{!"0x2e\00~SVal\00~SVal\00\0012\000\000\000\006\000\000\000", !48, !1, !10, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12, !13}
+!12 = !{!"0xf\00\000\0064\0064\000\0064", !48, null, !1} ; [ DW_TAG_pointer_type ]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !48, null} ; [ DW_TAG_base_type ]
+!14 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !12}
+!16 = !{!"0x2e\00SVal\00SVal\00_ZN4SValC1Ev\0011\000\001\000\006\000\000\000", !48, !1, !14, null, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = !{!"0x2e\00foo\00foo\00_Z3fooi4SVal\0016\000\001\000\006\000\000\000", !48, !2, !18, null, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null} ; [ DW_TAG_subprogram ]
+!18 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{!13, !13, !1}
+!20 = !{!"0x2e\00main\00main\00main\0023\000\001\000\006\000\000\000", !48, !2, !21, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!21 = !{!"0x15\00\000\000\000\000\000\000", !48, null, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{!13}
+!23 = !{!"0x101\00i\0016\000", !17, !2, !13} ; [ DW_TAG_arg_variable ]
+!24 = !MDLocation(line: 16, scope: !17)
+!25 = !{!"0x101\00location\0016\000", !17, !2, !26} ; [ DW_TAG_arg_variable ]
+!26 = !{!"0x10\00SVal\000\0064\0064\000\000", !48, !2, !1} ; [ DW_TAG_reference_type ]
+!27 = !MDLocation(line: 17, scope: !28)
+!28 = !{!"0xb\0016\000\002", !2, !17} ; [ DW_TAG_lexical_block ]
+!29 = !MDLocation(line: 18, scope: !28)
+!30 = !MDLocation(line: 20, scope: !28)
+!31 = !{!"0x101\00this\0011\000", !16, !2, !32} ; [ DW_TAG_arg_variable ]
+!32 = !{!"0x26\00\000\0064\0064\000\0064", !48, !2, !33} ; [ DW_TAG_const_type ]
+!33 = !{!"0xf\00\000\0064\0064\000\000", !48, !2, !1} ; [ DW_TAG_pointer_type ]
+!34 = !MDLocation(line: 11, scope: !16)
+!35 = !MDLocation(line: 11, scope: !36)
+!36 = !{!"0xb\0011\000\001", !48, !37} ; [ DW_TAG_lexical_block ]
+!37 = !{!"0xb\0011\000\000", !48, !16} ; [ DW_TAG_lexical_block ]
+!38 = !{!"0x100\00v\0024\000", !39, !2, !1} ; [ DW_TAG_auto_variable ]
+!39 = !{!"0xb\0023\000\004", !48, !40} ; [ DW_TAG_lexical_block ]
+!40 = !{!"0xb\0023\000\003", !48, !20} ; [ DW_TAG_lexical_block ]
+!41 = !MDLocation(line: 24, scope: !39)
+!42 = !MDLocation(line: 25, scope: !39)
+!43 = !MDLocation(line: 26, scope: !39)
+!44 = !{!"0x100\00k\0026\000", !39, !2, !13} ; [ DW_TAG_auto_variable ]
+!45 = !MDLocation(line: 27, scope: !39)
+!46 = !{!16, !17, !20}
+!47 = !{}
+!48 = !{!"small.cc", !"/Users/manav/R8248330"}
+!49 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index ec7488089556..80a19649635a 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -spiller=trivial
-; RUN: llc < %s -verify-machineinstrs -spiller=inline
+; RUN: llc < %s -verify-machineinstrs
 ; PR8612
 ;
 ; This test has an inline asm with early-clobber arguments.
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index b1d59aa0fde8..67dda672719c 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
@@ -11,66 +11,66 @@ target triple = "thumbv7-apple-darwin10"
 
 ; Check debug info output for merged global.
 ; DW_AT_location
-; DW_OP_addr
-; DW_OP_plus
-; .long __MergedGlobals
-; DW_OP_constu
-; offset
+; 0x03 DW_OP_addr
+; 0x.. .long __MergedGlobals
+; 0x10 DW_OP_constu
+; 0x.. offset
+; 0x22 DW_OP_plus
 
-;CHECK: .long Lset7
-;CHECK-NEXT:        @ DW_AT_type
-;CHECK-NEXT:        @ DW_AT_decl_file
-;CHECK-NEXT:        @ DW_AT_decl_line
-;CHECK-NEXT:        @ DW_AT_location
-;CHECK-NEXT:        .byte   3
-;CHECK-NEXT:        .long   __MergedGlobals
-;CHECK-NEXT:        .byte   16
-;CHECK-NEXT:        .byte   1
-;CHECK-NEXT:        .byte   34
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:    DW_AT_name {{.*}} "x1"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:    DW_AT_location [DW_FORM_exprloc]        (<0x8> 03 [[ADDR:.. .. .. ..]] 10 00 22  )
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:    DW_AT_name {{.*}} "x2"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:    DW_AT_location [DW_FORM_exprloc]        (<0x8> 03 [[ADDR]] 10 01 22  )
 
 define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !10), !dbg !30
+  tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
   %0 = load i8* @x1, align 4, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !11), !dbg !30
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !30
   store i8 %a, i8* @x1, align 4, !dbg !30
   ret i8 %0, !dbg !31
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !18), !dbg !32
+  tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !32
   %0 = load i8* @x2, align 4, !dbg !32
-  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !19), !dbg !32
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !32
   store i8 %a, i8* @x2, align 4, !dbg !32
   ret i8 %0, !dbg !33
 }
 
 define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !21), !dbg !34
+  tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !34
   %0 = load i8* @x3, align 4, !dbg !34
-  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !22), !dbg !34
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
   store i8 %a, i8* @x3, align 4, !dbg !34
   ret i8 %0, !dbg !35
 }
 
 define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !24), !dbg !36
+  tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !36
   %0 = load i8* @x4, align 4, !dbg !36
-  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !25), !dbg !36
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !36
   store i8 %a, i8* @x4, align 4, !dbg !36
   ret i8 %0, !dbg !37
 }
 
 define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !27), !dbg !38
+  tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
   %0 = load i8* @x5, align 4, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !28), !dbg !38
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !38
   store i8 %a, i8* @x5, align 4, !dbg !38
   ret i8 %0, !dbg !39
 }
@@ -78,53 +78,53 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1, null, null, metadata !42, i32 4} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !47, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !48, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !47, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 786468, metadata !47, metadata !1, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2, null, null, metadata !43, i32 7} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3, null, null, metadata !44, i32 10} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4, null, null, metadata !45, i32 13} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5, null, null, metadata !46, i32 16} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !47, metadata !0, i32 4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1, null} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2, null} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3, null} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4, null} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 786484, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786688, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 786443, metadata !47, metadata !6, i32 7, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786688, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 786443, metadata !47, metadata !7, i32 10, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 786443, metadata !47, metadata !8, i32 13, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !47, metadata !9, i32 16, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 4, i32 0, metadata !0, null}
-!31 = metadata !{i32 4, i32 0, metadata !12, null}
-!32 = metadata !{i32 7, i32 0, metadata !6, null}
-!33 = metadata !{i32 7, i32 0, metadata !20, null}
-!34 = metadata !{i32 10, i32 0, metadata !7, null}
-!35 = metadata !{i32 10, i32 0, metadata !23, null}
-!36 = metadata !{i32 13, i32 0, metadata !8, null}
-!37 = metadata !{i32 13, i32 0, metadata !26, null}
-!38 = metadata !{i32 16, i32 0, metadata !9, null}
-!39 = metadata !{i32 16, i32 0, metadata !29, null}
-!40 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !9}
-!41 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17}
-!42 = metadata !{metadata !10, metadata !11}
-!43 = metadata !{metadata !18, metadata !19}
-!44 = metadata !{metadata !21, metadata !22}
-!45 = metadata !{metadata !24, metadata !25}
-!46 = metadata !{metadata !27, metadata !28}
-!47 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
-!48 = metadata !{}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00get1\00get1\00get1\004\000\001\000\006\00256\001\004", !47, !1, !3, null, i8 (i8)* @get1, null, null, !42} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)\001\00\000\00\000", !47, !48, !48, !40, !41,  !48} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !47, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5, !5}
+!5 = !{!"0x24\00_Bool\000\008\008\000\000\002", !47, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00get2\00get2\00get2\007\000\001\000\006\00256\001\007", !47, !1, !3, null, i8 (i8)* @get2, null, null, !43} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x2e\00get3\00get3\00get3\0010\000\001\000\006\00256\001\0010", !47, !1, !3, null, i8 (i8)* @get3, null, null, !44} ; [ DW_TAG_subprogram ]
+!8 = !{!"0x2e\00get4\00get4\00get4\0013\000\001\000\006\00256\001\0013", !47, !1, !3, null, i8 (i8)* @get4, null, null, !45} ; [ DW_TAG_subprogram ]
+!9 = !{!"0x2e\00get5\00get5\00get5\0016\000\001\000\006\00256\001\0016", !47, !1, !3, null, i8 (i8)* @get5, null, null, !46} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x101\00a\004\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!11 = !{!"0x100\00b\004\000", !12, !1, !5} ; [ DW_TAG_auto_variable ]
+!12 = !{!"0xb\004\000\000", !47, !0} ; [ DW_TAG_lexical_block ]
+!13 = !{!"0x34\00x1\00x1\00\003\001\001", !1, !1, !5, i8* @x1, null} ; [ DW_TAG_variable ]
+!14 = !{!"0x34\00x2\00x2\00\006\001\001", !1, !1, !5, i8* @x2, null} ; [ DW_TAG_variable ]
+!15 = !{!"0x34\00x3\00x3\00\009\001\001", !1, !1, !5, i8* @x3, null} ; [ DW_TAG_variable ]
+!16 = !{!"0x34\00x4\00x4\00\0012\001\001", !1, !1, !5, i8* @x4, null} ; [ DW_TAG_variable ]
+!17 = !{!"0x34\00x5\00x5\00\0015\000\001", !1, !1, !5, i8* @x5, null} ; [ DW_TAG_variable ]
+!18 = !{!"0x101\00a\007\000", !6, !1, !5} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x100\00b\007\000", !20, !1, !5} ; [ DW_TAG_auto_variable ]
+!20 = !{!"0xb\007\000\001", !47, !6} ; [ DW_TAG_lexical_block ]
+!21 = !{!"0x101\00a\0010\000", !7, !1, !5} ; [ DW_TAG_arg_variable ]
+!22 = !{!"0x100\00b\0010\000", !23, !1, !5} ; [ DW_TAG_auto_variable ]
+!23 = !{!"0xb\0010\000\002", !47, !7} ; [ DW_TAG_lexical_block ]
+!24 = !{!"0x101\00a\0013\000", !8, !1, !5} ; [ DW_TAG_arg_variable ]
+!25 = !{!"0x100\00b\0013\000", !26, !1, !5} ; [ DW_TAG_auto_variable ]
+!26 = !{!"0xb\0013\000\003", !47, !8} ; [ DW_TAG_lexical_block ]
+!27 = !{!"0x101\00a\0016\000", !9, !1, !5} ; [ DW_TAG_arg_variable ]
+!28 = !{!"0x100\00b\0016\000", !29, !1, !5} ; [ DW_TAG_auto_variable ]
+!29 = !{!"0xb\0016\000\004", !47, !9} ; [ DW_TAG_lexical_block ]
+!30 = !MDLocation(line: 4, scope: !0)
+!31 = !MDLocation(line: 4, scope: !12)
+!32 = !MDLocation(line: 7, scope: !6)
+!33 = !MDLocation(line: 7, scope: !20)
+!34 = !MDLocation(line: 10, scope: !7)
+!35 = !MDLocation(line: 10, scope: !23)
+!36 = !MDLocation(line: 13, scope: !8)
+!37 = !MDLocation(line: 13, scope: !26)
+!38 = !MDLocation(line: 16, scope: !9)
+!39 = !MDLocation(line: 16, scope: !29)
+!40 = !{!0, !6, !7, !8, !9}
+!41 = !{!13, !14, !15, !16, !17}
+!42 = !{!10, !11}
+!43 = !{!18, !19}
+!44 = !{!21, !22}
+!45 = !{!24, !25}
+!46 = !{!27, !28}
+!47 = !{!"foo.c", !"/tmp/"}
+!48 = !{}
+!49 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
index 97297f78c7e6..1a6879e366eb 100644
--- a/test/CodeGen/ARM/2011-04-12-AlignBug.ll
+++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll
@@ -1,11 +1,10 @@
 ; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10.0.0"
 
 ; CHECK: align 3
 @.v = private unnamed_addr constant <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 8
-; CHECK: align 2
-@.strA = private unnamed_addr constant [4 x i8] c"bar\00"
+; CHECK: align 4
+@.strA = private unnamed_addr constant [4 x i64] zeroinitializer
 ; CHECK-NOT: align
 @.strB = private unnamed_addr constant [4 x i8] c"foo\00", align 1
 @.strC = private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1
diff --git a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
index eb23de0b9716..e9a6793a768a 100644
--- a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
+++ b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -12,4 +12,4 @@ entry:
   ret void
 }
 
-!0 = metadata !{i32 109}
+!0 = !{i32 109}
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
index d3394b58ed93..2af3e3e6bd4c 100644
--- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -81,8 +81,8 @@ declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
 
 declare void @_ZSt9terminatev()
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"bool", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!"bool", !1}
+!4 = !{!"int", !1}
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index ed2840bbff59..3edc946825bb 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -1,25 +1,23 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; Check debug info output for merged global.
 ; DW_AT_location
-; DW_OP_addr
-; DW_OP_plus
-; .long __MergedGlobals
-; DW_OP_constu
-; offset
-
-;CHECK: .long Lset9
-;CHECK-NEXT:        @ DW_AT_type
-;CHECK-NEXT:        @ DW_AT_decl_file
-;CHECK-NEXT:        @ DW_AT_decl_line
-;CHECK-NEXT:        @ DW_AT_location
-;CHECK-NEXT:        .byte   3
-;CHECK-NEXT:        .long   __MergedGlobals
-;CHECK-NEXT:        .byte   16
-; 4 is byte offset of x2 in __MergedGobals
-;CHECK-NEXT:        .byte   4
-;CHECK-NEXT:        .byte   34
+; 0x03 DW_OP_addr
+; 0x.. .long __MergedGlobals
+; 0x10 DW_OP_constu
+; 0x.. offset
+; 0x22 DW_OP_plus
 
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:    DW_AT_name {{.*}} "x1"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:    DW_AT_location [DW_FORM_exprloc]        (<0x8> 03 [[ADDR:.. .. .. ..]] 10 00 22  )
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK:    DW_AT_name {{.*}} "x2"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:    DW_AT_location [DW_FORM_exprloc]        (<0x8> 03 [[ADDR]] 10 04 22  )
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.7.0"
@@ -31,97 +29,94 @@ target triple = "thumbv7-apple-macosx10.7.0"
 @x5 = global i32 0, align 4
 
 define i32 @get1(i32 %a) nounwind optsize ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !10), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
   %1 = load i32* @x1, align 4, !dbg !31
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !11), !dbg !31
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !31
   store i32 %a, i32* @x1, align 4, !dbg !31
   ret i32 %1, !dbg !31
 }
 
 define i32 @get2(i32 %a) nounwind optsize ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !13), !dbg !32
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !32
   %1 = load i32* @x2, align 4, !dbg !33
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !14), !dbg !33
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !33
   store i32 %a, i32* @x2, align 4, !dbg !33
   ret i32 %1, !dbg !33
 }
 
 define i32 @get3(i32 %a) nounwind optsize ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !16), !dbg !34
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !34
   %1 = load i32* @x3, align 4, !dbg !35
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !17), !dbg !35
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
   store i32 %a, i32* @x3, align 4, !dbg !35
   ret i32 %1, !dbg !35
 }
 
 define i32 @get4(i32 %a) nounwind optsize ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !19), !dbg !36
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !36
   %1 = load i32* @x4, align 4, !dbg !37
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !20), !dbg !37
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !37
   store i32 %a, i32* @x4, align 4, !dbg !37
   ret i32 %1, !dbg !37
 }
 
 define i32 @get5(i32 %a) nounwind optsize ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %a}, i64 0, metadata !27), !dbg !38
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
   %1 = load i32* @x5, align 4, !dbg !39
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !28), !dbg !39
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
   store i32 %a, i32* @x5, align 4, !dbg !39
   ret i32 %1, !dbg !39
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!49}
 
-!0 = metadata !{i32 786449, metadata !47, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !48, metadata !48, metadata !40, metadata !41,  metadata !48, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get1", metadata !"get1", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get1, null, null, metadata !42, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
-!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get2", metadata !"get2", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get2, null, null, metadata !43, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [get2]
-!7 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get3", metadata !"get3", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get3, null, null, metadata !44, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [get3]
-!8 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get4", metadata !"get4", metadata !"", i32 14, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get4, null, null, metadata !45, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [get4]
-!9 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"get5", metadata !"get5", metadata !"", i32 17, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @get5, null, null, metadata !46, i32 17} ; [ DW_TAG_subprogram ] [line 17] [def] [get5]
-!10 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777221, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 786688, metadata !12, metadata !"b", metadata !2, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !47, metadata !1, i32 5, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 786689, metadata !6, metadata !"a", metadata !2, i32 16777224, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 786688, metadata !15, metadata !"b", metadata !2, i32 8, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !47, metadata !6, i32 8, i32 17, i32 1} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786689, metadata !7, metadata !"a", metadata !2, i32 16777227, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786688, metadata !18, metadata !"b", metadata !2, i32 11, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !47, metadata !7, i32 11, i32 19, i32 2} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786689, metadata !8, metadata !"a", metadata !2, i32 16777230, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 786688, metadata !21, metadata !"b", metadata !2, i32 14, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!21 = metadata !{i32 786443, metadata !47, metadata !8, i32 14, i32 19, i32 3} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x5", metadata !"x5", metadata !"", metadata !2, i32 16, metadata !5, i32 0, i32 1, i32* @x5, null} ; [ DW_TAG_variable ]
-!23 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x4", metadata !"x4", metadata !"", metadata !2, i32 13, metadata !5, i32 1, i32 1, i32* @x4, null} ; [ DW_TAG_variable ]
-!24 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x3", metadata !"x3", metadata !"", metadata !2, i32 10, metadata !5, i32 1, i32 1, i32* @x3, null} ; [ DW_TAG_variable ]
-!25 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x2", metadata !"x2", metadata !"", metadata !2, i32 7, metadata !5, i32 1, i32 1, i32* @x2, null} ; [ DW_TAG_variable ]
-!26 = metadata !{i32 786484, i32 0, metadata !0, metadata !"x1", metadata !"x1", metadata !"", metadata !2, i32 4, metadata !5, i32 1, i32 1, i32* @x1, null} ; [ DW_TAG_variable ]
-!27 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !2, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 786688, metadata !29, metadata !"b", metadata !2, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !47, metadata !9, i32 17, i32 19, i32 4} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 5, i32 16, metadata !1, null}
-!31 = metadata !{i32 5, i32 32, metadata !12, null}
-!32 = metadata !{i32 8, i32 14, metadata !6, null}
-!33 = metadata !{i32 8, i32 29, metadata !15, null}
-!34 = metadata !{i32 11, i32 16, metadata !7, null}
-!35 = metadata !{i32 11, i32 32, metadata !18, null}
-!36 = metadata !{i32 14, i32 16, metadata !8, null}
-!37 = metadata !{i32 14, i32 32, metadata !21, null}
-!38 = metadata !{i32 17, i32 16, metadata !9, null}
-!39 = metadata !{i32 17, i32 32, metadata !29, null}
-!40 = metadata !{metadata !1, metadata !6, metadata !7, metadata !8, metadata !9}
-!41 = metadata !{metadata !22, metadata !23, metadata !24, metadata !25, metadata !26}
-!42 = metadata !{metadata !10, metadata !11}
-!43 = metadata !{metadata !13, metadata !14}
-!44 = metadata !{metadata !16, metadata !17}
-!45 = metadata !{metadata !19, metadata !20}
-!46 = metadata !{metadata !27, metadata !28}
-!47 = metadata !{metadata !"ss3.c", metadata !"/private/tmp"}
-!48 = metadata !{}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang\001\00\000\00\001", !47, !48, !48, !40, !41,  !48} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00get1\00get1\00\005\000\001\000\006\00256\001\005", !47, !2, !3, null, i32 (i32)* @get1, null, null, !42} ; [ DW_TAG_subprogram ] [line 5] [def] [get1]
+!2 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00get2\00get2\00\008\000\001\000\006\00256\001\008", !47, !2, !3, null, i32 (i32)* @get2, null, null, !43} ; [ DW_TAG_subprogram ] [line 8] [def] [get2]
+!7 = !{!"0x2e\00get3\00get3\00\0011\000\001\000\006\00256\001\0011", !47, !2, !3, null, i32 (i32)* @get3, null, null, !44} ; [ DW_TAG_subprogram ] [line 11] [def] [get3]
+!8 = !{!"0x2e\00get4\00get4\00\0014\000\001\000\006\00256\001\0014", !47, !2, !3, null, i32 (i32)* @get4, null, null, !45} ; [ DW_TAG_subprogram ] [line 14] [def] [get4]
+!9 = !{!"0x2e\00get5\00get5\00\0017\000\001\000\006\00256\001\0017", !47, !2, !3, null, i32 (i32)* @get5, null, null, !46} ; [ DW_TAG_subprogram ] [line 17] [def] [get5]
+!10 = !{!"0x101\00a\0016777221\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!11 = !{!"0x100\00b\005\000", !12, !2, !5} ; [ DW_TAG_auto_variable ]
+!12 = !{!"0xb\005\0019\000", !47, !1} ; [ DW_TAG_lexical_block ]
+!13 = !{!"0x101\00a\0016777224\000", !6, !2, !5} ; [ DW_TAG_arg_variable ]
+!14 = !{!"0x100\00b\008\000", !15, !2, !5} ; [ DW_TAG_auto_variable ]
+!15 = !{!"0xb\008\0017\001", !47, !6} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x101\00a\0016777227\000", !7, !2, !5} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x100\00b\0011\000", !18, !2, !5} ; [ DW_TAG_auto_variable ]
+!18 = !{!"0xb\0011\0019\002", !47, !7} ; [ DW_TAG_lexical_block ]
+!19 = !{!"0x101\00a\0016777230\000", !8, !2, !5} ; [ DW_TAG_arg_variable ]
+!20 = !{!"0x100\00b\0014\000", !21, !2, !5} ; [ DW_TAG_auto_variable ]
+!21 = !{!"0xb\0014\0019\003", !47, !8} ; [ DW_TAG_lexical_block ]
+!25 = !{!"0x34\00x1\00x1\00\004\001\001", !0, !2, !5, i32* @x1, null} ; [ DW_TAG_variable ]
+!26 = !{!"0x34\00x2\00x2\00\007\001\001", !0, !2, !5, i32* @x2, null} ; [ DW_TAG_variable ]
+!27 = !{!"0x101\00a\0016777233\000", !9, !2, !5} ; [ DW_TAG_arg_variable ]
+!28 = !{!"0x100\00b\0017\000", !29, !2, !5} ; [ DW_TAG_auto_variable ]
+!29 = !{!"0xb\0017\0019\004", !47, !9} ; [ DW_TAG_lexical_block ]
+!30 = !MDLocation(line: 5, column: 16, scope: !1)
+!31 = !MDLocation(line: 5, column: 32, scope: !12)
+!32 = !MDLocation(line: 8, column: 14, scope: !6)
+!33 = !MDLocation(line: 8, column: 29, scope: !15)
+!34 = !MDLocation(line: 11, column: 16, scope: !7)
+!35 = !MDLocation(line: 11, column: 32, scope: !18)
+!36 = !MDLocation(line: 14, column: 16, scope: !8)
+!37 = !MDLocation(line: 14, column: 32, scope: !21)
+!38 = !MDLocation(line: 17, column: 16, scope: !9)
+!39 = !MDLocation(line: 17, column: 32, scope: !29)
+!40 = !{!1, !6, !7, !8, !9}
+!41 = !{!25, !26}
+!42 = !{!10, !11}
+!43 = !{!13, !14}
+!44 = !{!16, !17}
+!45 = !{!19, !20}
+!46 = !{!27, !28}
+!47 = !{!"ss3.c", !"/private/tmp"}
+!48 = !{}
+!49 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
index b3a7e3444f36..69d72bd83391 100644
--- a/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
+++ b/test/CodeGen/ARM/2012-04-24-SplitEHCriticalEdge.ll
@@ -65,7 +65,7 @@ declare i32 @__gxx_personality_sj0(...)
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index adb5c7e4b259..70e307934559 100644
--- a/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -169,4 +169,4 @@ define arm_aapcs_vfpcc void @foo(float, i1 zeroext, i1 zeroext) nounwind uwtable
 
 declare arm_aapcs_vfpcc void @bar(%0*, float)
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!0 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
index 5235e9cb2034..53860ea1b0b9 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv.ll
@@ -8,4 +8,4 @@ define void @f() nounwind ssp {
   ret void
 }
 
-!0 = metadata !{i32 318437}
+!0 = !{i32 318437}
diff --git a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
index d389b5c5c1cf..b47247c5454f 100644
--- a/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
+++ b/test/CodeGen/ARM/2012-09-25-InlineAsmScalarToVectorConv2.ll
@@ -8,4 +8,4 @@ define hidden void @f(i32* %corr, i32 %order) nounwind ssp {
   ret void
 }
 
-!0 = metadata !{i32 257}
+!0 = !{i32 257}
diff --git a/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll b/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
index 9ea762ae9bff..df7d2457e763 100644
--- a/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
+++ b/test/CodeGen/ARM/2014-07-18-earlyclobber-str-post.ll
@@ -4,10 +4,30 @@
 ; e.g. str r0, [r0], #4
 
 define i32* @earlyclobber-str-post(i32* %addr) nounwind {
-; CHECK: earlyclobber-str-post
+; CHECK-LABEL: earlyclobber-str-post
 ; CHECK-NOT: str r[[REG:[0-9]+]], [r[[REG]]], #4
   %val = ptrtoint i32* %addr to i32
   store i32 %val, i32* %addr
   %new = getelementptr i32* %addr, i32 1
   ret i32* %new
 }
+
+define i16* @earlyclobber-strh-post(i16* %addr) nounwind {
+; CHECK-LABEL: earlyclobber-strh-post
+; CHECK-NOT: strh r[[REG:[0-9]+]], [r[[REG]]], #2
+  %val = ptrtoint i16* %addr to i32
+  %tr = trunc i32 %val to i16
+  store i16 %tr, i16* %addr
+  %new = getelementptr i16* %addr, i32 1
+  ret i16* %new
+}
+
+define i8* @earlyclobber-strb-post(i8* %addr) nounwind {
+; CHECK-LABEL: earlyclobber-strb-post
+; CHECK-NOT: strb r[[REG:[0-9]+]], [r[[REG]]], #1
+  %val = ptrtoint i8* %addr to i32
+  %tr = trunc i32 %val to i8
+  store i8 %tr, i8* %addr
+  %new = getelementptr i8* %addr, i32 1
+  ret i8* %new
+}
diff --git a/test/CodeGen/ARM/2014-08-04-muls-it.ll b/test/CodeGen/ARM/2014-08-04-muls-it.ll
new file mode 100644
index 000000000000..4636bff880a8
--- /dev/null
+++ b/test/CodeGen/ARM/2014-08-04-muls-it.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple thumbv7-eabi -arm-restrict-it -filetype asm -o - %s \
+; RUN:    | FileCheck %s
+
+define arm_aapcscc i32 @function(i32 %i, i32 %j) {
+entry:
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %mul = mul nsw i32 %i, %i
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %i.addr.0 = phi i32 [ %mul, %if.then ], [ %i, %entry ]
+  ret i32 %i.addr.0
+}
+
+; CHECK-LABEL: function
+; CHECK: cmp r0, r1
+; CHECK: bne [[LABEL:[.*]]]
+; CHECK-NOT: mulseq r0, r0, r0
+; CHECK: [[LABEL]]
+; CHECK: muls r0, r0, r0
+; CHECK: bx lr
+
diff --git a/test/CodeGen/ARM/aapcs-hfa-code.ll b/test/CodeGen/ARM/aapcs-hfa-code.ll
index 396e83816ccf..5545dfdcd4c8 100644
--- a/test/CodeGen/ARM/aapcs-hfa-code.ll
+++ b/test/CodeGen/ARM/aapcs-hfa-code.ll
@@ -54,12 +54,11 @@ define arm_aapcs_vfpcc void @test_1double({ double } %a) {
 ; CHECK: bl test_1double
 
 ; CHECK-M4F-LABEL: test_1double:
-; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
-; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
-; CHECK-M4F: movt [[ONEHI]], #16368
-; CHECK-M4F-DAG: vmov s0, [[ONELO]]
-; CHECK-M4F-DAG: vmov s1, [[ONEHI]]
+; CHECK-M4F: vldr d0, [[CP_LABEL:.*]]
 ; CHECK-M4F: bl test_1double
+; CHECK-M4F: [[CP_LABEL]]
+; CHECK-M4F-NEXT: .long 0
+; CHECK-M4F-NEXT: .long 1072693248
 
   call arm_aapcs_vfpcc void @test_1double({ double } { double 1.0 })
   ret void
@@ -76,11 +75,10 @@ define arm_aapcs_vfpcc void @test_1double_nosplit([4 x float], [4 x double], [3
 ; CHECK: bl test_1double_nosplit
 
 ; CHECK-M4F-LABEL: test_1double_nosplit:
-; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
 ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
+; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
 ; CHECK-M4F: movt [[ONEHI]], #16368
-; CHECK-M4F-DAG: str [[ONELO]], [sp]
-; CHECK-M4F-DAG: str [[ONEHI]], [sp, #4]
+; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp]
 ; CHECK-M4F: bl test_1double_nosplit
   call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0)
   ret void
@@ -92,19 +90,16 @@ define arm_aapcs_vfpcc void @test_1double_misaligned([4 x double], [4 x double],
   call arm_aapcs_vfpcc void @test_1double_misaligned([4 x double] undef, [4 x double] undef, float undef, double 1.0)
 
 ; CHECK-LABEL: test_1double_misaligned:
-; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
-; CHECK-DAG: mov r[[BASE:[0-9]+]], sp
 ; CHECK-DAG: movw [[ONEHI:r[0-9]+]], #0
+; CHECK-DAG: mov [[ONELO:r[0-9]+]], #0
 ; CHECK-DAG: movt [[ONEHI]], #16368
-; CHECK-DAG: str [[ONELO]], [r[[BASE]], #8]!
-; CHECK-DAG: str [[ONEHI]], [r[[BASE]], #4]
+; CHECK-DAG: strd [[ONELO]], [[ONEHI]], [sp, #8]
 
 ; CHECK-M4F-LABEL: test_1double_misaligned:
-; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
 ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0
+; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0
 ; CHECK-M4F: movt [[ONEHI]], #16368
-; CHECK-M4F-DAG: str [[ONELO]], [sp, #8]
-; CHECK-M4F-DAG: str [[ONEHI]], [sp, #12]
+; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8]
 ; CHECK-M4F: bl test_1double_misaligned
 
   ret void
diff --git a/test/CodeGen/ARM/adv-copy-opt.ll b/test/CodeGen/ARM/adv-copy-opt.ll
new file mode 100644
index 000000000000..f71bf78b62c4
--- /dev/null
+++ b/test/CodeGen/ARM/adv-copy-opt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -O1 -mtriple=armv7s-apple-ios -mcpu=swift < %s -disable-adv-copy-opt=true | FileCheck -check-prefix=NOOPT --check-prefix=CHECK %s
+; RUN: llc -O1 -mtriple=armv7s-apple-ios -mcpu=swift < %s -disable-adv-copy-opt=false | FileCheck -check-prefix=OPT --check-prefix=CHECK %s
+; RUN: llc -O1 -mtriple=thumbv7s-apple-ios -mcpu=swift < %s -disable-adv-copy-opt=true | FileCheck -check-prefix=NOOPT --check-prefix=CHECK %s
+; RUN: llc -O1 -mtriple=thumbv7s-apple-ios -mcpu=swift < %s -disable-adv-copy-opt=false | FileCheck -check-prefix=OPT --check-prefix=CHECK %s
+
+; CHECK-LABEL: simpleVectorDiv
+; ABI: %A => r0, r1.
+;      %B => r2, r3
+;      ret => r0, r1
+; We want to compute:
+; r0 = r0 / r2
+; r1 = r1 / r3
+;
+; NOOPT: vmov	[[B:d[0-9]+]], r2, r3
+; NOOPT-NEXT: vmov	[[A:d[0-9]+]], r0, r1
+; Move the low part of B into a register.
+; Unfortunately, we cannot express that the 's' register is the low
+; part of B, i.e., sIdx == BIdx x 2. E.g., B = d1, B_low = s2.
+; NOOPT-NEXT: vmov	[[B_LOW:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov	[[A_LOW:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: udiv	[[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
+; NOOPT-NEXT: vmov	[[B_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov	[[A_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: udiv	[[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
+; NOOPT-NEXT: vmov.32	[[RES:d[0-9]+]][0], [[RES_LOW]]
+; NOOPT-NEXT: vmov.32	[[RES]][1], [[RES_HIGH]]
+; NOOPT-NEXT: vmov	r0, r1, [[RES]]
+; NOOPT-NEXT: bx	lr
+;
+; OPT-NOT: vmov
+; OPT: 	udiv	r0, r0, r2
+; OPT-NEXT: udiv	r1, r1, r3
+; OPT-NEXT: bx	lr
+define <2 x i32> @simpleVectorDiv(<2 x i32> %A, <2 x i32> %B) nounwind {
+entry:
+  %div = udiv <2 x i32> %A, %B
+  ret <2 x i32> %div
+}
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index f55ae10b247d..5a737ad995ac 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -25,9 +25,9 @@
 define i32 @foo_f() {
   ret i32 0
 }
-@bar_f = alias weak %FunTy* @foo_f
+@bar_f = weak alias %FunTy* @foo_f
 
-@bar_i = alias internal i32* @bar
+@bar_i = internal alias i32* @bar
 
 @A = alias bitcast (i32* @bar to i64*)
 
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 6e6311d4d34f..5ad87191efe9 100644
--- a/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -8,21 +8,28 @@
 
 define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" {
 entry:
-; NO-REALIGN: test1
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; NO-REALIGN: vst1.64
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; NO-REALIGN: vst1.64
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; NO-REALIGN: vst1.64
-; NO-REALIGN: vst1.64
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; NO-REALIGN: vst1.64
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; NO-REALIGN: vst1.64
-; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; NO-REALIGN: vst1.64
-; NO-REALIGN: vst1.64
+; NO-REALIGN-LABEL: test1
+; NO-REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
+; NO-REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
+; NO-REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]!
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0:0]], #48
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: add r[[R2:[0-9]+]], r[[R0]], #32
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
+; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
@@ -33,22 +40,31 @@ entry:
 
 define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp {
 entry:
-; REALIGN: test2
-; REALIGN: bic sp, sp, #63
-; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; REALIGN: vst1.64
-; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; REALIGN: vst1.64
-; REALIGN: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; REALIGN: vst1.64
-; REALIGN: vst1.64
-; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
-; REALIGN: vst1.64
-; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
-; REALIGN: vst1.64
-; REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
-; REALIGN: vst1.64
-; REALIGN: vst1.64
+; REALIGN-LABEL: test2
+; REALIGN: bfc sp, #0, #6
+; REALIGN: mov r[[R2:[0-9]+]], r[[R1:[0-9]+]]
+; REALIGN: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]!
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #32
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: add r[[R2:[0-9]+]], r[[R1]], #48
+; REALIGN: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+
+
+; REALIGN: orr r[[R2:[0-9]+]], r[[R1:[0-9]+]], #48
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #32
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: orr r[[R2:[0-9]+]], r[[R1]], #16
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+
+; REALIGN: add r[[R1:[0-9]+]], r[[R0:0]], #48
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: add r[[R1:[0-9]+]], r[[R0]], #32
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]
+; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
+; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
  %0 = load <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
diff --git a/test/CodeGen/ARM/arm-abi-attr.ll b/test/CodeGen/ARM/arm-abi-attr.ll
index f3923ae5cc82..61cb6cefa170 100644
--- a/test/CodeGen/ARM/arm-abi-attr.ll
+++ b/test/CodeGen/ARM/arm-abi-attr.ll
@@ -1,13 +1,13 @@
-; RUN: llc -mtriple=arm-linux < %s | FileCheck %s --check-prefix=APCS
-; RUN: llc -mtriple=arm-linux -mattr=apcs < %s | \
+; RUN: llc -mtriple=arm-linux-gnu < %s | FileCheck %s --check-prefix=APCS
+; RUN: llc -mtriple=arm-linux-gnu -target-abi=apcs < %s | \
 ; RUN: FileCheck %s --check-prefix=APCS
-; RUN: llc -mtriple=arm-linux-gnueabi -mattr=apcs < %s | \
+; RUN: llc -mtriple=arm-linux-gnueabi -target-abi=apcs < %s | \
 ; RUN: FileCheck %s --check-prefix=APCS
 
 ; RUN: llc -mtriple=arm-linux-gnueabi < %s | FileCheck %s --check-prefix=AAPCS
-; RUN: llc -mtriple=arm-linux-gnueabi -mattr=aapcs < %s | \
+; RUN: llc -mtriple=arm-linux-gnueabi -target-abi=aapcs < %s | \
 ; RUN: FileCheck %s --check-prefix=AAPCS
-; RUN: llc -mtriple=arm-linux-gnu -mattr=aapcs < %s | \
+; RUN: llc -mtriple=arm-linux-gnu -target-abi=aapcs < %s | \
 ; RUN: FileCheck %s --check-prefix=AAPCS
 
 ; The stack is 8 byte aligned on AAPCS and 4 on APCS, so we should get a BIC
diff --git a/test/CodeGen/ARM/arm32-round-conv.ll b/test/CodeGen/ARM/arm32-round-conv.ll
new file mode 100644
index 000000000000..88fb891a8efb
--- /dev/null
+++ b/test/CodeGen/ARM/arm32-round-conv.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=+fp-armv8 | FileCheck %s
+; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vcvtm.s32.f32
+define i32 @test1(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test2
+; CHECK: vcvtm.u32.f32
+define i32 @test2(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test3
+; CHECK: vcvtm.s32.f64
+define i32 @test3(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test4
+; CHECK: vcvtm.u32.f64
+define i32 @test4(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test5
+; CHECK: vcvtp.s32.f32
+define i32 @test5(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test6
+; CHECK: vcvtp.u32.f32
+define i32 @test6(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test7
+; CHECK: vcvtp.s32.f64
+define i32 @test7(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test8
+; CHECK: vcvtp.u32.f64
+define i32 @test8(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test9
+; CHECK: vcvta.s32.f32
+define i32 @test9(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test10
+; CHECK: vcvta.u32.f32
+define i32 @test10(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test11
+; CHECK: vcvta.s32.f64
+define i32 @test11(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: test12
+; CHECK: vcvta.u32.f64
+define i32 @test12(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+declare float @floorf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @round(double) nounwind readnone
diff --git a/test/CodeGen/ARM/arm32-rounding.ll b/test/CodeGen/ARM/arm32-rounding.ll
new file mode 100644
index 000000000000..f247648d814a
--- /dev/null
+++ b/test/CodeGen/ARM/arm32-rounding.ll
@@ -0,0 +1,118 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+fp-armv8 | FileCheck --check-prefix=CHECK --check-prefix=DP %s
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16,+fp-only-sp | FileCheck --check-prefix=SP %s
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabihf -mattr=+fp-armv8,+d16 | FileCheck --check-prefix=DP %s
+
+; CHECK-LABEL: test1
+; CHECK: vrintm.f32
+define float @test1(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test2
+; SP: b floor
+; DP: vrintm.f64
+define double @test2(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: test3
+; CHECK: vrintp.f32
+define float @test3(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test4
+; SP: b ceil
+; DP: vrintp.f64
+define double @test4(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: test5
+; CHECK: vrinta.f32
+define float @test5(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test6
+; SP: b round
+; DP: vrinta.f64
+define double @test6(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: test7
+; CHECK: vrintz.f32
+define float @test7(float %a) {
+entry:
+  %call = call float @truncf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test8
+; SP: b trunc
+; DP: vrintz.f64
+define double @test8(double %a) {
+entry:
+  %call = call double @trunc(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: test9
+; CHECK: vrintr.f32
+define float @test9(float %a) {
+entry:
+  %call = call float @nearbyintf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test10
+; SP: b nearbyint
+; DP: vrintr.f64
+define double @test10(double %a) {
+entry:
+  %call = call double @nearbyint(double %a) nounwind readnone
+  ret double %call
+}
+
+; CHECK-LABEL: test11
+; CHECK: vrintx.f32
+define float @test11(float %a) {
+entry:
+  %call = call float @rintf(float %a) nounwind readnone
+  ret float %call
+}
+
+; CHECK-LABEL: test12
+; SP: b rint
+; DP: vrintx.f64
+define double @test12(double %a) {
+entry:
+  %call = call double @rint(double %a) nounwind readnone
+  ret double %call
+}
+
+declare float @floorf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @round(double) nounwind readnone
+declare float @truncf(float) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare float @nearbyintf(float) nounwind readnone
+declare double @nearbyint(double) nounwind readnone
+declare float @rintf(float) nounwind readnone
+declare double @rint(double) nounwind readnone
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 462c1859dc91..0c0769f1b145 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
 ; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-LE
-; RUN: llc < %s -mtriple=armebv7 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc < %s -mtriple=armebv7 -target-abi apcs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 ; RUN: llc < %s -mtriple=thumbebv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK-THUMB-BE
 
 define i64 @test1(i64* %ptr, i64 %val) {
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index 4b79fa25145b..84790be6d605 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -20,12 +20,15 @@ entry:
 
 ; CHECK-THUMB-LABEL: test_cmpxchg_res_i8
 ; CHECK-THUMB: bl __sync_val_compare_and_swap_1
-; CHECK-THUMB: mov [[R1:r[0-9]+]], r0
+; CHECK-THUMB-NOT: mov [[R1:r[0-7]]], r0
+; CHECK-THUMB: push  {r0}
+; CHECK-THUMB: pop {[[R1:r[0-7]]]}
 ; CHECK-THUMB: movs r0, #1
 ; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
 ; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
 ; CHECK-THU<B: beq
-; CHECK-THUMB: mov r0, [[R2]]
+; CHECK-THUMB: push  {[[R2]]}
+; CHECK-THUMB: pop {r0}
 
 ; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
 ; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
diff --git a/test/CodeGen/ARM/atomic-load-store.ll b/test/CodeGen/ARM/atomic-load-store.ll
index 49342d2d1bfe..af13dfc80d2d 100644
--- a/test/CodeGen/ARM/atomic-load-store.ll
+++ b/test/CodeGen/ARM/atomic-load-store.ll
@@ -3,6 +3,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s -check-prefix=THUMBTWO
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s -check-prefix=THUMBONE
 ; RUN: llc < %s -mtriple=armv4-apple-ios | FileCheck %s -check-prefix=ARMV4
+; RUN: llc < %s -mtriple=armv6-apple-ios | FileCheck %s -check-prefix=ARMV6
+; RUN: llc < %s -mtriple=thumbv7m-apple-ios | FileCheck %s -check-prefix=THUMBM
 
 define void @test1(i32* %ptr, i32 %val1) {
 ; ARM-LABEL: test1
@@ -15,6 +17,14 @@ define void @test1(i32* %ptr, i32 %val1) {
 ; THUMBTWO: dmb {{ish$}}
 ; THUMBTWO-NEXT: str
 ; THUMBTWO-NEXT: dmb {{ish$}}
+; ARMV6-LABEL: test1
+; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
+; ARMV6: str
+; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
+; THUMBM-LABEL: test1
+; THUMBM: dmb sy
+; THUMBM: str
+; THUMBM: dmb sy
   store atomic i32 %val1, i32* %ptr seq_cst, align 4
   ret void
 }
@@ -28,6 +38,12 @@ define i32 @test2(i32* %ptr) {
 ; THUMBTWO-LABEL: test2
 ; THUMBTWO: ldr
 ; THUMBTWO-NEXT: dmb {{ish$}}
+; ARMV6-LABEL: test2
+; ARMV6: ldr
+; ARMV6: mcr p15, #0, {{r[0-9]*}}, c7, c10, #5
+; THUMBM-LABEL: test2
+; THUMBM: ldr
+; THUMBM: dmb sy
   %val = load atomic i32* %ptr seq_cst, align 4
   ret i32 %val
 }
@@ -55,6 +71,11 @@ define void @test3(i8* %ptr1, i8* %ptr2) {
 ; THUMBONE-NOT: dmb
 ; THUMBONE: strb
 ; THUMBONE-NOT: dmb
+
+; ARMV6-LABEL: test3
+; ARMV6-NOT: mcr
+; THUMBM-LABEL: test3
+; THUMBM-NOT: dmb sy
   %val = load atomic i8* %ptr1 unordered, align 1
   store atomic i8 %val, i8* %ptr2 unordered, align 1
   ret void
@@ -64,6 +85,8 @@ define void @test4(i8* %ptr1, i8* %ptr2) {
 ; THUMBONE-LABEL: test4
 ; THUMBONE: ___sync_val_compare_and_swap_1
 ; THUMBONE: ___sync_lock_test_and_set_1
+; ARMV6-LABEL: test4
+; THUMBM-LABEL: test4
   %val = load atomic i8* %ptr1 seq_cst, align 1
   store atomic i8 %val, i8* %ptr2 seq_cst, align 1
   ret void
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index b988242ae57e..1ac86485c556 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,7 +1,10 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
-; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-T1
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0
+; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -27,48 +30,72 @@ entry:
   ; CHECK: add
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_add_4
+  ; CHECK-M0: bl ___sync_fetch_and_add_4
+  ; CHECK-BAREMETAL: add
+  ; CHECK-BAREMETAL-NOT: __sync
   %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
 	store i32 %0, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_sub_4
+  ; CHECK-M0: bl ___sync_fetch_and_sub_4
+  ; CHECK-BAREMETAL: sub
+  ; CHECK-BAREMETAL-NOT: __sync
   %1 = atomicrmw sub i32* %val2, i32 30 monotonic
 	store i32 %1, i32* %old
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_add_4
+  ; CHECK-M0: bl ___sync_fetch_and_add_4
+  ; CHECK-BAREMETAL: add
+  ; CHECK-BAREMETAL-NOT: __sync
   %2 = atomicrmw add i32* %val2, i32 1 monotonic
 	store i32 %2, i32* %old
   ; CHECK: ldrex
   ; CHECK: sub
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_sub_4
+  ; CHECK-M0: bl ___sync_fetch_and_sub_4
+  ; CHECK-BAREMETAL: sub
+  ; CHECK-BAREMETAL-NOT: __sync
   %3 = atomicrmw sub i32* %val2, i32 1 monotonic
 	store i32 %3, i32* %old
   ; CHECK: ldrex
   ; CHECK: and
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_and_4
+  ; CHECK-M0: bl ___sync_fetch_and_and_4
+  ; CHECK-BAREMETAL: and
+  ; CHECK-BAREMETAL-NOT: __sync
   %4 = atomicrmw and i32* %andt, i32 4080 monotonic
 	store i32 %4, i32* %old
   ; CHECK: ldrex
   ; CHECK: or
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_or_4
+  ; CHECK-M0: bl ___sync_fetch_and_or_4
+  ; CHECK-BAREMETAL: or
+  ; CHECK-BAREMETAL-NOT: __sync
   %5 = atomicrmw or i32* %ort, i32 4080 monotonic
 	store i32 %5, i32* %old
   ; CHECK: ldrex
   ; CHECK: eor
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_xor_4
+  ; CHECK-M0: bl ___sync_fetch_and_xor_4
+  ; CHECK-BAREMETAL: eor
+  ; CHECK-BAREMETAL-NOT: __sync
   %6 = atomicrmw xor i32* %xort, i32 4080 monotonic
 	store i32 %6, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_min_4
+  ; CHECK-M0: bl ___sync_fetch_and_min_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
 	%neg = sub i32 0, 1
@@ -76,24 +103,36 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_min_4
+  ; CHECK-M0: bl ___sync_fetch_and_min_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %8 = atomicrmw min i32* %val2, i32 %neg monotonic
 	store i32 %8, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_max_4
+  ; CHECK-M0: bl ___sync_fetch_and_max_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %9 = atomicrmw max i32* %val2, i32 1 monotonic
 	store i32 %9, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_max_4
+  ; CHECK-M0: bl ___sync_fetch_and_max_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_4
+  ; CHECK-M0: bl ___sync_fetch_and_umin_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %11 = atomicrmw umin i32* %val2, i32 16 monotonic
 	store i32 %11, i32* %old
 	%uneg = sub i32 0, 1
@@ -101,18 +140,27 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_4
+  ; CHECK-M0: bl ___sync_fetch_and_umin_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
 	store i32 %12, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_4
+  ; CHECK-M0: bl ___sync_fetch_and_umax_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %13 = atomicrmw umax i32* %val2, i32 1 monotonic
 	store i32 %13, i32* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_4
+  ; CHECK-M0: bl ___sync_fetch_and_umax_4
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %14 = atomicrmw umax i32* %val2, i32 0 monotonic
 	store i32 %14, i32* %old
 
@@ -128,6 +176,9 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_2
+  ; CHECK-M0: bl ___sync_fetch_and_umin_2
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %0 = atomicrmw umin i16* %val, i16 16 monotonic
   store i16 %0, i16* %old
   %uneg = sub i16 0, 1
@@ -135,18 +186,27 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_2
+  ; CHECK-M0: bl ___sync_fetch_and_umin_2
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
   store i16 %1, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_2
+  ; CHECK-M0: bl ___sync_fetch_and_umax_2
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %2 = atomicrmw umax i16* %val, i16 1 monotonic
   store i16 %2, i16* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_2
+  ; CHECK-M0: bl ___sync_fetch_and_umax_2
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %3 = atomicrmw umax i16* %val, i16 0 monotonic
   store i16 %3, i16* %old
   ret void
@@ -161,12 +221,18 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_1
+  ; CHECK-M0: bl ___sync_fetch_and_umin_1
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %0 = atomicrmw umin i8* %val, i8 16 monotonic
   store i8 %0, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umin_1
+  ; CHECK-M0: bl ___sync_fetch_and_umin_1
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %uneg = sub i8 0, 1
   %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
   store i8 %1, i8* %old
@@ -174,12 +240,18 @@ entry:
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_1
+  ; CHECK-M0: bl ___sync_fetch_and_umax_1
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %2 = atomicrmw umax i8* %val, i8 1 monotonic
   store i8 %2, i8* %old
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
   ; CHECK-T1: blx ___sync_fetch_and_umax_1
+  ; CHECK-M0: bl ___sync_fetch_and_umax_1
+  ; CHECK-BAREMETAL: cmp
+  ; CHECK-BAREMETAL-NOT: __sync
   %3 = atomicrmw umax i8* %val, i8 0 monotonic
   store i8 %3, i8* %old
   ret void
@@ -233,3 +305,69 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
 
   ret i32 %oldval
 }
+
+define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
+; CHECK-LABEL: load_load_add_acquire
+  %val1 = load atomic i32* %mem1 acquire, align 4
+  %val2 = load atomic i32* %mem2 acquire, align 4
+  %tmp = add i32 %val1, %val2
+
+; CHECK: ldr {{r[0-9]}}, [r0]
+; CHECK: dmb
+; CHECK: ldr {{r[0-9]}}, [r1]
+; CHECK: dmb
+; CHECK: add r0,
+
+; CHECK-M0: ___sync_val_compare_and_swap_4
+; CHECK-M0: ___sync_val_compare_and_swap_4
+
+; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r0]
+; CHECK-BAREMETAL-NOT: dmb
+; CHECK-BAREMETAL: ldr {{r[0-9]}}, [r1]
+; CHECK-BAREMETAL-NOT: dmb
+; CHECK-BAREMETAL: add r0,
+
+  ret i32 %tmp
+}
+
+define void @store_store_release(i32* %mem1, i32 %val1, i32* %mem2, i32 %val2) {
+; CHECK-LABEL: store_store_release
+  store atomic i32 %val1, i32* %mem1 release, align 4
+  store atomic i32 %val2, i32* %mem2 release, align 4
+
+; CHECK: dmb
+; CHECK: str r1, [r0]
+; CHECK: dmb
+; CHECK: str r3, [r2]
+
+; CHECK-M0: ___sync_lock_test_and_set
+; CHECK-M0: ___sync_lock_test_and_set
+
+; CHECK-BAREMETAL-NOT: dmb
+; CHECK-BAREMTEAL: str r1, [r0]
+; CHECK-BAREMETAL-NOT: dmb
+; CHECK-BAREMTEAL: str r3, [r2]
+
+  ret void
+}
+
+define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
+; CHECK-LABEL: load_fence_store_monotonic
+  %val = load atomic i32* %mem1 monotonic, align 4
+  fence seq_cst
+  store atomic i32 %val, i32* %mem2 monotonic, align 4
+
+; CHECK: ldr [[R0:r[0-9]]], [r0]
+; CHECK: dmb
+; CHECK: str [[R0]], [r1]
+
+; CHECK-M0: ldr [[R0:r[0-9]]], [r0]
+; CHECK-M0: dmb
+; CHECK-M0: str [[R0]], [r1]
+
+; CHECK-BAREMETAL: ldr [[R0:r[0-9]]], [r0]
+; CHECK-BAREMETAL-NOT: dmb
+; CHECK-BAREMETAL: str [[R0]], [r1]
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/build-attributes-encoding.s b/test/CodeGen/ARM/build-attributes-encoding.s
index 34a1ad38fb17..29f13f09d319 100644
--- a/test/CodeGen/ARM/build-attributes-encoding.s
+++ b/test/CodeGen/ARM/build-attributes-encoding.s
@@ -78,7 +78,7 @@
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:     SectionData (
 // CHECK-NEXT:       0000: 41460000 00616561 62690001 3C000000
-// CHECK-NEXT:       0010: 05434F52 5445582D 41380006 0A074108
+// CHECK-NEXT:       0010: 05636F72 7465782D 61380006 0A074108
 // CHECK-NEXT:       0020: 0109020A 030C0214 01150117 01180119
 // CHECK-NEXT:       0030: 011B001C 0124012A 012C0244 036EA001
 // CHECK-NEXT:       0040: 81013100 FA0101
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index d75d55d0fa68..2e382308bf51 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -3,115 +3,309 @@
 
 ; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale | FileCheck %s --check-prefix=XSCALE
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast  | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=V7
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7-FAST
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V8-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi | FileCheck %s --check-prefix=Vt8
+; RUN: llc < %s -mtriple=thumbv8-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-neon,-crypto | FileCheck %s --check-prefix=V8-FPARMv8
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-fp-armv8,-crypto | FileCheck %s --check-prefix=V8-NEON
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mattr=-crypto | FileCheck %s --check-prefix=V8-FPARMv8-NEON
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi | FileCheck %s --check-prefix=V8-FPARMv8-NEON-CRYPTO
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-DEFAULT-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-neon,+d16 | FileCheck %s --check-prefix=CORTEX-A5-NONEON
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A5-NOFPU
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a5 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A5-NOFPU-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-SOFT-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-A9-HARD
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A9-HARD-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-A9-SOFT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-DEFAULT-FAST
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A12-NOFPU
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a9-mp | FileCheck %s --check-prefix=CORTEX-A9-MP
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A12-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a12 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 | FileCheck %s --check-prefix=CORTEX-A15
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A15-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a15 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 | FileCheck %s --check-prefix=CORTEX-A17-DEFAULT
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-A17-NOFPU
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A17-NOFPU-FAST
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft | FileCheck %s --check-prefix=CORTEX-M4-SOFT
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=soft  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-SOFT-FAST
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard | FileCheck %s --check-prefix=CORTEX-M4-HARD
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -float-abi=hard  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M4-HARD-FAST
+; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m4 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2 | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SOFT
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=-vfp2  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-NOFPU-FAST
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s --check-prefix=CORTEX-M7 --check-prefix=CORTEX-M7-SINGLE
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -mattr=+fp-only-sp  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M7-FAST
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 | FileCheck %s --check-prefix=CORTEX-M7-DOUBLE
+; RUN: llc < %s -mtriple=thumbv7em-linux-gnueabi -mcpu=cortex-m7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=CORTEX-R5
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R5-FAST
+; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r5 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=CORTEX-A57
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A57-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a57 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s  --check-prefix=CORTEX-A7-CHECK
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s  --check-prefix=CORTEX-A7-CHECK-FAST
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-NOFPU
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=-vfp2,-vfp3,-vfp4,-neon  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-NOFPU-FAST
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
 ; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE
+
+; ARMv8a (AArch32)
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; ARMv7a
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; ARMv7r
+; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; ARMv7m
+; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; ARMv6
+; RUN: llc < %s -mtriple=armv6-none-netbsd-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; ARMv6m
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-no-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; ARMv5
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e | FileCheck %s --check-prefix=STRICT-ALIGN
 
 ; XSCALE:      .eabi_attribute 6, 5
 ; XSCALE:      .eabi_attribute 8, 1
 ; XSCALE:      .eabi_attribute 9, 1
 
+; DYN-ROUNDING: .eabi_attribute 19, 1
+
 ; V6:   .eabi_attribute 6, 6
 ; V6:   .eabi_attribute 8, 1
+;; We assume round-to-nearest by default (matches GCC)
+; V6-NOT:   .eabi_attribute 19
+;; The default choice made by llc is for a V6 CPU without an FPU.
+;; This is not an interesting detail, but for such CPUs, the default intention is to use
+;; software floating-point support. The choice is not important for targets without
+;; FPU support!
+; V6:   .eabi_attribute 20, 1
+; V6:   .eabi_attribute 21, 1
+; V6-NOT:   .eabi_attribute 22
+; V6:   .eabi_attribute 23, 3
 ; V6:   .eabi_attribute 24, 1
 ; V6:   .eabi_attribute 25, 1
 ; V6-NOT:   .eabi_attribute 27
 ; V6-NOT:   .eabi_attribute 28
 ; V6-NOT:    .eabi_attribute 36
+; V6:    .eabi_attribute 38, 1
 ; V6-NOT:    .eabi_attribute 42
+; V6-NOT:  .eabi_attribute 44
 ; V6-NOT:    .eabi_attribute 68
 
+; V6-FAST-NOT:   .eabi_attribute 19
+;; Despite the V6 CPU having no FPU by default, we chose to flush to
+;; positive zero here. There's no hardware support doing this, but the
+;; fast maths software library might.
+; V6-FAST-NOT:   .eabi_attribute 20
+; V6-FAST-NOT:   .eabi_attribute 21
+; V6-FAST-NOT:   .eabi_attribute 22
+; V6-FAST:   .eabi_attribute 23, 1
+
 ; V6M:  .eabi_attribute 6, 12
 ; V6M-NOT:  .eabi_attribute 7
 ; V6M:  .eabi_attribute 8, 0
 ; V6M:  .eabi_attribute 9, 1
+; V6M-NOT:   .eabi_attribute 19
+;; The default choice made by llc is for a V6M CPU without an FPU.
+;; This is not an interesting detail, but for such CPUs, the default intention is to use
+;; software floating-point support. The choice is not important for targets without
+;; FPU support!
+; V6M:  .eabi_attribute 20, 1
+; V6M:   .eabi_attribute 21, 1
+; V6M-NOT:   .eabi_attribute 22
+; V6M:   .eabi_attribute 23, 3
 ; V6M:  .eabi_attribute 24, 1
 ; V6M:  .eabi_attribute 25, 1
 ; V6M-NOT:  .eabi_attribute 27
 ; V6M-NOT:  .eabi_attribute 28
 ; V6M-NOT:  .eabi_attribute 36
+; V6M:  .eabi_attribute 38, 1
 ; V6M-NOT:  .eabi_attribute 42
+; V6M-NOT:  .eabi_attribute 44
 ; V6M-NOT:  .eabi_attribute 68
 
+; V6M-FAST-NOT:   .eabi_attribute 19
+;; Despite the V6M CPU having no FPU by default, we chose to flush to
+;; positive zero here. There's no hardware support doing this, but the
+;; fast maths software library might.
+; V6M-FAST-NOT:  .eabi_attribute 20
+; V6M-FAST-NOT:   .eabi_attribute 21
+; V6M-FAST-NOT:   .eabi_attribute 22
+; V6M-FAST:   .eabi_attribute 23, 1
+
 ; ARM1156T2F-S: .cpu arm1156t2f-s
 ; ARM1156T2F-S: .eabi_attribute 6, 8
 ; ARM1156T2F-S: .eabi_attribute 8, 1
 ; ARM1156T2F-S: .eabi_attribute 9, 2
 ; ARM1156T2F-S: .fpu vfpv2
+; ARM1156T2F-S-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; ARM1156T2F-S: .eabi_attribute 20, 1
 ; ARM1156T2F-S: .eabi_attribute 21, 1
+; ARM1156T2F-S-NOT: .eabi_attribute 22
 ; ARM1156T2F-S: .eabi_attribute 23, 3
 ; ARM1156T2F-S: .eabi_attribute 24, 1
 ; ARM1156T2F-S: .eabi_attribute 25, 1
 ; ARM1156T2F-S-NOT: .eabi_attribute 27
 ; ARM1156T2F-S-NOT: .eabi_attribute 28
 ; ARM1156T2F-S-NOT: .eabi_attribute 36
+; ARM1156T2F-S: .eabi_attribute 38, 1
 ; ARM1156T2F-S-NOT:    .eabi_attribute 42
+; ARM1156T2F-S-NOT:    .eabi_attribute 44
 ; ARM1156T2F-S-NOT:    .eabi_attribute 68
 
+; ARM1156T2F-S-FAST-NOT:   .eabi_attribute 19
+;; V6 cores default to flush to positive zero (value 0). Note that value 2 is also equally
+;; valid for this core, it's an implementation defined question as to which of 0 and 2 you
+;; select. LLVM historically picks 0.
+; ARM1156T2F-S-FAST-NOT: .eabi_attribute 20
+; ARM1156T2F-S-FAST-NOT:   .eabi_attribute 21
+; ARM1156T2F-S-FAST-NOT:   .eabi_attribute 22
+; ARM1156T2F-S-FAST:   .eabi_attribute 23, 1
+
 ; V7M:  .eabi_attribute 6, 10
 ; V7M:  .eabi_attribute 7, 77
 ; V7M:  .eabi_attribute 8, 0
 ; V7M:  .eabi_attribute 9, 2
+; V7M-NOT:   .eabi_attribute 19
+;; The default choice made by llc is for a V7M CPU without an FPU.
+;; This is not an interesting detail, but for such CPUs, the default intention is to use
+;; software floating-point support. The choice is not important for targets without
+;; FPU support!
+; V7M:  .eabi_attribute 20, 1
+; V7M: .eabi_attribute 21, 1
+; V7M-NOT: .eabi_attribute 22
+; V7M: .eabi_attribute 23, 3
 ; V7M:  .eabi_attribute 24, 1
 ; V7M:  .eabi_attribute 25, 1
 ; V7M-NOT:  .eabi_attribute 27
 ; V7M-NOT:  .eabi_attribute 28
 ; V7M-NOT:  .eabi_attribute 36
+; V7M:  .eabi_attribute 38, 1
 ; V7M-NOT:  .eabi_attribute 42
 ; V7M-NOT:  .eabi_attribute 44
 ; V7M-NOT:  .eabi_attribute 68
 
+; V7M-FAST-NOT:   .eabi_attribute 19
+;; Despite the V7M CPU having no FPU by default, we chose to flush
+;; preserving sign. This matches what the hardware would do in the
+;; architecture revision were to exist on the current target.
+; V7M-FAST:  .eabi_attribute 20, 2
+; V7M-FAST-NOT:   .eabi_attribute 21
+; V7M-FAST-NOT:   .eabi_attribute 22
+; V7M-FAST:   .eabi_attribute 23, 1
+
 ; V7:      .syntax unified
 ; V7: .eabi_attribute 6, 10
+; V7-NOT:   .eabi_attribute 19
+;; In safe-maths mode we default to an IEEE 754 compliant choice.
 ; V7: .eabi_attribute 20, 1
 ; V7: .eabi_attribute 21, 1
+; V7-NOT: .eabi_attribute 22
 ; V7: .eabi_attribute 23, 3
 ; V7: .eabi_attribute 24, 1
 ; V7: .eabi_attribute 25, 1
 ; V7-NOT: .eabi_attribute 27
 ; V7-NOT: .eabi_attribute 28
 ; V7-NOT: .eabi_attribute 36
+; V7: .eabi_attribute 38, 1
 ; V7-NOT:    .eabi_attribute 42
+; V7-NOT:    .eabi_attribute 44
 ; V7-NOT:    .eabi_attribute 68
 
+; V7-FAST-NOT:   .eabi_attribute 19
+;; The default CPU does have an FPU and it must be VFPv3 or better, so it flushes
+;; denormals to zero preserving the sign.
+; V7-FAST: .eabi_attribute 20, 2
+; V7-FAST-NOT:   .eabi_attribute 21
+; V7-FAST-NOT:   .eabi_attribute 22
+; V7-FAST:   .eabi_attribute 23, 1
+
 ; V8:      .syntax unified
+; V8: .eabi_attribute 67, "2.09"
 ; V8: .eabi_attribute 6, 14
+; V8-NOT:   .eabi_attribute 19
+; V8: .eabi_attribute 20, 1
+; V8: .eabi_attribute 21, 1
+; V8-NOT: .eabi_attribute 22
+; V8: .eabi_attribute 23, 3
+; V8-NOT: .eabi_attribute 44
+
+; V8-FAST-NOT:   .eabi_attribute 19
+;; The default does have an FPU, and for V8-A, it flushes preserving sign.
+; V8-FAST: .eabi_attribute 20, 2
+; V8-FAST-NOT: .eabi_attribute 21
+; V8-FAST-NOT: .eabi_attribute 22
+; V8-FAST: .eabi_attribute 23, 1
 
 ; Vt8:     .syntax unified
 ; Vt8: .eabi_attribute 6, 14
+; Vt8-NOT:   .eabi_attribute 19
+; Vt8: .eabi_attribute 20, 1
+; Vt8: .eabi_attribute 21, 1
+; Vt8-NOT: .eabi_attribute 22
+; Vt8: .eabi_attribute 23, 3
 
 ; V8-FPARMv8:      .syntax unified
 ; V8-FPARMv8: .eabi_attribute 6, 14
@@ -132,74 +326,99 @@
 ; V8-FPARMv8-NEON-CRYPTO: .fpu crypto-neon-fp-armv8
 ; V8-FPARMv8-NEON-CRYPTO: .eabi_attribute 12, 3
 
-; Tag_CPU_arch	'ARMv7'
-; CORTEX-A7-CHECK: .eabi_attribute	6, 10
-; CORTEX-A7-NOFPU: .eabi_attribute	6, 10
-; CORTEX-A7-FPUV4: .eabi_attribute	6, 10
+; Tag_CPU_unaligned_access
+; NO-STRICT-ALIGN: .eabi_attribute 34, 1
+; STRICT-ALIGN: .eabi_attribute 34, 0
+
+; Tag_CPU_arch  'ARMv7'
+; CORTEX-A7-CHECK: .eabi_attribute      6, 10
+; CORTEX-A7-NOFPU: .eabi_attribute      6, 10
+
+; CORTEX-A7-FPUV4: .eabi_attribute      6, 10
 
 ; Tag_CPU_arch_profile 'A'
-; CORTEX-A7-CHECK: .eabi_attribute	7, 65
-; CORTEX-A7-NOFPU: .eabi_attribute	7, 65
-; CORTEX-A7-FPUV4: .eabi_attribute	7, 65
+; CORTEX-A7-CHECK: .eabi_attribute      7, 65
+; CORTEX-A7-NOFPU: .eabi_attribute      7, 65
+; CORTEX-A7-FPUV4: .eabi_attribute      7, 65
 
 ; Tag_ARM_ISA_use
-; CORTEX-A7-CHECK: .eabi_attribute	8, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	8, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	8, 1
+; CORTEX-A7-CHECK: .eabi_attribute      8, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      8, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      8, 1
 
 ; Tag_THUMB_ISA_use
-; CORTEX-A7-CHECK: .eabi_attribute	9, 2
-; CORTEX-A7-NOFPU: .eabi_attribute	9, 2
-; CORTEX-A7-FPUV4: .eabi_attribute	9, 2
+; CORTEX-A7-CHECK: .eabi_attribute      9, 2
+; CORTEX-A7-NOFPU: .eabi_attribute      9, 2
+; CORTEX-A7-FPUV4: .eabi_attribute      9, 2
 
-; CORTEX-A7-CHECK: .fpu	neon-vfpv4
+; CORTEX-A7-CHECK: .fpu neon-vfpv4
 ; CORTEX-A7-NOFPU-NOT: .fpu
-; CORTEX-A7-FPUV4: .fpu	vfpv4
+; CORTEX-A7-FPUV4: .fpu vfpv4
 
+; CORTEX-A7-CHECK-NOT:   .eabi_attribute 19
 ; Tag_ABI_FP_denormal
-; CORTEX-A7-CHECK: .eabi_attribute	20, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	20, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	20, 1
+;; We default to IEEE 754 compliance
+; CORTEX-A7-CHECK: .eabi_attribute      20, 1
+;; The A7 has VFPv3 support by default, so flush preserving sign.
+; CORTEX-A7-CHECK-FAST: .eabi_attribute 20, 2
+; CORTEX-A7-NOFPU: .eabi_attribute      20, 1
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-A7-NOFPU-FAST: .eabi_attribute 20, 2
+; CORTEX-A7-FPUV4: .eabi_attribute      20, 1
+;; The VFPv4 FPU flushes preserving sign.
+; CORTEX-A7-FPUV4-FAST: .eabi_attribute 20, 2
 
 ; Tag_ABI_FP_exceptions
-; CORTEX-A7-CHECK: .eabi_attribute	21, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	21, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	21, 1
+; CORTEX-A7-CHECK: .eabi_attribute      21, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      21, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      21, 1
+
+; Tag_ABI_FP_user_exceptions
+; CORTEX-A7-CHECK-NOT: .eabi_attribute      22
+; CORTEX-A7-NOFPU-NOT: .eabi_attribute      22
+; CORTEX-A7-FPUV4-NOT: .eabi_attribute      22
 
 ; Tag_ABI_FP_number_model
-; CORTEX-A7-CHECK: .eabi_attribute	23, 3
-; CORTEX-A7-NOFPU: .eabi_attribute	23, 3
-; CORTEX-A7-FPUV4: .eabi_attribute	23, 3
+; CORTEX-A7-CHECK: .eabi_attribute      23, 3
+; CORTEX-A7-NOFPU: .eabi_attribute      23, 3
+; CORTEX-A7-FPUV4: .eabi_attribute      23, 3
 
 ; Tag_ABI_align_needed
-; CORTEX-A7-CHECK: .eabi_attribute	24, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	24, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	24, 1
+; CORTEX-A7-CHECK: .eabi_attribute      24, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      24, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      24, 1
 
 ; Tag_ABI_align_preserved
-; CORTEX-A7-CHECK: .eabi_attribute	25, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	25, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	25, 1
+; CORTEX-A7-CHECK: .eabi_attribute      25, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      25, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      25, 1
 
 ; Tag_FP_HP_extension
-; CORTEX-A7-CHECK: .eabi_attribute	36, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	36, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	36, 1
+; CORTEX-A7-CHECK: .eabi_attribute      36, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      36, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      36, 1
+
+; Tag_FP_16bit_format
+; CORTEX-A7-CHECK: .eabi_attribute      38, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      38, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      38, 1
 
 ; Tag_MPextension_use
-; CORTEX-A7-CHECK: .eabi_attribute	42, 1
-; CORTEX-A7-NOFPU: .eabi_attribute	42, 1
-; CORTEX-A7-FPUV4: .eabi_attribute	42, 1
+; CORTEX-A7-CHECK: .eabi_attribute      42, 1
+; CORTEX-A7-NOFPU: .eabi_attribute      42, 1
+; CORTEX-A7-FPUV4: .eabi_attribute      42, 1
 
 ; Tag_DIV_use
-; CORTEX-A7-CHECK: .eabi_attribute	44, 2
-; CORTEX-A7-NOFPU: .eabi_attribute	44, 2
-; CORTEX-A7-FPUV4: .eabi_attribute	44, 2
+; CORTEX-A7-CHECK: .eabi_attribute      44, 2
+; CORTEX-A7-NOFPU: .eabi_attribute      44, 2
+; CORTEX-A7-FPUV4: .eabi_attribute      44, 2
 
 ; Tag_Virtualization_use
-; CORTEX-A7-CHECK: .eabi_attribute	68, 3
-; CORTEX-A7-NOFPU: .eabi_attribute	68, 3
-; CORTEX-A7-FPUV4: .eabi_attribute	68, 3
+; CORTEX-A7-CHECK: .eabi_attribute      68, 3
+; CORTEX-A7-NOFPU: .eabi_attribute      68, 3
+; CORTEX-A7-FPUV4: .eabi_attribute      68, 3
 
 ; CORTEX-A5-DEFAULT:        .cpu    cortex-a5
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 6, 10
@@ -207,92 +426,134 @@
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 8, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 9, 2
 ; CORTEX-A5-DEFAULT:        .fpu    neon-vfpv4
+; CORTEX-A5-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 20, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 21, 1
+; CORTEX-A5-DEFAULT-NOT:        .eabi_attribute 22
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 23, 3
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 24, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 25, 1
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 42, 1
+; CORTEX-A5-DEFAULT-NOT:        .eabi_attribute 44
 ; CORTEX-A5-DEFAULT:        .eabi_attribute 68, 1
 
+; CORTEX-A5-DEFAULT-FAST-NOT:   .eabi_attribute 19
+;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math
+;; is given.
+; CORTEX-A5-DEFAULT-FAST:        .eabi_attribute 20, 2
+; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 21
+; CORTEX-A5-DEFAULT-FAST-NOT: .eabi_attribute 22
+; CORTEX-A5-DEFAULT-FAST: .eabi_attribute 23, 1
+
 ; CORTEX-A5-NONEON:        .cpu    cortex-a5
 ; CORTEX-A5-NONEON:        .eabi_attribute 6, 10
 ; CORTEX-A5-NONEON:        .eabi_attribute 7, 65
 ; CORTEX-A5-NONEON:        .eabi_attribute 8, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 9, 2
 ; CORTEX-A5-NONEON:        .fpu    vfpv4-d16
+;; We default to IEEE 754 compliance
 ; CORTEX-A5-NONEON:        .eabi_attribute 20, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 21, 1
+; CORTEX-A5-NONEON-NOT:    .eabi_attribute 22
 ; CORTEX-A5-NONEON:        .eabi_attribute 23, 3
 ; CORTEX-A5-NONEON:        .eabi_attribute 24, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 25, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 42, 1
 ; CORTEX-A5-NONEON:        .eabi_attribute 68, 1
 
+; CORTEX-A5-NONEON-FAST-NOT:   .eabi_attribute 19
+;; The A5 defaults to a VFPv4 FPU, so it flushed preserving sign when -ffast-math
+;; is given.
+; CORTEX-A5-NONEON-FAST:        .eabi_attribute 20, 2
+; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 21
+; CORTEX-A5-NONEON-FAST-NOT: .eabi_attribute 22
+; CORTEX-A5-NONEON-FAST: .eabi_attribute 23, 1
+
 ; CORTEX-A5-NOFPU:        .cpu    cortex-a5
 ; CORTEX-A5-NOFPU:        .eabi_attribute 6, 10
 ; CORTEX-A5-NOFPU:        .eabi_attribute 7, 65
 ; CORTEX-A5-NOFPU:        .eabi_attribute 8, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 9, 2
 ; CORTEX-A5-NOFPU-NOT:    .fpu
+; CORTEX-A5-NOFPU-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A5-NOFPU:        .eabi_attribute 20, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 21, 1
+; CORTEX-A5-NOFPU-NOT:    .eabi_attribute 22
 ; CORTEX-A5-NOFPU:        .eabi_attribute 23, 3
 ; CORTEX-A5-NOFPU:        .eabi_attribute 24, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 25, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 42, 1
 ; CORTEX-A5-NOFPU:        .eabi_attribute 68, 1
 
+; CORTEX-A5-NOFPU-FAST-NOT:   .eabi_attribute 19
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-A5-NOFPU-FAST: .eabi_attribute 20, 2
+; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 21
+; CORTEX-A5-NOFPU-FAST-NOT: .eabi_attribute 22
+; CORTEX-A5-NOFPU-FAST: .eabi_attribute 23, 1
+
 ; CORTEX-A9-SOFT:  .cpu cortex-a9
 ; CORTEX-A9-SOFT:  .eabi_attribute 6, 10
 ; CORTEX-A9-SOFT:  .eabi_attribute 7, 65
 ; CORTEX-A9-SOFT:  .eabi_attribute 8, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 9, 2
 ; CORTEX-A9-SOFT:  .fpu neon
+; CORTEX-A9-SOFT-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A9-SOFT:  .eabi_attribute 20, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 21, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 22
 ; CORTEX-A9-SOFT:  .eabi_attribute 23, 3
 ; CORTEX-A9-SOFT:  .eabi_attribute 24, 1
 ; CORTEX-A9-SOFT:  .eabi_attribute 25, 1
 ; CORTEX-A9-SOFT-NOT:  .eabi_attribute 27
 ; CORTEX-A9-SOFT-NOT:  .eabi_attribute 28
 ; CORTEX-A9-SOFT:  .eabi_attribute 36, 1
-; CORTEX-A9-SOFT-NOT:  .eabi_attribute 42
+; CORTEX-A9-SOFT:  .eabi_attribute 38, 1
+; CORTEX-A9-SOFT:  .eabi_attribute 42, 1
+; CORTEX-A9-SOFT-NOT:  .eabi_attribute 44
 ; CORTEX-A9-SOFT:  .eabi_attribute 68, 1
 
+; CORTEX-A9-SOFT-FAST-NOT:   .eabi_attribute 19
+;; The A9 defaults to a VFPv3 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-A9-SOFT-FAST:  .eabi_attribute 20, 2
+; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 21
+; CORTEX-A5-SOFT-FAST-NOT: .eabi_attribute 22
+; CORTEX-A5-SOFT-FAST: .eabi_attribute 23, 1
+
 ; CORTEX-A9-HARD:  .cpu cortex-a9
 ; CORTEX-A9-HARD:  .eabi_attribute 6, 10
 ; CORTEX-A9-HARD:  .eabi_attribute 7, 65
 ; CORTEX-A9-HARD:  .eabi_attribute 8, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 9, 2
 ; CORTEX-A9-HARD:  .fpu neon
+; CORTEX-A9-HARD-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A9-HARD:  .eabi_attribute 20, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 21, 1
+; CORTEX-A9-HARD-NOT:  .eabi_attribute 22
 ; CORTEX-A9-HARD:  .eabi_attribute 23, 3
 ; CORTEX-A9-HARD:  .eabi_attribute 24, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 25, 1
 ; CORTEX-A9-HARD-NOT:  .eabi_attribute 27
 ; CORTEX-A9-HARD:  .eabi_attribute 28, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 36, 1
-; CORTEX-A9-HARD-NOT:  .eabi_attribute 42
+; CORTEX-A9-HARD:  .eabi_attribute 38, 1
+; CORTEX-A9-HARD:  .eabi_attribute 42, 1
 ; CORTEX-A9-HARD:  .eabi_attribute 68, 1
 
-; CORTEX-A9-MP:  .cpu cortex-a9-mp
-; CORTEX-A9-MP:  .eabi_attribute 6, 10
-; CORTEX-A9-MP:  .eabi_attribute 7, 65
-; CORTEX-A9-MP:  .eabi_attribute 8, 1
-; CORTEX-A9-MP:  .eabi_attribute 9, 2
-; CORTEX-A9-MP:  .fpu neon
-; CORTEX-A9-MP:  .eabi_attribute 20, 1
-; CORTEX-A9-MP:  .eabi_attribute 21, 1
-; CORTEX-A9-MP:  .eabi_attribute 23, 3
-; CORTEX-A9-MP:  .eabi_attribute 24, 1
-; CORTEX-A9-MP:  .eabi_attribute 25, 1
-; CORTEX-A9-MP-NOT:  .eabi_attribute 27
-; CORTEX-A9-MP-NOT:  .eabi_attribute 28
-; CORTEX-A9-MP:  .eabi_attribute 36, 1
-; CORTEX-A9-MP:  .eabi_attribute 42, 1
-; CORTEX-A9-MP:  .eabi_attribute 68, 1
+; CORTEX-A9-HARD-FAST-NOT:   .eabi_attribute 19
+;; The A9 defaults to a VFPv3 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-A9-HARD-FAST:  .eabi_attribute 20, 2
+; CORTEX-A9-HARD-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A9-HARD-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A9-HARD-FAST:  .eabi_attribute 23, 1
 
 ; CORTEX-A12-DEFAULT:  .cpu cortex-a12
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 6, 10
@@ -300,8 +561,11 @@
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 8, 1
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 9, 2
 ; CORTEX-A12-DEFAULT:  .fpu neon-vfpv4
+; CORTEX-A12-DEFAULT-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 20, 1
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 21, 1
+; CORTEX-A12-DEFAULT-NOT:  .eabi_attribute 22
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 23, 3
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 24, 1
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 25, 1
@@ -309,14 +573,25 @@
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 44, 2
 ; CORTEX-A12-DEFAULT:  .eabi_attribute 68, 3
 
+; CORTEX-A12-DEFAULT-FAST-NOT:   .eabi_attribute 19
+;; The A12 defaults to a VFPv3 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-A12-DEFAULT-FAST:  .eabi_attribute 20, 2
+; CORTEX-A12-HARD-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A12-HARD-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A12-HARD-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-A12-NOFPU:  .cpu cortex-a12
 ; CORTEX-A12-NOFPU:  .eabi_attribute 6, 10
 ; CORTEX-A12-NOFPU:  .eabi_attribute 7, 65
 ; CORTEX-A12-NOFPU:  .eabi_attribute 8, 1
 ; CORTEX-A12-NOFPU:  .eabi_attribute 9, 2
 ; CORTEX-A12-NOFPU-NOT:  .fpu
+; CORTEX-A12-NOFPU-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A12-NOFPU:  .eabi_attribute 20, 1
 ; CORTEX-A12-NOFPU:  .eabi_attribute 21, 1
+; CORTEX-A12-NOFPU-NOT:  .eabi_attribute 22
 ; CORTEX-A12-NOFPU:  .eabi_attribute 23, 3
 ; CORTEX-A12-NOFPU:  .eabi_attribute 24, 1
 ; CORTEX-A12-NOFPU:  .eabi_attribute 25, 1
@@ -324,108 +599,284 @@
 ; CORTEX-A12-NOFPU:  .eabi_attribute 44, 2
 ; CORTEX-A12-NOFPU:  .eabi_attribute 68, 3
 
+; CORTEX-A12-NOFPU-FAST-NOT:   .eabi_attribute 19
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-A12-NOFPU-FAST:  .eabi_attribute 20, 2
+; CORTEX-A12-NOFPU-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A12-NOFPU-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A12-NOFPU-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-A15: .cpu cortex-a15
 ; CORTEX-A15: .eabi_attribute 6, 10
 ; CORTEX-A15: .eabi_attribute 7, 65
 ; CORTEX-A15: .eabi_attribute 8, 1
 ; CORTEX-A15: .eabi_attribute 9, 2
 ; CORTEX-A15: .fpu neon-vfpv4
+; CORTEX-A15-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-A15: .eabi_attribute 20, 1
 ; CORTEX-A15: .eabi_attribute 21, 1
+; CORTEX-A15-NOT: .eabi_attribute 22
 ; CORTEX-A15: .eabi_attribute 23, 3
 ; CORTEX-A15: .eabi_attribute 24, 1
 ; CORTEX-A15: .eabi_attribute 25, 1
 ; CORTEX-A15-NOT: .eabi_attribute 27
 ; CORTEX-A15-NOT: .eabi_attribute 28
 ; CORTEX-A15: .eabi_attribute 36, 1
+; CORTEX-A15: .eabi_attribute 38, 1
 ; CORTEX-A15: .eabi_attribute 42, 1
 ; CORTEX-A15: .eabi_attribute 44, 2
 ; CORTEX-A15: .eabi_attribute 68, 3
 
+; CORTEX-A15-FAST-NOT:   .eabi_attribute 19
+;; The A15 defaults to a VFPv3 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-A15-FAST: .eabi_attribute 20, 2
+; CORTEX-A15-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A15-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A15-FAST:  .eabi_attribute 23, 1
+
+; CORTEX-A17-DEFAULT:  .cpu cortex-a17
+; CORTEX-A17-DEFAULT:  .eabi_attribute 6, 10
+; CORTEX-A17-DEFAULT:  .eabi_attribute 7, 65
+; CORTEX-A17-DEFAULT:  .eabi_attribute 8, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 9, 2
+; CORTEX-A17-DEFAULT:  .fpu neon-vfpv4
+; CORTEX-A17-DEFAULT-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A17-DEFAULT:  .eabi_attribute 20, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 21, 1
+; CORTEX-A17-DEFAULT-NOT:  .eabi_attribute 22
+; CORTEX-A17-DEFAULT:  .eabi_attribute 23, 3
+; CORTEX-A17-DEFAULT:  .eabi_attribute 24, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 25, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 42, 1
+; CORTEX-A17-DEFAULT:  .eabi_attribute 44, 2
+; CORTEX-A17-DEFAULT:  .eabi_attribute 68, 3
+
+; CORTEX-A17-FAST-NOT:   .eabi_attribute 19
+;; The A17 defaults to a VFPv3 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-A17-FAST:  .eabi_attribute 20, 2
+; CORTEX-A17-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A17-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A17-FAST:  .eabi_attribute 23, 1
+
+; CORTEX-A17-NOFPU:  .cpu cortex-a17
+; CORTEX-A17-NOFPU:  .eabi_attribute 6, 10
+; CORTEX-A17-NOFPU:  .eabi_attribute 7, 65
+; CORTEX-A17-NOFPU:  .eabi_attribute 8, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 9, 2
+; CORTEX-A17-NOFPU-NOT:  .fpu
+; CORTEX-A17-NOFPU-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A17-NOFPU:  .eabi_attribute 20, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 21, 1
+; CORTEX-A17-NOFPU-NOT:  .eabi_attribute 22
+; CORTEX-A17-NOFPU:  .eabi_attribute 23, 3
+; CORTEX-A17-NOFPU:  .eabi_attribute 24, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 25, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 42, 1
+; CORTEX-A17-NOFPU:  .eabi_attribute 44, 2
+; CORTEX-A17-NOFPU:  .eabi_attribute 68, 3
+
+; CORTEX-A17-NOFPU-NOT:   .eabi_attribute 19
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-A17-NOFPU-FAST:  .eabi_attribute 20, 2
+; CORTEX-A17-NOFPU-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A17-NOFPU-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A17-NOFPU-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-M0:  .cpu cortex-m0
 ; CORTEX-M0:  .eabi_attribute 6, 12
 ; CORTEX-M0-NOT:  .eabi_attribute 7
 ; CORTEX-M0:  .eabi_attribute 8, 0
 ; CORTEX-M0:  .eabi_attribute 9, 1
+; CORTEX-M0-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-M0:  .eabi_attribute 20, 1
+; CORTEX-M0:  .eabi_attribute 21, 1
+; CORTEX-M0-NOT:  .eabi_attribute 22
+; CORTEX-M0:  .eabi_attribute 23, 3
 ; CORTEX-M0:  .eabi_attribute 24, 1
 ; CORTEX-M0:  .eabi_attribute 25, 1
 ; CORTEX-M0-NOT:  .eabi_attribute 27
 ; CORTEX-M0-NOT:  .eabi_attribute 28
 ; CORTEX-M0-NOT:  .eabi_attribute 36
+; CORTEX-M0:  .eabi_attribute 38, 1
 ; CORTEX-M0-NOT:  .eabi_attribute 42
+; CORTEX-M0-NOT:  .eabi_attribute 44
 ; CORTEX-M0-NOT:  .eabi_attribute 68
 
+; CORTEX-M0-FAST-NOT:   .eabi_attribute 19
+;; Despite the M0 CPU having no FPU in this scenario, we chose to
+;; flush to positive zero here. There's no hardware support doing
+;; this, but the fast maths software library might and such behaviour
+;; would match hardware support on this architecture revision if it
+;; existed.
+; CORTEX-M0-FAST-NOT:  .eabi_attribute 20
+; CORTEX-M0-FAST-NOT:  .eabi_attribute 21
+; CORTEX-M0-FAST-NOT:  .eabi_attribute 22
+; CORTEX-M0-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-M3:  .cpu cortex-m3
 ; CORTEX-M3:  .eabi_attribute 6, 10
 ; CORTEX-M3:  .eabi_attribute 7, 77
 ; CORTEX-M3:  .eabi_attribute 8, 0
 ; CORTEX-M3:  .eabi_attribute 9, 2
+; CORTEX-M3-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-M3:  .eabi_attribute 20, 1
 ; CORTEX-M3:  .eabi_attribute 21, 1
+; CORTEX-M3-NOT:  .eabi_attribute 22
 ; CORTEX-M3:  .eabi_attribute 23, 3
 ; CORTEX-M3:  .eabi_attribute 24, 1
 ; CORTEX-M3:  .eabi_attribute 25, 1
 ; CORTEX-M3-NOT:  .eabi_attribute 27
 ; CORTEX-M3-NOT:  .eabi_attribute 28
 ; CORTEX-M3-NOT:  .eabi_attribute 36
+; CORTEX-M3:  .eabi_attribute 38, 1
 ; CORTEX-M3-NOT:  .eabi_attribute 42
 ; CORTEX-M3-NOT:  .eabi_attribute 44
 ; CORTEX-M3-NOT:  .eabi_attribute 68
 
+; CORTEX-M3-FAST-NOT:   .eabi_attribute 19
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-M3-FAST:  .eabi_attribute 20, 2
+; CORTEX-M3-FAST-NOT:  .eabi_attribute 21
+; CORTEX-M3-FAST-NOT:  .eabi_attribute 22
+; CORTEX-M3-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-M4-SOFT:  .cpu cortex-m4
 ; CORTEX-M4-SOFT:  .eabi_attribute 6, 13
 ; CORTEX-M4-SOFT:  .eabi_attribute 7, 77
 ; CORTEX-M4-SOFT:  .eabi_attribute 8, 0
 ; CORTEX-M4-SOFT:  .eabi_attribute 9, 2
 ; CORTEX-M4-SOFT:  .fpu vfpv4-d16
+; CORTEX-M4-SOFT-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-M4-SOFT:  .eabi_attribute 20, 1
 ; CORTEX-M4-SOFT:  .eabi_attribute 21, 1
+; CORTEX-M4-SOFT-NOT:  .eabi_attribute 22
 ; CORTEX-M4-SOFT:  .eabi_attribute 23, 3
 ; CORTEX-M4-SOFT:  .eabi_attribute 24, 1
 ; CORTEX-M4-SOFT:  .eabi_attribute 25, 1
 ; CORTEX-M4-SOFT:  .eabi_attribute 27, 1
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 28
 ; CORTEX-M4-SOFT:  .eabi_attribute 36, 1
+; CORTEX-M4-SOFT:  .eabi_attribute 38, 1
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 42
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 44
 ; CORTEX-M4-SOFT-NOT:  .eabi_attribute 68
 
+; CORTEX-M4-SOFT-FAST-NOT:   .eabi_attribute 19
+;; The M4 defaults to a VFPv4 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-M4-SOFT-FAST:  .eabi_attribute 20, 2
+; CORTEX-M4-SOFT-FAST-NOT:  .eabi_attribute 21
+; CORTEX-M4-SOFT-FAST-NOT:  .eabi_attribute 22
+; CORTEX-M4-SOFT-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-M4-HARD:  .cpu cortex-m4
 ; CORTEX-M4-HARD:  .eabi_attribute 6, 13
 ; CORTEX-M4-HARD:  .eabi_attribute 7, 77
 ; CORTEX-M4-HARD:  .eabi_attribute 8, 0
 ; CORTEX-M4-HARD:  .eabi_attribute 9, 2
 ; CORTEX-M4-HARD:  .fpu vfpv4-d16
+; CORTEX-M4-HARD-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-M4-HARD:  .eabi_attribute 20, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 21, 1
+; CORTEX-M4-HARD-NOT:  .eabi_attribute 22
 ; CORTEX-M4-HARD:  .eabi_attribute 23, 3
 ; CORTEX-M4-HARD:  .eabi_attribute 24, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 25, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 27, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 28, 1
 ; CORTEX-M4-HARD:  .eabi_attribute 36, 1
+; CORTEX-M4-HARD:  .eabi_attribute 38, 1
 ; CORTEX-M4-HARD-NOT:  .eabi_attribute 42
 ; CORTEX-M4-HARD-NOT:  .eabi_attribute 44
 ; CORTEX-M4-HARD-NOT:  .eabi_attribute 68
 
+; CORTEX-M4-HARD-FAST-NOT:   .eabi_attribute 19
+;; The M4 defaults to a VFPv4 FPU, so it flushes preseving sign when
+;; -ffast-math is specified.
+; CORTEX-M4-HARD-FAST:  .eabi_attribute 20, 2
+; CORTEX-M4-HARD-FAST-NOT:  .eabi_attribute 21
+; CORTEX-M4-HARD-FAST-NOT:  .eabi_attribute 22
+; CORTEX-M4-HARD-FAST:  .eabi_attribute 23, 1
+
+; CORTEX-M7:  .cpu    cortex-m7
+; CORTEX-M7:  .eabi_attribute 6, 13
+; CORTEX-M7:  .eabi_attribute 7, 77
+; CORTEX-M7:  .eabi_attribute 8, 0
+; CORTEX-M7:  .eabi_attribute 9, 2
+; CORTEX-M7-SOFT-NOT: .fpu
+; CORTEX-M7-SINGLE:  .fpu fpv5-d16
+; CORTEX-M7-DOUBLE:  .fpu fpv5-d16
+; CORTEX-M7:  .eabi_attribute 17, 1
+; CORTEX-M7-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-M7:  .eabi_attribute 20, 1
+; CORTEX-M7:  .eabi_attribute 21, 1
+; CORTEX-M7-NOT:  .eabi_attribute 22
+; CORTEX-M7:  .eabi_attribute 23, 3
+; CORTEX-M7:  .eabi_attribute 24, 1
+; CORTEX-M7:  .eabi_attribute 25, 1
+; CORTEX-M7-SOFT-NOT: .eabi_attribute 27
+; CORTEX-M7-SINGLE:  .eabi_attribute 27, 1
+; CORTEX-M7-DOUBLE-NOT: .eabi_attribute 27
+; CORTEX-M7:  .eabi_attribute 36, 1
+; CORTEX-M7:  .eabi_attribute 38, 1
+; CORTEX-M7:  .eabi_attribute 14, 0
+
+; CORTEX-M7-NOFPU-FAST-NOT:   .eabi_attribute 19
+;; The M7 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-M7-FAST:  .eabi_attribute 20, 2
+;; Despite there being no FPU, we chose to flush to zero preserving
+;; sign. This matches what the hardware would do for this architecture
+;; revision.
+; CORTEX-M7-NOFPU-FAST: .eabi_attribute 20, 2
+; CORTEX-M7-NOFPU-FAST-NOT:  .eabi_attribute 21
+; CORTEX-M7-NOFPU-FAST-NOT:  .eabi_attribute 22
+; CORTEX-M7-NOFPU-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-R5:  .cpu cortex-r5
 ; CORTEX-R5:  .eabi_attribute 6, 10
 ; CORTEX-R5:  .eabi_attribute 7, 82
 ; CORTEX-R5:  .eabi_attribute 8, 1
 ; CORTEX-R5:  .eabi_attribute 9, 2
 ; CORTEX-R5:  .fpu vfpv3-d16
+; CORTEX-R5-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
 ; CORTEX-R5:  .eabi_attribute 20, 1
 ; CORTEX-R5:  .eabi_attribute 21, 1
+; CORTEX-R5-NOT:  .eabi_attribute 22
 ; CORTEX-R5:  .eabi_attribute 23, 3
 ; CORTEX-R5:  .eabi_attribute 24, 1
 ; CORTEX-R5:  .eabi_attribute 25, 1
 ; CORTEX-R5:  .eabi_attribute 27, 1
 ; CORTEX-R5-NOT:  .eabi_attribute 28
 ; CORTEX-R5-NOT:  .eabi_attribute 36
+; CORTEX-R5:  .eabi_attribute 38, 1
 ; CORTEX-R5-NOT:  .eabi_attribute 42
 ; CORTEX-R5:  .eabi_attribute 44, 2
 ; CORTEX-R5-NOT:  .eabi_attribute 68
 
+; CORTEX-R5-FAST-NOT:   .eabi_attribute 19
+;; The R5 has the VFPv3 FP unit, which always flushes preserving sign.
+; CORTEX-R5-FAST:  .eabi_attribute 20, 2
+; CORTEX-R5-FAST-NOT:  .eabi_attribute 21
+; CORTEX-R5-FAST-NOT:  .eabi_attribute 22
+; CORTEX-R5-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-A53:  .cpu cortex-a53
 ; CORTEX-A53:  .eabi_attribute 6, 14
 ; CORTEX-A53:  .eabi_attribute 7, 65
@@ -433,15 +884,29 @@
 ; CORTEX-A53:  .eabi_attribute 9, 2
 ; CORTEX-A53:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A53:  .eabi_attribute 12, 3
+; CORTEX-A53-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A53:  .eabi_attribute 20, 1
+; CORTEX-A53:  .eabi_attribute 21, 1
+; CORTEX-A53-NOT:  .eabi_attribute 22
+; CORTEX-A53:  .eabi_attribute 23, 3
 ; CORTEX-A53:  .eabi_attribute 24, 1
 ; CORTEX-A53:  .eabi_attribute 25, 1
 ; CORTEX-A53-NOT:  .eabi_attribute 27
 ; CORTEX-A53-NOT:  .eabi_attribute 28
 ; CORTEX-A53:  .eabi_attribute 36, 1
+; CORTEX-A53:  .eabi_attribute 38, 1
 ; CORTEX-A53:  .eabi_attribute 42, 1
 ; CORTEX-A53-NOT:  .eabi_attribute 44
 ; CORTEX-A53:  .eabi_attribute 68, 3
 
+; CORTEX-A53-FAST-NOT:   .eabi_attribute 19
+;; The A53 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-A53-FAST:  .eabi_attribute 20, 2
+; CORTEX-A53-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A53-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A53-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-A57:  .cpu cortex-a57
 ; CORTEX-A57:  .eabi_attribute 6, 14
 ; CORTEX-A57:  .eabi_attribute 7, 65
@@ -449,20 +914,37 @@
 ; CORTEX-A57:  .eabi_attribute 9, 2
 ; CORTEX-A57:  .fpu crypto-neon-fp-armv8
 ; CORTEX-A57:  .eabi_attribute 12, 3
+; CORTEX-A57-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A57:  .eabi_attribute 20, 1
+; CORTEX-A57:  .eabi_attribute 21, 1
+; CORTEX-A57-NOT:  .eabi_attribute 22
+; CORTEX-A57:  .eabi_attribute 23, 3
 ; CORTEX-A57:  .eabi_attribute 24, 1
 ; CORTEX-A57:  .eabi_attribute 25, 1
 ; CORTEX-A57-NOT:  .eabi_attribute 27
 ; CORTEX-A57-NOT:  .eabi_attribute 28
 ; CORTEX-A57:  .eabi_attribute 36, 1
+; CORTEX-A57:  .eabi_attribute 38, 1
 ; CORTEX-A57:  .eabi_attribute 42, 1
 ; CORTEX-A57-NOT:  .eabi_attribute 44
 ; CORTEX-A57:  .eabi_attribute 68, 3
 
+; CORTEX-A57-FAST-NOT:   .eabi_attribute 19
+;; The A57 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-A57-FAST:  .eabi_attribute 20, 2
+; CORTEX-A57-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A57-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A57-FAST:  .eabi_attribute 23, 1
+
 ; RELOC-PIC:  .eabi_attribute 15, 1
 ; RELOC-PIC:  .eabi_attribute 16, 1
 ; RELOC-PIC:  .eabi_attribute 17, 2
 ; RELOC-OTHER:  .eabi_attribute 17, 1
 
+; PCS-R9-USE:  .eabi_attribute 14, 0
+; PCS-R9-RESERVE:  .eabi_attribute 14, 3
+
 define i32 @f(i64 %z) {
-	ret i32 0
+    ret i32 0
 }
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index e344b08a8aeb..7ea9be2c61e6 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv6t2-eabi %s -o - | FileCheck %s
 
 define i64 @f1(i64 %a, i64 %b) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index 606c9bc52d64..4e5fb5e5c60f 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -27,11 +27,11 @@ for.cond1:                                        ; preds = %for.end9, %for.cond
 
 for.body2:                                        ; preds = %for.cond1
   store i32 %storemerge11, i32* @b, align 4, !dbg !26
-  tail call void @llvm.dbg.value(metadata !27, i64 0, metadata !11), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !28
   %0 = load i64* @a, align 8, !dbg !29
   %xor = xor i64 %0, %e.1.ph, !dbg !29
   %conv3 = trunc i64 %xor to i32, !dbg !29
-  tail call void @llvm.dbg.value(metadata !{i32 %conv3}, i64 0, metadata !10), !dbg !29
+  tail call void @llvm.dbg.value(metadata i32 %conv3, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !29
   %tobool4 = icmp eq i32 %conv3, 0, !dbg !29
   br i1 %tobool4, label %land.end, label %land.rhs, !dbg !29
 
@@ -69,7 +69,7 @@ declare i32 @fn2(...) #1
 declare i32 @fn3(...) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -79,33 +79,33 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 182024) (llvm/trunk 182023)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"pr16110.c", metadata !"/d/b"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"pr16110", metadata !"pr16110", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @pr16110, null, null, metadata !9, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10, metadata !11}
-!10 = metadata !{i32 786688, metadata !4, metadata !"e", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 8]
-!11 = metadata !{i32 786688, metadata !12, metadata !"f", metadata !5, i32 13, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 13]
-!12 = metadata !{i32 786443, metadata !1, metadata !13, i32 12, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
-!15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !20}
-!16 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !5, i32 1, metadata !17, i32 0, i32 1, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!17 = metadata !{i32 786468, null, null, metadata !"long long int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long long int] [line 0, size 64, align 32, offset 0, enc DW_ATE_signed]
-!18 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !5, i32 2, metadata !8, i32 0, i32 1, i32* @b, null} ; [ DW_TAG_variable ] [b] [line 2] [def]
-!19 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !5, i32 3, metadata !8, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ] [c] [line 3] [def]
-!20 = metadata !{i32 786484, i32 0, null, metadata !"d", metadata !"d", metadata !"", metadata !5, i32 4, metadata !8, i32 0, i32 1, i32* @d, null} ; [ DW_TAG_variable ] [d] [line 4] [def]
-!21 = metadata !{i32 10, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !1, metadata !4, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
-!26 = metadata !{i32 12, i32 0, metadata !13, null}
-!27 = metadata !{i32* null}
-!28 = metadata !{i32 13, i32 0, metadata !12, null}
-!29 = metadata !{i32 14, i32 0, metadata !12, null}
-!31 = metadata !{i32 16, i32 0, metadata !4, null}
-!32 = metadata !{i32 18, i32 0, metadata !4, null}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 182024) (llvm/trunk 182023)\001\00\000\00\000", !1, !2, !2, !3, !15, !2} ; [ DW_TAG_compile_unit ] [/d/b/pr16110.c] [DW_LANG_C99]
+!1 = !{!"pr16110.c", !"/d/b"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00pr16110\00pr16110\00\007\000\001\000\006\000\001\007", !1, !5, !6, null, i32 ()* @pr16110, null, null, !9} ; [ DW_TAG_subprogram ] [line 7] [def] [pr16110]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/d/b/pr16110.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10, !11}
+!10 = !{!"0x100\00e\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [e] [line 8]
+!11 = !{!"0x100\00f\0013\000", !12, !5, !14} ; [ DW_TAG_auto_variable ] [f] [line 13]
+!12 = !{!"0xb\0012\000\002", !1, !13} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!13 = !{!"0xb\0012\000\001", !1, !4} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!14 = !{!"0xf\00\000\0032\0032\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!15 = !{!16, !18, !19, !20}
+!16 = !{!"0x34\00a\00a\00\001\000\001", null, !5, !17, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!17 = !{!"0x24\00long long int\000\0064\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [long long int] [line 0, size 64, align 32, offset 0, enc DW_ATE_signed]
+!18 = !{!"0x34\00b\00b\00\002\000\001", null, !5, !8, i32* @b, null} ; [ DW_TAG_variable ] [b] [line 2] [def]
+!19 = !{!"0x34\00c\00c\00\003\000\001", null, !5, !8, i32* @c, null} ; [ DW_TAG_variable ] [c] [line 3] [def]
+!20 = !{!"0x34\00d\00d\00\004\000\001", null, !5, !8, i32* @d, null} ; [ DW_TAG_variable ] [d] [line 4] [def]
+!21 = !MDLocation(line: 10, scope: !22)
+!22 = !{!"0xb\0010\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/d/b/pr16110.c]
+!26 = !MDLocation(line: 12, scope: !13)
+!27 = !{i32* null}
+!28 = !MDLocation(line: 13, scope: !12)
+!29 = !MDLocation(line: 14, scope: !12)
+!31 = !MDLocation(line: 16, scope: !4)
+!32 = !MDLocation(line: 18, scope: !4)
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/constant-islands.ll b/test/CodeGen/ARM/constant-islands.ll
new file mode 100644
index 000000000000..afa4b85ff64b
--- /dev/null
+++ b/test/CodeGen/ARM/constant-islands.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf -O0 -fast-isel=0 -o - %s | FileCheck %s
+
+define void @test_no_duplicate_branches(float %in) {
+; CHECK-LABEL: test_no_duplicate_branches:
+; CHECK: vldr {{s[0-9]+}}, [[CONST:\.LCPI[0-9]+_[0-9]+]]
+; CHECK: b .LBB
+; CHECK-NOT: b .LBB
+; CHECK: [[CONST]]:
+; CHECK-NEXT: .long 1150963712
+
+  %tst = fcmp oeq float %in, 1234.5
+
+  %chain = zext i1 %tst to i32
+
+  br i1 %tst, label %true, label %false
+
+true:
+  call i32 @llvm.arm.space(i32 2000, i32 undef)
+  ret void
+
+false:
+  ret void
+}
+
+declare i32 @llvm.arm.space(i32, i32)
diff --git a/test/CodeGen/ARM/copy-cpsr.ll b/test/CodeGen/ARM/copy-cpsr.ll
new file mode 100644
index 000000000000..8b7dc038fc92
--- /dev/null
+++ b/test/CodeGen/ARM/copy-cpsr.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-ARM
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB
+; RUN: llc -mtriple=thumbv7m-none-eabi -show-mc-encoding %s -o - | FileCheck %s --check-prefix=CHECK-THUMB
+
+; In the ARM backend, most compares are glued to their uses so CPSR can't
+; escape. However, for long ADCS chains (and last ditch fallback) the dependency
+; is carried in the DAG because duplicating them can be more expensive than
+; copying CPSR.
+
+; Crafting a test for this was a little tricky, in case it breaks here are some
+; notes on what I was tring to achieve:
+;   + We want 2 long ADCS chains
+;   + We want them to split after an initial common prefix (so that a single
+;     CPSR is used twice).
+;   + We want both chains to write CPSR post-split (so that the copy can't be
+;     elided).
+;   + We want the chains to be long enough that duplicating them is expensive.
+
+define void @test_copy_cpsr(i128 %lhs, i128 %rhs, i128* %addr) {
+; CHECK-ARM: test_copy_cpsr:
+; CHECK-THUMB: test_copy_cpsr:
+
+; CHECK-ARM: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0x00,0x{{[0-9a-f]}}0,0x0f,0xe1]
+; CHECK-ARM: msr APSR_nzcvq, [[TMP]] @ encoding: [0x0{{[0-9a-f]}},0xf0,0x28,0xe1]
+
+  ; In Thumb mode v7M and v7AR have different MRS/MSR instructions that happen
+  ; to overlap for the apsr case, so it's definitely worth checking both.
+; CHECK-THUMB: mrs [[TMP:r[0-9]+]], apsr @ encoding: [0xef,0xf3,0x00,0x8{{[0-9a-f]}}]
+; CHECK-THUMB: msr {{APSR|apsr}}_nzcvq, [[TMP]] @ encoding: [0x8{{[0-9a-f]}},0xf3,0x00,0x88]
+
+  %sum = add i128 %lhs, %rhs
+  store volatile i128 %sum, i128* %addr
+
+  %rhs2.tmp1 = trunc i128 %rhs to i64
+  %rhs2 = zext i64 %rhs2.tmp1 to i128
+
+  %sum2 = add i128 %lhs, %rhs2
+  store volatile i128 %sum2, i128* %addr
+
+  ret void
+}
diff --git a/test/CodeGen/ARM/crc32.ll b/test/CodeGen/ARM/crc32.ll
new file mode 100644
index 000000000000..cc94330ce654
--- /dev/null
+++ b/test/CodeGen/ARM/crc32.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=thumbv8 -o - %s | FileCheck %s
+
+define i32 @test_crc32b(i32 %cur, i8 %next) {
+; CHECK-LABEL: test_crc32b:
+; CHECK: crc32b r0, r0, r1
+  %bits = zext i8 %next to i32
+  %val = call i32 @llvm.arm.crc32b(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32h(i32 %cur, i16 %next) {
+; CHECK-LABEL: test_crc32h:
+; CHECK: crc32h r0, r0, r1
+  %bits = zext i16 %next to i32
+  %val = call i32 @llvm.arm.crc32h(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32w(i32 %cur, i32 %next) {
+; CHECK-LABEL: test_crc32w:
+; CHECK: crc32w r0, r0, r1
+  %val = call i32 @llvm.arm.crc32w(i32 %cur, i32 %next)
+  ret i32 %val
+}
+
+define i32 @test_crc32cb(i32 %cur, i8 %next) {
+; CHECK-LABEL: test_crc32cb:
+; CHECK: crc32cb r0, r0, r1
+  %bits = zext i8 %next to i32
+  %val = call i32 @llvm.arm.crc32cb(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32ch(i32 %cur, i16 %next) {
+; CHECK-LABEL: test_crc32ch:
+; CHECK: crc32ch r0, r0, r1
+  %bits = zext i16 %next to i32
+  %val = call i32 @llvm.arm.crc32ch(i32 %cur, i32 %bits)
+  ret i32 %val
+}
+
+define i32 @test_crc32cw(i32 %cur, i32 %next) {
+; CHECK-LABEL: test_crc32cw:
+; CHECK: crc32cw r0, r0, r1
+  %val = call i32 @llvm.arm.crc32cw(i32 %cur, i32 %next)
+  ret i32 %val
+}
+
+
+declare i32 @llvm.arm.crc32b(i32, i32)
+declare i32 @llvm.arm.crc32h(i32, i32)
+declare i32 @llvm.arm.crc32w(i32, i32)
+declare i32 @llvm.arm.crc32x(i32, i64)
+
+declare i32 @llvm.arm.crc32cb(i32, i32)
+declare i32 @llvm.arm.crc32ch(i32, i32)
+declare i32 @llvm.arm.crc32cw(i32, i32)
+declare i32 @llvm.arm.crc32cx(i32, i64)
diff --git a/test/CodeGen/ARM/cse-ldrlit.ll b/test/CodeGen/ARM/cse-ldrlit.ll
index ea8c0ca8560d..3f5d4c2e3c29 100644
--- a/test/CodeGen/ARM/cse-ldrlit.ll
+++ b/test/CodeGen/ARM/cse-ldrlit.ll
@@ -33,8 +33,8 @@ false:
 ; CHECK-ARM-PIC-LABEL: foo:
 ; CHECK-ARM-PIC: ldr [[VAR_OFFSET:r[0-9]+]], LCPI0_0
 ; CHECK-ARM-PIC: LPC0_0:
-; CHECK-ARM-PIC-NEXT: ldr r0, [pc, [[VAR_OFFSET]]]
-; CHECK-ARM-PIC: ldr {{r[1-9][0-9]?}}, [r0, #4]
+; CHECK-ARM-PIC-NEXT: add r0, pc, [[VAR_OFFSET]]
+; CHECK-ARM-PIC: ldr {{r[0-9]+}}, [r0, #4]
 
 ; CHECK-ARM-PIC: LCPI0_0:
 ; CHECK-ARM-PIC-NEXT: .long _var-(LPC0_0+8)
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 62b9e4380b2a..4f5b7592c844 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -march=arm | grep "bl.*__ltdf" | count 1
+; RUN: llc < %s -march=arm | FileCheck %s
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
+; CHECK: bl ___ltdf
+; CHECK-NOT: bl ___ltdf
+
 define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index 62ed87fd7871..80ef2ab7b8bf 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7s-apple-ios3.0.0 -mcpu=generic | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
-; RUN: llc < %s -mtriple=thumbeb -mattr=v7,neon | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
+; RUN: llc < %s -mtriple=thumbeb -target-abi apcs -mattr=v7,neon | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 ; PR15525
 ; CHECK-LABEL: test1:
diff --git a/test/CodeGen/ARM/darwin-eabi.ll b/test/CodeGen/ARM/darwin-eabi.ll
index f2cde71dd496..5301c0b38a75 100644
--- a/test/CodeGen/ARM/darwin-eabi.ll
+++ b/test/CodeGen/ARM/darwin-eabi.ll
@@ -7,7 +7,7 @@ define float @float_op(float %lhs, float %rhs) {
   %sum = fadd float %lhs, %rhs
   ret float %sum
 ; CHECK-M3-LABEL: float_op:
-; CHECK-M3: blx ___addsf3
+; CHECK-M3: bl ___addsf3
 
 ; CHECK-M4-LABEL: float_op:
 ; CHECK-M4: vadd.f32
@@ -17,8 +17,8 @@ define double @double_op(double %lhs, double %rhs) {
   %sum = fadd double %lhs, %rhs
   ret double %sum
 ; CHECK-M3-LABEL: double_op:
-; CHECK-M3: blx ___adddf3
+; CHECK-M3: bl ___adddf3
 
 ; CHECK-M4-LABEL: double_op:
-; CHECK-M4: blx ___adddf3
+; CHECK-M4: {{(blx|b.w)}} ___adddf3
 }
diff --git a/test/CodeGen/ARM/dbg.ll b/test/CodeGen/ARM/dbg.ll
new file mode 100644
index 000000000000..8bce1a65c15e
--- /dev/null
+++ b/test/CodeGen/ARM/dbg.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple armv8-eabi -mcpu=cortex-a57 -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv8-eabi -mcpu=cortex-a57 -o - %s | FileCheck %s
+
+define void @hint_dbg() {
+entry:
+  call void @llvm.arm.dbg(i32 0)
+  ret void
+}
+
+declare void @llvm.arm.dbg(i32)
+
+; CHECK: dbg #0
+
diff --git a/test/CodeGen/ARM/debug-frame-large-stack.ll b/test/CodeGen/ARM/debug-frame-large-stack.ll
index 5bafce9407e5..1addf639bfe4 100644
--- a/test/CodeGen/ARM/debug-frame-large-stack.ll
+++ b/test/CodeGen/ARM/debug-frame-large-stack.ll
@@ -1,28 +1,11 @@
-; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi -disable-fp-elim| FileCheck %s --check-prefix=CHECK-ARM
-; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-none-eabi | FileCheck %s --check-prefix=CHECK-ARM-FP-ELIM
+; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-netbsd-eabi -disable-fp-elim| FileCheck %s --check-prefix=CHECK-ARM
+; RUN: llc -filetype=asm -o - < %s -mtriple arm-arm-netbsd-eabi | FileCheck %s --check-prefix=CHECK-ARM-FP-ELIM
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
     ret void
 }
 
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/large.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"large.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @test1, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test1]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/large.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5 "}
-!11 = metadata !{i32 2, i32 0, metadata !4, null}
-
 ; CHECK-ARM-LABEL: test1:
 ; CHECK-ARM: .cfi_startproc
 ; CHECK-ARM: sub    sp, sp, #256
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
index 42ff82d81539..05521d80646c 100644
--- a/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -25,40 +25,40 @@
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"var.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, ...)* @sum, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 "}
-!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5]
-!13 = metadata !{i32 5, i32 0, metadata !4, null}
-!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6]
-!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
-!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"}
-!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
-!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
-!22 = metadata !{i32 6, i32 0, metadata !4, null}
-!23 = metadata !{i32 7, i32 0, metadata !4, null}
-!24 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8]
-!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9]
-!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!28 = metadata !{i32 9, i32 0, metadata !27, null}
-!29 = metadata !{i32 10, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!31 = metadata !{i32 11, i32 0, metadata !30, null}
-!32 = metadata !{i32 12, i32 0, metadata !4, null}
-!33 = metadata !{i32 13, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
+!1 = !{!"var.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00sum\00sum\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, i32 (i32, ...)* @sum, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 "}
+!12 = !{!"0x101\00count\0016777221\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [count] [line 5]
+!13 = !MDLocation(line: 5, scope: !4)
+!14 = !{!"0x100\00vl\006\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [vl] [line 6]
+!15 = !{!"0x16\00va_list\0030\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
+!16 = !{!"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", !"/tmp"}
+!17 = !{!"0x16\00__builtin_va_list\006\000\000\000\000", !1, null, !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
+!18 = !{!"0x13\00__va_list\006\0032\0032\000\000\000", !1, null, null, !19, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!19 = !{!20}
+!20 = !{!"0xd\00__ap\006\0032\0032\000\000", !1, !18, !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
+!21 = !{!"0xf\00\000\0032\0032\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
+!22 = !MDLocation(line: 6, scope: !4)
+!23 = !MDLocation(line: 7, scope: !4)
+!24 = !{!"0x100\00sum\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [sum] [line 8]
+!25 = !MDLocation(line: 8, scope: !4)
+!26 = !{!"0x100\00i\009\000", !27, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 9]
+!27 = !{!"0xb\009\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!28 = !MDLocation(line: 9, scope: !27)
+!29 = !MDLocation(line: 10, scope: !30)
+!30 = !{!"0xb\009\000\001", !1, !27} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!31 = !MDLocation(line: 11, scope: !30)
+!32 = !MDLocation(line: 12, scope: !4)
+!33 = !MDLocation(line: 13, scope: !4)
 
 ; CHECK-FP-LABEL: sum
 ; CHECK-FP: .cfi_startproc
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
index cb54aa8aec73..16e2c4c59f96 100644
--- a/test/CodeGen/ARM/debug-frame.ll
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -128,41 +128,41 @@ declare void @_ZSt9terminatev()
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/exp.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"exp.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"_Z4testiiiiiddddd", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 5] [test]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/exp.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8, metadata !8, metadata !8, metadata !8, metadata !8, metadata !9, metadata !9, metadata !9, metadata !9, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5 "}
-!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
-!14 = metadata !{i32 4, i32 0, metadata !4, null}
-!15 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554436, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 4]
-!16 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331652, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 4]
-!17 = metadata !{i32 786689, metadata !4, metadata !"d", metadata !5, i32 67108868, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 4]
-!18 = metadata !{i32 786689, metadata !4, metadata !"e", metadata !5, i32 83886084, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [e] [line 4]
-!19 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 100663301, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 5]
-!20 = metadata !{i32 5, i32 0, metadata !4, null}
-!21 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 117440517, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 5]
-!22 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 134217733, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 5]
-!23 = metadata !{i32 786689, metadata !4, metadata !"q", metadata !5, i32 150994949, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [q] [line 5]
-!24 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !5, i32 167772165, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 5]
-!25 = metadata !{i32 7, i32 0, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !1, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
-!27 = metadata !{i32 8, i32 0, metadata !26, null} ; [ DW_TAG_imported_declaration ]
-!28 = metadata !{i32 11, i32 0, metadata !26, null}
-!29 = metadata !{i32 9, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !1, metadata !4, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
-!31 = metadata !{i32 10, i32 0, metadata !30, null}
-!32 = metadata !{i32 10, i32 0, metadata !4, null}
-!33 = metadata !{i32 11, i32 0, metadata !4, null}
-!34 = metadata !{i32 11, i32 0, metadata !30, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/exp.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"exp.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test\00test\00_Z4testiiiiiddddd\004\000\001\000\006\00256\000\005", !1, !5, !6, null, void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 5] [test]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/exp.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8, !8, !8, !8, !8, !9, !9, !9, !9, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5 "}
+!13 = !{!"0x101\00a\0016777220\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!14 = !MDLocation(line: 4, scope: !4)
+!15 = !{!"0x101\00b\0033554436\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 4]
+!16 = !{!"0x101\00c\0050331652\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [c] [line 4]
+!17 = !{!"0x101\00d\0067108868\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [d] [line 4]
+!18 = !{!"0x101\00e\0083886084\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [e] [line 4]
+!19 = !{!"0x101\00m\00100663301\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [m] [line 5]
+!20 = !MDLocation(line: 5, scope: !4)
+!21 = !{!"0x101\00n\00117440517\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [n] [line 5]
+!22 = !{!"0x101\00p\00134217733\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [p] [line 5]
+!23 = !{!"0x101\00q\00150994949\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [q] [line 5]
+!24 = !{!"0x101\00r\00167772165\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [r] [line 5]
+!25 = !MDLocation(line: 7, scope: !26)
+!26 = !{!"0xb\006\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
+!27 = !MDLocation(line: 8, scope: !26)
+!28 = !MDLocation(line: 11, scope: !26)
+!29 = !MDLocation(line: 9, scope: !30)
+!30 = !{!"0xb\008\000\001", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/exp.cpp]
+!31 = !MDLocation(line: 10, scope: !30)
+!32 = !MDLocation(line: 10, scope: !4)
+!33 = !MDLocation(line: 11, scope: !4)
+!34 = !MDLocation(line: 11, scope: !30)
 
 ; CHECK-FP-LABEL: _Z4testiiiiiddddd:
 ; CHECK-FP:   .cfi_startproc
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 31d0324de689..8679589a4865 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -7,13 +7,13 @@ target triple = "thumbv7-apple-ios"
 %struct.tag_s = type { i32, i32, i32 }
 
 define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp {
-  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %this}, i64 0, metadata !5), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %c}, i64 0, metadata !13), !dbg !21
-  tail call void @llvm.dbg.value(metadata !{i64 %x}, i64 0, metadata !14), !dbg !22
-  tail call void @llvm.dbg.value(metadata !{i64 %y}, i64 0, metadata !17), !dbg !23
+  tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !20
+  tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !21
+  tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !22
+  tail call void @llvm.dbg.value(metadata i64 %y, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !23
 ;CHECK:	@DEBUG_VALUE: foo:y <- [R7+8]
-  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr1}, i64 0, metadata !18), !dbg !24
-  tail call void @llvm.dbg.value(metadata !{%struct.tag_s* %ptr2}, i64 0, metadata !19), !dbg !25
+  tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !24
+  tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !25
   %1 = icmp eq %struct.tag_s* %c, null, !dbg !26
   br i1 %1, label %3, label %2, !dbg !26
 
@@ -27,42 +27,42 @@ define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64
 
 declare void @foobar(i64, i64)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !30, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !2, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 11, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, metadata !31, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [foo]
-!2 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !32, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !2, i32 16777227, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786451, metadata !32, metadata !0, metadata !"tag_s", i32 5, i64 96, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [tag_s] [line 5, size 96, align 32, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !11, metadata !12}
-!9 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"x", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"y", i32 7, i64 32, i64 32, i64 32, i32 0, metadata !10} ; [ DW_TAG_member ]
-!12 = metadata !{i32 786445, metadata !32, metadata !7, metadata !"z", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554443, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 50331659, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 786454, metadata !32, metadata !0, metadata !"UInt64", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ]
-!16 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 67108875, metadata !15, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786689, metadata !1, metadata !"ptr1", metadata !2, i32 83886091, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786689, metadata !1, metadata !"ptr2", metadata !2, i32 100663307, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 11, i32 24, metadata !1, null}
-!21 = metadata !{i32 11, i32 44, metadata !1, null}
-!22 = metadata !{i32 11, i32 54, metadata !1, null}
-!23 = metadata !{i32 11, i32 64, metadata !1, null}
-!24 = metadata !{i32 11, i32 81, metadata !1, null}
-!25 = metadata !{i32 11, i32 101, metadata !1, null}
-!26 = metadata !{i32 12, i32 3, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !2, metadata !1, i32 11, i32 107, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{i32 13, i32 5, metadata !27, null}
-!29 = metadata !{i32 14, i32 1, metadata !27, null}
-!30 = metadata !{metadata !1}
-!31 = metadata !{metadata !5, metadata !13, metadata !14, metadata !17, metadata !18, metadata!19}
-!32 = metadata !{metadata !"one.c", metadata !"/Volumes/Athwagate/R10048772"}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)\001\00\000\00\001", !32, !4, !4, !30, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\0011\000\001\000\006\00256\001\0011", !2, !2, !3, null, void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, null, null, !31} ; [ DW_TAG_subprogram ] [line 11] [def] [foo]
+!2 = !{!"0x29", !32} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !32, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x101\00this\0016777227\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!6 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !7} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x13\00tag_s\005\0096\0032\000\000\000", !32, !0, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [tag_s] [line 5, size 96, align 32, offset 0] [def] [from ]
+!8 = !{!9, !11, !12}
+!9 = !{!"0xd\00x\006\0032\0032\000\000", !32, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!11 = !{!"0xd\00y\007\0032\0032\0032\000", !32, !7, !10} ; [ DW_TAG_member ]
+!12 = !{!"0xd\00z\008\0032\0032\0064\000", !32, !7, !10} ; [ DW_TAG_member ]
+!13 = !{!"0x101\00c\0033554443\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!14 = !{!"0x101\00x\0050331659\000", !1, !2, !15} ; [ DW_TAG_arg_variable ]
+!15 = !{!"0x16\00UInt64\001\000\000\000\000", !32, !0, !16} ; [ DW_TAG_typedef ]
+!16 = !{!"0x24\00long long unsigned int\000\0064\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
+!17 = !{!"0x101\00y\0067108875\000", !1, !2, !15} ; [ DW_TAG_arg_variable ]
+!18 = !{!"0x101\00ptr1\0083886091\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x101\00ptr2\00100663307\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!20 = !MDLocation(line: 11, column: 24, scope: !1)
+!21 = !MDLocation(line: 11, column: 44, scope: !1)
+!22 = !MDLocation(line: 11, column: 54, scope: !1)
+!23 = !MDLocation(line: 11, column: 64, scope: !1)
+!24 = !MDLocation(line: 11, column: 81, scope: !1)
+!25 = !MDLocation(line: 11, column: 101, scope: !1)
+!26 = !MDLocation(line: 12, column: 3, scope: !27)
+!27 = !{!"0xb\0011\00107\000", !2, !1} ; [ DW_TAG_lexical_block ]
+!28 = !MDLocation(line: 13, column: 5, scope: !27)
+!29 = !MDLocation(line: 14, column: 1, scope: !27)
+!30 = !{!1}
+!31 = !{!5, !13, !14, !17, !18, !19}
+!32 = !{!"one.c", !"/Volumes/Athwagate/R10048772"}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 5ad5e59b880e..3bf6ad91c86e 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -19,11 +19,11 @@ target triple = "thumbv7-apple-ios"
 @"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4
 @"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
@@ -31,22 +31,22 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
   %1 = alloca %0*, align 4
   %bounds = alloca %struct.CR, align 4
   %data = alloca %struct.CR, align 4
-  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !27), !dbg !129
+  call void @llvm.dbg.value(metadata i8* %.block_descriptor, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !129
   store %0* %loadedMydata, %0** %1, align 4
-  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !130), !dbg !131
+  call void @llvm.dbg.declare(metadata %0** %1, metadata !130, metadata !{!"0x102"}), !dbg !131
   %2 = bitcast %struct.CR* %bounds to %1*
   %3 = getelementptr %1* %2, i32 0, i32 0
   store [4 x i32] %bounds.coerce0, [4 x i32]* %3
-  call void @llvm.dbg.declare(metadata !{%struct.CR* %bounds}, metadata !132), !dbg !133
+  call void @llvm.dbg.declare(metadata %struct.CR* %bounds, metadata !132, metadata !{!"0x102"}), !dbg !133
   %4 = bitcast %struct.CR* %data to %1*
   %5 = getelementptr %1* %4, i32 0, i32 0
   store [4 x i32] %data.coerce0, [4 x i32]* %5
-  call void @llvm.dbg.declare(metadata !{%struct.CR* %data}, metadata !134), !dbg !135
+  call void @llvm.dbg.declare(metadata %struct.CR* %data, metadata !134, metadata !{!"0x102"}), !dbg !135
   %6 = bitcast i8* %.block_descriptor to %2*
   %7 = getelementptr inbounds %2* %6, i32 0, i32 6
-  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !136), !dbg !137
-  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !138), !dbg !137
-  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !139), !dbg !140
+  call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137
+  call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137
+  call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140
   %8 = load %0** %1, align 4, !dbg !141
   %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
   %10 = bitcast %0* %8 to i8*, !dbg !141
@@ -95,169 +95,169 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!162}
 
-!0 = metadata !{i32 786449, metadata !153, i32 16, metadata !"Apple clang version 2.1", i1 false, metadata !"", i32 2, metadata !147, metadata !26, metadata !148, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"", i32 248, i64 32, i64 32, i32 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ]
-!2 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786472, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
-!5 = metadata !{i32 786436, metadata !160, metadata !0, metadata !"Mode", i32 79, i64 32, i64 32, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [Mode] [line 79, size 32, align 32, offset 0] [def] [from ]
-!6 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786472, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
-!9 = metadata !{i32 786436, metadata !149, metadata !0, metadata !"", i32 15, i64 32, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 15, size 32, align 32, offset 0] [def] [from ]
-!10 = metadata !{i32 786473, metadata !149} ; [ DW_TAG_file_type ]
-!11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786472, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
-!13 = metadata !{i32 786472, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
-!14 = metadata !{i32 786436, metadata !150, metadata !0, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
-!15 = metadata !{i32 786473, metadata !150} ; [ DW_TAG_file_type ]
-!16 = metadata !{metadata !17, metadata !18}
-!17 = metadata !{i32 786472, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
-!18 = metadata !{i32 786472, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
-!19 = metadata !{i32 786436, metadata !151, metadata !0, metadata !"", i32 14, i64 32, i64 32, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 14, size 32, align 32, offset 0] [def] [from ]
-!20 = metadata !{i32 786473, metadata !151} ; [ DW_TAG_file_type ]
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 786472, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
-!23 = metadata !{i32 786478, metadata !152, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null, i32 609} ; [ DW_TAG_subprogram ] [line 609] [local] [def] [foobar_func_block_invoke_0]
-!24 = metadata !{i32 786473, metadata !152} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 786453, metadata !152, metadata !24, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!26 = metadata !{null}
-!27 = metadata !{i32 786689, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64, null} ; [ DW_TAG_arg_variable ]
-!28 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"__block_literal_14", i32 609, i64 256, i64 32, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_14] [line 609, size 256, align 32, offset 0] [def] [from ]
-!30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
-!31 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!32 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__reserved", i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__FuncPtr", i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
-!37 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__descriptor", i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"__block_descriptor_withcopydispose", i32 307, i64 128, i64 32, i32 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 307, size 128, align 32, offset 0] [def] [from ]
-!40 = metadata !{i32 786473, metadata !153} ; [ DW_TAG_file_type ]
-!41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
-!42 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"reserved", i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
-!43 = metadata !{i32 786468, null, metadata !0, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!44 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"Size", i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
-!45 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"CopyFuncPtr", i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
-!46 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 786445, metadata !153, metadata !40, metadata !"DestroyFuncPtr", i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
-!48 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
-!49 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
-!50 = metadata !{i32 786451, metadata !152, metadata !24, metadata !"", i32 0, i64 224, i64 0, i32 0, i32 16, null, metadata !51, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
-!51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__isa", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
-!53 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__forwarding", i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
-!54 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
-!55 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__size", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
-!56 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__copy_helper", i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
-!57 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"__destroy_helper", i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
-!58 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"mydata", i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
-!59 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
-!60 = metadata !{i32 786451, metadata !154, metadata !24, metadata !"UIMydata", i32 26, i64 128, i64 32, i32 0, i32 0, null, metadata !62, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [UIMydata] [line 26, size 128, align 32, offset 0] [def] [from ]
-!61 = metadata !{i32 786473, metadata !154} ; [ DW_TAG_file_type ]
-!62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
-!63 = metadata !{i32 786460, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!64 = metadata !{i32 786451, metadata !155, metadata !40, metadata !"NSO", i32 66, i64 32, i64 32, i32 0, i32 0, null, metadata !66, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSO] [line 66, size 32, align 32, offset 0] [def] [from ]
-!65 = metadata !{i32 786473, metadata !155} ; [ DW_TAG_file_type ]
-!66 = metadata !{metadata !67}
-!67 = metadata !{i32 786445, metadata !155, metadata !65, metadata !"isa", i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!68 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"Class", i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
-!69 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
-!70 = metadata !{i32 786451, metadata !153, metadata !0, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!71 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataRef", i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!72 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"CFTypeRef", i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
-!73 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
-!74 = metadata !{i32 786470, null, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
-!75 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_scale", i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!76 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"Float", i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
-!77 = metadata !{i32 786473, metadata !156} ; [ DW_TAG_file_type ]
-!78 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!79 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"_mydataFlags", i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!80 = metadata !{i32 786451, metadata !154, metadata !0, metadata !"", i32 30, i64 8, i64 8, i32 0, i32 0, null, metadata !81, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 30, size 8, align 8, offset 0] [def] [from ]
-!81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
-!82 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"named", i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
-!83 = metadata !{i32 786468, null, metadata !0, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!84 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"mydataO", i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
-!85 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"cached", i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
-!86 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasBeenCached", i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
-!87 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"hasPattern", i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
-!88 = metadata !{i32 786445, metadata !154, metadata !61, metadata !"isCIMydata", i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
-!89 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"self", i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
-!90 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
-!91 = metadata !{i32 786451, metadata !152, metadata !40, metadata !"MyWork", i32 36, i64 384, i64 32, i32 0, i32 0, null, metadata !92, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyWork] [line 36, size 384, align 32, offset 0] [def] [from ]
-!92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
-!93 = metadata !{i32 786460, metadata !152, metadata !91, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
-!94 = metadata !{i32 786451, metadata !157, metadata !40, metadata !"twork", i32 43, i64 32, i64 32, i32 0, i32 0, null, metadata !96, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [twork] [line 43, size 32, align 32, offset 0] [def] [from ]
-!95 = metadata !{i32 786473, metadata !157} ; [ DW_TAG_file_type ]
-!96 = metadata !{metadata !97}
-!97 = metadata !{i32 786460, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!98 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_itemID", i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!99 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"uint64_t", i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
-!100 = metadata !{i32 786468, null, metadata !0, metadata !"long long unsigned int", i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!101 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_library", i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!102 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
-!103 = metadata !{i32 786451, metadata !158, metadata !40, metadata !"MyLibrary2", i32 22, i64 32, i64 32, i32 0, i32 0, null, metadata !105, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [MyLibrary2] [line 22, size 32, align 32, offset 0] [def] [from ]
-!104 = metadata !{i32 786473, metadata !158} ; [ DW_TAG_file_type ]
-!105 = metadata !{metadata !106}
-!106 = metadata !{i32 786460, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
-!107 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_bounds", i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!108 = metadata !{i32 786454, metadata !153, metadata !0, metadata !"CR", i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
-!109 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CR", i32 29, i64 128, i64 32, i32 0, i32 0, null, metadata !110, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CR] [line 29, size 128, align 32, offset 0] [def] [from ]
-!110 = metadata !{metadata !111, metadata !117}
-!111 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"origin", i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
-!112 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"CP", i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
-!113 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"CP", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !114, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [CP] [line 13, size 64, align 32, offset 0] [def] [from ]
-!114 = metadata !{metadata !115, metadata !116}
-!115 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"x", i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!116 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"y", i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!117 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"size", i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
-!118 = metadata !{i32 786454, metadata !156, metadata !0, metadata !"Size", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
-!119 = metadata !{i32 786451, metadata !156, metadata !0, metadata !"Size", i32 21, i64 64, i64 32, i32 0, i32 0, null, metadata !120, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Size] [line 21, size 64, align 32, offset 0] [def] [from ]
-!120 = metadata !{metadata !121, metadata !122}
-!121 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"width", i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
-!122 = metadata !{i32 786445, metadata !156, metadata !77, metadata !"height", i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
-!123 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"_data", i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
-!124 = metadata !{i32 786445, metadata !152, metadata !24, metadata !"semi", i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
-!125 = metadata !{i32 786454, metadata !152, metadata !0, metadata !"d_t", i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
-!126 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
-!127 = metadata !{i32 786451, metadata !159, metadata !0, metadata !"my_struct", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [my_struct] [line 49, size 0, align 0, offset 0] [decl] [from ]
-!128 = metadata !{i32 786473, metadata !159} ; [ DW_TAG_file_type ]
-!129 = metadata !{i32 609, i32 144, metadata !23, null}
-!130 = metadata !{i32 786689, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0, null} ; [ DW_TAG_arg_variable ]
-!131 = metadata !{i32 609, i32 155, metadata !23, null}
-!132 = metadata !{i32 786689, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
-!133 = metadata !{i32 609, i32 175, metadata !23, null}
-!134 = metadata !{i32 786689, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0, null} ; [ DW_TAG_arg_variable ]
-!135 = metadata !{i32 609, i32 190, metadata !23, null}
-!136 = metadata !{i32 786688, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, null, metadata !163} ; [ DW_TAG_auto_variable ]
-!137 = metadata !{i32 604, i32 49, metadata !23, null}
-!138 = metadata !{i32 786688, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, null, metadata !164} ; [ DW_TAG_auto_variable ]
-!139 = metadata !{i32 786688, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, null, metadata !165} ; [ DW_TAG_auto_variable ]
-!140 = metadata !{i32 607, i32 30, metadata !23, null}
-!141 = metadata !{i32 610, i32 17, metadata !142, null}
-!142 = metadata !{i32 786443, metadata !152, metadata !23, i32 609, i32 200, i32 94} ; [ DW_TAG_lexical_block ]
-!143 = metadata !{i32 611, i32 17, metadata !142, null}
-!144 = metadata !{i32 612, i32 17, metadata !142, null}
-!145 = metadata !{i32 613, i32 17, metadata !142, null}
-!146 = metadata !{i32 615, i32 13, metadata !142, null}
-!147 = metadata !{metadata !1, metadata !1, metadata !5, metadata !5, metadata !9, metadata !14, metadata !19, metadata !19, metadata !14, metadata !14, metadata !14, metadata !19, metadata !19, metadata !19}
-!148 = metadata !{metadata !23}
-!149 = metadata !{metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm"}
-!150 = metadata !{metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm"}
-!151 = metadata !{metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm"}
-!152 = metadata !{metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm"}
-!153 = metadata !{metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm"}
-!154 = metadata !{metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm"}
-!155 = metadata !{metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm"}
-!156 = metadata !{metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm"}
-!157 = metadata !{metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm"}
-!158 = metadata !{metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm"}
-!159 = metadata !{metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm"}
-!160 = metadata !{metadata !"header.h", metadata !"/Volumes/Sandbox/llvm"}
-!161 = metadata !{metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm"}
-!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!163 = metadata !{i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24}
-!164 = metadata !{i64 1, i64 24}
-!165 = metadata !{i64 1, i64 28}
+!0 = !{!"0x11\0016\00Apple clang version 2.1\000\00\002\00\001", !153, !147, !26, !148, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x4\00\00248\0032\0032\000\000\000", !160, !0, null, !3, null, null, null} ; [ DW_TAG_enumeration_type ] [line 248, size 32, align 32, offset 0] [def] [from ]
+!2 = !{!"0x29", !160} ; [ DW_TAG_file_type ]
+!3 = !{!4}
+!4 = !{!"0x28\00Ver1\000"} ; [ DW_TAG_enumerator ]
+!5 = !{!"0x4\00Mode\0079\0032\0032\000\000\000", !160, !0, null, !7, null, null, null} ; [ DW_TAG_enumeration_type ] [Mode] [line 79, size 32, align 32, offset 0] [def] [from ]
+!6 = !{!"0x29", !161} ; [ DW_TAG_file_type ]
+!7 = !{!8}
+!8 = !{!"0x28\00One\000"} ; [ DW_TAG_enumerator ]
+!9 = !{!"0x4\00\0015\0032\0032\000\000\000", !149, !0, null, !11, null, null, null} ; [ DW_TAG_enumeration_type ] [line 15, size 32, align 32, offset 0] [def] [from ]
+!10 = !{!"0x29", !149} ; [ DW_TAG_file_type ]
+!11 = !{!12, !13}
+!12 = !{!"0x28\00Unknown\000"} ; [ DW_TAG_enumerator ]
+!13 = !{!"0x28\00Known\001"} ; [ DW_TAG_enumerator ]
+!14 = !{!"0x4\00\0020\0032\0032\000\000\000", !150, !0, null, !16, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
+!15 = !{!"0x29", !150} ; [ DW_TAG_file_type ]
+!16 = !{!17, !18}
+!17 = !{!"0x28\00Single\000"} ; [ DW_TAG_enumerator ]
+!18 = !{!"0x28\00Double\001"} ; [ DW_TAG_enumerator ]
+!19 = !{!"0x4\00\0014\0032\0032\000\000\000", !151, !0, null, !21, null, null, null} ; [ DW_TAG_enumeration_type ] [line 14, size 32, align 32, offset 0] [def] [from ]
+!20 = !{!"0x29", !151} ; [ DW_TAG_file_type ]
+!21 = !{!22}
+!22 = !{!"0x28\00Eleven\000"} ; [ DW_TAG_enumerator ]
+!23 = !{!"0x2e\00foobar_func_block_invoke_0\00foobar_func_block_invoke_0\00\00609\001\001\000\006\00256\000\00609", !152, !24, !25, null, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null, null} ; [ DW_TAG_subprogram ] [line 609] [local] [def] [foobar_func_block_invoke_0]
+!24 = !{!"0x29", !152} ; [ DW_TAG_file_type ]
+!25 = !{!"0x15\00\000\000\000\000\000\000", !152, !24, null, !26, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = !{null}
+!27 = !{!"0x101\00.block_descriptor\0016777825\0064", !23, !24, !28} ; [ DW_TAG_arg_variable ]
+!28 = !{!"0xf\00\000\0032\000\000\000", null, !0, !29} ; [ DW_TAG_pointer_type ]
+!29 = !{!"0x13\00__block_literal_14\00609\00256\0032\000\000\000", !152, !24, null, !30, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_14] [line 609, size 256, align 32, offset 0] [def] [from ]
+!30 = !{!31, !33, !35, !36, !37, !48, !89, !124}
+!31 = !{!"0xd\00__isa\00609\0032\0032\000\000", !152, !24, !32} ; [ DW_TAG_member ]
+!32 = !{!"0xf\00\000\0032\0032\000\000", null, !0, null} ; [ DW_TAG_pointer_type ]
+!33 = !{!"0xd\00__flags\00609\0032\0032\0032\000", !152, !24, !34} ; [ DW_TAG_member ]
+!34 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!35 = !{!"0xd\00__reserved\00609\0032\0032\0064\000", !152, !24, !34} ; [ DW_TAG_member ]
+!36 = !{!"0xd\00__FuncPtr\00609\0032\0032\0096\000", !152, !24, !32} ; [ DW_TAG_member ]
+!37 = !{!"0xd\00__descriptor\00609\0032\0032\00128\000", !152, !24, !38} ; [ DW_TAG_member ]
+!38 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !39} ; [ DW_TAG_pointer_type ]
+!39 = !{!"0x13\00__block_descriptor_withcopydispose\00307\00128\0032\000\000\000", !153, !0, null, !41, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 307, size 128, align 32, offset 0] [def] [from ]
+!40 = !{!"0x29", !153} ; [ DW_TAG_file_type ]
+!41 = !{!42, !44, !45, !47}
+!42 = !{!"0xd\00reserved\00307\0032\0032\000\000", !153, !40, !43} ; [ DW_TAG_member ]
+!43 = !{!"0x24\00long unsigned int\000\0032\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
+!44 = !{!"0xd\00Size\00307\0032\0032\0032\000", !153, !40, !43} ; [ DW_TAG_member ]
+!45 = !{!"0xd\00CopyFuncPtr\00307\0032\0032\0064\000", !153, !40, !46} ; [ DW_TAG_member ]
+!46 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !32} ; [ DW_TAG_pointer_type ]
+!47 = !{!"0xd\00DestroyFuncPtr\00307\0032\0032\0096\000", !153, !40, !46} ; [ DW_TAG_member ]
+!48 = !{!"0xd\00mydata\00609\0032\0032\00160\000", !152, !24, !49} ; [ DW_TAG_member ]
+!49 = !{!"0xf\00\000\0032\000\000\000", null, !0, !50} ; [ DW_TAG_pointer_type ]
+!50 = !{!"0x13\00\000\00224\000\000\0016\000", !152, !24, null, !51, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
+!51 = !{!52, !53, !54, !55, !56, !57, !58}
+!52 = !{!"0xd\00__isa\000\0032\0032\000\000", !152, !24, !32} ; [ DW_TAG_member ]
+!53 = !{!"0xd\00__forwarding\000\0032\0032\0032\000", !152, !24, !32} ; [ DW_TAG_member ]
+!54 = !{!"0xd\00__flags\000\0032\0032\0064\000", !152, !24, !34} ; [ DW_TAG_member ]
+!55 = !{!"0xd\00__size\000\0032\0032\0096\000", !152, !24, !34} ; [ DW_TAG_member ]
+!56 = !{!"0xd\00__copy_helper\000\0032\0032\00128\000", !152, !24, !32} ; [ DW_TAG_member ]
+!57 = !{!"0xd\00__destroy_helper\000\0032\0032\00160\000", !152, !24, !32} ; [ DW_TAG_member ]
+!58 = !{!"0xd\00mydata\000\0032\0032\00192\000", !152, !24, !59} ; [ DW_TAG_member ]
+!59 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !60} ; [ DW_TAG_pointer_type ]
+!60 = !{!"0x13\00UIMydata\0026\00128\0032\000\000\0016", !154, !24, null, !62, null, null, null} ; [ DW_TAG_structure_type ] [UIMydata] [line 26, size 128, align 32, offset 0] [def] [from ]
+!61 = !{!"0x29", !154} ; [ DW_TAG_file_type ]
+!62 = !{!63, !71, !75, !79}
+!63 = !{!"0x1c\00\000\000\000\000\000", !60, null, !64} ; [ DW_TAG_inheritance ]
+!64 = !{!"0x13\00NSO\0066\0032\0032\000\000\0016", !155, !40, null, !66, null, null, null} ; [ DW_TAG_structure_type ] [NSO] [line 66, size 32, align 32, offset 0] [def] [from ]
+!65 = !{!"0x29", !155} ; [ DW_TAG_file_type ]
+!66 = !{!67}
+!67 = !{!"0xd\00isa\0067\0032\0032\000\002", !155, !65, !68, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!68 = !{!"0x16\00Class\00197\000\000\000\000", !153, !0, !69} ; [ DW_TAG_typedef ]
+!69 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !70} ; [ DW_TAG_pointer_type ]
+!70 = !{!"0x13\00objc_class\000\000\000\000\004\000", !153, !0, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!71 = !{!"0xd\00_mydataRef\0028\0032\0032\0032\000", !154, !61, !72, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!72 = !{!"0x16\00CFTypeRef\00313\000\000\000\000", !152, !0, !73} ; [ DW_TAG_typedef ]
+!73 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !74} ; [ DW_TAG_pointer_type ]
+!74 = !{!"0x26\00\000\000\000\000\000", null, !0, null} ; [ DW_TAG_const_type ]
+!75 = !{!"0xd\00_scale\0029\0032\0032\0064\000", !154, !61, !76, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!76 = !{!"0x16\00Float\0089\000\000\000\000", !156, !0, !78} ; [ DW_TAG_typedef ]
+!77 = !{!"0x29", !156} ; [ DW_TAG_file_type ]
+!78 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !0} ; [ DW_TAG_base_type ]
+!79 = !{!"0xd\00_mydataFlags\0037\008\008\0096\000", !154, !61, !80, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!80 = !{!"0x13\00\0030\008\008\000\000\000", !154, !0, null, !81, null, null, null} ; [ DW_TAG_structure_type ] [line 30, size 8, align 8, offset 0] [def] [from ]
+!81 = !{!82, !84, !85, !86, !87, !88}
+!82 = !{!"0xd\00named\0031\001\0032\000\000", !154, !61, !83} ; [ DW_TAG_member ]
+!83 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
+!84 = !{!"0xd\00mydataO\0032\003\0032\001\000", !154, !61, !83} ; [ DW_TAG_member ]
+!85 = !{!"0xd\00cached\0033\001\0032\004\000", !154, !61, !83} ; [ DW_TAG_member ]
+!86 = !{!"0xd\00hasBeenCached\0034\001\0032\005\000", !154, !61, !83} ; [ DW_TAG_member ]
+!87 = !{!"0xd\00hasPattern\0035\001\0032\006\000", !154, !61, !83} ; [ DW_TAG_member ]
+!88 = !{!"0xd\00isCIMydata\0036\001\0032\007\000", !154, !61, !83} ; [ DW_TAG_member ]
+!89 = !{!"0xd\00self\00609\0032\0032\00192\000", !152, !24, !90} ; [ DW_TAG_member ]
+!90 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !91} ; [ DW_TAG_pointer_type ]
+!91 = !{!"0x13\00MyWork\0036\00384\0032\000\000\0016", !152, !40, null, !92, null, null, null} ; [ DW_TAG_structure_type ] [MyWork] [line 36, size 384, align 32, offset 0] [def] [from ]
+!92 = !{!93, !98, !101, !107, !123}
+!93 = !{!"0x1c\00\000\000\000\000\000", !152, !91, !94} ; [ DW_TAG_inheritance ]
+!94 = !{!"0x13\00twork\0043\0032\0032\000\000\0016", !157, !40, null, !96, null, null, null} ; [ DW_TAG_structure_type ] [twork] [line 43, size 32, align 32, offset 0] [def] [from ]
+!95 = !{!"0x29", !157} ; [ DW_TAG_file_type ]
+!96 = !{!97}
+!97 = !{!"0x1c\00\000\000\000\000\000", !94, null, !64} ; [ DW_TAG_inheritance ]
+!98 = !{!"0xd\00_itemID\0038\0064\0032\0032\001", !152, !24, !99, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!99 = !{!"0x16\00uint64_t\0055\000\000\000\000", !153, !0, !100} ; [ DW_TAG_typedef ]
+!100 = !{!"0x24\00long long unsigned int\000\0064\0032\000\000\007", null, !0} ; [ DW_TAG_base_type ]
+!101 = !{!"0xd\00_library\0039\0032\0032\0096\001", !152, !24, !102, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!102 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !103} ; [ DW_TAG_pointer_type ]
+!103 = !{!"0x13\00MyLibrary2\0022\0032\0032\000\000\0016", !158, !40, null, !105, null, null, null} ; [ DW_TAG_structure_type ] [MyLibrary2] [line 22, size 32, align 32, offset 0] [def] [from ]
+!104 = !{!"0x29", !158} ; [ DW_TAG_file_type ]
+!105 = !{!106}
+!106 = !{!"0x1c\00\000\000\000\000\000", !103, null, !64} ; [ DW_TAG_inheritance ]
+!107 = !{!"0xd\00_bounds\0040\00128\0032\00128\001", !152, !24, !108, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!108 = !{!"0x16\00CR\0033\000\000\000\000", !153, !0, !109} ; [ DW_TAG_typedef ]
+!109 = !{!"0x13\00CR\0029\00128\0032\000\000\000", !156, !0, null, !110, null, null, null} ; [ DW_TAG_structure_type ] [CR] [line 29, size 128, align 32, offset 0] [def] [from ]
+!110 = !{!111, !117}
+!111 = !{!"0xd\00origin\0030\0064\0032\000\000", !156, !77, !112} ; [ DW_TAG_member ]
+!112 = !{!"0x16\00CP\0017\000\000\000\000", !156, !0, !113} ; [ DW_TAG_typedef ]
+!113 = !{!"0x13\00CP\0013\0064\0032\000\000\000", !156, !0, null, !114, null, null, null} ; [ DW_TAG_structure_type ] [CP] [line 13, size 64, align 32, offset 0] [def] [from ]
+!114 = !{!115, !116}
+!115 = !{!"0xd\00x\0014\0032\0032\000\000", !156, !77, !76} ; [ DW_TAG_member ]
+!116 = !{!"0xd\00y\0015\0032\0032\0032\000", !156, !77, !76} ; [ DW_TAG_member ]
+!117 = !{!"0xd\00size\0031\0064\0032\0064\000", !156, !77, !118} ; [ DW_TAG_member ]
+!118 = !{!"0x16\00Size\0025\000\000\000\000", !156, !0, !119} ; [ DW_TAG_typedef ]
+!119 = !{!"0x13\00Size\0021\0064\0032\000\000\000", !156, !0, null, !120, null, null, null} ; [ DW_TAG_structure_type ] [Size] [line 21, size 64, align 32, offset 0] [def] [from ]
+!120 = !{!121, !122}
+!121 = !{!"0xd\00width\0022\0032\0032\000\000", !156, !77, !76} ; [ DW_TAG_member ]
+!122 = !{!"0xd\00height\0023\0032\0032\0032\000", !156, !77, !76} ; [ DW_TAG_member ]
+!123 = !{!"0xd\00_data\0040\00128\0032\00256\001", !152, !24, !108, !"", !"", !"", i32 0} ; [ DW_TAG_member ]
+!124 = !{!"0xd\00semi\00609\0032\0032\00224\000", !152, !24, !125} ; [ DW_TAG_member ]
+!125 = !{!"0x16\00d_t\0035\000\000\000\000", !152, !0, !126} ; [ DW_TAG_typedef ]
+!126 = !{!"0xf\00\000\0032\0032\000\000", null, !0, !127} ; [ DW_TAG_pointer_type ]
+!127 = !{!"0x13\00my_struct\0049\000\000\000\004\000", !159, !0, null, null, null, null, null} ; [ DW_TAG_structure_type ] [my_struct] [line 49, size 0, align 0, offset 0] [decl] [from ]
+!128 = !{!"0x29", !159} ; [ DW_TAG_file_type ]
+!129 = !MDLocation(line: 609, column: 144, scope: !23)
+!130 = !{!"0x101\00loadedMydata\0033555041\000", !23, !24, !59} ; [ DW_TAG_arg_variable ]
+!131 = !MDLocation(line: 609, column: 155, scope: !23)
+!132 = !{!"0x101\00bounds\0050332257\000", !23, !24, !108} ; [ DW_TAG_arg_variable ]
+!133 = !MDLocation(line: 609, column: 175, scope: !23)
+!134 = !{!"0x101\00data\0067109473\000", !23, !24, !108} ; [ DW_TAG_arg_variable ]
+!135 = !MDLocation(line: 609, column: 190, scope: !23)
+!136 = !{!"0x100\00mydata\00604\000", !23, !24, !50} ; [ DW_TAG_auto_variable ]
+!137 = !MDLocation(line: 604, column: 49, scope: !23)
+!138 = !{!"0x100\00self\00604\000", !23, !40, !90} ; [ DW_TAG_auto_variable ]
+!139 = !{!"0x100\00semi\00607\000", !23, !24, !125} ; [ DW_TAG_auto_variable ]
+!140 = !MDLocation(line: 607, column: 30, scope: !23)
+!141 = !MDLocation(line: 610, column: 17, scope: !142)
+!142 = !{!"0xb\00609\00200\0094", !152, !23} ; [ DW_TAG_lexical_block ]
+!143 = !MDLocation(line: 611, column: 17, scope: !142)
+!144 = !MDLocation(line: 612, column: 17, scope: !142)
+!145 = !MDLocation(line: 613, column: 17, scope: !142)
+!146 = !MDLocation(line: 615, column: 13, scope: !142)
+!147 = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
+!148 = !{!23}
+!149 = !{!"header3.h", !"/Volumes/Sandbox/llvm"}
+!150 = !{!"Private.h", !"/Volumes/Sandbox/llvm"}
+!151 = !{!"header4.h", !"/Volumes/Sandbox/llvm"}
+!152 = !{!"MyLibrary.m", !"/Volumes/Sandbox/llvm"}
+!153 = !{!"MyLibrary.i", !"/Volumes/Sandbox/llvm"}
+!154 = !{!"header11.h", !"/Volumes/Sandbox/llvm"}
+!155 = !{!"NSO.h", !"/Volumes/Sandbox/llvm"}
+!156 = !{!"header12.h", !"/Volumes/Sandbox/llvm"}
+!157 = !{!"header13.h", !"/Volumes/Sandbox/llvm"}
+!158 = !{!"header14.h", !"/Volumes/Sandbox/llvm"}
+!159 = !{!"header15.h", !"/Volumes/Sandbox/llvm"}
+!160 = !{!"header.h", !"/Volumes/Sandbox/llvm"}
+!161 = !{!"header2.h", !"/Volumes/Sandbox/llvm"}
+!162 = !{i32 1, !"Debug Info Version", i32 2}
+!163 = !{!"0x102\0034\0020\006\0034\004\006\0034\0024"} ; [ DW_TAG_expression ] [DW_OP_plus 20 DW_OP_deref DW_OP_plus 4 DW_OP_deref DW_OP_plus 24]
+!164 = !{!"0x102\0034\0024"} ; [ DW_TAG_expression ] [DW_OP_plus 24]
+!165 = !{!"0x102\0034\0028"} ; [ DW_TAG_expression ] [DW_OP_plus 28]
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 8505f5365567..94756953de0a 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -3,6 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: 	vadd.f32	q4, q8, q8
+;CHECK-NEXT: Ltmp1
 ;CHECK-NEXT: LBB0_1
 
 ;CHECK:@DEBUG_VALUE: x <- Q4{{$}}
@@ -19,9 +20,9 @@ entry:
 
 for.body9:                                        ; preds = %for.body9, %entry
   %add19 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
-  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !39
   %add20 = fadd <4 x float> undef, <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, !dbg !39
-  tail call void @llvm.dbg.value(metadata !{<4 x float> %add20}, i64 0, metadata !28), !dbg !39
+  tail call void @llvm.dbg.value(metadata <4 x float> %add20, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
   br i1 %cond, label %for.end54, label %for.body9, !dbg !44
 
 for.end54:                                        ; preds = %for.body9
@@ -36,64 +37,65 @@ for.end54:                                        ; preds = %for.body9
 
 declare i32 @printf(i8* nocapture, ...) nounwind
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!56}
+!llvm.dbg.cu = !{!2}
 
-!0 = metadata !{i32 786478, metadata !54, null, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786433, metadata !54, metadata !2, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
-!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, metadata !54, null, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null, null, metadata !52, i32 0} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 0] [main]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, metadata !55, null, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 0} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [scope 0] [printFV]
-!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null}
-!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !1, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ]
-!34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 786443, metadata !1, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 786443, metadata !1, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 786443, metadata !1, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 786443, metadata !1, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
-!44 = metadata !{i32 75, i32 5, metadata !42, null}
-!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 786443, metadata !15, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 786443, metadata !15, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
-!48 = metadata !{i32 95, i32 3, metadata !25, null}
-!49 = metadata !{i32 99, i32 3, metadata !25, null}
-!50 = metadata !{metadata !0, metadata !10, metadata !14}
-!51 = metadata !{metadata !18}
-!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
-!53 = metadata !{metadata !30}
-!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
-!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
-!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00test0001\00test0001\00\003\000\001\000\006\00256\001\000", !54, null, !3, i32 0, <4 x float> (float)* @test0001, null, null, !51} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !54} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !54, !17, !17, !50, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, i32 0, !4, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = !{!5}
+!5 = !{!"0x16\00v4f32\0014\000\000\000\000", !54, !2, !6} ; [ DW_TAG_typedef ]
+!6 = !{!"0x1\00\000\00128\00128\000\000", !54, !2, !7, !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
+!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!8 = !{!9}
+!9 = !{!"0x21\000\004"}         ; [ DW_TAG_subrange_type ]
+!10 = !{!"0x2e\00main\00main\00\0059\000\001\000\006\00256\001\000", !54, null, !11, null, i32 (i32, i8**, i1)* @main, null, null, !52} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 0] [main]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0x2e\00printFV\00printFV\00\0041\001\001\000\006\00256\001\000", !55, null, !16, null, null, null, null, !53} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [scope 0] [printFV]
+!15 = !{!"0x29", !55} ; [ DW_TAG_file_type ]
+!16 = !{!"0x15\00\000\000\000\000\000\000", !55, !15, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null}
+!18 = !{!"0x101\00a\0016777219\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x101\00argc\0016777275\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
+!20 = !{!"0x101\00argv\0033554491\000", !10, !1, !21} ; [ DW_TAG_arg_variable ]
+!21 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !22} ; [ DW_TAG_pointer_type ]
+!22 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !23} ; [ DW_TAG_pointer_type ]
+!23 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
+!24 = !{!"0x100\00i\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
+!25 = !{!"0xb\0059\0033\0014", !1, !10} ; [ DW_TAG_lexical_block ]
+!26 = !{!"0x100\00j\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
+!27 = !{!"0x100\00x\0061\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!28 = !{!"0x100\00y\0062\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!29 = !{!"0x100\00z\0063\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!30 = !{!"0x101\00F\0016777257\000", !14, !15, !31} ; [ DW_TAG_arg_variable ]
+!31 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !32} ; [ DW_TAG_pointer_type ]
+!32 = !{!"0x16\00FV\0025\000\000\000\000", !55, !2, !33} ; [ DW_TAG_typedef ]
+!33 = !{!"0x17\00\0022\00128\00128\000\000\000", !55, !2, i32 0, !34, null} ; [ DW_TAG_union_type ]
+!34 = !{!35, !37}
+!35 = !{!"0xd\00V\0023\00128\00128\000\000", !55, !15, !36} ; [ DW_TAG_member ]
+!36 = !{!"0x16\00v4sf\003\000\000\000\000", !55, !2, !6} ; [ DW_TAG_typedef ]
+!37 = !{!"0xd\00A\0024\00128\0032\000\000", !55, !15, !38} ; [ DW_TAG_member ]
+!38 = !{!"0x1\00\000\00128\0032\000\000", null, !2, !7, !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = !MDLocation(line: 79, column: 7, scope: !40)
+!40 = !{!"0xb\0075\0035\0018", !1, !41} ; [ DW_TAG_lexical_block ]
+!41 = !{!"0xb\0075\005\0017", !1, !42} ; [ DW_TAG_lexical_block ]
+!42 = !{!"0xb\0071\0032\0016", !1, !43} ; [ DW_TAG_lexical_block ]
+!43 = !{!"0xb\0071\003\0015", !1, !25} ; [ DW_TAG_lexical_block ]
+!44 = !MDLocation(line: 75, column: 5, scope: !42)
+!45 = !MDLocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
+!46 = !{!"0xb\0042\002\0020", !15, !47} ; [ DW_TAG_lexical_block ]
+!47 = !{!"0xb\0041\0028\0019", !15, !14} ; [ DW_TAG_lexical_block ]
+!48 = !MDLocation(line: 95, column: 3, scope: !25)
+!49 = !MDLocation(line: 99, column: 3, scope: !25)
+!50 = !{!0, !10, !14}
+!51 = !{!18}
+!52 = !{!19, !20, !24, !26, !27, !28, !29}
+!53 = !{!30}
+!54 = !{!"build2.c", !"/private/tmp"}
+!55 = !{!"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", !"/private/tmp"}
+!56 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 30a3e2dcdc2c..85b510f175ee 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -12,9 +12,9 @@ target triple = "thumbv7-apple-darwin10"
 
 define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !19), !dbg !26
-  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !20), !dbg !26
-  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !21), !dbg !26
+  tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !26
+  tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !26
+  tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !26
   %0 = zext i8 %c to i32, !dbg !27
   %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27
   ret i32 0, !dbg !29
@@ -22,9 +22,9 @@ entry:
 
 define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !16), !dbg !30
-  tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !17), !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !18), !dbg !30
+  tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !30
+  tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !30
+  tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !30
   %0 = zext i8 %c to i32, !dbg !31
   %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31
   ret i32 0, !dbg !33
@@ -32,22 +32,22 @@ entry:
 
 declare i32 @printf(i8* nocapture, ...) nounwind
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !22), !dbg !34
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !23), !dbg !34
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !34
   %0 = sitofp i32 %argc to double, !dbg !35
   %1 = fadd double %0, 5.555552e+05, !dbg !35
-  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !24), !dbg !35
+  tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !35
   %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36
   %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37
   %4 = trunc i32 %argc to i8, !dbg !37
   %5 = add i8 %4, 97, !dbg !37
-  tail call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !19) nounwind, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !20) nounwind, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{i8 %5}, i64 0, metadata !21) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !19, metadata !{!"0x102"}) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata double %1, i64 0, metadata !20, metadata !{!"0x102"}) nounwind, !dbg !38
+  tail call void @llvm.dbg.value(metadata i8 %5, i64 0, metadata !21, metadata !{!"0x102"}) nounwind, !dbg !38
   %6 = zext i8 %5 to i32, !dbg !39
   %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39
   %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40
@@ -59,52 +59,52 @@ declare i32 @puts(i8* nocapture) nounwind
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!48}
 
-!0 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer, null, null, metadata !43, i32 12} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !46, i32 1, metadata !"(LLVM build 00)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !42, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8}
-!5 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"double", i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter, null, null, metadata !44, i32 5} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786478, metadata !46, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !45, i32 18} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !46, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !5, metadata !5, metadata !13}
-!13 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786447, metadata !46, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 786468, metadata !46, metadata !1, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786689, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 786689, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 786689, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 786688, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !46, metadata !10, i32 18, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 4, i32 0, metadata !9, null}
-!27 = metadata !{i32 6, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !46, metadata !9, i32 5, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!29 = metadata !{i32 7, i32 0, metadata !28, null}
-!30 = metadata !{i32 11, i32 0, metadata !0, null}
-!31 = metadata !{i32 13, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !46, metadata !0, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!33 = metadata !{i32 14, i32 0, metadata !32, null}
-!34 = metadata !{i32 17, i32 0, metadata !10, null}
-!35 = metadata !{i32 19, i32 0, metadata !25, null}
-!36 = metadata !{i32 20, i32 0, metadata !25, null}
-!37 = metadata !{i32 21, i32 0, metadata !25, null}
-!38 = metadata !{i32 4, i32 0, metadata !9, metadata !37}
-!39 = metadata !{i32 6, i32 0, metadata !28, metadata !37}
-!40 = metadata !{i32 22, i32 0, metadata !25, null}
-!41 = metadata !{i32 23, i32 0, metadata !25, null}
-!42 = metadata !{metadata !0, metadata !9, metadata !10}
-!43 = metadata !{metadata !16, metadata !17, metadata !18}
-!44 = metadata !{metadata !19, metadata !20, metadata !21}
-!45 = metadata !{metadata !22, metadata !23, metadata !24}
-!46 = metadata !{metadata !"a.c", metadata !"/tmp/"}
-!47 = metadata !{i32 0}
-!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00printer\00printer\00printer\0012\000\001\000\006\00256\001\0012", !46, !1, !3, null, i32 (i8*, double, i8)* @printer, null, null, !43} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !46} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\00(LLVM build 00)\001\00\000\00\001", !46, !47, !47, !42, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !46, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5, !6, !7, !8}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !46, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, null} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x24\00double\000\0064\0032\000\000\004", !46, !1} ; [ DW_TAG_base_type ]
+!8 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !46, !1} ; [ DW_TAG_base_type ]
+!9 = !{!"0x2e\00inlineprinter\00inlineprinter\00inlineprinter\005\000\001\000\006\00256\001\005", !46, !1, !3, null, i32 (i8*, double, i8)* @inlineprinter, null, null, !44} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x2e\00main\00main\00main\0018\000\001\000\006\00256\001\0018", !46, !1, !11, null, i32 (i32, i8**)* @main, null, null, !45} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !46, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!5, !5, !13}
+!13 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, !14} ; [ DW_TAG_pointer_type ]
+!14 = !{!"0xf\00\000\0032\0032\000\000", !46, !1, !15} ; [ DW_TAG_pointer_type ]
+!15 = !{!"0x24\00char\000\008\008\000\000\006", !46, !1} ; [ DW_TAG_base_type ]
+!16 = !{!"0x101\00ptr\0011\000", !0, !1, !6} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x101\00val\0011\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
+!18 = !{!"0x101\00c\0011\000", !0, !1, !8} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x101\00ptr\004\000", !9, !1, !6} ; [ DW_TAG_arg_variable ]
+!20 = !{!"0x101\00val\004\000", !9, !1, !7} ; [ DW_TAG_arg_variable ]
+!21 = !{!"0x101\00c\004\000", !9, !1, !8} ; [ DW_TAG_arg_variable ]
+!22 = !{!"0x101\00argc\0017\000", !10, !1, !5} ; [ DW_TAG_arg_variable ]
+!23 = !{!"0x101\00argv\0017\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
+!24 = !{!"0x100\00dval\0019\000", !25, !1, !7} ; [ DW_TAG_auto_variable ]
+!25 = !{!"0xb\0018\000\002", !46, !10} ; [ DW_TAG_lexical_block ]
+!26 = !MDLocation(line: 4, scope: !9)
+!27 = !MDLocation(line: 6, scope: !28)
+!28 = !{!"0xb\005\000\001", !46, !9} ; [ DW_TAG_lexical_block ]
+!29 = !MDLocation(line: 7, scope: !28)
+!30 = !MDLocation(line: 11, scope: !0)
+!31 = !MDLocation(line: 13, scope: !32)
+!32 = !{!"0xb\0012\000\000", !46, !0} ; [ DW_TAG_lexical_block ]
+!33 = !MDLocation(line: 14, scope: !32)
+!34 = !MDLocation(line: 17, scope: !10)
+!35 = !MDLocation(line: 19, scope: !25)
+!36 = !MDLocation(line: 20, scope: !25)
+!37 = !MDLocation(line: 21, scope: !25)
+!38 = !MDLocation(line: 4, scope: !9, inlinedAt: !37)
+!39 = !MDLocation(line: 6, scope: !28, inlinedAt: !37)
+!40 = !MDLocation(line: 22, scope: !25)
+!41 = !MDLocation(line: 23, scope: !25)
+!42 = !{!0, !9, !10}
+!43 = !{!16, !17, !18}
+!44 = !{!19, !20, !21}
+!45 = !{!22, !23, !24}
+!46 = !{!"a.c", !"/tmp/"}
+!47 = !{i32 0}
+!48 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 03ce312a9013..c05df6ab3d51 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -2,13 +2,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
-;CHECK: sub-register
-;CHECK-NEXT: DW_OP_regx
+;CHECK: sub-register DW_OP_regx
 ;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece
 ;CHECK-NEXT: byte   8
-;CHECK-NEXT: sub-register
-;CHECK-NEXT: DW_OP_regx
+;CHECK-NEXT: sub-register DW_OP_regx
 ;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece
 ;CHECK-NEXT: byte   8
@@ -26,7 +24,7 @@ for.body9:                                        ; preds = %for.body9, %entry
   br i1 undef, label %for.end54, label %for.body9, !dbg !44
 
 for.end54:                                        ; preds = %for.body9
-  tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39
+  tail call void @llvm.dbg.value(metadata <4 x float> %add19, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !39
   %tmp115 = extractelement <4 x float> %add19, i32 1
   %conv6.i75 = fpext float %tmp115 to double, !dbg !45
   %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45
@@ -35,65 +33,65 @@ for.end54:                                        ; preds = %for.body9
 
 declare i32 @printf(i8* nocapture, ...) nounwind
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!56}
 
-!0 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, <4 x float> (float)* @test0001, null, null, metadata !51, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001]
-!1 = metadata !{i32 786473, metadata !54} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !54, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !50, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786454, metadata !54, metadata !2, metadata !"v4f32", i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786433, metadata !2, null, metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
-!7 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786465, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
-!10 = metadata !{i32 786478, metadata !54, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !52, i32 59} ; [ DW_TAG_subprogram ] [line 59] [def] [main]
-!11 = metadata !{i32 786453, metadata !54, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786478, metadata !55, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !53, i32 41} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [printFV]
-!15 = metadata !{i32 786473, metadata !55} ; [ DW_TAG_file_type ]
-!16 = metadata !{i32 786453, metadata !55, metadata !15, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null}
-!18 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 786688, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!25 = metadata !{i32 786443, metadata !54, metadata !10, i32 59, i32 33, i32 14} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 786688, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 786688, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!28 = metadata !{i32 786688, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786688, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!30 = metadata !{i32 786689, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0, null} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"FV", i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ]
-!33 = metadata !{i32 786455, metadata !55, metadata !2, metadata !"", i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, null} ; [ DW_TAG_union_type ]
-!34 = metadata !{metadata !35, metadata !37}
-!35 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"V", i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ]
-!36 = metadata !{i32 786454, metadata !55, metadata !2, metadata !"v4sf", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ]
-!37 = metadata !{i32 786445, metadata !55, metadata !15, metadata !"A", i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ]
-!38 = metadata !{i32 786433, null, metadata !2, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
-!39 = metadata !{i32 79, i32 7, metadata !40, null}
-!40 = metadata !{i32 786443, metadata !54, metadata !41, i32 75, i32 35, i32 18} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 786443, metadata !54, metadata !42, i32 75, i32 5, i32 17} ; [ DW_TAG_lexical_block ]
-!42 = metadata !{i32 786443, metadata !54, metadata !43, i32 71, i32 32, i32 16} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 786443, metadata !54, metadata !25, i32 71, i32 3, i32 15} ; [ DW_TAG_lexical_block ]
-!44 = metadata !{i32 75, i32 5, metadata !42, null}
-!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48}
-!46 = metadata !{i32 786443, metadata !55, metadata !47, i32 42, i32 2, i32 20} ; [ DW_TAG_lexical_block ]
-!47 = metadata !{i32 786443, metadata !55, metadata !14, i32 41, i32 28, i32 19} ; [ DW_TAG_lexical_block ]
-!48 = metadata !{i32 95, i32 3, metadata !25, null}
-!49 = metadata !{i32 99, i32 3, metadata !25, null}
-!50 = metadata !{metadata !0, metadata !10, metadata !14}
-!51 = metadata !{metadata !18}
-!52 = metadata !{metadata !19, metadata !20, metadata !24, metadata !26, metadata !27, metadata !28, metadata !29}
-!53 = metadata !{metadata !30}
-!54 = metadata !{metadata !"build2.c", metadata !"/private/tmp"}
-!55 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp"}
-!56 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00test0001\00test0001\00\003\000\001\000\006\00256\001\003", !54, !1, !3, null, <4 x float> (float)* @test0001, null, null, !51} ; [ DW_TAG_subprogram ] [line 3] [def] [test0001]
+!1 = !{!"0x29", !54} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !54, !17, !17, !50, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x16\00v4f32\0014\000\000\000\000", !54, !2, !6} ; [ DW_TAG_typedef ]
+!6 = !{!"0x1\00\000\00128\00128\000\000", !2, null, !7, !8, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [from float]
+!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!8 = !{!9}
+!9 = !{!"0x21\000\004"}         ; [ DW_TAG_subrange_type ]
+!10 = !{!"0x2e\00main\00main\00\0059\000\001\000\006\00256\001\0059", !54, !1, !11, null, i32 (i32, i8**)* @main, null, null, !52} ; [ DW_TAG_subprogram ] [line 59] [def] [main]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !54, !1, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0x2e\00printFV\00printFV\00\0041\001\001\000\006\00256\001\0041", !55, !15, !16, null, null, null, null, !53} ; [ DW_TAG_subprogram ] [line 41] [local] [def] [printFV]
+!15 = !{!"0x29", !55} ; [ DW_TAG_file_type ]
+!16 = !{!"0x15\00\000\000\000\000\000\000", !55, !15, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null}
+!18 = !{!"0x101\00a\0016777219\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x101\00argc\0016777275\000", !10, !1, !13} ; [ DW_TAG_arg_variable ]
+!20 = !{!"0x101\00argv\0033554491\000", !10, !1, !21} ; [ DW_TAG_arg_variable ]
+!21 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !22} ; [ DW_TAG_pointer_type ]
+!22 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !23} ; [ DW_TAG_pointer_type ]
+!23 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
+!24 = !{!"0x100\00i\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
+!25 = !{!"0xb\0059\0033\0014", !54, !10} ; [ DW_TAG_lexical_block ]
+!26 = !{!"0x100\00j\0060\000", !25, !1, !13} ; [ DW_TAG_auto_variable ]
+!27 = !{!"0x100\00x\0061\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!28 = !{!"0x100\00y\0062\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!29 = !{!"0x100\00z\0063\000", !25, !1, !5} ; [ DW_TAG_auto_variable ]
+!30 = !{!"0x101\00F\0016777257\000", !14, !15, !31} ; [ DW_TAG_arg_variable ]
+!31 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !32} ; [ DW_TAG_pointer_type ]
+!32 = !{!"0x16\00FV\0025\000\000\000\000", !55, !2, !33} ; [ DW_TAG_typedef ]
+!33 = !{!"0x17\00\0022\00128\00128\000\000\000", !55, !2, i32 0, !34, null} ; [ DW_TAG_union_type ]
+!34 = !{!35, !37}
+!35 = !{!"0xd\00V\0023\00128\00128\000\000", !55, !15, !36} ; [ DW_TAG_member ]
+!36 = !{!"0x16\00v4sf\003\000\000\000\000", !55, !2, !6} ; [ DW_TAG_typedef ]
+!37 = !{!"0xd\00A\0024\00128\0032\000\000", !55, !15, !38} ; [ DW_TAG_member ]
+!38 = !{!"0x1\00\000\00128\0032\000\000", null, !2, !7, !8, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!39 = !MDLocation(line: 79, column: 7, scope: !40)
+!40 = !{!"0xb\0075\0035\0018", !54, !41} ; [ DW_TAG_lexical_block ]
+!41 = !{!"0xb\0075\005\0017", !54, !42} ; [ DW_TAG_lexical_block ]
+!42 = !{!"0xb\0071\0032\0016", !54, !43} ; [ DW_TAG_lexical_block ]
+!43 = !{!"0xb\0071\003\0015", !54, !25} ; [ DW_TAG_lexical_block ]
+!44 = !MDLocation(line: 75, column: 5, scope: !42)
+!45 = !MDLocation(line: 42, column: 2, scope: !46, inlinedAt: !48)
+!46 = !{!"0xb\0042\002\0020", !55, !47} ; [ DW_TAG_lexical_block ]
+!47 = !{!"0xb\0041\0028\0019", !55, !14} ; [ DW_TAG_lexical_block ]
+!48 = !MDLocation(line: 95, column: 3, scope: !25)
+!49 = !MDLocation(line: 99, column: 3, scope: !25)
+!50 = !{!0, !10, !14}
+!51 = !{!18}
+!52 = !{!19, !20, !24, !26, !27, !28, !29}
+!53 = !{!30}
+!54 = !{!"build2.c", !"/private/tmp"}
+!55 = !{!"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", !"/private/tmp"}
+!56 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index ee9faf833ccf..9b303dde2e35 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -1,8 +1,7 @@
 ; RUN: llc < %s - | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for s16
-;CHECK: super-register
-;CHECK-NEXT: DW_OP_regx
+;CHECK: super-register DW_OP_regx
 ;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece
 ;CHECK-NEXT: 4
@@ -15,9 +14,9 @@ target triple = "thumbv7-apple-macosx10.6.7"
 
 define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !8), !dbg !24
-  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !10), !dbg !25
-  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !12), !dbg !26
+  tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !24
+  tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !25
+  tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !26
   %conv = fpext float %val to double, !dbg !27
   %conv3 = zext i8 %c to i32, !dbg !27
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27
@@ -28,9 +27,9 @@ declare i32 @printf(i8* nocapture, ...) nounwind optsize
 
 define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !14), !dbg !30
-  tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !15), !dbg !31
-  tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !16), !dbg !32
+  tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !30
+  tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !31
+  tail call void @llvm.dbg.value(metadata i8 %c, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !32
   %conv = fpext float %val to double, !dbg !33
   %conv3 = zext i8 %c to i32, !dbg !33
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33
@@ -39,19 +38,19 @@ entry:
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !17), !dbg !36
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !18), !dbg !37
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !36
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !37
   %conv = sitofp i32 %argc to double, !dbg !38
   %add = fadd double %conv, 5.555552e+05, !dbg !38
   %conv1 = fptrunc double %add to float, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !22), !dbg !38
+  tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !38
   %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39
   %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40
   %add5 = add nsw i32 %argc, 97, !dbg !40
   %conv6 = trunc i32 %add5 to i8, !dbg !40
-  tail call void @llvm.dbg.value(metadata !{i8* %add.ptr}, i64 0, metadata !8) nounwind, !dbg !41
-  tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !10) nounwind, !dbg !42
-  tail call void @llvm.dbg.value(metadata !{i8 %conv6}, i64 0, metadata !12) nounwind, !dbg !43
+  tail call void @llvm.dbg.value(metadata i8* %add.ptr, i64 0, metadata !8, metadata !{!"0x102"}) nounwind, !dbg !41
+  tail call void @llvm.dbg.value(metadata float %conv1, i64 0, metadata !10, metadata !{!"0x102"}) nounwind, !dbg !42
+  tail call void @llvm.dbg.value(metadata i8 %conv6, i64 0, metadata !12, metadata !{!"0x102"}) nounwind, !dbg !43
   %conv.i = fpext float %conv1 to double, !dbg !44
   %conv3.i = and i32 %add5, 255, !dbg !44
   %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44
@@ -61,62 +60,62 @@ entry:
 
 declare i32 @puts(i8* nocapture) nounwind optsize
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!53}
 
-!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null, null, metadata !48, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter]
-!1 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !51, i32 12, metadata !"clang version 3.0 (trunk 129915)", i1 true, metadata !"", i32 0, metadata !52, metadata !52, metadata !47, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !51, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"printer", metadata !"printer", metadata !"", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null, null, metadata !49, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [printer]
-!7 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !50, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!8 = metadata !{i32 786689, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786689, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786689, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 786689, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0, null} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 786689, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786689, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786689, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786468, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 786688, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0, null} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 786443, metadata !51, metadata !7, i32 18, i32 1, i32 2} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{i32 4, i32 22, metadata !0, null}
-!25 = metadata !{i32 4, i32 33, metadata !0, null}
-!26 = metadata !{i32 4, i32 52, metadata !0, null}
-!27 = metadata !{i32 6, i32 3, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !51, metadata !0, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!29 = metadata !{i32 7, i32 3, metadata !28, null}
-!30 = metadata !{i32 11, i32 42, metadata !6, null}
-!31 = metadata !{i32 11, i32 53, metadata !6, null}
-!32 = metadata !{i32 11, i32 72, metadata !6, null}
-!33 = metadata !{i32 13, i32 3, metadata !34, null}
-!34 = metadata !{i32 786443, metadata !51, metadata !6, i32 12, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 14, i32 3, metadata !34, null}
-!36 = metadata !{i32 17, i32 15, metadata !7, null}
-!37 = metadata !{i32 17, i32 28, metadata !7, null}
-!38 = metadata !{i32 19, i32 31, metadata !23, null}
-!39 = metadata !{i32 20, i32 3, metadata !23, null}
-!40 = metadata !{i32 21, i32 3, metadata !23, null}
-!41 = metadata !{i32 4, i32 22, metadata !0, metadata !40}
-!42 = metadata !{i32 4, i32 33, metadata !0, metadata !40}
-!43 = metadata !{i32 4, i32 52, metadata !0, metadata !40}
-!44 = metadata !{i32 6, i32 3, metadata !28, metadata !40}
-!45 = metadata !{i32 22, i32 3, metadata !23, null}
-!46 = metadata !{i32 23, i32 1, metadata !23, null}
-!47 = metadata !{metadata !0, metadata !6, metadata !7}
-!48 = metadata !{metadata !8, metadata !10, metadata !12}
-!49 = metadata !{metadata !14, metadata !15, metadata !16}
-!50 = metadata !{metadata !17, metadata !18, metadata !22}
-!51 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
-!52 = metadata !{i32 0}
-!53 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00inlineprinter\00inlineprinter\00\005\000\001\000\006\00256\001\005", !51, !1, !3, null, i32 (i8*, float, i8)* @inlineprinter, null, null, !48} ; [ DW_TAG_subprogram ] [line 5] [def] [inlineprinter]
+!1 = !{!"0x29", !51} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129915)\001\00\000\00\001", !51, !52, !52, !47, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !51, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00printer\00printer\00\0012\000\001\000\006\00256\001\0012", !51, !1, !3, null, i32 (i8*, float, i8)* @printer, null, null, !49} ; [ DW_TAG_subprogram ] [line 12] [def] [printer]
+!7 = !{!"0x2e\00main\00main\00\0018\000\001\000\006\00256\001\0018", !51, !1, !3, null, i32 (i32, i8**)* @main, null, null, !50} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!8 = !{!"0x101\00ptr\0016777220\000", !0, !1, !9} ; [ DW_TAG_arg_variable ]
+!9 = !{!"0xf\00\000\0032\0032\000\000", null, !2, null} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x101\00val\0033554436\000", !0, !1, !11} ; [ DW_TAG_arg_variable ]
+!11 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!12 = !{!"0x101\00c\0050331652\000", !0, !1, !13} ; [ DW_TAG_arg_variable ]
+!13 = !{!"0x24\00unsigned char\000\008\008\000\000\008", null, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0x101\00ptr\0016777227\000", !6, !1, !9} ; [ DW_TAG_arg_variable ]
+!15 = !{!"0x101\00val\0033554443\000", !6, !1, !11} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x101\00c\0050331659\000", !6, !1, !13} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x101\00argc\0016777233\000", !7, !1, !5} ; [ DW_TAG_arg_variable ]
+!18 = !{!"0x101\00argv\0033554449\000", !7, !1, !19} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !20} ; [ DW_TAG_pointer_type ]
+!20 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !21} ; [ DW_TAG_pointer_type ]
+!21 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
+!22 = !{!"0x100\00dval\0019\000", !23, !1, !11} ; [ DW_TAG_auto_variable ]
+!23 = !{!"0xb\0018\001\002", !51, !7} ; [ DW_TAG_lexical_block ]
+!24 = !MDLocation(line: 4, column: 22, scope: !0)
+!25 = !MDLocation(line: 4, column: 33, scope: !0)
+!26 = !MDLocation(line: 4, column: 52, scope: !0)
+!27 = !MDLocation(line: 6, column: 3, scope: !28)
+!28 = !{!"0xb\005\001\000", !51, !0} ; [ DW_TAG_lexical_block ]
+!29 = !MDLocation(line: 7, column: 3, scope: !28)
+!30 = !MDLocation(line: 11, column: 42, scope: !6)
+!31 = !MDLocation(line: 11, column: 53, scope: !6)
+!32 = !MDLocation(line: 11, column: 72, scope: !6)
+!33 = !MDLocation(line: 13, column: 3, scope: !34)
+!34 = !{!"0xb\0012\001\001", !51, !6} ; [ DW_TAG_lexical_block ]
+!35 = !MDLocation(line: 14, column: 3, scope: !34)
+!36 = !MDLocation(line: 17, column: 15, scope: !7)
+!37 = !MDLocation(line: 17, column: 28, scope: !7)
+!38 = !MDLocation(line: 19, column: 31, scope: !23)
+!39 = !MDLocation(line: 20, column: 3, scope: !23)
+!40 = !MDLocation(line: 21, column: 3, scope: !23)
+!41 = !MDLocation(line: 4, column: 22, scope: !0, inlinedAt: !40)
+!42 = !MDLocation(line: 4, column: 33, scope: !0, inlinedAt: !40)
+!43 = !MDLocation(line: 4, column: 52, scope: !0, inlinedAt: !40)
+!44 = !MDLocation(line: 6, column: 3, scope: !28, inlinedAt: !40)
+!45 = !MDLocation(line: 22, column: 3, scope: !23)
+!46 = !MDLocation(line: 23, column: 1, scope: !23)
+!47 = !{!0, !6, !7}
+!48 = !{!8, !10, !12}
+!49 = !{!14, !15, !16}
+!50 = !{!17, !18, !22}
+!51 = !{!"a.c", !"/private/tmp"}
+!52 = !{i32 0}
+!53 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index 71a696a6a4a9..977a6f27677c 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -1,26 +1,21 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s - -filetype=obj | llvm-dwarfdump -debug-dump=loc - | FileCheck %s
 ; Radar 9376013
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.6.7"
 
-;CHECK-LABEL: Lfunc_begin0:
-;CHECK: Ltmp[[K:[0-9]+]]:
-;CHECK: Ltmp[[L:[0-9]+]]:
-;CHECK-LABEL: Ldebug_loc0:
-;CHECK-NEXT:        .long   Ltmp[[K]]
-;CHECK-NEXT:        .long   Ltmp[[L]]
-;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
-;CHECK-NEXT:        .short  Lset[[N]]
-;CHECK-NEXT: Ltmp[[M]]:
-;CHECK-NEXT:        .byte   144                     @ super-register
-;CHECK-NEXT:                                        @ DW_OP_regx
-;CHECK-NEXT:        .ascii
-;CHECK-NEXT:        .byte   {{[0-9]+}}              @ DW_OP_{{.*}}piece
+; Just making sure the first part of the location isn't a repetition
+; of the size of the location description.
+;
+; 0x90   DW_OP_regx of super-register
+
+; CHECK: 0x00000000: Beginning address offset:
+; CHECK-NEXT:           Ending address offset:
+; CHECK-NEXT:            Location description: 90 {{.. .. .. .. $}}
 
 define void @_Z3foov() optsize ssp {
 entry:
   %call = tail call float @_Z3barv() optsize, !dbg !11
-  tail call void @llvm.dbg.value(metadata !{float %call}, i64 0, metadata !5), !dbg !11
+  tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !11
   %call16 = tail call float @_Z2f2v() optsize, !dbg !12
   %cmp7 = fcmp olt float %call, %call16, !dbg !12
   br i1 %cmp7, label %for.body, label %for.end, !dbg !12
@@ -43,29 +38,29 @@ declare float @_Z2f2v() optsize
 
 declare float @_Z2f3f(float) optsize
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, metadata !18, i32 4, metadata !"clang version 3.0 (trunk 130845)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z3foov, null, null, metadata !17, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
-!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 786688, metadata !6, metadata !"k", metadata !2, i32 6, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !18, metadata !1, i32 5, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 786468, null, metadata !0, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786688, metadata !9, metadata !"y", metadata !2, i32 8, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 786443, metadata !18, metadata !10, i32 7, i32 25, i32 2} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786443, metadata !18, metadata !6, i32 7, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 6, i32 18, metadata !6, null}
-!12 = metadata !{i32 7, i32 3, metadata !6, null}
-!13 = metadata !{i32 8, i32 20, metadata !9, null}
-!14 = metadata !{i32 7, i32 20, metadata !10, null}
-!15 = metadata !{i32 10, i32 1, metadata !6, null}
-!16 = metadata !{metadata !1}
-!17 = metadata !{metadata !5, metadata !8}
-!18 = metadata !{metadata !"k.cc", metadata !"/private/tmp"}
-!19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.0 (trunk 130845)\001\00\000\00\001", !18, !19, !19, !16, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00_Z3foov\005\000\001\000\006\00256\001\005", !18, !2, !3, null, void ()* @_Z3foov, null, null, !17} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x100\00k\006\000", !6, !2, !7} ; [ DW_TAG_auto_variable ]
+!6 = !{!"0xb\005\0012\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !0} ; [ DW_TAG_base_type ]
+!8 = !{!"0x100\00y\008\000", !9, !2, !7} ; [ DW_TAG_auto_variable ]
+!9 = !{!"0xb\007\0025\002", !18, !10} ; [ DW_TAG_lexical_block ]
+!10 = !{!"0xb\007\003\001", !18, !6} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 6, column: 18, scope: !6)
+!12 = !MDLocation(line: 7, column: 3, scope: !6)
+!13 = !MDLocation(line: 8, column: 20, scope: !9)
+!14 = !MDLocation(line: 7, column: 20, scope: !10)
+!15 = !MDLocation(line: 10, column: 1, scope: !6)
+!16 = !{!1}
+!17 = !{!5, !8}
+!18 = !{!"k.cc", !"/private/tmp"}
+!19 = !{i32 0}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index e866b4e124d8..7ea5665a7a9b 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -39,42 +39,40 @@ define void @test_basic() #0 {
 ; ARM-linux       .cfi_same_value r5
 }
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"var.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_basic",
-  metadata !"test_basic", metadata !"", i32 5, metadata !6, i1 false, i1 true,
-  i32 0, i32 0, null, i32 256, i1 false, void ()* @test_basic, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 "}
-!12 = metadata !{i32 786689, metadata !4, metadata !"count", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [count] [line 5]
-!13 = metadata !{i32 5, i32 0, metadata !4, null}
-!14 = metadata !{i32 786688, metadata !4, metadata !"vl", metadata !5, i32 6, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vl] [line 6]
-!15 = metadata !{i32 786454, metadata !16, null, metadata !"va_list", i32 30, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
-!16 = metadata !{metadata !"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", metadata !"/tmp"}
-!17 = metadata !{i32 786454, metadata !1, null, metadata !"__builtin_va_list", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"__va_list", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"__ap", i32 6, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
-!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
-!22 = metadata !{i32 6, i32 0, metadata !4, null}
-!23 = metadata !{i32 7, i32 0, metadata !4, null}
-!24 = metadata !{i32 786688, metadata !4, metadata !"test_basic", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 8]
-!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i32 786688, metadata !27, metadata !"i", metadata !5, i32 9, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 9]
-!27 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!28 = metadata !{i32 9, i32 0, metadata !27, null}
-!29 = metadata !{i32 10, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !1, metadata !27, i32 9, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
-!31 = metadata !{i32 11, i32 0, metadata !30, null}
-!32 = metadata !{i32 12, i32 0, metadata !4, null}
-!33 = metadata !{i32 13, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/var.c] [DW_LANG_C99]
+!1 = !{!"var.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test_basic\00test_basic\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, void ()* @test_basic, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [sum]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/var.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 "}
+!12 = !{!"0x101\00count\0016777221\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [count] [line 5]
+!13 = !MDLocation(line: 5, scope: !4)
+!14 = !{!"0x100\00vl\006\000", !4, !5, !15} ; [ DW_TAG_auto_variable ] [vl] [line 6]
+!15 = !{!"0x16\00va_list\0030\000\000\000\000", !16, null, !17} ; [ DW_TAG_typedef ] [va_list] [line 30, size 0, align 0, offset 0] [from __builtin_va_list]
+!16 = !{!"/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", !"/tmp"}
+!17 = !{!"0x16\00__builtin_va_list\006\000\000\000\000", !1, null, !18} ; [ DW_TAG_typedef ] [__builtin_va_list] [line 6, size 0, align 0, offset 0] [from __va_list]
+!18 = !{!"0x13\00__va_list\006\0032\0032\000\000\000", !1, null, null, !19, null, null, null} ; [ DW_TAG_structure_type ] [__va_list] [line 6, size 32, align 32, offset 0] [def] [from ]
+!19 = !{!20}
+!20 = !{!"0xd\00__ap\006\0032\0032\000\000", !1, !18, !21} ; [ DW_TAG_member ] [__ap] [line 6, size 32, align 32, offset 0] [from ]
+!21 = !{!"0xf\00\000\0032\0032\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from ]
+!22 = !MDLocation(line: 6, scope: !4)
+!23 = !MDLocation(line: 7, scope: !4)
+!24 = !{!"0x100\00test_basic\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [sum] [line 8]
+!25 = !MDLocation(line: 8, scope: !4)
+!26 = !{!"0x100\00i\009\000", !27, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 9]
+!27 = !{!"0xb\009\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!28 = !MDLocation(line: 9, scope: !27)
+!29 = !MDLocation(line: 10, scope: !30)
+!30 = !{!"0xb\009\000\001", !1, !27} ; [ DW_TAG_lexical_block ] [/tmp/var.c]
+!31 = !MDLocation(line: 11, scope: !30)
+!32 = !MDLocation(line: 12, scope: !4)
+!33 = !MDLocation(line: 13, scope: !4)
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
diff --git a/test/CodeGen/ARM/dwarf-unwind.ll b/test/CodeGen/ARM/dwarf-unwind.ll
new file mode 100644
index 000000000000..5256db863440
--- /dev/null
+++ b/test/CodeGen/ARM/dwarf-unwind.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=thumbv7-netbsd-eabi -o - %s | FileCheck %s
+declare void @bar()
+
+; ARM's frame lowering attempts to tack another callee-saved register onto the
+; list when it detects a potential misaligned VFP store. However, if there are
+; none available it used to just vpush anyway and misreport the location of the
+; registers in unwind info. Since there are benefits to aligned stores, it's
+; better to correct the code than the .cfi_offset directive.
+
+define void @test_dpr_align(i8 %l, i8 %r) {
+; CHECK-LABEL: test_dpr_align:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: .cfi_def_cfa_offset 36
+; CHECK: sub sp, #4
+; CHECK: .cfi_def_cfa_offset 40
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+; The prologue (but not the epilogue) can be made more space efficient by
+; chucking an argument register into the list. Not worth it in general though,
+; "sub sp, #4" is likely faster.
+define void @test_dpr_align_tiny(i8 %l, i8 %r) minsize {
+; CHECK-LABEL: test_dpr_align_tiny:
+; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8}
+; CHECK: .cfi_offset d8, -48
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: vpop {d8}
+; CHECK: add sp, #4
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+
+
+; However, we shouldn't do a 2-step align/adjust if there are no DPRs to be
+; saved.
+define void @test_nodpr_noalign(i8 %l, i8 %r) {
+; CHECK-LABEL: test_nodpr_noalign:
+; CHECK: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NOT: sub sp
+; CHECK: sub sp, #12
+; CHECK-NOT: sub sp
+; [...]
+; CHECK: bl bar
+; CHECK-NOT: add sp
+; CHECK: add sp, #12
+; CHECK-NOT: add sp
+; CHECK: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  alloca i64
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"()
+  call void @bar()
+  ret void
+}
+
+define void @test_frame_pointer_offset() minsize "no-frame-pointer-elim"="true" {
+; CHECK-LABEL: test_frame_pointer_offset:
+; CHECK: push.w {r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: .cfi_def_cfa_offset 40
+; CHECK: add r7, sp, #16
+; CHECK: .cfi_def_cfa r7, 24
+; CHECK-NOT: .cfi_def_cfa_offset
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{d8}"()
+  call void @bar()
+  ret void
+}
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/emit-big-cst.ll b/test/CodeGen/ARM/emit-big-cst.ll
index 9a3367dab1a1..01d789c492fe 100644
--- a/test/CodeGen/ARM/emit-big-cst.ll
+++ b/test/CodeGen/ARM/emit-big-cst.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv7-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-unknown-unknown -target-abi apcs < %s | FileCheck %s
 ; Check assembly printing of odd constants.
 
 ; CHECK: bigCst:
diff --git a/test/CodeGen/ARM/fabs-neon.ll b/test/CodeGen/ARM/fabs-neon.ll
index e3094aaf57d0..dc1dc321fcf9 100644
--- a/test/CodeGen/ARM/fabs-neon.ll
+++ b/test/CodeGen/ARM/fabs-neon.ll
@@ -15,3 +15,42 @@ define <2 x float> @test2(<2 x float> %a) {
     ret <2 x float> %foo
 }
 declare <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
+
+; No constant pool loads or vector ops are needed for the fabs of a
+; bitcasted integer constant; we should just return integer constants
+; that have the sign bits turned off.
+;
+; So instead of something like this:
+; 	mvn	r0, #0
+; 	mov	r1, #0
+; 	vmov	d16, r1, r0
+; 	vabs.f32	d16, d16
+; 	vmov	r0, r1, d16
+; 	bx	lr
+;
+; We should generate:
+;	mov	r0, #0
+;	mvn	r1, #-2147483648
+;	bx	lr
+
+define i64 @fabs_v2f32_1() {
+; CHECK-LABEL: fabs_v2f32_1:
+; CHECK: mvn r1, #-2147483648
+; CHECK: bx lr
+; CHECK-NOT: vabs
+ %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+}
+
+define i64 @fabs_v2f32_2() {
+; CHECK-LABEL: fabs_v2f32_2:
+; CHECK: mvn r0, #-2147483648
+; CHECK: bx lr
+; CHECK-NOT: vabs
+ %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index 2d7378e47f2f..74b31bd9e67f 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -117,17 +117,11 @@ entry:
 ; ARM-LONG: blx [[R]]
 ; THUMB: @t10
 ; THUMB: movs [[R0:l?r[0-9]*]], #0
-; THUMB: movt [[R0]], #0
 ; THUMB: movs [[R1:l?r[0-9]*]], #248
-; THUMB: movt [[R1]], #0
 ; THUMB: movs [[R2:l?r[0-9]*]], #187
-; THUMB: movt [[R2]], #0
 ; THUMB: movs [[R3:l?r[0-9]*]], #28
-; THUMB: movt [[R3]], #0
 ; THUMB: movw [[R4:l?r[0-9]*]], #40
-; THUMB: movt [[R4]], #0
 ; THUMB: movw [[R5:l?r[0-9]*]], #186
-; THUMB: movt [[R5]], #0
 ; THUMB: and [[R0]], [[R0]], #255
 ; THUMB: and [[R1]], [[R1]], #255
 ; THUMB: and [[R2]], [[R2]], #255
@@ -250,4 +244,19 @@ entry:
   ret void
 }
 
+declare void @bar2(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6)
+
+define void @call_undef_args() {
+; ARM-LABEL: call_undef_args
+; ARM:       movw  r0, #1
+; ARM-NEXT:  movw  r1, #2
+; ARM-NEXT:  movw  r2, #3
+; ARM-NEXT:  movw  r3, #4
+; ARM-NOT:   str {{r[0-9]+}}, [sp]
+; ARM:       movw  [[REG:l?r[0-9]*]], #6
+; ARM-NEXT:  str [[REG]], [sp, #4]
+  call void @bar2(i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6)
+  ret void
+}
+
 declare void @print(float)
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 5e6666c47d3e..c3eed30692b8 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -14,7 +14,6 @@ entry:
 ; THUMB-NOT: ldr
 ; THUMB-NOT: sxtb
 ; THUMB: movs r0, #0
-; THUMB: movt r0, #0
 ; THUMB: pop
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 089209e45fc3..b09931dc4e2f 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -31,9 +31,7 @@ define void @t1() nounwind ssp {
 ; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
-; THUMB: movt r1, #0
 ; THUMB: movs r2, #10
-; THUMB: movt r2, #0
 ; THUMB: and r1, r1, #255
 ; THUMB: bl {{_?}}memset
 ; THUMB-LONG-LABEL: t1:
@@ -71,7 +69,6 @@ define void @t2() nounwind ssp {
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #17
-; THUMB: movt r2, #0
 ; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
 ; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
@@ -109,7 +106,6 @@ define void @t3() nounwind ssp {
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #10
-; THUMB: movt r2, #0
 ; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
 ; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index 0bc9395e2d78..886f2daa21dd 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,17 +1,14 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=true  -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -arm-use-movt=true  -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=MOVT
 ; rdar://10412592
 
-; Note: The Thumb code is being generated by the target-independent selector.
-
 define void @t1() nounwind {
 entry:
-; ARM: t1
-; THUMB: t1
-; ARM: mvn r0, #0
-; THUMB: movw r0, #65535
-; THUMB: movt r0, #65535
+; CHECK-LABEL: t1
+; CHECK:       mvn r0, #0
   call void @foo(i32 -1)
   ret void
 }
@@ -20,22 +17,16 @@ declare void @foo(i32)
 
 define void @t2() nounwind {
 entry:
-; ARM: t2
-; THUMB: t2
-; ARM: mvn r0, #233
-; THUMB: movw r0, #65302
-; THUMB: movt r0, #65535
+; CHECK-LABEL: t2
+; CHECK:       mvn r0, #233
   call void @foo(i32 -234)
   ret void
 }
 
 define void @t3() nounwind {
 entry:
-; ARM: t3
-; THUMB: t3
-; ARM: mvn	r0, #256
-; THUMB: movw r0, #65279
-; THUMB: movt r0, #65535
+; CHECK-LABEL: t3
+; CHECK:       mvn r0, #256
   call void @foo(i32 -257)
   ret void
 }
@@ -43,66 +34,60 @@ entry:
 ; Load from constant pool
 define void @t4() nounwind {
 entry:
-; ARM: t4
-; THUMB: t4
-; ARM: ldr	r0
-; THUMB: movw r0, #65278
-; THUMB: movt r0, #65535
+; ARM-LABEL:   t4
+; ARM:         ldr r0
+; THUMB-LABEL: t4
+; THUMB:       movw r0, #65278
+; THUMB:       movt r0, #65535
   call void @foo(i32 -258)
   ret void
 }
 
 define void @t5() nounwind {
 entry:
-; ARM: t5
-; THUMB: t5
-; ARM: mvn r0, #65280
-; THUMB: movs r0, #255
-; THUMB: movt r0, #65535
+; CHECK-LABEL: t5
+; CHECK:       mvn r0, #65280
   call void @foo(i32 -65281)
   ret void
 }
 
 define void @t6() nounwind {
 entry:
-; ARM: t6
-; THUMB: t6
-; ARM: mvn r0, #978944
-; THUMB: movw r0, #4095
-; THUMB: movt r0, #65521
+; CHECK-LABEL: t6
+; CHECK:       mvn r0, #978944
   call void @foo(i32 -978945)
   ret void
 }
 
 define void @t7() nounwind {
 entry:
-; ARM: t7
-; THUMB: t7
-; ARM: mvn r0, #267386880
-; THUMB: movw r0, #65535
-; THUMB: movt r0, #61455
+; CHECK-LABEL: t7
+; CHECK:       mvn r0, #267386880
   call void @foo(i32 -267386881)
   ret void
 }
 
 define void @t8() nounwind {
 entry:
-; ARM: t8
-; THUMB: t8
-; ARM: mvn r0, #65280
-; THUMB: movs r0, #255
-; THUMB: movt r0, #65535
+; CHECK-LABEL: t8
+; CHECK:       mvn r0, #65280
   call void @foo(i32 -65281)
   ret void
 }
 
 define void @t9() nounwind {
 entry:
-; ARM: t9
-; THUMB: t9
-; ARM: mvn r0, #2130706432
-; THUMB: movw r0, #65535
-; THUMB: movt r0, #33023
+; CHECK-LABEL: t9
+; CHECK:       mvn r0, #2130706432
   call void @foo(i32 -2130706433)
   ret void
 }
+
+; Load from constant pool.
+define i32 @t10(i32 %a) {
+; MOVT-LABEL: t10
+; MOVT:       ldr
+  %1 = xor i32 -1998730207, %a
+  ret i32 %1
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index 40f88075039e..549c97e24dcd 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -12,7 +12,6 @@ entry:
 ; ARM: mov r0, r{{[1-9]}}
 ; THUMB: t1
 ; THUMB: movs r{{[1-9]}}, #10
-; THUMB: movt r{{[1-9]}}, #0
 ; THUMB: cmp r0, #0
 ; THUMB: it eq
 ; THUMB: moveq r{{[1-9]}}, #20
@@ -59,13 +58,12 @@ entry:
 ; ARM: cmp r0, #0
 ; ARM: mvneq r{{[1-9]}}, #0
 ; ARM: mov r0, r{{[1-9]}}
-; THUMB: t4
-; THUMB: movw r{{[1-9]}}, #65526
-; THUMB: movt r{{[1-9]}}, #65535
+; THUMB-LABEL: t4
+; THUMB: mvn [[REG:r[1-9]+]], #9
 ; THUMB: cmp r0, #0
 ; THUMB: it eq
-; THUMB: mvneq r{{[1-9]}}, #0
-; THUMB: mov r0, r{{[1-9]}}
+; THUMB: mvneq [[REG]], #0
+; THUMB: mov r0, [[REG]]
   %0 = select i1 %c, i32 -10, i32 -1
   ret i32 %0
 }
diff --git a/test/CodeGen/ARM/fast-isel-vararg.ll b/test/CodeGen/ARM/fast-isel-vararg.ll
index 0b7b0bd1c6f0..3ff2b151ab5f 100644
--- a/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -29,7 +29,6 @@ entry:
 ; ARM: bl {{_?CallVariadic}}
 ; THUMB: sub sp, #32
 ; THUMB: movs r0, #5
-; THUMB: movt r0, #0
 ; THUMB: ldr r1, [sp, #28]
 ; THUMB: ldr r2, [sp, #24]
 ; THUMB: ldr r3, [sp, #20]
diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll
index 36af8352433e..65fe9e36fa1d 100644
--- a/test/CodeGen/ARM/fnegs.ll
+++ b/test/CodeGen/ARM/fnegs.ll
@@ -1,9 +1,12 @@
 ; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=VFP2
 
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN: llc -mtriple=arm-eabi -mattr=+neon,-neonfp %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=NFP0
 
+; RUN: llc -mtriple=arm-eabi -mattr=+neon,+neonfp %s -o - \
+; RUN:  | FileCheck %s -check-prefix=NFP1
+
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=CORTEXA8
 
@@ -70,3 +73,49 @@ entry:
 ; CORTEXA9-LABEL: test2:
 ; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
 
+; If we're bitcasting an integer to an FP vector, we should avoid the FP/vector unit entirely.
+; Make sure that we're flipping the sign bit and only the sign bit of each float (PR20354).
+; So instead of something like this:
+;    vmov     d16, r0, r1
+;    vneg.f32 d16, d16
+;    vmov     r0, r1, d16
+;
+; We should generate:
+;    eor     r0, r0, #-214783648
+;    eor     r1, r1, #-214783648
+
+define <2 x float> @fneg_bitcast(i64 %i) {
+  %bitcast = bitcast i64 %i to <2 x float>
+  %fneg = fsub <2 x float> <float -0.0, float -0.0>, %bitcast
+  ret <2 x float> %fneg
+}
+; VFP2-LABEL: fneg_bitcast:
+; VFP2-DAG: eor r0, r0, #-2147483648
+; VFP2-DAG: eor r1, r1, #-2147483648
+; VFP2-NOT:  vneg.f32
+
+; NFP1-LABEL: fneg_bitcast:
+; NFP1-DAG: eor r0, r0, #-2147483648
+; NFP1-DAG: eor r1, r1, #-2147483648
+; NFP1-NOT: vneg.f32
+
+; NFP0-LABEL: fneg_bitcast:
+; NFP0-DAG: eor r0, r0, #-2147483648
+; NFP0-DAG: eor r1, r1, #-2147483648
+; NFP0-NOT: vneg.f32
+
+; CORTEXA8-LABEL: fneg_bitcast:
+; CORTEXA8-DAG: eor r0, r0, #-2147483648
+; CORTEXA8-DAG: eor r1, r1, #-2147483648
+; CORTEXA8-NOT:         vneg.f32
+
+; CORTEXA8U-LABEL: fneg_bitcast:
+; CORTEXA8U-DAG: eor r0, r0, #-2147483648
+; CORTEXA8U-DAG: eor r1, r1, #-2147483648
+; CORTEXA8U-NOT:        vneg.f32
+
+; CORTEXA9-LABEL: fneg_bitcast:
+; CORTEXA9-DAG: eor r0, r0, #-2147483648
+; CORTEXA9-DAG: eor r1, r1, #-2147483648
+; CORTEXA9-NOT:         vneg.f32
+
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index eb0120f7c1bb..c5ff82eaf830 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -71,7 +71,7 @@ define void @check_vfp_fold() minsize {
 ; CHECK-IOS-LABEL: check_vfp_fold:
 ; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
 ; CHECK-IOS: sub.w r4, sp, #16
-; CHECK-IOS: bic r4, r4, #15
+; CHECK-IOS: bfc r4, #0, #4
 ; CHECK-IOS: mov sp, r4
 ; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
 ; ...
@@ -167,9 +167,9 @@ end:
 define void @test_varsize(...) minsize {
 ; CHECK-T1-LABEL: test_varsize:
 ; CHECK-T1: sub	sp, #16
-; CHECK-T1: push	{r2, r3, r4, r5, r7, lr}
+; CHECK-T1: push	{r5, r6, r7, lr}
 ; ...
-; CHECK-T1: pop	{r2, r3, r4, r5, r7}
+; CHECK-T1: pop	{r2, r3, r7}
 ; CHECK-T1: pop	{r3}
 ; CHECK-T1: add	sp, #16
 ; CHECK-T1: bx	r3
@@ -183,6 +183,7 @@ define void @test_varsize(...) minsize {
 ; CHECK: bx	lr
 
   %var = alloca i8, i32 8
+  call void @llvm.va_start(i8* %var)
   call void @bar(i8* %var)
   ret void
 }
@@ -216,3 +217,5 @@ if.then:                                          ; preds = %entry
 exit:                                             ; preds = %if.then, %entry
   ret float %call1
 }
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index d3f32556a093..5a926acc5430 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -1,17 +1,20 @@
 ; RUN: llc < %s | FileCheck %s
 ; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
-; RUN: llc -mtriple=armv8-eabi < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
+; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
+; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK-SOFTFLOAT %s
+
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
-target triple = "armv7-eabi"
+target triple = "armv7---eabihf"
 
 @x = global i16 12902
 @y = global i16 0
 @z = common global i16 0
 
-define arm_aapcs_vfpcc void @foo() nounwind {
+define void @foo() nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK-FP16-LABEL: foo:
 ; CHECK-ARMV8-LABEL: foo:
+; CHECK-SOFTFLOAT-LABEL: foo:
 entry:
   %0 = load i16* @x, align 2
   %1 = load i16* @y, align 2
@@ -19,23 +22,27 @@ entry:
 ; CHECK: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-ARMv8: vcvtb.f32.f16
+; CHECK-SOFTFLOAT: __gnu_h2f_ieee
   %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
 ; CHECK: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-ARMV8: vcvtb.f32.f16
+; CHECK-SOFTFLOAT: __gnu_h2f_ieee
   %4 = fadd float %2, %3
   %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
 ; CHECK: __gnu_f2h_ieee
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-ARMV8: vcvtb.f16.f32
+; CHECK-SOFTFLOAT: __gnu_f2h_ieee
   store i16 %5, i16* @x, align 2
   ret void
 }
 
-define arm_aapcs_vfpcc double @test_from_fp16(i16 %in) {
+define double @test_from_fp16(i16 %in) {
 ; CHECK-LABEL: test_from_fp16:
-; CHECK-FP-LABEL: test_from_fp16:
+; CHECK-FP16-LABEL: test_from_fp16:
 ; CHECK-ARMV8-LABEL: test_from_fp16:
+; CHECK-SOFTFLOAT-LABEL: test_from_fp16:
   %val = call double @llvm.convert.from.fp16.f64(i16 %in)
 ; CHECK: bl __gnu_h2f_ieee
 ; CHECK: vmov [[TMP:s[0-9]+]], r0
@@ -47,20 +54,26 @@ define arm_aapcs_vfpcc double @test_from_fp16(i16 %in) {
 
 ; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0
 ; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]]
+
+; CHECK-SOFTFLOAT: bl __gnu_h2f_ieee
+; CHECK-SOFTFLOAT: bl __aeabi_f2d
   ret double %val
 }
 
-define arm_aapcs_vfpcc i16 @test_to_fp16(double %in) {
+define i16 @test_to_fp16(double %in) {
 ; CHECK-LABEL: test_to_fp16:
-; CHECK-FP-LABEL: test_to_fp16:
+; CHECK-FP16-LABEL: test_to_fp16:
 ; CHECK-ARMV8-LABEL: test_to_fp16:
+; CHECK-SOFTFLOAT-LABEL: test_to_fp16:
   %val = call i16 @llvm.convert.to.fp16.f64(double %in)
-; CHECK: bl __truncdfhf2
+; CHECK: bl __aeabi_d2h
 
-; CHECK-FP16: bl __truncdfhf2
+; CHECK-FP16: bl __aeabi_d2h
 
 ; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
 ; CHECK-ARMV8: vmov r0, [[TMP]]
+
+; CHECK-SOFTFLOAT: bl __aeabi_d2h
   ret i16 %val
 }
 
diff --git a/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll b/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll
new file mode 100644
index 000000000000..7444a6851d95
--- /dev/null
+++ b/test/CodeGen/ARM/fpcmp-f64-neon-opt.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=linux-arm-gnueabihf -mattr=+neon %s -o - | FileCheck %s
+
+; Check that no intermediate integer register is used.
+define i32 @no-intermediate-register-for-zero-imm(double %x) #0 {
+entry:
+; CHECK-LABEL: no-intermediate-register-for-zero-imm
+; CHECK-NOT: vmov
+; CHECK: vcmp
+  %cmp = fcmp une double %x, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
index 6d2564ba1ab6..623b4220c21f 100644
--- a/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -1,21 +1,21 @@
 ; RUN: llc -mtriple=thumbv7-eabi -o - %s | FileCheck %s
 
-declare cc 10 void @g()
+declare ghccc void @g()
 
-define cc 10 void @test_direct_tail() {
+define ghccc void @test_direct_tail() {
 ; CHECK-LABEL: test_direct_tail:
 ; CHECK: b g
 
-  tail call cc10 void @g()
+  tail call ghccc void @g()
   ret void
 }
 
 @ind_func = global void()* zeroinitializer
 
-define cc 10 void @test_indirect_tail() {
+define ghccc void @test_indirect_tail() {
 ; CHECK-LABEL: test_indirect_tail:
 ; CHECK: bx {{r[0-9]+}}
   %func = load void()** @ind_func
-  tail call cc10 void()* %func()
+  tail call ghccc void()* %func()
   ret void
 }
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
index 341597e6188c..e5d4def938df 100644
--- a/test/CodeGen/ARM/global-merge-1.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -78,8 +78,8 @@ attributes #3 = { nounwind }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"LLVM version 3.4 "}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
+!0 = !{!"LLVM version 3.4 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 3101500f2ca8..2c599bf011a7 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -43,6 +43,7 @@ define i32 @test1() {
 ; DarwinPIC: LPC0_0:
 ; DarwinPIC:    ldr r0, [pc, r0]
 ; DarwinPIC:    ldr r0, [r0]
+; DarwinPIC-NOT: ldr
 ; DarwinPIC:    bx lr
 
 ; DarwinPIC: 	.align	2
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
index 5d8e477d681e..f76fd302774b 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -59,5 +59,5 @@ declare %classL* @_ZN1M1JI1LS1_EcvPS1_Ev(%classM2*)
 declare void @_ZN1F10handleMoveEb(%classF*, i1 zeroext)
 declare void @_Z3fn1v()
 
-!0 = metadata !{metadata !"clang version 3.5"}
-!1 = metadata !{metadata !"branch_weights", i32 62, i32 62}
+!0 = !{!"clang version 3.5"}
+!1 = !{!"branch_weights", i32 62, i32 62}
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
index a994d3d01ae8..2d12a899f4b3 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -38,5 +38,5 @@ return:
   ret i8 1
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 4, i32 12}
-!1 = metadata !{metadata !"branch_weights", i32 8, i32 16}
+!0 = !{!"branch_weights", i32 4, i32 12}
+!1 = !{!"branch_weights", i32 8, i32 16}
diff --git a/test/CodeGen/ARM/inline-diagnostics.ll b/test/CodeGen/ARM/inline-diagnostics.ll
index 7b77da22d5f5..0276abf2f5fa 100644
--- a/test/CodeGen/ARM/inline-diagnostics.ll
+++ b/test/CodeGen/ARM/inline-diagnostics.ll
@@ -13,4 +13,4 @@ define float @inline_func(float %f1, float %f2) #0 {
   ret float %1
 }
 
-!1 = metadata !{i32 271, i32 305}
+!1 = !{i32 271, i32 305}
diff --git a/test/CodeGen/ARM/inlineasm-global.ll b/test/CodeGen/ARM/inlineasm-global.ll
new file mode 100644
index 000000000000..fd210f420925
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-global.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=thumb-unknown-unknown -no-integrated-as < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -mtriple=arm-unknown-unknown -no-integrated-as < %s | FileCheck %s --check-prefix=ARM
+
+; In thumb mode, emit ".code 16" before global inline-asm instructions.
+
+; THUMB: .code 16
+; THUMB: stmib
+; THUMB: .code 16
+
+; ARM-NOT: .code 16
+; ARM:     stmib
+
+module asm "stmib sp, {r0-r14};"
diff --git a/test/CodeGen/ARM/interrupt-attr.ll b/test/CodeGen/ARM/interrupt-attr.ll
index cb67dd929f41..c6da09d156b7 100644
--- a/test/CodeGen/ARM/interrupt-attr.ll
+++ b/test/CodeGen/ARM/interrupt-attr.ll
@@ -15,7 +15,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
 ; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: add r11, sp, #20
 ; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
-; CHECK-A: bic sp, sp, #7
+; CHECK-A: bfc sp, #0, #3
 ; CHECK-A: bl bar
 ; CHECK-A: sub sp, r11, #20
 ; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -25,7 +25,7 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
 ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r7, r12, lr}
 ; CHECK-A-THUMB: add r7, sp, #20
 ; CHECK-A-THUMB: mov r4, sp
-; CHECK-A-THUMB: bic r4, r4, #7
+; CHECK-A-THUMB: bfc r4, #0, #3
 ; CHECK-A-THUMB: bl bar
 ; CHECK-A-THUMB: sub.w r4, r7,  #20
 ; CHECK-A-THUMB: mov sp, r4
@@ -38,9 +38,9 @@ define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" {
 ; CHECK-M: push.w {r4, r10, r11, lr}
 ; CHECK-M: add.w r11, sp, #8
 ; CHECK-M: mov r4, sp
-; CHECK-M: bic r4, r4, #7
+; CHECK-M: bfc r4, #0, #3
 ; CHECK-M: mov sp, r4
-; CHECK-M: blx _bar
+; CHECK-M: bl _bar
 ; CHECK-M: sub.w r4, r11, #8
 ; CHECK-M: mov sp, r4
 ; CHECK-M: pop.w {r4, r10, r11, pc}
@@ -56,7 +56,7 @@ define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" {
   ; 32 to get past r0, r1, ..., r7
 ; CHECK-A: add r11, sp, #32
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
-; CHECK-A: bic sp, sp, #7
+; CHECK-A: bfc sp, #0, #3
 ; [...]
   ; 32 must match above
 ; CHECK-A: sub sp, r11, #32
@@ -75,7 +75,7 @@ define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" {
 ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
 ; CHECK-A: add r11, sp, #44
 ; CHECK-A: sub sp, sp, #{{[0-9]+}}
-; CHECK-A: bic sp, sp, #7
+; CHECK-A: bfc sp, #0, #3
 ; [...]
 ; CHECK-A: sub sp, r11, #44
 ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
@@ -91,7 +91,7 @@ define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" {
 ; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: add r11, sp, #20
 ; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
-; CHECK-A: bic sp, sp, #7
+; CHECK-A: bfc sp, #0, #3
 ; [...]
 ; CHECK-A: sub sp, r11, #20
 ; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
@@ -106,7 +106,7 @@ define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" {
 ; CHECK-A: push {r0, r1, r2, r3, r10, r11, r12, lr}
 ; CHECK-A: add r11, sp, #20
 ; CHECK-A-NOT: sub sp, sp, #{{[0-9]+}}
-; CHECK-A: bic sp, sp, #7
+; CHECK-A: bfc sp, #0, #3
 ; [...]
 ; CHECK-A: sub sp, r11, #20
 ; CHECK-A: pop {r0, r1, r2, r3, r10, r11, r12, lr}
diff --git a/test/CodeGen/ARM/invalid-target.ll b/test/CodeGen/ARM/invalid-target.ll
new file mode 100644
index 000000000000..bb0ada4c2fdc
--- /dev/null
+++ b/test/CodeGen/ARM/invalid-target.ll
@@ -0,0 +1,32 @@
+; RUN: not llc -mtriple armvinvalid-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ARMVINVALID
+
+; RUN: not llc -mtriple armebvinvalid-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ARMEBVINVALID
+
+; RUN: not llc -mtriple thumbvinvalid-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=THUMBVINVALID
+
+; RUN: not llc -mtriple thumbebvinvalid-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=THUMBEBVINVALID
+
+; RUN: not llc -mtriple thumbv2-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=THUMBV2
+
+; RUN: not llc -mtriple thumbv3-linux-gnueabi %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=THUMBV3
+
+; RUN: not llc -mtriple arm64invalid-linux-gnu %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=ARM64INVALID
+
+; RUN: not llc -mtriple aarch64invalid-linux-gnu %s -o - 2>&1 | \
+; RUN: FileCheck %s --check-prefix=AARCH64INVALID
+
+; ARMVINVALID: error: unable to get target for 'armvinvalid--linux-gnueabi'
+; ARMEBVINVALID: error: unable to get target for 'armebvinvalid--linux-gnueabi'
+; THUMBVINVALID: error: unable to get target for 'thumbvinvalid--linux-gnueabi'
+; THUMBEBVINVALID: error: unable to get target for 'thumbebvinvalid--linux-gnueabi'
+; THUMBV2: error: unable to get target for 'thumbv2--linux-gnueabi'
+; THUMBV3: error: unable to get target for 'thumbv3--linux-gnueabi'
+; ARM64INVALID: error: unable to get target for 'arm64invalid--linux-gnu'
+; AARCH64INVALID: error: unable to get target for 'aarch64invalid--linux-gnu'
diff --git a/test/CodeGen/ARM/isel-v8i32-crash.ll b/test/CodeGen/ARM/isel-v8i32-crash.ll
new file mode 100644
index 000000000000..0116fe8de7cc
--- /dev/null
+++ b/test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnu | FileCheck %s
+
+; Check we don't crash when trying to combine:
+;   (d1 = <float 8.000000e+00, float 8.000000e+00, ...>) (power of 2)
+;   vmul.f32        d0, d1, d0
+;   vcvt.s32.f32    d0, d0
+; into:
+;   vcvt.s32.f32    d0, d0, #3
+; when we have a vector length of 8, due to use of v8i32 types.
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK: func:
+; CHECK: vcvt.s32.f32  q[[R:[0-9]]], q[[R]], #3
+define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
+entry:
+  %0 = bitcast float* %pf to <8 x float>*
+  %1 = load <8 x float>* %0, align 4
+  %2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  %3 = fptosi <8 x float> %2 to <8 x i16>
+  %4 = bitcast i16* %pb to <8 x i16>*
+  store <8 x i16> %3, <8 x i16>* %4, align 2
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/jump_tables.ll b/test/CodeGen/ARM/jump_tables.ll
deleted file mode 100644
index 907a86c25387..000000000000
--- a/test/CodeGen/ARM/jump_tables.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc <%s -mtriple=arm-unknown-linux-gnueabi -jump-table-type=single | FileCheck --check-prefix=ARM %s
-; RUN: llc <%s -mtriple=thumb-unknown-linux-gnueabi -jump-table-type=single | FileCheck --check-prefix=THUMB %s
-
-define void @indirect_fun() unnamed_addr jumptable {
-  ret void
-}
-define void ()* @get_fun() {
-  ret void ()* @indirect_fun
-
-; ARM:         ldr     r0, [[LABEL:.*]]
-; ARM:         mov     pc, lr
-; ARM: [[LABEL]]:
-; ARM:         .long   __llvm_jump_instr_table_0_1
-
-; THUMB:         ldr     r0, [[LABEL:.*]]
-; THUMB:         bx      lr
-; THUMB: [[LABEL]]:
-; THUMB:         .long   __llvm_jump_instr_table_0_1
-}
-
-; ARM:         .globl  __llvm_jump_instr_table_0_1
-; ARM:         .align  3
-; ARM:         .type   __llvm_jump_instr_table_0_1,%function
-; ARM: __llvm_jump_instr_table_0_1:
-; ARM:         b     indirect_fun(PLT)
-
-; THUMB:         .globl  __llvm_jump_instr_table_0_1
-; THUMB:         .align  3
-; THUMB:         .thumb_func
-; THUMB:         .type   __llvm_jump_instr_table_0_1,%function
-; THUMB: __llvm_jump_instr_table_0_1:
-; THUMB:         b     indirect_fun(PLT)
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 84ce4a7f0e79..33ac4e125633 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -46,10 +46,8 @@ entry:
 ; CHECK: movw [[REG2:r[0-9]+]], #16716
 ; CHECK: movt [[REG2:r[0-9]+]], #72
 ; CHECK: str [[REG2]], [r0, #32]
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #16
-; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
 ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
@@ -59,10 +57,8 @@ entry:
 define void @t3(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t3:
-; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
-; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
-; CHECK: adds r0, #16
-; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]!
 ; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
 ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
@@ -73,7 +69,8 @@ define void @t4(i8* nocapture %C) nounwind {
 entry:
 ; CHECK-LABEL: t4:
 ; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
-; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]!
+; CHECK: strh [[REG5:r[0-9]+]], [r0]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/ARM/metadata-default.ll b/test/CodeGen/ARM/metadata-default.ll
index f6a3fe289cc1..f8e40b4cb7e7 100644
--- a/test/CodeGen/ARM/metadata-default.ll
+++ b/test/CodeGen/ARM/metadata-default.ll
@@ -9,8 +9,8 @@ define i32 @f(i64 %z) {
 
 !llvm.module.flags = !{!0, !1}
 
-!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
-!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
 
 ; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
 ; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-enums.ll b/test/CodeGen/ARM/metadata-short-enums.ll
index bccd3327e5b5..2f1586d0dcd0 100644
--- a/test/CodeGen/ARM/metadata-short-enums.ll
+++ b/test/CodeGen/ARM/metadata-short-enums.ll
@@ -9,8 +9,8 @@ define i32 @f(i64 %z) {
 
 !llvm.module.flags = !{!0, !1}
 
-!0 = metadata !{i32 1, metadata !"wchar_size", i32 4}
-!1 = metadata !{i32 1, metadata !"min_enum_size", i32 1}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 1}
 
 ; CHECK: .eabi_attribute 18, 4   @ Tag_ABI_PCS_wchar_t
 ; CHECK: .eabi_attribute 26, 1   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/metadata-short-wchar.ll b/test/CodeGen/ARM/metadata-short-wchar.ll
index 6de9bf174317..b7f5833e07bc 100644
--- a/test/CodeGen/ARM/metadata-short-wchar.ll
+++ b/test/CodeGen/ARM/metadata-short-wchar.ll
@@ -9,8 +9,8 @@ define i32 @f(i64 %z) {
 
 !llvm.module.flags = !{!0, !1}
 
-!0 = metadata !{i32 1, metadata !"wchar_size", i32 2}
-!1 = metadata !{i32 1, metadata !"min_enum_size", i32 4}
+!0 = !{i32 1, !"wchar_size", i32 2}
+!1 = !{i32 1, !"min_enum_size", i32 4}
 
 ; CHECK: .eabi_attribute 18, 2   @ Tag_ABI_PCS_wchar_t
 ; CHECK: .eabi_attribute 26, 2   @ Tag_ABI_enum_size
diff --git a/test/CodeGen/ARM/named-reg-alloc.ll b/test/CodeGen/ARM/named-reg-alloc.ll
index 3c27d2244e3c..380cf39734ff 100644
--- a/test/CodeGen/ARM/named-reg-alloc.ll
+++ b/test/CodeGen/ARM/named-reg-alloc.ll
@@ -11,4 +11,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"r5\00"}
+!0 = !{!"r5\00"}
diff --git a/test/CodeGen/ARM/named-reg-notareg.ll b/test/CodeGen/ARM/named-reg-notareg.ll
index af38b609b404..3ac03f4fdaaa 100644
--- a/test/CodeGen/ARM/named-reg-notareg.ll
+++ b/test/CodeGen/ARM/named-reg-notareg.ll
@@ -10,4 +10,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"notareg\00"}
+!0 = !{!"notareg\00"}
diff --git a/test/CodeGen/ARM/negative-offset.ll b/test/CodeGen/ARM/negative-offset.ll
new file mode 100644
index 000000000000..7b949fd71fe1
--- /dev/null
+++ b/test/CodeGen/ARM/negative-offset.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=arm-eabi -O3 %s -o - | FileCheck %s
+
+; Function Attrs: nounwind readonly
+define arm_aapcscc i32 @sum(i32* nocapture readonly %p) #0 {
+entry:
+;CHECK-LABEL: sum:
+;CHECK-NOT: sub
+;CHECK: ldr r{{.*}}, [r0, #-16]
+;CHECK: ldr r{{.*}}, [r0, #-8]
+  %arrayidx = getelementptr inbounds i32* %p, i32 -4
+  %0 = load i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32* %p, i32 -2
+  %1 = load i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  ret i32 %add
+}
+
diff --git a/test/CodeGen/ARM/no-tail-call.ll b/test/CodeGen/ARM/no-tail-call.ll
new file mode 100644
index 000000000000..3a8cb21bee92
--- /dev/null
+++ b/test/CodeGen/ARM/no-tail-call.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -O0 -o - | FileCheck %s
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "armv7s-apple-ios7"
+
+%foo = type <{ %Sf }>
+%Sf = type <{ float }>
+
+declare float @llvm.ceil.f32(float) 
+
+; Check that we are not emitting a tail call for the last call to ceil.
+; This function returns three different results.
+; CHECK-LABEL: func1:
+; CHECK-NOT: b _ceilf
+; CHECK: pop
+define { float, float, float } @func1() {
+entry:
+  %0 = alloca %foo, align 4
+  %1 = alloca %foo, align 4
+  %2 = alloca %foo, align 4
+  %.native = getelementptr inbounds %foo* %0, i32 0, i32 0
+  %.native.value = getelementptr inbounds %Sf* %.native, i32 0, i32 0
+  store float 0.000000e+00, float* %.native.value, align 4
+  %.native1 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native1.value = getelementptr inbounds %Sf* %.native1, i32 0, i32 0
+  store float 1.000000e+00, float* %.native1.value, align 4
+  %.native2 = getelementptr inbounds %foo* %2, i32 0, i32 0
+  %.native2.value = getelementptr inbounds %Sf* %.native2, i32 0, i32 0
+  store float 5.000000e+00, float* %.native2.value, align 4
+  br i1 true, label %3, label %4
+
+; <label>:3                                       ; preds = %entry
+  %.native4 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native4.value = getelementptr inbounds %Sf* %.native4, i32 0, i32 0
+  store float 2.000000e+00, float* %.native4.value, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %3, %entry
+  %5 = call float @llvm.ceil.f32(float 5.000000e+00)
+  %.native3 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native3.value = getelementptr inbounds %Sf* %.native3, i32 0, i32 0
+  %6 = load float* %.native3.value, align 4
+  %7 = call float @llvm.ceil.f32(float %6)
+  %8 = insertvalue { float, float, float } { float 0.000000e+00, float undef, float undef }, float %5, 1
+  %9 = insertvalue { float, float, float } %8, float %7, 2
+  ret { float, float, float } %9
+}
+
+; Check that we are not emitting a tail call for the last call to ceil.
+; This function returns two different results.
+; CHECK-LABEL: func2:
+; CHECK-NOT: b _ceilf
+; CHECK: pop
+define { float, float } @func2() {
+entry:
+  %0 = alloca %foo, align 4
+  %1 = alloca %foo, align 4
+  %2 = alloca %foo, align 4
+  %.native = getelementptr inbounds %foo* %0, i32 0, i32 0
+  %.native.value = getelementptr inbounds %Sf* %.native, i32 0, i32 0
+  store float 0.000000e+00, float* %.native.value, align 4
+  %.native1 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native1.value = getelementptr inbounds %Sf* %.native1, i32 0, i32 0
+  store float 1.000000e+00, float* %.native1.value, align 4
+  %.native2 = getelementptr inbounds %foo* %2, i32 0, i32 0
+  %.native2.value = getelementptr inbounds %Sf* %.native2, i32 0, i32 0
+  store float 5.000000e+00, float* %.native2.value, align 4
+  br i1 true, label %3, label %4
+
+; <label>:3                                       ; preds = %entry
+  %.native4 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native4.value = getelementptr inbounds %Sf* %.native4, i32 0, i32 0
+  store float 2.000000e+00, float* %.native4.value, align 4
+  br label %4
+
+; <label>:4                                       ; preds = %3, %entry
+  %5 = call float @llvm.ceil.f32(float 5.000000e+00)
+  %.native3 = getelementptr inbounds %foo* %1, i32 0, i32 0
+  %.native3.value = getelementptr inbounds %Sf* %.native3, i32 0, i32 0
+  %6 = load float* %.native3.value, align 4
+  %7 = call float @llvm.ceil.f32(float %6)
+  %8 = insertvalue { float, float } { float 0.000000e+00, float undef }, float %7, 1
+  ret { float, float } %8
+}
+
diff --git a/test/CodeGen/ARM/none-macho-v4t.ll b/test/CodeGen/ARM/none-macho-v4t.ll
new file mode 100644
index 000000000000..b6018de796af
--- /dev/null
+++ b/test/CodeGen/ARM/none-macho-v4t.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=thumb-none-macho -mcpu=arm7tdmi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-none-macho -mcpu=arm7tdmi %s -filetype=obj -o /dev/null
+
+declare void @callee()
+
+define void @test_call() {
+  ; BX can only take a register before v5t came along, so we must materialise
+  ; the address properly.
+; CHECK-LABEL: test_call:
+; CHECK: ldr r[[CALLEE_STUB:[0-9]+]], [[LITPOOL:LCPI[0-9]+_[0-9]+]]
+; CHECK: [[PC_LABEL:LPC[0-9]+_[0-9]+]]:
+; CHECK-NEXT: add r[[CALLEE_STUB]], pc
+; CHECK: ldr [[CALLEE:r[0-9]+]], [r[[CALLEE_STUB]]]
+; CHECK-NOT: mov lr, pc
+; CHECK: bl [[INDIRECT_PAD:Ltmp[0-9]+]]
+
+; CHECK: [[LITPOOL]]:
+; CHECK-NEXT: .long L_callee$non_lazy_ptr-([[PC_LABEL]]+4)
+
+; CHECK: [[INDIRECT_PAD]]:
+; CHECK: bx [[CALLEE]]
+
+  call void @callee()
+  ret void
+}
diff --git a/test/CodeGen/ARM/none-macho.ll b/test/CodeGen/ARM/none-macho.ll
index 60c21716dc35..2a7878fee300 100644
--- a/test/CodeGen/ARM/none-macho.ll
+++ b/test/CodeGen/ARM/none-macho.ll
@@ -84,7 +84,7 @@ define float @test_softfloat_calls(float %in) {
 
   ; Soft-float calls should be GNU-style rather than RTABI and should not be the
   ; *vfp variants used for ARMv6 iOS.
-; CHECK: blx ___addsf3{{$}}
+; CHECK: bl ___addsf3{{$}}
   ret float %sum
 }
 
diff --git a/test/CodeGen/ARM/out-of-registers.ll b/test/CodeGen/ARM/out-of-registers.ll
index 790e4165d4c6..a83923db0b30 100644
--- a/test/CodeGen/ARM/out-of-registers.ll
+++ b/test/CodeGen/ARM/out-of-registers.ll
@@ -38,5 +38,5 @@ attributes #2 = { nounwind readonly }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"Snapdragon LLVM ARM Compiler 3.4"}
-!1 = metadata !{metadata !1}
+!0 = !{!"Snapdragon LLVM ARM Compiler 3.4"}
+!1 = !{!1}
diff --git a/test/CodeGen/ARM/pr18364-movw.ll b/test/CodeGen/ARM/pr18364-movw.ll
new file mode 100644
index 000000000000..fdcf15485f1c
--- /dev/null
+++ b/test/CodeGen/ARM/pr18364-movw.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=armv5te | FileCheck %s --check-prefix=V5
+; RUN: llc < %s -mtriple=armv6   | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=armv6t2 | FileCheck %s --check-prefix=V6T2
+; RUN: llc < %s -mtriple=armv7   | FileCheck %s --check-prefix=V7
+; PR18364
+
+define i64 @f() #0 {
+entry:
+; V5-NOT: movw
+; V6-NOT: movw
+; V6T2: movw
+; V7: movw
+  %y = alloca i64, align 8
+  %z = alloca i64, align 8
+  store i64 1, i64* %y, align 8
+  store i64 11579764786944, i64* %z, align 8
+  %0 = load i64* %y, align 8
+  %1 = load i64* %z, align 8
+  %sub = sub i64 %0, %1
+  ret i64 %sub
+}
+
+define i64 @g(i64 %a, i32 %b) #0 {
+entry:
+; V5-NOT: movw
+; V6-NOT: movw
+; V6T2: movw
+; V7: movw
+  %0 = mul i64 %a, 86400000
+  %mul = add i64 %0, -210866803200000
+  %conv = sext i32 %b to i64
+  %add = add nsw i64 %mul, %conv
+  ret i64 %add
+}
diff --git a/test/CodeGen/ARM/preferred-align.ll b/test/CodeGen/ARM/preferred-align.ll
new file mode 100644
index 000000000000..8cd4ef615468
--- /dev/null
+++ b/test/CodeGen/ARM/preferred-align.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=armv7-linux-gnueabi %s -o - | FileCheck %s
+
+@var_agg = global {i8, i8} zeroinitializer
+
+; CHECK: .globl var_agg
+; CHECK-NEXT: .align 2
+
+@var1 = global i1 zeroinitializer
+
+; CHECK: .globl var1
+; CHECK-NOT: .align
+
+@var8 = global i8 zeroinitializer
+
+; CHECK: .globl var8
+; CHECK-NOT: .align
+
+@var16 = global i16 zeroinitializer
+
+; CHECK: .globl var16
+; CHECK-NEXT: .align 1
+\ No newline at end of file
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 7350e0a90d89..7fdc5b65c70e 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=thumb-eabi -mattr=-thumb2 %s -o - | FileCheck %s -check-prefix CHECK-T1
 ; RUN: llc -mtriple=thumb-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=THUMB2
 ; RUN: llc -mtriple=arm-eabi -mattr=+v7 %s -o - | FileCheck %s -check-prefix=ARM
-; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9-mp %s -o - | FileCheck %s -check-prefix=ARM-MP
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a9 %s -o - | FileCheck %s -check-prefix=ARM-MP
 ; rdar://8601536
 
 ; CHECK-T1-NOT: pld
diff --git a/test/CodeGen/ARM/sbfx.ll b/test/CodeGen/ARM/sbfx.ll
index 3c25edcaa75c..5b77c59bca96 100644
--- a/test/CodeGen/ARM/sbfx.ll
+++ b/test/CodeGen/ARM/sbfx.ll
@@ -45,3 +45,21 @@ entry:
     %tmp2 = ashr i32 %tmp, 1
     ret i32 %tmp2
 }
+
+define signext i8 @f6(i32 %a) {
+; CHECK-LABEL: f6:
+; CHECK: sbfx r0, r0, #23, #8
+
+  %tmp = lshr i32 %a, 23
+  %res = trunc i32 %tmp to i8
+  ret i8 %res
+}
+
+define signext i8 @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK-NOT: sbfx
+
+  %tmp = lshr i32 %a, 25
+  %res = trunc i32 %tmp to i8
+  ret i8 %res
+}
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index e13504a42a16..326eb512d858 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -222,3 +222,110 @@ entry:
   %add = add i32 %conv, %c
   ret i32 %add
 }
+
+; Do not fold the xor into the select
+define i32 @t15(i32 %p) {
+entry:
+; ARM-LABEL: t15:
+; ARM: mov     [[REG:r[0-9]+]], #2
+; ARM: cmp     r0, #8
+; ARM: movwgt  [[REG:r[0-9]+]], #1
+; ARM: eor     r0, [[REG:r[0-9]+]], #1
+
+; T2-LABEL: t15:
+; T2: movs    [[REG:r[0-9]+]], #2
+; T2: cmp     [[REG:r[0-9]+]], #8
+; T2: it      gt
+; T2: movgt   [[REG:r[0-9]+]], #1
+; T2: eor     r0, [[REG:r[0-9]+]], #1
+  %cmp = icmp sgt i32 %p, 8
+  %a = select i1 %cmp, i32 1, i32 2
+  %xor = xor i32 %a, 1
+  ret i32 %xor
+}
+
+define i32 @t16(i32 %x, i32 %y) {
+entry:
+; ARM-LABEL: t16:
+; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}}
+
+; T2-LABEL: t16:
+; T2: ands r0, {{r[0-9]+}}
+  %cmp = icmp eq i32 %x, 0
+  %cond = select i1 %cmp, i32 5, i32 2
+  %cmp1 = icmp eq i32 %y, 0
+  %cond2 = select i1 %cmp1, i32 3, i32 4
+  %and = and i32 %cond2, %cond
+  ret i32 %and
+}
+
+define i32 @t17(i32 %x, i32 %y) #0 {
+entry:
+; ARM-LABEL: t17:
+; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}}
+
+; T2-LABEL: t17:
+; T2: ands r0, {{r[0-9]+}}
+  %cmp = icmp eq i32 %x, -1
+  %cond = select i1 %cmp, i32 5, i32 2
+  %cmp1 = icmp eq i32 %y, -1
+  %cond2 = select i1 %cmp1, i32 3, i32 4
+  %and = and i32 %cond2, %cond
+  ret i32 %and
+}
+
+define i32 @t18(i32 %x, i32 %y) #0 {
+entry:
+; ARM-LABEL: t18:
+; ARM: and r0, {{r[0-9]+}}, {{r[0-9]+}}
+
+; T2-LABEL: t18:
+; T2: and.w r0, {{r[0-9]+}}
+  %cmp = icmp ne i32 %x, 0
+  %cond = select i1 %cmp, i32 5, i32 2
+  %cmp1 = icmp ne i32 %x, -1
+  %cond2 = select i1 %cmp1, i32 3, i32 4
+  %and = and i32 %cond2, %cond
+  ret i32 %and
+}
+
+define i32 @t19(i32 %x, i32 %y) #0 {
+entry:
+; ARM-LABEL: t19:
+; ARM: orr r0, {{r[0-9]+}}, {{r[0-9]+}}
+
+; T2-LABEL: t19:
+; T2: orrs r0, {{r[0-9]+}}
+  %cmp = icmp ne i32 %x, 0
+  %cond = select i1 %cmp, i32 5, i32 2
+  %cmp1 = icmp ne i32 %y, 0
+  %cond2 = select i1 %cmp1, i32 3, i32 4
+  %or = or i32 %cond2, %cond
+  ret i32 %or
+}
+
+define i32 @t20(i32 %x, i32 %y) #0 {
+entry:
+; ARM-LABEL: t20:
+; ARM: orr r0, {{r[0-9]+}}, {{r[0-9]+}}
+
+; T2-LABEL: t20:
+; T2: orrs r0, {{r[0-9]+}}
+  %cmp = icmp ne i32 %x, -1
+  %cond = select i1 %cmp, i32 5, i32 2
+  %cmp1 = icmp ne i32 %y, -1
+  %cond2 = select i1 %cmp1, i32 3, i32 4
+  %or = or i32 %cond2, %cond
+  ret i32 %or
+}
+
+define  <2 x i32> @t21(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK-LABEL: t21:
+; CHECK-NOT: eor
+; CHECK: mvn
+; CHECK-NOT: eor
+  %tst = icmp eq <2 x i32> %lhs, %rhs
+  %ntst = xor <2 x i1> %tst, <i1 1 , i1 undef>
+  %btst = sext <2 x i1> %ntst to <2 x i32>
+  ret <2 x i32> %btst
+}
diff --git a/test/CodeGen/ARM/smulw.ll b/test/CodeGen/ARM/smulw.ll
new file mode 100644
index 000000000000..8653903eee53
--- /dev/null
+++ b/test/CodeGen/ARM/smulw.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=arm--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb--none-eabi -mcpu=cortex-a8 %s -o - | FileCheck %s
+
+; We cannot codegen the smulw[bt] or smlaw[bt] instructions for these functions,
+; as the top 16 bits of the result would differ
+
+define i32 @f1(i32 %a, i16 %b) {
+; CHECK-LABEL: f1:
+; CHECK: mul
+; CHECK: asr
+  %tmp1 = sext i16 %b to i32
+  %tmp2 = mul i32 %a, %tmp1
+  %tmp3 = ashr i32 %tmp2, 16
+  ret i32 %tmp3
+}
+
+define i32 @f2(i32 %a, i16 %b, i32 %c) {
+; CHECK-LABEL: f2:
+; CHECK: mul
+; CHECK: add{{.*}}, asr #16
+  %tmp1 = sext i16 %b to i32
+  %tmp2 = mul i32 %a, %tmp1
+  %tmp3 = ashr i32 %tmp2, 16
+  %tmp4 = add i32 %tmp3, %c
+  ret i32 %tmp4
+}
diff --git a/test/CodeGen/ARM/space-directive.ll b/test/CodeGen/ARM/space-directive.ll
new file mode 100644
index 000000000000..55be1991fe82
--- /dev/null
+++ b/test/CodeGen/ARM/space-directive.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=armv7 -o - %s | FileCheck %s
+
+define i32 @test_space() minsize {
+; CHECK-LABEL: test_space:
+; CHECK: ldr {{r[0-9]+}}, [[CPENTRY:.?LCPI[0-9]+_[0-9]+]]
+; CHECK: b [[PAST_CP:.?LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[CPENTRY]]:
+; CHECK-NEXT: 12345678
+
+; CHECK: [[PAST_CP]]:
+; CHECK: .zero 10000
+  %addr = inttoptr i32 12345678 to i32*
+  %val = load i32* %addr
+  call i32 @llvm.arm.space(i32 10000, i32 undef)
+  ret i32 %val
+}
+
+declare i32 @llvm.arm.space(i32, i32)
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 4fa97ea5b689..425fc12755cd 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK-LABEL: aaa:
-; CHECK: bic {{.*}}, #15
+; CHECK: bfc {{.*}}, #0, #4
 ; CHECK: vst1.64 {{.*}}sp:128
 ; CHECK: vld1.64 {{.*}}sp:128
 entry:
diff --git a/test/CodeGen/ARM/stack-alignment.ll b/test/CodeGen/ARM/stack-alignment.ll
new file mode 100644
index 000000000000..153f92e25f62
--- /dev/null
+++ b/test/CodeGen/ARM/stack-alignment.ll
@@ -0,0 +1,164 @@
+; RUN: llc -verify-machineinstrs < %s -mtriple=armv4t   | FileCheck %s -check-prefix=CHECK-v4A32
+; RUN: llc -verify-machineinstrs < %s -mtriple=armv7a   | FileCheck %s -check-prefix=CHECK-v7A32
+; RUN: llc -verify-machineinstrs < %s -mtriple=thumbv7a | FileCheck %s -check-prefix=CHECK-THUMB2
+; FIXME: There are no tests for Thumb1 since dynamic stack alignment is not supported for
+; Thumb1.
+
+define i32 @f_bic_can_be_used_align() nounwind {
+entry:
+; CHECK-LABEL: f_bic_can_be_used_align:
+; CHECK-v7A32: bfc        sp, #0, #8
+; CHECK-v4A32: bic        sp, sp, #255
+; CHECK-THUMB2:	mov	r4, sp
+; CHECK-THUMB2-NEXT: bfc	r4, #0, #8
+; CHECK-THUMB2-NEXT: mov	sp, r4
+  %x = alloca i32, align 256
+  store volatile i32 0, i32* %x, align 256
+  ret i32 0
+}
+
+define i32 @f_too_large_for_bic_align() nounwind {
+entry:
+; CHECK-LABEL: f_too_large_for_bic_align:
+; CHECK-v7A32: bfc sp, #0, #9
+; CHECK-v4A32: lsr sp, sp, #9
+; CHECK-v4A32: lsl sp, sp, #9
+; CHECK-THUMB2:	mov	r4, sp
+; CHECK-THUMB2-NEXT:	bfc	r4, #0, #9
+; CHECK-THUMB2-NEXT:	mov	sp, r4
+  %x = alloca i32, align 512
+  store volatile i32 0, i32* %x, align 512
+  ret i32 0
+}
+
+define i8* @f_alignedDPRCS2Spills(double* %d) #0 {
+entry:
+; CHECK-LABEL: f_too_large_for_bic_align:
+; CHECK-v7A32: bfc sp, #0, #12
+; CHECK-v4A32: lsr sp, sp, #12
+; CHECK-v4A32: lsl sp, sp, #12
+; CHECK-THUMB2:      bfc	r4, #0, #12
+; CHECK-THUMB2-NEXT: mov	sp, r4
+  %a = alloca i8, align 4096
+  %0 = load double* %d, align 4
+  %arrayidx1 = getelementptr inbounds double* %d, i32 1
+  %1 = load double* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds double* %d, i32 2
+  %2 = load double* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds double* %d, i32 3
+  %3 = load double* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds double* %d, i32 4
+  %4 = load double* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds double* %d, i32 5
+  %5 = load double* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds double* %d, i32 6
+  %6 = load double* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds double* %d, i32 7
+  %7 = load double* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds double* %d, i32 8
+  %8 = load double* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds double* %d, i32 9
+  %9 = load double* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds double* %d, i32 10
+  %10 = load double* %arrayidx10, align 4
+  %arrayidx11 = getelementptr inbounds double* %d, i32 11
+  %11 = load double* %arrayidx11, align 4
+  %arrayidx12 = getelementptr inbounds double* %d, i32 12
+  %12 = load double* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds double* %d, i32 13
+  %13 = load double* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds double* %d, i32 14
+  %14 = load double* %arrayidx14, align 4
+  %arrayidx15 = getelementptr inbounds double* %d, i32 15
+  %15 = load double* %arrayidx15, align 4
+  %arrayidx16 = getelementptr inbounds double* %d, i32 16
+  %16 = load double* %arrayidx16, align 4
+  %arrayidx17 = getelementptr inbounds double* %d, i32 17
+  %17 = load double* %arrayidx17, align 4
+  %arrayidx18 = getelementptr inbounds double* %d, i32 18
+  %18 = load double* %arrayidx18, align 4
+  %arrayidx19 = getelementptr inbounds double* %d, i32 19
+  %19 = load double* %arrayidx19, align 4
+  %arrayidx20 = getelementptr inbounds double* %d, i32 20
+  %20 = load double* %arrayidx20, align 4
+  %arrayidx21 = getelementptr inbounds double* %d, i32 21
+  %21 = load double* %arrayidx21, align 4
+  %arrayidx22 = getelementptr inbounds double* %d, i32 22
+  %22 = load double* %arrayidx22, align 4
+  %arrayidx23 = getelementptr inbounds double* %d, i32 23
+  %23 = load double* %arrayidx23, align 4
+  %arrayidx24 = getelementptr inbounds double* %d, i32 24
+  %24 = load double* %arrayidx24, align 4
+  %arrayidx25 = getelementptr inbounds double* %d, i32 25
+  %25 = load double* %arrayidx25, align 4
+  %arrayidx26 = getelementptr inbounds double* %d, i32 26
+  %26 = load double* %arrayidx26, align 4
+  %arrayidx27 = getelementptr inbounds double* %d, i32 27
+  %27 = load double* %arrayidx27, align 4
+  %arrayidx28 = getelementptr inbounds double* %d, i32 28
+  %28 = load double* %arrayidx28, align 4
+  %arrayidx29 = getelementptr inbounds double* %d, i32 29
+  %29 = load double* %arrayidx29, align 4
+  %div = fdiv double %29, %28
+  %div30 = fdiv double %div, %27
+  %div31 = fdiv double %div30, %26
+  %div32 = fdiv double %div31, %25
+  %div33 = fdiv double %div32, %24
+  %div34 = fdiv double %div33, %23
+  %div35 = fdiv double %div34, %22
+  %div36 = fdiv double %div35, %21
+  %div37 = fdiv double %div36, %20
+  %div38 = fdiv double %div37, %19
+  %div39 = fdiv double %div38, %18
+  %div40 = fdiv double %div39, %17
+  %div41 = fdiv double %div40, %16
+  %div42 = fdiv double %div41, %15
+  %div43 = fdiv double %div42, %14
+  %div44 = fdiv double %div43, %13
+  %div45 = fdiv double %div44, %12
+  %div46 = fdiv double %div45, %11
+  %div47 = fdiv double %div46, %10
+  %div48 = fdiv double %div47, %9
+  %div49 = fdiv double %div48, %8
+  %div50 = fdiv double %div49, %7
+  %div51 = fdiv double %div50, %6
+  %div52 = fdiv double %div51, %5
+  %div53 = fdiv double %div52, %4
+  %div54 = fdiv double %div53, %3
+  %div55 = fdiv double %div54, %2
+  %div56 = fdiv double %div55, %1
+  %div57 = fdiv double %div56, %0
+  %div58 = fdiv double %0, %1
+  %div59 = fdiv double %div58, %2
+  %div60 = fdiv double %div59, %3
+  %div61 = fdiv double %div60, %4
+  %div62 = fdiv double %div61, %5
+  %div63 = fdiv double %div62, %6
+  %div64 = fdiv double %div63, %7
+  %div65 = fdiv double %div64, %8
+  %div66 = fdiv double %div65, %9
+  %div67 = fdiv double %div66, %10
+  %div68 = fdiv double %div67, %11
+  %div69 = fdiv double %div68, %12
+  %div70 = fdiv double %div69, %13
+  %div71 = fdiv double %div70, %14
+  %div72 = fdiv double %div71, %15
+  %div73 = fdiv double %div72, %16
+  %div74 = fdiv double %div73, %17
+  %div75 = fdiv double %div74, %18
+  %div76 = fdiv double %div75, %19
+  %div77 = fdiv double %div76, %20
+  %div78 = fdiv double %div77, %21
+  %div79 = fdiv double %div78, %22
+  %div80 = fdiv double %div79, %23
+  %div81 = fdiv double %div80, %24
+  %div82 = fdiv double %div81, %25
+  %div83 = fdiv double %div82, %26
+  %div84 = fdiv double %div83, %27
+  %div85 = fdiv double %div84, %28
+  %div86 = fdiv double %div85, %29
+  %mul = fmul double %div57, %div86
+  %conv = fptosi double %mul to i32
+  %add.ptr = getelementptr inbounds i8* %a, i32 %conv
+  ret i8* %add.ptr
+}
diff --git a/test/CodeGen/ARM/stack_guard_remat.ll b/test/CodeGen/ARM/stack_guard_remat.ll
new file mode 100644
index 000000000000..7c89b99b8f97
--- /dev/null
+++ b/test/CodeGen/ARM/stack_guard_remat.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC -check-prefix=STATIC
+; RUN: llc < %s -mtriple=arm-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s  -check-prefix=NO-PIC -check-prefix=DYNAMIC-NO-PIC
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC-V7
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC-V7
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a8 -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s -check-prefix=DYNAMIC-NO-PIC-V7
+
+;PIC:   foo2
+;PIC:   ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
+;PIC: [[LABEL1:LPC0_1]]:
+;PIC:   add [[R1:r[0-9]+]], pc, [[R0]]
+;PIC:   ldr [[R2:r[0-9]+]], {{\[}}[[R1]]{{\]}}
+;PIC:   ldr {{r[0-9]+}}, {{\[}}[[R2]]{{\]}}
+
+;PIC:      [[LABEL0]]:
+;PIC-NEXT:   .long L___stack_chk_guard$non_lazy_ptr-([[LABEL1]]+8)
+
+;NO-PIC: foo2
+;NO-PIC: ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
+;NO-PIC-NOT: LPC
+;NO-PIC: ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+
+;STATIC:      [[LABEL0]]:
+;STATIC-NEXT:   .long ___stack_chk_guard
+
+;DYNAMIC-NO-PIC:      [[LABEL0]]:
+;DYNAMIC-NO-PIC-NEXT:   .long L___stack_chk_guard$non_lazy_ptr
+
+;PIC-V7:   movw [[R0:r[0-9]+]], :lower16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0:LPC[0-9_]+]]+8))
+;PIC-V7:   movt [[R0]], :upper16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0]]+8))
+;PIC-V7: [[LABEL0]]:
+;PIC-V7:   ldr [[R0]], {{\[}}pc, [[R0]]{{\]}}
+;PIC-V7:   ldr [[R0]], {{\[}}[[R0]]{{\]}}
+
+;PIC-V7: L___stack_chk_guard$non_lazy_ptr:
+;PIC-V7:   .indirect_symbol        ___stack_chk_guard
+
+;STATIC-V7: movw [[R0:r[0-9]+]], :lower16:___stack_chk_guard
+;STATIC-V7: movt [[R0]], :upper16:___stack_chk_guard
+;STATIC-V7: ldr  [[R0]], {{\[}}[[R0]]{{\]}}
+
+;DYNAMIC-NO-PIC-V7: movw [[R0:r[0-9]+]], :lower16:L___stack_chk_guard$non_lazy_ptr
+;DYNAMIC-NO-PIC-V7: movt [[R0]], :upper16:L___stack_chk_guard$non_lazy_ptr
+;DYNAMIC-NO-PIC-V7: ldr  [[R0]], {{\[}}[[R0]]{{\]}}
+;DYNAMIC-NO-PIC-V7: ldr  [[R0]], {{\[}}[[R0]]{{\]}}
+
+;DYNAMIC-NO-PIC-V7: L___stack_chk_guard$non_lazy_ptr:
+;DYNAMIC-NO-PIC-V7:   .indirect_symbol        ___stack_chk_guard
+
+; Function Attrs: nounwind ssp
+define i32 @test_stack_guard_remat() #0 {
+  %a1 = alloca [256 x i32], align 4
+  %1 = bitcast [256 x i32]* %a1 to i8*
+  call void @llvm.lifetime.start(i64 1024, i8* %1)
+  %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+  call void @foo3(i32* %2) #3
+  call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
+  call void @llvm.lifetime.end(i64 1024, i8* %1)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo3(i32*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/ARM/stackpointer.ll b/test/CodeGen/ARM/stackpointer.ll
index 420a9166d790..320f0d945bfc 100644
--- a/test/CodeGen/ARM/stackpointer.ll
+++ b/test/CodeGen/ARM/stackpointer.ll
@@ -22,4 +22,4 @@ declare void @llvm.write_register.i32(metadata, i32) nounwind
 
 ; register unsigned long current_stack_pointer asm("sp");
 ; CHECK-NOT: .asciz  "sp"
-!0 = metadata !{metadata !"sp\00"}
+!0 = !{!"sp\00"}
diff --git a/test/CodeGen/ARM/swift-atomics.ll b/test/CodeGen/ARM/swift-atomics.ll
index 1d7181557100..8b100f1f41f2 100644
--- a/test/CodeGen/ARM/swift-atomics.ll
+++ b/test/CodeGen/ARM/swift-atomics.ll
@@ -8,6 +8,7 @@ define void @test_store_release(i32* %p, i32 %v) {
 ; CHECK: dmb ishst
 ; CHECK: str
 
+; CHECK-STRICT-ATOMIC-LABEL: test_store_release:
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
   store atomic i32 %v, i32* %p release, align 4
   ret void
@@ -24,7 +25,11 @@ define i32 @test_seq_cst(i32* %p, i32 %v) {
 ; CHECK: ldr
 ; CHECK: dmb {{ish$}}
 
+; CHECK-STRICT-ATOMIC-LABEL: test_seq_cst:
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+; CHECK-STRICT-ATOMIC: str
+; CHECK-STRICT-ATOMIC: dmb {{ish$}}
+; CHECK-STRICT-ATOMIC: ldr
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
 
   store atomic i32 %v, i32* %p seq_cst, align 4
@@ -39,6 +44,7 @@ define i32 @test_acq(i32* %addr) {
 ; CHECK: ldr
 ; CHECK: dmb {{ish$}}
 
+; CHECK-STRICT-ATOMIC-LABEL: test_acq:
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
   %val = load atomic i32* %addr acquire, align 4
   ret i32 %val
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index 5ddea2ec13dc..41626910c3b1 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -9,7 +9,8 @@ define i32 @test0(i8 %A) {
 
 define signext i8 @test1(i32 %A) {
 ; CHECK: test1
-; CHECK: sxtb r0, r0, ror #8
+; CHECK: lsr r0, r0, #8
+; CHECK: sxtb r0, r0
   %B = lshr i32 %A, 8
   %C = shl i32 %A, 24
   %D = or i32 %B, %C
diff --git a/test/CodeGen/ARM/tail-call-weak.ll b/test/CodeGen/ARM/tail-call-weak.ll
new file mode 100644
index 000000000000..466c33d38786
--- /dev/null
+++ b/test/CodeGen/ARM/tail-call-weak.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple thumbv7-windows-coff -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-COFF
+; RUN: llc -mtriple thumbv7-elf -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-ELF
+; RUN: llc -mtriple thumbv7-macho -filetype asm -o - %s | FileCheck %s -check-prefix CHECK-MACHO
+
+declare i8* @f()
+declare extern_weak i8* @g(i8*)
+
+; weak symbol resolution occurs statically in PE/COFF, ensure that we permit
+; tail calls on weak externals when targeting a COFF environment.
+define void @test() {
+  %call = tail call i8* @f()
+  %call1 = tail call i8* @g(i8* %call)
+  ret void
+}
+
+; CHECK-COFF: b g
+; CHECK-ELF: bl g
+; CHECK-MACHO: blx _g
+
diff --git a/test/CodeGen/ARM/tail-call.ll b/test/CodeGen/ARM/tail-call.ll
index 771158632ecf..ca19b057773a 100644
--- a/test/CodeGen/ARM/tail-call.ll
+++ b/test/CodeGen/ARM/tail-call.ll
@@ -1,8 +1,10 @@
-; RUN: llc -mtriple armv7 -O0 -o - < %s | FileCheck %s -check-prefix CHECK-TAIL
-; RUN: llc -mtriple armv7 -O0 -disable-tail-calls -o - < %s \
+; RUN: llc -mtriple armv7 -target-abi apcs -O0 -o - < %s \
+; RUN:   | FileCheck %s -check-prefix CHECK-TAIL
+; RUN: llc -mtriple armv7 -target-abi apcs -O0 -disable-tail-calls -o - < %s \
 ; RUN:   | FileCheck %s -check-prefix CHECK-NO-TAIL
 
 declare i32 @callee(i32 %i)
+declare extern_weak fastcc void @callee_weak()
 
 define i32 @caller(i32 %i) {
 entry:
@@ -19,3 +21,12 @@ entry:
 ; CHECK-NO-TAIL: pop {lr}
 ; CHECK-NO-TAIL: bx lr
 
+
+; Weakly-referenced extern functions cannot be tail-called, as AAELF does
+; not define the behaviour of branch instructions to undefined weak symbols.
+define fastcc void @caller_weak() {
+; CHECK-LABEL: caller_weak:
+; CHECK: bl callee_weak
+  tail call void @callee_weak()
+  ret void
+}
diff --git a/test/CodeGen/ARM/tail-merge-branch-weight.ll b/test/CodeGen/ARM/tail-merge-branch-weight.ll
new file mode 100644
index 000000000000..95b0a202e7ff
--- /dev/null
+++ b/test/CodeGen/ARM/tail-merge-branch-weight.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=arm-apple-ios -print-machineinstrs=branch-folder \
+; RUN: %s -o /dev/null 2>&1 | FileCheck %s
+
+; Branch probability of tailed-merged block:
+;
+; p(L0_L1 -> L2) = p(entry -> L0) * p(L0 -> L2) + p(entry -> L1) * p(L1 -> L2)
+;                = 0.2 * 0.6 + 0.8 * 0.3 = 0.36
+; p(L0_L1 -> L3) = p(entry -> L0) * p(L0 -> L3) + p(entry -> L1) * p(L1 -> L3)
+;                = 0.2 * 0.4 + 0.8 * 0.7 = 0.64
+
+; CHECK: # Machine code for function test0:
+; CHECK: Successors according to CFG: BB#{{[0-9]+}}(13) BB#{{[0-9]+}}(24)
+; CHECK: BB#{{[0-9]+}}:
+; CHECK: BB#{{[0-9]+}}:
+; CHECK: # End machine code for function test0.
+
+define i32 @test0(i32 %n, i32 %m, i32* nocapture %a, i32* nocapture %b) {
+entry:
+  %cmp = icmp sgt i32 %n, 0
+  br i1 %cmp, label %L0, label %L1, !prof !0
+
+L0:                                          ; preds = %entry
+  store i32 12, i32* %a, align 4
+  store i32 18, i32* %b, align 4
+  %cmp1 = icmp eq i32 %m, 8
+  br i1 %cmp1, label %L2, label %L3, !prof !1
+
+L1:                                          ; preds = %entry
+  store i32 14, i32* %a, align 4
+  store i32 18, i32* %b, align 4
+  %cmp3 = icmp eq i32 %m, 8
+  br i1 %cmp3, label %L2, label %L3, !prof !2
+
+L2:                                               ; preds = %L1, %L0
+  br label %L3
+
+L3:                                           ; preds = %L0, %L1, %L2
+  %retval.0 = phi i32 [ 100, %L2 ], [ 6, %L1 ], [ 6, %L0 ]
+  ret i32 %retval.0
+}
+
+!0 = !{!"branch_weights", i32 200, i32 800}
+!1 = !{!"branch_weights", i32 600, i32 400}
+!2 = !{!"branch_weights", i32 300, i32 700}
diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll
index 0a16071a6615..64e0f4bcdefc 100644
--- a/test/CodeGen/ARM/taildup-branch-weight.ll
+++ b/test/CodeGen/ARM/taildup-branch-weight.ll
@@ -27,7 +27,7 @@ B4:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 4, i32 124}
+!0 = !{!"branch_weights", i32 4, i32 124}
 
 ; CHECK: Machine code for function test1:
 ; CHECK: Successors according to CFG: BB#1(8) BB#2(248)
@@ -51,4 +51,4 @@ B3:
   ret void
 }
 
-!1 = metadata !{metadata !"branch_weights", i32 248, i32 8}
+!1 = !{!"branch_weights", i32 248, i32 8}
diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll
index e07e8aab77aa..8d5888d38f97 100644
--- a/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -1,13 +1,15 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
-; rdar://8819685
+; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-darwin
+; RUN: llvm-objdump -triple=thumbv6-apple-darwin -d %t | FileCheck %s
 
 @__bar = external hidden global i8*
 @__baz = external hidden global i8*
 
+; rdar://8819685
 define i8* @_foo() {
 entry:
-; CHECK: foo:
+; CHECK-LABEL: foo:
 
 	%size = alloca i32, align 4
 	%0 = load i8** @__bar, align 4
@@ -40,3 +42,102 @@ bb3:
 
 declare noalias i8* @strdup(i8* nocapture) nounwind
 declare i32 @_called_func(i8*, i32*) nounwind
+
+; Variable ending up at unaligned offset from sp (i.e. not a multiple of 4)
+define void @test_local_var_addr() {
+; CHECK-LABEL: test_local_var_addr:
+
+  %addr1 = alloca i8
+  %addr2 = alloca i8
+
+; CHECK: mov r0, sp
+; CHECK: adds r0, #{{[0-9]+}}
+; CHECK: blx
+  call void @take_ptr(i8* %addr1)
+
+; CHECK: mov r0, sp
+; CHECK: adds r0, #{{[0-9]+}}
+; CHECK: blx
+  call void @take_ptr(i8* %addr2)
+
+  ret void
+}
+
+; Simple variable ending up *at* sp.
+define void @test_simple_var() {
+; CHECK-LABEL: test_simple_var:
+
+  %addr32 = alloca i32
+  %addr8 = bitcast i32* %addr32 to i8*
+
+; CHECK: mov r0, sp
+; CHECK-NOT: adds r0
+; CHECK: blx
+  call void @take_ptr(i8* %addr8)
+  ret void
+}
+
+; Simple variable ending up at aligned offset from sp.
+define void @test_local_var_addr_aligned() {
+; CHECK-LABEL: test_local_var_addr_aligned:
+
+  %addr1.32 = alloca i32
+  %addr1 = bitcast i32* %addr1.32 to i8*
+  %addr2.32 = alloca i32
+  %addr2 = bitcast i32* %addr2.32 to i8*
+
+; CHECK: add r0, sp, #{{[0-9]+}}
+; CHECK: blx
+  call void @take_ptr(i8* %addr1)
+
+; CHECK: mov r0, sp
+; CHECK-NOT: add r0
+; CHECK: blx
+  call void @take_ptr(i8* %addr2)
+
+  ret void
+}
+
+; Simple variable ending up at aligned offset from sp.
+define void @test_local_var_big_offset() {
+; CHECK-LABEL: test_local_var_big_offset:
+  %addr1.32 = alloca i32, i32 257
+  %addr1 = bitcast i32* %addr1.32 to i8*
+  %addr2.32 = alloca i32, i32 257
+
+; CHECK: add [[RTMP:r[0-9]+]], sp, #1020
+; CHECK: adds [[RTMP]], #8
+; CHECK: blx
+  call void @take_ptr(i8* %addr1)
+
+  ret void
+}
+
+; Max range addressable with tADDrSPi
+define void @test_local_var_offset_1020() {
+; CHECK-LABEL: test_local_var_offset_1020
+  %addr1 = alloca i8, i32 4
+  %addr2 = alloca i8, i32 1020
+
+; CHECK: add r0, sp, #1020
+; CHECK-NEXT: blx
+  call void @take_ptr(i8* %addr1)
+
+  ret void
+}
+
+; Max range addressable with tADDrSPi + tADDi8
+define void @test_local_var_offset_1275() {
+; CHECK-LABEL: test_local_var_offset_1275
+  %addr1 = alloca i8, i32 1
+  %addr2 = alloca i8, i32 1275
+
+; CHECK: add r0, sp, #1020
+; CHECK: adds r0, #255
+; CHECK-NEXT: blx
+  call void @take_ptr(i8* %addr1)
+
+  ret void
+}
+
+declare void @take_ptr(i8*)
diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll
new file mode 100644
index 000000000000..318e6e402370
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -0,0 +1,217 @@
+; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-V4T
+; RUN: llc -mtriple=thumbv5t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-V5T
+
+; CHECK-V4T-LABEL: clobberframe
+; CHECK-V5T-LABEL: clobberframe
+define <4 x i32> @clobberframe() #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V4T:    sub sp,
+; CHECK-V5T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+
+  %b = alloca <4 x i32>, align 16
+  %a = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
+  %0 = load <4 x i32>* %a, align 16
+  ret <4 x i32> %0
+
+; Epilogue
+; --------
+; CHECK-V4T:         add sp,
+; CHECK-V4T-NEXT:    pop {[[SAVED]]}
+; CHECK-V4T-NEXT:    mov r12, r3
+; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    mov lr, r3
+; CHECK-V4T-NEXT:    mov r3, r12
+; CHECK-V4T:         bx  lr
+; CHECK-V5T:         pop {[[SAVED]], pc}
+}
+
+; CHECK-V4T-LABEL: clobbervariadicframe
+; CHECK-V5T-LABEL: clobbervariadicframe
+define <4 x i32> @clobbervariadicframe(i32 %i, ...) #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T:    sub sp,
+; CHECK-V4T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V5T:    sub sp,
+; CHECK-V5T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+
+  %b = alloca <4 x i32>, align 16
+  %a = alloca <4 x i32>, align 16
+  store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
+  store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
+  %0 = load <4 x i32>* %a, align 16
+  call void @llvm.va_start(i8* null)
+  ret <4 x i32> %0
+
+; Epilogue
+; --------
+; CHECK-V4T:         pop {[[SAVED]]}
+; CHECK-V4T-NEXT:    mov r12, r3
+; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    add sp,
+; CHECK-V4T-NEXT:    mov lr, r3
+; CHECK-V4T-NEXT:    mov r3, r12
+; CHECK-V4T:         bx  lr
+; CHECK-V5T:         add sp,
+; CHECK-V5T-NEXT:    pop {[[SAVED]]}
+; CHECK-V5T-NEXT:    mov r12, r3
+; CHECK-V5T-NEXT:    pop {r3}
+; CHECK-V5T-NEXT:    add sp,
+; CHECK-V5T-NEXT:    mov lr, r3
+; CHECK-V5T-NEXT:    mov r3, r12
+; CHECK-V5T-NEXT:    bx lr
+}
+
+; CHECK-V4T-LABEL: simpleframe
+; CHECK-V5T-LABEL: simpleframe
+define i32 @simpleframe() #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V5T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
+
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  %d = alloca i32, align 4
+  store i32 1, i32* %a, align 4
+  store i32 2, i32* %b, align 4
+  store i32 3, i32* %c, align 4
+  store i32 4, i32* %d, align 4
+  %0 = load i32* %a, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %a, align 4
+  %1 = load i32* %b, align 4
+  %inc1 = add nsw i32 %1, 1
+  store i32 %inc1, i32* %b, align 4
+  %2 = load i32* %c, align 4
+  %inc2 = add nsw i32 %2, 1
+  store i32 %inc2, i32* %c, align 4
+  %3 = load i32* %d, align 4
+  %inc3 = add nsw i32 %3, 1
+  store i32 %inc3, i32* %d, align 4
+  %4 = load i32* %a, align 4
+  %5 = load i32* %b, align 4
+  %add = add nsw i32 %4, %5
+  %6 = load i32* %c, align 4
+  %add4 = add nsw i32 %add, %6
+  %7 = load i32* %d, align 4
+  %add5 = add nsw i32 %add4, %7
+  ret i32 %add5
+
+; Epilogue
+; --------
+; CHECK-V4T:    pop {[[SAVED]]}
+; CHECK-V4T:    pop {r3}
+; CHECK-V4T:    bx r3
+; CHECK-V5T:    pop {[[SAVED]], pc}
+}
+
+; CHECK-V4T-LABEL: simplevariadicframe
+; CHECK-V5T-LABEL: simplevariadicframe
+define i32 @simplevariadicframe(i32 %i, ...) #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T:    sub sp,
+; CHECK-V4T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V4T:    sub sp,
+; CHECK-V5T:    sub sp,
+; CHECK-V5T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V5T:    sub sp,
+
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  %d = alloca i32, align 4
+  store i32 1, i32* %a, align 4
+  store i32 2, i32* %b, align 4
+  store i32 3, i32* %c, align 4
+  store i32 4, i32* %d, align 4
+  %0 = load i32* %a, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %a, align 4
+  %1 = load i32* %b, align 4
+  %inc1 = add nsw i32 %1, 1
+  store i32 %inc1, i32* %b, align 4
+  %2 = load i32* %c, align 4
+  %inc2 = add nsw i32 %2, 1
+  store i32 %inc2, i32* %c, align 4
+  %3 = load i32* %d, align 4
+  %inc3 = add nsw i32 %3, 1
+  store i32 %inc3, i32* %d, align 4
+  %4 = load i32* %a, align 4
+  %5 = load i32* %b, align 4
+  %add = add nsw i32 %4, %5
+  %6 = load i32* %c, align 4
+  %add4 = add nsw i32 %add, %6
+  %7 = load i32* %d, align 4
+  %add5 = add nsw i32 %add4, %7
+  %add6 = add nsw i32 %add5, %i
+  call void @llvm.va_start(i8* null)
+  ret i32 %add6
+
+; Epilogue
+; --------
+; CHECK-V4T:         add sp,
+; CHECK-V4T-NEXT:    pop {[[SAVED]]}
+; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    add sp,
+; CHECK-V4T-NEXT:    bx r3
+; CHECK-V5T:         add sp,
+; CHECK-V5T-NEXT:    pop {[[SAVED]]}
+; CHECK-V5T-NEXT:    pop {r3}
+; CHECK-V5T-NEXT:    add sp,
+; CHECK-V5T-NEXT:    bx r3
+}
+
+; CHECK-V4T-LABEL: noframe
+; CHECK-V5T-LABEL: noframe
+define i32 @noframe() #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T-NOT: push
+; CHECK-V5T-NOT: push
+    ret i32 0;
+; Epilogue
+; --------
+; CHECK-V4T-NOT: pop
+; CHECK-V5T-NOT: pop
+; CHECK-V4T:    bx  lr
+; CHECK-V5T:    bx  lr
+}
+
+; CHECK-V4T-LABEL: novariadicframe
+; CHECK-V5T-LABEL: novariadicframe
+define i32 @novariadicframe(i32 %i, ...) #0 {
+entry:
+; Prologue
+; --------
+; CHECK-V4T:    sub sp,
+; CHECK-V4T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+; CHECK-V5T:    sub sp,
+; CHECK-V5T:    push {[[SAVED:(r[4567](, )?)+]], lr}
+
+  call void @llvm.va_start(i8* null)
+  ret i32 %i;
+; Epilogue
+; --------
+; CHECK-V4T:         pop {[[SAVED]]}
+; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    add sp,
+; CHECK-V4T-NEXT:    bx r3
+; CHECK-V5T:         pop {[[SAVED]]}
+; CHECK-V5T-NEXT:    pop {r3}
+; CHECK-V5T-NEXT:    add sp,
+; CHECK-V5T-NEXT:    bx r3
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index c5e699c155a1..2675a733da97 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,15 +1,9 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix CHECK-V7 %s
+; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s -check-prefix CHECK-V8
 ; PR11107
 
 define i32 @test(i32 %a, i32 %b) {
 entry:
-; CHECK:        cmp
-; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsb{{s?}}mi
-; CHECK-NEXT:   cmp
-; CHECK-NEXT:   it    mi
-; CHECK-NEXT:   rsb{{s?}}mi
  %cmp1 = icmp slt i32 %a, 0
  %sub1 = sub nsw i32 0, %a
  %abs1 = select i1 %cmp1, i32 %sub1, i32 %a
@@ -19,3 +13,18 @@ entry:
  %add = add nsw i32 %abs1, %abs2
  ret i32 %add
 }
+
+; CHECK-V7:        cmp
+; CHECK-V7-NEXT:   it    mi
+; CHECK-V7-NEXT:   rsbmi
+; CHECK-V7-NEXT:   cmp
+; CHECK-V7-NEXT:   it    mi
+; CHECK-V7-NEXT:   rsbmi
+
+; CHECK-V8:        cmp
+; CHECK-V8-NEXT:   bpl
+; CHECK-V8:        rsbs
+; CHECK-V8:        cmp
+; CHECK-V8-NEXT:   bpl
+; CHECK-V8:        rsbs
+
diff --git a/test/CodeGen/ARM/thumb2-size-opt.ll b/test/CodeGen/ARM/thumb2-size-opt.ll
new file mode 100644
index 000000000000..0084a456a72e
--- /dev/null
+++ b/test/CodeGen/ARM/thumb2-size-opt.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf -o - -show-mc-encoding -t2-reduce-limit=0 -t2-reduce-limit2=0 %s | FileCheck %s
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf -o - -show-mc-encoding %s | FileCheck %s --check-prefix=CHECK-OPT
+
+define i32 @and(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: and:
+; CHECK: and.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: ands r{{[0-7]}}, r{{[0-7]}} @ encoding: [{{0x..,0x..}}]
+entry:
+  %and = and i32 %b, %a
+  ret i32 %and
+}
+
+define i32 @asr-imm(i32 %a) nounwind readnone {
+; CHECK-LABEL: "asr-imm":
+; CHECK: asr.w r{{[0-9]+}}, r{{[0-9]+}}, #13 @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: asrs r{{[0-7]}}, r{{[0-7]}}, #13 @ encoding: [{{0x..,0x..}}]
+entry:
+  %shr = ashr i32 %a, 13
+  ret i32 %shr
+}
+
+define i32 @asr-reg(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: "asr-reg":
+; CHECK: asr.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: asrs r{{[0-7]}}, r{{[0-7]}} @ encoding: [{{0x..,0x..}}]
+entry:
+  %shr = ashr i32 %a, %b
+  ret i32 %shr
+}
+
+define i32 @bic(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: bic:
+; CHECK: bic.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: bics r{{[0-7]}}, r{{[0-7]}} @ encoding: [{{0x..,0x..}}]
+entry:
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i32 @eor(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: eor:
+; CHECK: eor.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: eors r{{[0-7]}}, r{{[0-7]}} @ encoding: [{{0x..,0x..}}]
+entry:
+  %eor = xor i32 %a, %b
+  ret i32 %eor
+}
+
+define i32 @lsl-imm(i32 %a) nounwind readnone {
+; CHECK-LABEL: "lsl-imm":
+; CHECK: lsl.w r{{[0-9]+}}, r{{[0-9]+}}, #13 @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: lsls r{{[0-7]}}, r{{[0-7]}}, #13  @ encoding: [{{0x..,0x..}}]
+entry:
+  %shl = shl i32 %a, 13
+  ret i32 %shl
+}
+
+define i32 @lsl-reg(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: "lsl-reg":
+; CHECK: lsl.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: lsls r{{[0-7]}}, r{{[0-7]}}  @ encoding: [{{0x..,0x..}}]
+entry:
+  %shl = shl i32 %a, %b
+  ret i32 %shl
+}
+
+define i32 @lsr-imm(i32 %a) nounwind readnone {
+; CHECK-LABEL: "lsr-imm":
+; CHECK: lsr.w r{{[0-9]+}}, r{{[0-9]+}}, #13 @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: lsrs r{{[0-7]}}, r{{[0-7]}}, #13  @ encoding: [{{0x..,0x..}}]
+entry:
+  %shr = lshr i32 %a, 13
+  ret i32 %shr
+}
+
+define i32 @lsr-reg(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: "lsr-reg":
+; CHECK: lsr.w r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}} @ encoding: [{{0x..,0x..,0x..,0x..}}]
+; CHECK-OPT: lsrs r{{[0-7]}}, r{{[0-7]}}  @ encoding: [{{0x..,0x..}}]
+entry:
+  %shr = lshr i32 %a, %b
+  ret i32 %shr
+}
diff --git a/test/CodeGen/ARM/thumb_indirect_calls.ll b/test/CodeGen/ARM/thumb_indirect_calls.ll
new file mode 100644
index 000000000000..16a55a882d9a
--- /dev/null
+++ b/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=thumbv4t-eabi %s -o - | FileCheck ---check-prefix=CHECK -check-prefix=CHECK-V4T %s
+; RUN: llc -mtriple=thumbv5t-eabi %s -o - | FileCheck ---check-prefix=CHECK -check-prefix=CHECK-V5T %s
+
+@f = common global void (i32)* null, align 4
+
+; CHECK-LABEL foo:
+define void @foo(i32 %x) {
+entry:
+  %0 = load void (i32)** @f, align 4
+  tail call void %0(i32 %x)
+  ret void
+
+; CHECK: ldr [[TMP:r[0-3]]], [[F:\.[A-Z0-9_]+]]
+; CHECK: ldr [[CALLEE:r[0-3]]], {{\[}}[[TMP]]{{\]}}
+
+; CHECK-V4T-NOT: blx
+; CHECK-V4T: bl [[INDIRECT_PAD:\.Ltmp[0-9]+]]
+; CHECK-V4T: [[F]]:
+; CHECK-V4T: [[INDIRECT_PAD]]:
+; CHECK-V4T-NEXT: bx [[CALLEE]]
+; CHECK-V5T: blx [[CALLEE]]
+}
+
+; CHECK-LABEL bar:
+define void @bar(void (i32)* nocapture %g, i32 %x, void (i32)* nocapture %h) {
+entry:
+  tail call void %g(i32 %x)
+  tail call void %h(i32 %x)
+  ret void
+
+; CHECK-V4T: bl [[INDIRECT_PAD1:\.Ltmp[0-9]+]]
+; CHECK-V4T: bl [[INDIRECT_PAD2:\.Ltmp[0-9]+]]
+; CHECK-V4T: [[INDIRECT_PAD1]]:
+; CHECK-V4T-NEXT: bx
+; CHECK-V4T: [[INDIRECT_PAD2]]:
+; CHECK-V4T-NEXT: bx
+; CHECK-V5T: blx
+; CHECK-V5T: blx
+}
+
diff --git a/test/CodeGen/ARM/tls1.ll b/test/CodeGen/ARM/tls1.ll
index a1ca0b758b45..b03f76b6ef08 100644
--- a/test/CodeGen/ARM/tls1.ll
+++ b/test/CodeGen/ARM/tls1.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep "i(TPOFF)"
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
-; RUN:     grep "__aeabi_read_tp"
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
-; RUN:     -relocation-model=pic | grep "__tls_get_addr"
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic | \
+; RUN:   FileCheck %s --check-prefix=PIC
 
 
+; CHECK: i(TPOFF)
+; CHECK: __aeabi_read_tp
+
+; PIC: __tls_get_addr
+
 @i = thread_local global i32 15		; <i32*> [#uses=2]
 
 define i32 @f() {
diff --git a/test/CodeGen/ARM/vararg_no_start.ll b/test/CodeGen/ARM/vararg_no_start.ll
new file mode 100644
index 000000000000..f9c8c1b75466
--- /dev/null
+++ b/test/CodeGen/ARM/vararg_no_start.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=arm-darwin < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=arm-darwin < %s | FileCheck %s
+
+define void @foo(i8*, ...) {
+  ret void
+}
+; CHECK-LABEL: {{^_?}}foo:
+; CHECK-NOT: str
+; CHECK: {{bx lr|mov pc, lr}}
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
index 19d6cbe0cd8a..148a79df0cb8 100644
--- a/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
+++ b/test/CodeGen/ARM/varargs-spill-stack-align-nacl.ll
@@ -22,9 +22,9 @@ define void @varargs_func(i32 %arg1, ...) {
 ; Reserve space for the varargs save area.  This currently reserves
 ; more than enough (16 bytes rather than the 12 bytes needed).
 ; CHECK: sub sp, sp, #16
-; CHECK: push {lr}
+; CHECK: push {r11, lr}
 ; Align the stack pointer to a multiple of 16.
-; CHECK: sub sp, sp, #12
+; CHECK: sub sp, sp, #8
 ; Calculate the address of the varargs save area and save varargs
 ; arguments into it.
 ; CHECK-NEXT: add r0, sp, #20
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index e390cf051443..3abb57ee51f8 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -10,6 +10,7 @@ entry:
 	store i32 0, i32* %tmp
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp1, i32* %retval
+	call void @llvm.va_start(i8* null)
 	br label %return
 
 return:		; preds = %entry
@@ -20,3 +21,5 @@ return:		; preds = %entry
 ; OABI: add sp, sp, #12
 ; OABI: add sp, sp, #12
 }
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
new file mode 100644
index 000000000000..008bd1f6f8c8
--- /dev/null
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -0,0 +1,253 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
+;CHECK-LABEL: load_v8i8:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <8 x i8>** %ptr
+	%lA = load <8 x i8>* %A, align 1
+	ret <8 x i8> %lA
+}
+
+define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
+;CHECK-LABEL: load_v8i8_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <8 x i8>** %ptr
+	%lA = load <8 x i8>* %A, align 1
+	%inc = getelementptr <8 x i8>* %A, i38 1
+        store <8 x i8>* %inc, <8 x i8>** %ptr
+	ret <8 x i8> %lA
+}
+
+define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
+;CHECK-LABEL: load_v4i16:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <4 x i16>** %ptr
+	%lA = load <4 x i16>* %A, align 1
+	ret <4 x i16> %lA
+}
+
+define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
+;CHECK-LABEL: load_v4i16_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x i16>** %ptr
+	%lA = load <4 x i16>* %A, align 1
+	%inc = getelementptr <4 x i16>* %A, i34 1
+        store <4 x i16>* %inc, <4 x i16>** %ptr
+	ret <4 x i16> %lA
+}
+
+define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
+;CHECK-LABEL: load_v2i32:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <2 x i32>** %ptr
+	%lA = load <2 x i32>* %A, align 1
+	ret <2 x i32> %lA
+}
+
+define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
+;CHECK-LABEL: load_v2i32_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i32>** %ptr
+	%lA = load <2 x i32>* %A, align 1
+	%inc = getelementptr <2 x i32>* %A, i32 1
+        store <2 x i32>* %inc, <2 x i32>** %ptr
+	ret <2 x i32> %lA
+}
+
+define <2 x float> @load_v2f32(<2 x float>** %ptr) {
+;CHECK-LABEL: load_v2f32:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <2 x float>** %ptr
+	%lA = load <2 x float>* %A, align 1
+	ret <2 x float> %lA
+}
+
+define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
+;CHECK-LABEL: load_v2f32_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x float>** %ptr
+	%lA = load <2 x float>* %A, align 1
+	%inc = getelementptr <2 x float>* %A, i32 1
+        store <2 x float>* %inc, <2 x float>** %ptr
+	ret <2 x float> %lA
+}
+
+define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
+;CHECK-LABEL: load_v1i64:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <1 x i64>** %ptr
+	%lA = load <1 x i64>* %A, align 1
+	ret <1 x i64> %lA
+}
+
+define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
+;CHECK-LABEL: load_v1i64_update:
+;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <1 x i64>** %ptr
+	%lA = load <1 x i64>* %A, align 1
+	%inc = getelementptr <1 x i64>* %A, i31 1
+        store <1 x i64>* %inc, <1 x i64>** %ptr
+	ret <1 x i64> %lA
+}
+
+define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
+;CHECK-LABEL: load_v16i8:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <16 x i8>** %ptr
+	%lA = load <16 x i8>* %A, align 1
+	ret <16 x i8> %lA
+}
+
+define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
+;CHECK-LABEL: load_v16i8_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <16 x i8>** %ptr
+	%lA = load <16 x i8>* %A, align 1
+	%inc = getelementptr <16 x i8>* %A, i316 1
+        store <16 x i8>* %inc, <16 x i8>** %ptr
+	ret <16 x i8> %lA
+}
+
+define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
+;CHECK-LABEL: load_v8i16:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <8 x i16>** %ptr
+	%lA = load <8 x i16>* %A, align 1
+	ret <8 x i16> %lA
+}
+
+define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
+;CHECK-LABEL: load_v8i16_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <8 x i16>** %ptr
+	%lA = load <8 x i16>* %A, align 1
+	%inc = getelementptr <8 x i16>* %A, i38 1
+        store <8 x i16>* %inc, <8 x i16>** %ptr
+	ret <8 x i16> %lA
+}
+
+define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
+;CHECK-LABEL: load_v4i32:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <4 x i32>** %ptr
+	%lA = load <4 x i32>* %A, align 1
+	ret <4 x i32> %lA
+}
+
+define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
+;CHECK-LABEL: load_v4i32_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x i32>** %ptr
+	%lA = load <4 x i32>* %A, align 1
+	%inc = getelementptr <4 x i32>* %A, i34 1
+        store <4 x i32>* %inc, <4 x i32>** %ptr
+	ret <4 x i32> %lA
+}
+
+define <4 x float> @load_v4f32(<4 x float>** %ptr) {
+;CHECK-LABEL: load_v4f32:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <4 x float>** %ptr
+	%lA = load <4 x float>* %A, align 1
+	ret <4 x float> %lA
+}
+
+define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
+;CHECK-LABEL: load_v4f32_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x float>** %ptr
+	%lA = load <4 x float>* %A, align 1
+	%inc = getelementptr <4 x float>* %A, i34 1
+        store <4 x float>* %inc, <4 x float>** %ptr
+	ret <4 x float> %lA
+}
+
+define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 1
+	ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update:
+;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 1
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret <2 x i64> %lA
+}
+
+; Make sure we change the type to match alignment if necessary.
+define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned2:
+;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 2
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned4:
+;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 4
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned8:
+;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 8
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret <2 x i64> %lA
+}
+
+define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
+;CHECK-LABEL: load_v2i64_update_aligned16:
+;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
+	%A = load <2 x i64>** %ptr
+	%lA = load <2 x i64>* %A, align 16
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret <2 x i64> %lA
+}
+
+; Make sure we don't break smaller-than-dreg extloads.
+define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
+;CHECK-LABEL: zextload_v8i8tov8i32:
+;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
+;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
+	%A = load <4 x i8>** %ptr
+	%lA = load <4 x i8>* %A, align 4
+        %zlA = zext <4 x i8> %lA to <4 x i32>
+	ret <4 x i32> %zlA
+}
+
+define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
+;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
+;CHECK: ldr.w   r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
+;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16
+;CHECK: str.w   r[[INCREG]], [r0]
+;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
+	%A = load <4 x i8>** %ptr
+	%lA = load <4 x i8>* %A, align 4
+	%inc = getelementptr <4 x i8>* %A, i38 4
+        store <4 x i8>* %inc, <4 x i8>** %ptr
+        %zlA = zext <4 x i8> %lA to <4 x i32>
+	ret <4 x i32> %zlA
+}
diff --git a/test/CodeGen/ARM/vector-promotion.ll b/test/CodeGen/ARM/vector-promotion.ll
new file mode 100644
index 000000000000..42ceb60c47f5
--- /dev/null
+++ b/test/CodeGen/ARM/vector-promotion.ll
@@ -0,0 +1,403 @@
+; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-NORMAL %s
+; RUN: opt -codegenprepare -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon -S -stress-cgp-store-extract | FileCheck --check-prefix=IR-BOTH --check-prefix=IR-STRESS %s
+; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
+
+; IR-BOTH-LABEL: @simpleOneInstructionPromotion
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
+; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
+; IR-BOTH-NEXT: ret
+;
+; Make sure we got rid of any expensive vmov.32 instructions.
+; ASM-LABEL: simpleOneInstructionPromotion:
+; ASM: vldr [[LOAD:d[0-9]+]], [r0]
+; ASM-NEXT: vorr.i32 [[LOAD]], #0x1
+; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
+; ASM-NEXT: bx
+define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = or i32 %extract, 1
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
+; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
+; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
+; IR-BOTH-NEXT: ret
+;
+; ASM-LABEL: unsupportedInstructionForPromotion:
+; ASM: vldr [[LOAD:d[0-9]+]], [r0]
+; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
+; ASM: bx
+define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 0
+  %out = icmp eq i32 %extract, %in2
+  store i1 %out, i1* %dest, align 4
+  ret void
+}
+
+
+; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
+; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
+; BB2
+; IR-BOTH: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
+; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest, align 4
+; IR-BOTH: ret
+;
+; ASM-LABEL: unsupportedChainInDifferentBBs:
+; ASM: vldrne [[LOAD:d[0-9]+]], [r0]
+; ASM: vmovne.32 {{r[0-9]+}}, [[LOAD]]
+; ASM: bx
+define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
+bb1:
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 0
+  br i1 %bool, label %bb2, label %end
+bb2: 
+  %out = or i32 %extract, 1
+  store i32 %out, i32* %dest, align 4
+  br label %end
+end:
+  ret void
+}
+
+; IR-LABEL: @chainOfInstructionsToPromote
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR4:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR3]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR5:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR4]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR6:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR5]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[VECTOR_OR7:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR6]], <i32 1, i32 undef>
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR7]], i32 0
+; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
+; IR-BOTH-NEXT: ret
+;
+; ASM-LABEL: chainOfInstructionsToPromote:
+; ASM: vldr [[LOAD:d[0-9]+]], [r0]
+; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
+; ASM: bx
+define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 0
+  %out1 = or i32 %extract, 1
+  %out2 = or i32 %out1, 1
+  %out3 = or i32 %out2, 1
+  %out4 = or i32 %out3, 1
+  %out5 = or i32 %out4, 1
+  %out6 = or i32 %out5, 1
+  %out7 = or i32 %out6, 1
+  store i32 %out7, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @unsupportedMultiUses
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
+; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
+; IR-BOTH-NEXT: ret i32 [[OR]]
+;
+; ASM-LABEL: unsupportedMultiUses:
+; ASM: vldr [[LOAD:d[0-9]+]], [r0]
+; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
+; ASM: bx
+define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = or i32 %extract, 1
+  store i32 %out, i32* %dest, align 4
+  ret i32 %out
+}
+
+; Check that we promote we a splat constant when this is a division.
+; The NORMAL mode does not promote anything as divisions are not legal.
+; IR-BOTH-LABEL: @udivCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; Scalar version:
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = udiv <2 x i32> [[LOAD]], <i32 7, i32 7>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret
+define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = udiv i32 %extract, 7
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @uremCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; Scalar version:
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = urem <2 x i32> [[LOAD]], <i32 7, i32 7>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret 
+define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = urem i32 %extract, 7
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @sdivCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; Scalar version:
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = sdiv <2 x i32> [[LOAD]], <i32 7, i32 7>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret 
+define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = sdiv i32 %extract, 7
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @sremCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; Scalar version:
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = srem <2 x i32> [[LOAD]], <i32 7, i32 7>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret 
+define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = srem i32 %extract, 7
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @fdivCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; Scalar version:  
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fdiv <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store float [[RES]], float* %dest
+; IR-BOTH-NEXT: ret
+define void @fdivCase(<2 x float>* %addr1, float* %dest) {
+  %in1 = load <2 x float>* %addr1, align 8   
+  %extract = extractelement <2 x float> %in1, i32 1
+  %out = fdiv float %extract, 7.0
+  store float %out, float* %dest, align 4
+  ret void
+}
+
+; IR-BOTH-LABEL: @fremCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; Scalar version:  
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem <2 x float> [[LOAD]], <float 7.000000e+00, float 7.000000e+00>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store float [[RES]], float* %dest
+; IR-BOTH-NEXT: ret
+define void @fremCase(<2 x float>* %addr1, float* %dest) {
+  %in1 = load <2 x float>* %addr1, align 8   
+  %extract = extractelement <2 x float> %in1, i32 1
+  %out = frem float %extract, 7.0
+  store float %out, float* %dest, align 4
+  ret void
+}
+
+; Check that we do not promote when we may introduce undefined behavior
+; like division by zero.
+; IR-BOTH-LABEL: @undefDivCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret
+define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = udiv i32 7, %extract
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+
+; Check that we do not promote when we may introduce undefined behavior
+; like division by zero.
+; IR-BOTH-LABEL: @undefRemCase
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret
+define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 1
+  %out = srem i32 7, %extract
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; Check that we use an undef mask for undefined behavior if the fast-math
+; flag is set.
+; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; Scalar version:  
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> [[LOAD]], <float undef, float 7.000000e+00>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store float [[RES]], float* %dest
+; IR-BOTH-NEXT: ret
+define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
+  %in1 = load <2 x float>* %addr1, align 8   
+  %extract = extractelement <2 x float> %in1, i32 1
+  %out = frem nnan float %extract, 7.0
+  store float %out, float* %dest, align 4
+  ret void
+}
+
+; Check that we use an undef mask for undefined behavior if the fast-math
+; flag is set.
+; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; Scalar version:  
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = frem nnan <2 x float> <float undef, float 7.000000e+00>, [[LOAD]]
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store float [[RES]], float* %dest
+; IR-BOTH-NEXT: ret
+define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
+  %in1 = load <2 x float>* %addr1, align 8   
+  %extract = extractelement <2 x float> %in1, i32 1
+  %out = frem nnan float 7.0, %extract
+  store float %out, float* %dest, align 4
+  ret void
+}
+
+; Check that we are able to promote floating point value.
+; This requires the STRESS mode, as floating point value are
+; not promote on armv7.
+; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; Scalar version: 
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
+; Vector version:
+; IR-STRESS-NEXT: [[DIV:%[a-zA-Z_0-9-]+]] = fadd <2 x float> [[LOAD]], <float undef, float 1.000000e+00>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[DIV]], i32 1
+;
+; IR-BOTH-NEXT: store float [[RES]], float* %dest
+; IR-BOTH-NEXT: ret
+define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
+  %in1 = load <2 x float>* %addr1, align 8
+  %extract = extractelement <2 x float> %in1, i32 1
+  %out = fadd float %extract, 1.0
+  store float %out, float* %dest, align 4
+  ret void
+}
+
+; Check that we correctly use a splat constant when we cannot
+; determine at compile time the index of the extract.
+; This requires the STRESS modes, as variable index are expensive
+; to lower.
+; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; Scalar version:
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
+; Vector version:
+; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 1>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[OR]], i32 %idx
+;
+; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
+; IR-BOTH-NEXT: ret
+define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
+  %in1 = load <2 x i32>* %addr1, align 8
+  %extract = extractelement <2 x i32> %in1, i32 %idx
+  %out = or i32 %extract, 1
+  store i32 %out, i32* %dest, align 4
+  ret void
+}
+
+; Check a vector with more than 2 elements.
+; This requires the STRESS mode because currently 'or v8i8' is not marked
+; as legal or custom, althought the actual assembly is better if we were
+; promoting it.
+; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>* %addr1
+; Scalar version:  
+; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
+; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
+; Vector version:  
+; IR-STRESS-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or <8 x i8> [[LOAD]], <i8 undef, i8 1, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
+; IR-STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[OR]], i32 1
+;
+; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
+; IR-BOTH-NEXT: ret
+define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
+  %in1 = load <8 x i8>* %addr1, align 8
+  %extract = extractelement <8 x i8> %in1, i32 1
+  %out = or i8 %extract, 1
+  store i8 %out, i8* %dest, align 4
+  ret void
+}
+
+; Check that we optimized the sequence correctly when it can be
+; lowered on a Q register.
+; IR-BOTH-LABEL: @simpleOneInstructionPromotion
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>* %addr1
+; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
+; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
+; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
+; IR-BOTH-NEXT: ret
+;
+; Make sure we got rid of any expensive vmov.32 instructions.
+; ASM-LABEL: simpleOneInstructionPromotion4x32:
+; ASM: vld1.64 {[[LOAD:d[0-9]+]], d{{[0-9]+}}}, [r0]
+; The Q register used here must be [[LOAD]] / 2, but we cannot express that.
+; ASM-NEXT: vorr.i32 q{{[[0-9]+}}, #0x1
+; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
+; ASM-NEXT: bx
+define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
+  %in1 = load <4 x i32>* %addr1, align 8
+  %extract = extractelement <4 x i32> %in1, i32 1
+  %out = or i32 %extract, 1
+  store i32 %out, i32* %dest, align 1
+  ret void
+}
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
new file mode 100644
index 000000000000..9036a31d141d
--- /dev/null
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -0,0 +1,258 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7s-apple-ios8.0.0"
+
+define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
+;CHECK-LABEL: store_v8i8:
+;CHECK: str r1, [r0]
+	%A = load <8 x i8>** %ptr
+	store  <8 x i8> %val, <8 x i8>* %A, align 1
+	ret void
+}
+
+define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
+;CHECK-LABEL: store_v8i8_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <8 x i8>** %ptr
+	store  <8 x i8> %val, <8 x i8>* %A, align 1
+	%inc = getelementptr <8 x i8>* %A, i38 1
+        store <8 x i8>* %inc, <8 x i8>** %ptr
+	ret void
+}
+
+define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
+;CHECK-LABEL: store_v4i16:
+;CHECK: str r1, [r0]
+	%A = load <4 x i16>** %ptr
+	store  <4 x i16> %val, <4 x i16>* %A, align 1
+	ret void
+}
+
+define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
+;CHECK-LABEL: store_v4i16_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x i16>** %ptr
+	store  <4 x i16> %val, <4 x i16>* %A, align 1
+	%inc = getelementptr <4 x i16>* %A, i34 1
+        store <4 x i16>* %inc, <4 x i16>** %ptr
+	ret void
+}
+
+define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
+;CHECK-LABEL: store_v2i32:
+;CHECK: str r1, [r0]
+	%A = load <2 x i32>** %ptr
+	store  <2 x i32> %val, <2 x i32>* %A, align 1
+	ret void
+}
+
+define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
+;CHECK-LABEL: store_v2i32_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i32>** %ptr
+	store  <2 x i32> %val, <2 x i32>* %A, align 1
+	%inc = getelementptr <2 x i32>* %A, i32 1
+        store <2 x i32>* %inc, <2 x i32>** %ptr
+	ret void
+}
+
+define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
+;CHECK-LABEL: store_v2f32:
+;CHECK: str r1, [r0]
+	%A = load <2 x float>** %ptr
+	store  <2 x float> %val, <2 x float>* %A, align 1
+	ret void
+}
+
+define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
+;CHECK-LABEL: store_v2f32_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x float>** %ptr
+	store  <2 x float> %val, <2 x float>* %A, align 1
+	%inc = getelementptr <2 x float>* %A, i32 1
+        store <2 x float>* %inc, <2 x float>** %ptr
+	ret void
+}
+
+define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
+;CHECK-LABEL: store_v1i64:
+;CHECK: str r1, [r0]
+	%A = load <1 x i64>** %ptr
+	store  <1 x i64> %val, <1 x i64>* %A, align 1
+	ret void
+}
+
+define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
+;CHECK-LABEL: store_v1i64_update:
+;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <1 x i64>** %ptr
+	store  <1 x i64> %val, <1 x i64>* %A, align 1
+	%inc = getelementptr <1 x i64>* %A, i31 1
+        store <1 x i64>* %inc, <1 x i64>** %ptr
+	ret void
+}
+
+define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
+;CHECK-LABEL: store_v16i8:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <16 x i8>** %ptr
+	store  <16 x i8> %val, <16 x i8>* %A, align 1
+	ret void
+}
+
+define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
+;CHECK-LABEL: store_v16i8_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <16 x i8>** %ptr
+	store  <16 x i8> %val, <16 x i8>* %A, align 1
+	%inc = getelementptr <16 x i8>* %A, i316 1
+        store <16 x i8>* %inc, <16 x i8>** %ptr
+	ret void
+}
+
+define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
+;CHECK-LABEL: store_v8i16:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <8 x i16>** %ptr
+	store  <8 x i16> %val, <8 x i16>* %A, align 1
+	ret void
+}
+
+define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
+;CHECK-LABEL: store_v8i16_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <8 x i16>** %ptr
+	store  <8 x i16> %val, <8 x i16>* %A, align 1
+	%inc = getelementptr <8 x i16>* %A, i38 1
+        store <8 x i16>* %inc, <8 x i16>** %ptr
+	ret void
+}
+
+define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: store_v4i32:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <4 x i32>** %ptr
+	store  <4 x i32> %val, <4 x i32>* %A, align 1
+	ret void
+}
+
+define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: store_v4i32_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x i32>** %ptr
+	store  <4 x i32> %val, <4 x i32>* %A, align 1
+	%inc = getelementptr <4 x i32>* %A, i34 1
+        store <4 x i32>* %inc, <4 x i32>** %ptr
+	ret void
+}
+
+define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
+;CHECK-LABEL: store_v4f32:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <4 x float>** %ptr
+	store  <4 x float> %val, <4 x float>* %A, align 1
+	ret void
+}
+
+define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
+;CHECK-LABEL: store_v4f32_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <4 x float>** %ptr
+	store  <4 x float> %val, <4 x float>* %A, align 1
+	%inc = getelementptr <4 x float>* %A, i34 1
+        store <4 x float>* %inc, <4 x float>** %ptr
+	ret void
+}
+
+define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 1
+	ret void
+}
+
+define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update:
+;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 1
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret void
+}
+
+define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned2:
+;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 2
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret void
+}
+
+define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned4:
+;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 4
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret void
+}
+
+define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned8:
+;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:64]!
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 8
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret void
+}
+
+define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
+;CHECK-LABEL: store_v2i64_update_aligned16:
+;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
+	%A = load <2 x i64>** %ptr
+	store  <2 x i64> %val, <2 x i64>* %A, align 16
+	%inc = getelementptr <2 x i64>* %A, i32 1
+        store <2 x i64>* %inc, <2 x i64>** %ptr
+	ret void
+}
+
+define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: truncstore_v4i32tov4i8:
+;CHECK: ldr.w   r9, [sp]
+;CHECK: vmov    {{d[0-9]+}}, r3, r9
+;CHECK: vmov    {{d[0-9]+}}, r1, r2
+;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
+;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
+	%A = load <4 x i8>** %ptr
+        %trunc = trunc <4 x i32> %val to <4 x i8>
+	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
+	ret void
+}
+
+define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val) {
+;CHECK-LABEL: truncstore_v4i32tov4i8_fake_update:
+;CHECK: ldr.w   r9, [sp]
+;CHECK: vmov    {{d[0-9]+}}, r3, r9
+;CHECK: vmov    {{d[0-9]+}}, r1, r2
+;CHECK: movs    [[IMM16:r[0-9]+]], #16
+;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
+;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
+;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
+;CHECK: str     r[[PTRREG]], [r0]
+	%A = load <4 x i8>** %ptr
+        %trunc = trunc <4 x i32> %val to <4 x i8>
+	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
+	%inc = getelementptr <4 x i8>* %A, i38 4
+        store <4 x i8>* %inc, <4 x i8>** %ptr
+	ret void
+}
diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll
index 49767294ad28..b67f770bfd13 100644
--- a/test/CodeGen/ARM/vfp-regs-dwarf.ll
+++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll
@@ -31,14 +31,14 @@ define void @stack_offsets() {
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/Users/tim/llvm/build/tmp.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"tmp.c", metadata !"/Users/tim/llvm/build"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @stack_offsets, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/tim/llvm/build/tmp.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/tim/llvm/build/tmp.c] [DW_LANG_C99]
+!1 = !{!"tmp.c", !"/Users/tim/llvm/build"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00bar\00bar\00\001\000\001\000\006\000\000\001", !1, !5, !6, null, void ()* @stack_offsets, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/Users/tim/llvm/build/tmp.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
 
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index caeeada90ff5..db640f54b0e6 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -119,6 +119,14 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind {
 	ret <2 x i64> %tmp1
 }
 
+define <2 x double> @vld1Qf64(double* %A) nounwind {
+;CHECK-LABEL: vld1Qf64:
+;CHECK: vld1.64
+	%tmp0 = bitcast double* %A to i8*
+	%tmp1 = call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %tmp0, i32 1)
+	ret <2 x double> %tmp1
+}
+
 declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
 declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
 declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
@@ -130,6 +138,7 @@ declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
 declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
 declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) nounwind readonly
 
 ; Radar 8355607
 ; Do not crash if the vld1 result is not used.
diff --git a/test/CodeGen/ARM/vldm-sched-a9.ll b/test/CodeGen/ARM/vldm-sched-a9.ll
index 64f3770e3d21..e5e7bc08fa4a 100644
--- a/test/CodeGen/ARM/vldm-sched-a9.ll
+++ b/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; the changes for PR:18825 prevent that spilling.
 
 ; CHECK: test:
-; CHECK-NOT: vstmia
-; CHECK-NOT: vldmia
+; CHECK: vstmia
+; CHECK: vldmia
 define void @test(i64* %src) #0 {
 entry:
   %arrayidx39 = getelementptr inbounds i64* %src, i32 13
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index f6ce64c54a39..39289a0bafb3 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
 
 define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK: vmaxnmq
+; CHECK-LABEL: vmaxnmq:
 ; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
@@ -11,7 +11,7 @@ define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK: vmaxnmd
+; CHECK-LABEL: vmaxnmd:
 ; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
@@ -20,7 +20,7 @@ define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK: vminnmq
+; CHECK-LABEL: vminnmq:
 ; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
   %tmp1 = load <4 x float>* %A
   %tmp2 = load <4 x float>* %B
@@ -29,7 +29,7 @@ define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 }
 
 define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK: vminnmd
+; CHECK-LABEL: vminnmd:
 ; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
   %tmp1 = load <2 x float>* %A
   %tmp2 = load <2 x float>* %B
@@ -38,49 +38,93 @@ define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define float @fp-armv8_vminnm_o(float %a, float %b) {
-; CHECK-FAST: fp-armv8_vminnm_o
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
 ; CHECK-FAST-NOT: vcmp
 ; CHECK-FAST: vminnm.f32
-; CHECK: fp-armv8_vminnm_o
+; CHECK-LABEL: "fp-armv8_vminnm_o":
 ; CHECK-NOT: vminnm.f32
   %cmp = fcmp olt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
+define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
 define float @fp-armv8_vminnm_u(float %a, float %b) {
-; CHECK-FAST: fp-armv8_vminnm_u
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
 ; CHECK-FAST-NOT: vcmp
 ; CHECK-FAST: vminnm.f32
-; CHECK: fp-armv8_vminnm_u
+; CHECK-LABEL: "fp-armv8_vminnm_u":
 ; CHECK-NOT: vminnm.f32
   %cmp = fcmp ult float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
+define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vminnm.f32
+; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ugt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
 define float @fp-armv8_vmaxnm_o(float %a, float %b) {
-; CHECK-FAST: fp-armv8_vmaxnm_o
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
 ; CHECK-FAST-NOT: vcmp
 ; CHECK-FAST: vmaxnm.f32
-; CHECK: fp-armv8_vmaxnm_o
+; CHECK-LABEL: "fp-armv8_vmaxnm_o":
 ; CHECK-NOT: vmaxnm.f32
   %cmp = fcmp ogt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
+define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
 define float @fp-armv8_vmaxnm_u(float %a, float %b) {
-; CHECK-FAST: fp-armv8_vmaxnm_u
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
 ; CHECK-FAST-NOT: vcmp
 ; CHECK-FAST: vmaxnm.f32
-; CHECK: fp-armv8_vmaxnm_u
+; CHECK-LABEL: "fp-armv8_vmaxnm_u":
 ; CHECK-NOT: vmaxnm.f32
   %cmp = fcmp ugt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
+define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
+; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
+; CHECK-FAST-NOT: vcmp
+; CHECK-FAST: vmaxnm.f32
+; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ult float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
 
 declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
 declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index 14f3ff066301..a6bcf7d8ead3 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -117,6 +117,15 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
 	ret void
 }
 
+define void @vst1Qf64(double* %A, <2 x double>* %B) nounwind {
+;CHECK-LABEL: vst1Qf64:
+;CHECK: vst1.64
+	%tmp0 = bitcast double* %A to i8*
+	%tmp1 = load <2 x double>* %B
+	call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
+	ret void
+}
+
 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
@@ -128,3 +137,4 @@ declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
 declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) nounwind
diff --git a/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
new file mode 100644
index 000000000000..7ecd25283108
--- /dev/null
+++ b/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mcpu=cortex-a9 -O1 -filetype=obj %s -o - | llvm-objdump -arch thumb -mcpu=cortex-a9 -d - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+declare i8* @llvm.returnaddress(i32)
+
+define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0) minsize {
+  store i32 %val0, i32* %addr
+  %addr1 = getelementptr i32* %addr, i32 1
+  %lr = call i8* @llvm.returnaddress(i32 0)
+  %lr32 = ptrtoint i8* %lr to i32
+  store i32 %lr32, i32* %addr1
+  %addr2 = getelementptr i32* %addr1, i32 1
+  ret i32* %addr2
+}
+
+; Check that stm writes two registers.  The bug caused one of registers (LR,
+; which invalid for Thumb1 form of STMIA instruction) to be dropped.
+; CHECK: stm{{[^,]*}}, {{{.*,.*}}}
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index 3f17ce1e0b16..eaaeb37eebb4 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -30,20 +30,20 @@
 	%"struct.qdesigner_internal::GridLayout" = type { %"struct.qdesigner_internal::Layout", %"struct.QPair<int,int>", %"struct.qdesigner_internal::Grid"* }
 	%"struct.qdesigner_internal::Layout" = type { %struct.QObject, %"struct.QList<QAbstractExtensionFactory*>", %struct.QWidget*, %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >", %struct.QWidget*, %struct.QDesignerFormWindowInterface*, i8, %"struct.QPair<int,int>", %struct.QRect, i8 }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
 
 define void @_ZN18qdesigner_internal10GridLayout9buildGridEv(%"struct.qdesigner_internal::GridLayout"* %this) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
index da26504b13d1..cd446d57d8a5 100644
--- a/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
+++ b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
@@ -3,9 +3,6 @@
 ; the uses of a copy to a physical register without ignoring non-data
 ; dependence, PR10220.
 
-; The ARM backend can't handle i256 math at the moment.
-; XFAIL: arm
-
 define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
 entry:
   %c = load i256* %cc
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 0e98280694c4..83277c98989d 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -32,4 +32,4 @@ return:
   ret i32 %retval.0
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
+!0 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 4, i32 64}
diff --git a/test/CodeGen/Generic/PBQP.ll b/test/CodeGen/Generic/PBQP.ll
new file mode 100644
index 000000000000..91fcfba1a905
--- /dev/null
+++ b/test/CodeGen/Generic/PBQP.ll
@@ -0,0 +1,29 @@
+; RUN: llc -regalloc=pbqp < %s
+
+define i32 @foo() {
+entry:
+  %call = tail call i32 (...)* @baz()
+  %call1 = tail call i32 (...)* @baz()
+  %call2 = tail call i32 (...)* @baz()
+  %call3 = tail call i32 (...)* @baz()
+  %call4 = tail call i32 (...)* @baz()
+  %call5 = tail call i32 (...)* @baz()
+  %call6 = tail call i32 (...)* @baz()
+  %call7 = tail call i32 (...)* @baz()
+  %call8 = tail call i32 (...)* @baz()
+  %call9 = tail call i32 (...)* @baz()
+  %call10 = tail call i32 (...)* @baz()
+  %call11 = tail call i32 (...)* @baz()
+  %call12 = tail call i32 (...)* @baz()
+  %call13 = tail call i32 (...)* @baz()
+  %call14 = tail call i32 (...)* @baz()
+  %call15 = tail call i32 (...)* @baz()
+  %call16 = tail call i32 (...)* @baz()
+  %call17 = tail call i32 @bar(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15, i32 %call16)
+  ret i32 %call17
+}
+
+declare i32 @baz(...)
+
+declare i32 @bar(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
diff --git a/test/CodeGen/Generic/assume.ll b/test/CodeGen/Generic/assume.ll
new file mode 100644
index 000000000000..bb045b38465d
--- /dev/null
+++ b/test/CodeGen/Generic/assume.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+
+define void @main() {
+        call void @llvm.assume(i1 1)
+        ret void
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index 840eeb0cbf31..ed7bdbad1822 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -4,11 +4,11 @@
 %0 = type { i32, i32 }
 
 define void @t(%0*, i32, i32, i32, i32) nounwind {
-  tail call void @llvm.dbg.value(metadata !{%0* %0}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !{!"0x102"})
   unreachable
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; !0 should conform to the format of DIVariable.
-!0 = metadata !{i32 786689, null, metadata !"a", null, i32 0, null, i32 0, i32 0} ;
+!0 = !{!"0x101\00a\000\000", null, null, null} ; [ DW_TAG_arg_variable ]
diff --git a/test/CodeGen/Generic/empty-insertvalue.ll b/test/CodeGen/Generic/empty-insertvalue.ll
new file mode 100644
index 000000000000..e4cc27c2ffcf
--- /dev/null
+++ b/test/CodeGen/Generic/empty-insertvalue.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define void @f() {
+entry:
+  %0 = insertvalue { [0 x { i8*, i8* }], [0 x { i8*, i64 }] } undef, [0 x { i8*, i8* }] undef, 0
+  ret void
+}
diff --git a/test/CodeGen/Generic/empty-phi.ll b/test/CodeGen/Generic/empty-phi.ll
new file mode 100644
index 000000000000..8d5f3b96941c
--- /dev/null
+++ b/test/CodeGen/Generic/empty-phi.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+define void @f() {
+entry:
+  br label %bb1
+
+bb1:
+  %0 = phi [0 x { i8*, i64, i64 }] [ %load, %bb2 ], [ undef, %entry ]
+  store [0 x { i8*, i64, i64 }] %0, [0 x { i8*, i64, i64 }]* undef, align 8
+  %1 = icmp eq i64 undef, 0
+  br i1 %1, label %bb2, label %bb3
+
+bb2:
+  %load = load [0 x { i8*, i64, i64 }]* undef, align 8
+  br label %bb1
+
+bb3:
+  ret void
+}
diff --git a/test/CodeGen/Generic/print-machineinstrs.ll b/test/CodeGen/Generic/print-machineinstrs.ll
index 75dceb5b2623..26bccaae572c 100644
--- a/test/CodeGen/Generic/print-machineinstrs.ll
+++ b/test/CodeGen/Generic/print-machineinstrs.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -O3 -debug-pass=Structure -print-machineinstrs= -o /dev/null 2>&1 | FileCheck %s
 
 define i64 @foo(i64 %a, i64 %b) nounwind {
-; CHECK: -branch-folder -print-machineinstrs
+; CHECK: -branch-folder  -machineinstr-printer
 ; CHECK: Control Flow Optimizer
 ; CHECK-NEXT: MachineFunction Printer
 ; CHECK: Machine code for function foo:
diff --git a/test/CodeGen/Hexagon/BranchPredict.ll b/test/CodeGen/Hexagon/BranchPredict.ll
index 4ab1966bf04d..5d564493e507 100644
--- a/test/CodeGen/Hexagon/BranchPredict.ll
+++ b/test/CodeGen/Hexagon/BranchPredict.ll
@@ -72,5 +72,5 @@ return:                                           ; preds = %if.else, %if.then
   ret i32 %retval.0
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 64, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 64}
diff --git a/test/CodeGen/Hexagon/block-addr.ll b/test/CodeGen/Hexagon/block-addr.ll
index 54a12bf48448..dc0d6e60fd28 100644
--- a/test/CodeGen/Hexagon/block-addr.ll
+++ b/test/CodeGen/Hexagon/block-addr.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=hexagon < %s | FileCheck %s
 
 ; CHECK: r{{[0-9]+}} = CONST32(#.LJTI{{[0-9]+_[0-9]+}})
-; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}+r{{[0-9]+<<#[0-9]+}})
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}} + r{{[0-9]+<<#[0-9]+}})
 ; CHECK: jumpr r{{[0-9]+}}
 
 define void @main() #0 {
diff --git a/test/CodeGen/Hexagon/cext-check.ll b/test/CodeGen/Hexagon/cext-check.ll
index 7c4b19e5a402..b7181d803f71 100644
--- a/test/CodeGen/Hexagon/cext-check.ll
+++ b/test/CodeGen/Hexagon/cext-check.ll
@@ -2,9 +2,9 @@
 ; Check that we constant extended instructions only when necessary.
 
 define i32 @cext_test1(i32* %a) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##8000)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}##8000)
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
-; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+##4092)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}##4092)
 ; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
 entry:
   %0 = load i32* %a, align 4
@@ -29,9 +29,9 @@ return:
 }
 
 define i32 @cext_test2(i8* %a) nounwind {
-; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1023)
+; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+{{ *}}##1023)
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300000)
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+##1024)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}{{ *}}+{{ *}}##1024)
 ; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##6000)
 entry:
   %tobool = icmp ne i8* %a, null
diff --git a/test/CodeGen/Hexagon/cmp-not.ll b/test/CodeGen/Hexagon/cmp-not.ll
new file mode 100644
index 000000000000..abcddc38b23b
--- /dev/null
+++ b/test/CodeGen/Hexagon/cmp-not.ll
@@ -0,0 +1,50 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; Check that we generate matching compare insn.
+
+; Function Attrs: nounwind
+define i32 @neqi(i32 %argc) #0 {
+entry:
+  %p = alloca i8, align 1
+  %0 = tail call i1 @llvm.hexagon.C4.cmpneqi(i32 %argc, i32 512)
+  %conv = zext i1 %0 to i8
+  store volatile i8 %conv, i8* %p, align 1
+  %p.0.p.0. = load volatile i8* %p, align 1
+  %conv1 = zext i8 %p.0.p.0. to i32
+  ret i32 %conv1
+}
+; CHECK:	p{{[0-3]}}{{ *}} = !cmp.eq(r{{[0-9]+}}, ##512)
+
+; Function Attrs: nounwind readnone
+declare i1 @llvm.hexagon.C4.cmpneqi(i32, i32) #1
+
+; Function Attrs: nounwind
+define i32 @ngti(i32 %argc) #0 {
+entry:
+  %p = alloca i8, align 1
+  %0 = tail call i1 @llvm.hexagon.C4.cmpltei(i32 %argc, i32 4)
+  %conv = zext i1 %0 to i8
+  store volatile i8 %conv, i8* %p, align 1
+  %p.0.p.0. = load volatile i8* %p, align 1
+  %conv1 = zext i8 %p.0.p.0. to i32
+  ret i32 %conv1
+}
+; CHECK:	p{{[0-3]}}{{ *}} = !cmp.gt(r{{[0-9]+}}, #4)
+
+; Function Attrs: nounwind readnone
+declare i1 @llvm.hexagon.C4.cmpltei(i32, i32) #1
+
+; Function Attrs: nounwind
+define i32 @ngtui(i32 %argc) #0 {
+entry:
+  %p = alloca i8, align 1
+  %0 = tail call i1 @llvm.hexagon.C4.cmplteui(i32 %argc, i32 4)
+  %conv = zext i1 %0 to i8
+  store volatile i8 %conv, i8* %p, align 1
+  %p.0.p.0. = load volatile i8* %p, align 1
+  %conv1 = zext i8 %p.0.p.0. to i32
+  ret i32 %conv1
+}
+; CHECK: 	p{{[0-3]}}{{ *}} = !cmp.gtu(r{{[0-9]+}}, #4)
+
+; Function Attrs: nounwind readnone
+declare i1 @llvm.hexagon.C4.cmplteui(i32, i32) #1
diff --git a/test/CodeGen/Hexagon/ctor.ll b/test/CodeGen/Hexagon/ctor.ll
new file mode 100644
index 000000000000..2e2fc519118f
--- /dev/null
+++ b/test/CodeGen/Hexagon/ctor.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=hexagon < %s  | FileCheck -check-prefix=INITARRAY %s
+; RUN: llc -march=hexagon < %s  -use-ctors | FileCheck -check-prefix=CTOR %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_P10066.ii, i8* null }]
+define internal void @_GLOBAL__sub_I_P10066.ii() {
+entry:
+  ret void
+}
+
+;CTOR: .section	.ctors
+;CTOR-NOT:  section	.init_array
+
+;INITARRAY: section	.init_array
+;INITARRAY-NOT: .section	.ctors
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 9537489b03d3..3c05884f6a7d 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -5,9 +5,9 @@ target triple = "hexagon"
 
 define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !17
-  tail call void @llvm.dbg.value(metadata !{i32* %b}, i64 0, metadata !14), !dbg !18
-  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !15), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !17
+  tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !19
   br label %for.body, !dbg !19
 
 for.body:                                         ; preds = %for.body, %entry
@@ -18,11 +18,11 @@ for.body:                                         ; preds = %for.body, %entry
   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
   %incdec.ptr = getelementptr inbounds i32* %b.addr.01, i32 1, !dbg !21
-  tail call void @llvm.dbg.value(metadata !{i32* %incdec.ptr}, i64 0, metadata !14), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !21
   %0 = load i32* %b.addr.01, align 4, !dbg !21
   store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
   %inc = add nsw i32 %i.02, 1, !dbg !26
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !15), !dbg !26
+  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !26
   %exitcond = icmp eq i32 %inc, 10, !dbg !19
   %arrayidx.inc = getelementptr i32* %arrayidx.phi, i32 1
   br i1 %exitcond, label %for.end, label %for.body, !dbg !19
@@ -31,34 +31,34 @@ for.end:                                          ; preds = %for.body
   ret void, !dbg !27
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
-!2 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !28, null, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*)* @foo, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !9}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{metadata !13, metadata !14, metadata !15}
-!13 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!14 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
-!15 = metadata !{i32 786688, metadata !16, metadata !"i", metadata !6, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
-!16 = metadata !{i32 786443, metadata !28, metadata !5, i32 1, i32 26, i32 0} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!17 = metadata !{i32 1, i32 15, metadata !5, null}
-!18 = metadata !{i32 1, i32 23, metadata !5, null}
-!19 = metadata !{i32 3, i32 8, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !28, metadata !16, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!21 = metadata !{i32 4, i32 5, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !28, metadata !20, i32 3, i32 28, i32 2} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
-!26 = metadata !{i32 3, i32 23, metadata !20, null}
-!27 = metadata !{i32 6, i32 1, metadata !16, null}
-!28 = metadata !{metadata !"hwloop-dbg.c", metadata !"/usr2/kparzysz/s.hex/t"}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!30 = metadata !{i32 0}
+!0 = !{!"0x11\0012\00QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)\001\00\000\00\001", !28, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c] [DW_LANG_C99]
+!2 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\001\001", !28, null, !7, null, void (i32*, i32*)* @foo, null, null, !11} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !9}
+!9 = !{!"0xf\00\000\0032\0032\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from int]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = !{!13, !14, !15}
+!13 = !{!"0x101\00a\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = !{!"0x101\00b\0033554433\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!15 = !{!"0x100\00i\002\000", !16, !6, !10} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!16 = !{!"0xb\001\0026\000", !28, !5} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!17 = !MDLocation(line: 1, column: 15, scope: !5)
+!18 = !MDLocation(line: 1, column: 23, scope: !5)
+!19 = !MDLocation(line: 3, column: 8, scope: !20)
+!20 = !{!"0xb\003\003\001", !28, !16} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!21 = !MDLocation(line: 4, column: 5, scope: !22)
+!22 = !{!"0xb\003\0028\002", !28, !20} ; [ DW_TAG_lexical_block ] [/usr2/kparzysz/s.hex/t/hwloop-dbg.c]
+!26 = !MDLocation(line: 3, column: 23, scope: !20)
+!27 = !MDLocation(line: 6, column: 1, scope: !16)
+!28 = !{!"hwloop-dbg.c", !"/usr2/kparzysz/s.hex/t"}
+!29 = !{i32 1, !"Debug Info Version", i32 2}
+!30 = !{i32 0}
diff --git a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
index ca6df88a5529..729d79f55a6e 100644
--- a/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
+++ b/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -4,7 +4,7 @@
 ; load word
 
 define i32 @load_w(i32* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i32* %a, i32 %tmp
@@ -15,7 +15,7 @@ entry:
 ; load unsigned half word
 
 define i16 @load_uh(i16* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memuh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i16* %a, i32 %tmp
@@ -26,7 +26,7 @@ entry:
 ; load signed half word
 
 define i32 @load_h(i16* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memh(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i16* %a, i32 %tmp
@@ -38,7 +38,7 @@ entry:
 ; load unsigned byte
 
 define i8 @load_ub(i8* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memub(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i8* %a, i32 %tmp
@@ -49,7 +49,7 @@ entry:
 ; load signed byte
 
 define i32 @foo_2(i8* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memb(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i8* %a, i32 %tmp
@@ -61,7 +61,7 @@ entry:
 ; load doubleword
 
 define i64 @load_d(i64* nocapture %a, i32 %n) nounwind {
-; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}+r{{[0-9]+}}<<#0)
+; CHECK: r{{[0-9]+}}:{{[0-9]+}}{{ *}}={{ *}}memd(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}<<#0)
 entry:
   %tmp = shl i32 %n, 4
   %scevgep9 = getelementptr i64* %a, i32 %tmp
diff --git a/test/CodeGen/Hexagon/newvaluestore.ll b/test/CodeGen/Hexagon/newvaluestore.ll
index 186e39378854..93cf3479ab5e 100644
--- a/test/CodeGen/Hexagon/newvaluestore.ll
+++ b/test/CodeGen/Hexagon/newvaluestore.ll
@@ -7,7 +7,7 @@
 
 define i32 @main() nounwind {
 entry:
-; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}.new
+; CHECK: memw(r{{[0-9]+}}+#{{[0-9]+}}) = r{{[0-9]+}}.new
   %number1 = alloca i32, align 4
   %number2 = alloca i32, align 4
   %number3 = alloca i32, align 4
diff --git a/test/CodeGen/Hexagon/pred-absolute-store.ll b/test/CodeGen/Hexagon/pred-absolute-store.ll
index b1b09f414a54..64635b176daf 100644
--- a/test/CodeGen/Hexagon/pred-absolute-store.ll
+++ b/test/CodeGen/Hexagon/pred-absolute-store.ll
@@ -2,7 +2,7 @@
 ; Check that we are able to predicate instructions with abosolute
 ; addressing mode.
 
-; CHECK: if{{ *}}(p{{[0-3]+}}){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
+; CHECK: if{{ *}}(p{{[0-3]+}}.new){{ *}}memw(##gvar){{ *}}={{ *}}r{{[0-9]+}}
 
 @gvar = external global i32
 define i32 @test2(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
index f09fd10cc84d..db87d9e81db1 100644
--- a/test/CodeGen/Hexagon/struct_args_large.ll
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
-; CHECK: memw(r29 + #0) = r{{.}}
+; CHECK: memw(r29+#0) = r{{.}}
 ; CHECK: memw(r29+#8) = r{{.}}
 
 %struct.large = type { i64, i64 }
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index 953e576af85c..d21a4eebe247 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -3,28 +3,28 @@
 define i32 @main() nounwind ssp {
 entry:
 ; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !9
   ret i32 0, !dbg !10
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 2.9 (trunk 120996)", i1 false, metadata !"", i32 0, metadata !6, metadata !6, metadata !11, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 786688, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 786443, metadata !12, metadata !0, i32 2, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-!11 = metadata !{metadata !0}
-!12 = metadata !{metadata !"/tmp/x.c", metadata !"/Users/manav"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00main\00main\00\002\000\001\000\006\000\000\000", !12, !1, !3, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 120996)\000\00\000\00\000", !12, !6, !6, !11, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !12, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !2} ; [ DW_TAG_base_type ]
+!6 = !{i32 0}
+!7 = !{!"0x100\00i\003\000", !8, !1, !5} ; [ DW_TAG_auto_variable ]
+!8 = !{!"0xb\002\0012\000", !12, !0} ; [ DW_TAG_lexical_block ]
+!9 = !MDLocation(line: 3, column: 11, scope: !8)
+!10 = !MDLocation(line: 4, column: 2, scope: !8)
+!11 = !{!0}
+!12 = !{!"/tmp/x.c", !"/Users/manav"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/MSP430/asm-clobbers.ll b/test/CodeGen/MSP430/asm-clobbers.ll
new file mode 100644
index 000000000000..216a3fe40189
--- /dev/null
+++ b/test/CodeGen/MSP430/asm-clobbers.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:16:16-i32:16:32-a:16-n8:16"
+target triple = "msp430---elf"
+
+define void @test() {
+entry:
+; CHECK-LABEL: test:
+; CHECK: push.w r10
+  call void asm sideeffect "", "~{r10}"()
+; CHECK: pop.w r10
+  ret void
+}
diff --git a/test/CodeGen/MSP430/memset.ll b/test/CodeGen/MSP430/memset.ll
new file mode 100644
index 000000000000..bf105446f52d
--- /dev/null
+++ b/test/CodeGen/MSP430/memset.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+@buf = external global i8*
+
+; Function Attrs: nounwind
+define void @test() nounwind {
+entry:
+; CHECK-LABEL: test:
+  %0 = load i8** @buf, align 2
+; CHECK: mov.w &buf, r15
+; CHECK-NEXT: mov.w #5, r14
+; CHECK-NEXT: mov.w #128, r13
+; CHECK-NEXT: call #memset
+  call void @llvm.memset.p0i8.i16(i8* %0, i8 5, i16 128, i32 1, i1 false)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memset.p0i8.i16(i8* nocapture, i8, i16, i32, i1) nounwind
+
diff --git a/test/CodeGen/Mips/Fast-ISel/br1.ll b/test/CodeGen/Mips/Fast-ISel/br1.ll
new file mode 100644
index 000000000000..579a77f88fef
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/br1.ll
@@ -0,0 +1,34 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@b = global i32 1, align 4
+@i = global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+; Function Attrs: nounwind
+define void @br() #0 {
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 6754, i32* @i, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+; FIXME: This instruction is redundant.
+; CHECK:  xor  $[[REG1:[0-9]+]], ${{[0-9]+}}, $zero
+; CHECK:  sltiu  $[[REG2:[0-9]+]], $[[REG1]], 1
+; CHECK:  bgtz  $[[REG2]], $BB[[BL:[0-9]+_[0-9]+]]
+; CHECK:  nop
+; CHECK:  addiu  ${{[0-9]+}}, $zero, 6754
+; CHECK:  sw  ${{[0-9]+}}, 0(${{[0-9]+}})
+; CHECK: $BB[[BL]]:
+
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/Fast-ISel/callabi.ll b/test/CodeGen/Mips/Fast-ISel/callabi.ll
new file mode 100644
index 000000000000..e76d7a74bd0e
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -0,0 +1,477 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32r2
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s -check-prefix=CHECK2
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s -check-prefix=CHECK2
+
+
+@c1 = global i8 -45, align 1
+@uc1 = global i8 27, align 1
+@s1 = global i16 -1789, align 2
+@us1 = global i16 1256, align 2
+
+; Function Attrs: nounwind
+define void @cxi() #0 {
+entry:
+; CHECK-LABEL:  cxi
+  call void @xi(i32 10)
+; CHECK-DAG:    addiu   $4, $zero, 10
+; CHECK-DAG:    lw      $25, %got(xi)(${{[0-9]+}})
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xi(i32) #1
+
+; Function Attrs: nounwind
+define void @cxii() #0 {
+entry:
+; CHECK-LABEL:  cxii
+  call void @xii(i32 746, i32 892)
+; CHECK-DAG:    addiu   $4, $zero, 746
+; CHECK-DAG:    addiu   $5, $zero, 892
+; CHECK-DAG:    lw      $25, %got(xii)(${{[0-9]+}})
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xii(i32, i32) #1
+
+; Function Attrs: nounwind
+define void @cxiii() #0 {
+entry:
+; CHECK-LABEL:  cxiii
+  call void @xiii(i32 88, i32 44, i32 11)
+; CHECK-DAG:    addiu   $4, $zero, 88
+; CHECK-DAG:    addiu   $5, $zero, 44
+; CHECK-DAG:    addiu   $6, $zero, 11
+; CHECK-DAG:    lw      $25, %got(xiii)(${{[0-9]+}})
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xiii(i32, i32, i32) #1
+
+; Function Attrs: nounwind
+define void @cxiiii() #0 {
+entry:
+; CHECK-LABEL:  cxiiii
+  call void @xiiii(i32 167, i32 320, i32 97, i32 14)
+; CHECK-DAG:    addiu   $4, $zero, 167
+; CHECK-DAG:    addiu   $5, $zero, 320
+; CHECK-DAG:    addiu   $6, $zero, 97
+; CHECK-DAG:    addiu   $7, $zero, 14
+; CHECK-DAG:    lw      $25, %got(xiiii)(${{[0-9]+}})
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xiiii(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind
+define void @cxiiiiconv() #0 {
+entry:
+; CHECK-LABEL: cxiiiiconv
+; mips32r2-LABEL:  cxiiiiconv
+; mips32-LABEL:  cxiiiiconv
+  %0 = load i8* @c1, align 1
+  %conv = sext i8 %0 to i32
+  %1 = load i8* @uc1, align 1
+  %conv1 = zext i8 %1 to i32
+  %2 = load i16* @s1, align 2
+  %conv2 = sext i16 %2 to i32
+  %3 = load i16* @us1, align 2
+  %conv3 = zext i16 %3 to i32
+  call void @xiiii(i32 %conv, i32 %conv1, i32 %conv2, i32 %conv3)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32r2:     addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32:       addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32r2-DAG:         lw      $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
+; mips32r2-DAG: lbu     $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
+; mips32r2-DAG  seb     $3, $[[REG_C1]]
+; mips32-DAG:   lw      $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]])
+; mips32-DAG:   lbu     $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]])
+; mips32-DAG:   sll     $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24
+; mips32-DAG:   sra     $4, $[[REG_C1_1]], 24
+; CHECK-DAG:    lw      $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]])
+; CHECK-DAG:    lbu     $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]])
+; FIXME andi is superfulous
+; CHECK-DAG:    andi    $5, $[[REG_UC1]], 255
+; mips32r2-DAG:         lw      $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; mips32r2-DAG: lhu     $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
+; mips32r2-DAG: seh     $6, $[[REG_S1]]
+; mips32-DAG:   lw      $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]])
+; mips32-DAG:   lhu     $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]])
+; mips32-DAG:   sll     $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16
+; mips32-DAG:   sra     $6, $[[REG_S1_1]], 16
+; CHECK-DAG:    lw      $[[REG_US1_ADDR:[0-9]+]], %got(us1)($[[REG_GP]])
+; CHECK-DAG:    lhu     $[[REG_US1:[0-9]+]], 0($[[REG_US1_ADDR]])
+; FIXME andi is superfulous
+; CHECK-DAG:    andi    $7, $[[REG_US1]], 65535
+; mips32r2:     jalr    $25
+; mips32r2:     jalr    $25
+; CHECK:        jalr    $25
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @cxf() #0 {
+entry:
+; CHECK-LABEL:  cxf
+  call void @xf(float 0x40BBC85560000000)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK:        lui     $[[REG_FPCONST_1:[0-9]+]], 17886
+; CHECK:        ori     $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 17067
+; CHECK: mtc1   $[[REG_FPCONST]], $f12
+; CHECK:        lw      $25, %got(xf)($[[REG_GP]])
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xf(float) #1
+
+; Function Attrs: nounwind
+define void @cxff() #0 {
+entry:
+; CHECK-LABEL:  cxff
+  call void @xff(float 0x3FF74A6CA0000000, float 0x401A2C0840000000)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    lui     $[[REG_FPCONST_1:[0-9]+]], 16314
+; CHECK-DAG:    ori     $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 21349
+; CHECK-DAG: mtc1       $[[REG_FPCONST]], $f12
+; CHECK-DAG:    lui     $[[REG_FPCONST_2:[0-9]+]], 16593
+; CHECK-DAG:    ori     $[[REG_FPCONST_3:[0-9]+]], $[[REG_FPCONST_2]], 24642
+; CHECK-DAG: mtc1       $[[REG_FPCONST_3]], $f14
+; CHECK:        lw      $25, %got(xff)($[[REG_GP]])
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xff(float, float) #1
+
+; Function Attrs: nounwind
+define void @cxfi() #0 {
+entry:
+; CHECK-LABEL: cxfi
+  call void @xfi(float 0x4013906240000000, i32 102)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    lui     $[[REG_FPCONST_1:[0-9]+]], 16540
+; CHECK-DAG:    ori     $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 33554
+; CHECK-DAG: mtc1       $[[REG_FPCONST]], $f12
+; CHECK-DAG:    addiu   $5, $zero, 102
+; CHECK:        lw      $25, %got(xfi)($[[REG_GP]])
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xfi(float, i32) #1
+
+; Function Attrs: nounwind
+define void @cxfii() #0 {
+entry:
+; CHECK-LABEL: cxfii
+  call void @xfii(float 0x405EC7EE00000000, i32 9993, i32 10922)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    lui     $[[REG_FPCONST_1:[0-9]+]], 17142
+; CHECK-DAG:    ori     $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 16240
+; CHECK-DAG: mtc1       $[[REG_FPCONST]], $f12
+; CHECK-DAG:    addiu   $5, $zero, 9993
+; CHECK-DAG:    addiu   $6, $zero, 10922
+; CHECK:        lw      $25, %got(xfii)($[[REG_GP]])
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xfii(float, i32, i32) #1
+
+; Function Attrs: nounwind
+define void @cxfiii() #0 {
+entry:
+; CHECK-LABEL: cxfiii
+  call void @xfiii(float 0x405C072B20000000, i32 3948, i32 89011, i32 111222)
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    lui     $[[REG_FPCONST_1:[0-9]+]], 17120
+; CHECK-DAG:    ori     $[[REG_FPCONST:[0-9]+]], $[[REG_FPCONST_1]], 14681
+; CHECK-DAG: mtc1       $[[REG_FPCONST]], $f12
+; CHECK-DAG:    addiu   $5, $zero, 3948
+; CHECK-DAG:    lui     $[[REG_I_1:[0-9]+]], 1
+; CHECK-DAG:    ori     $6, $[[REG_I_1]], 23475
+; CHECK-DAG:    lui     $[[REG_I_2:[0-9]+]], 1
+; CHECK-DAG:    ori     $7, $[[REG_I_2]], 45686
+; CHECK:        lw      $25, %got(xfiii)($[[REG_GP]])
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xfiii(float, i32, i32, i32) #1
+
+; Function Attrs: nounwind
+define void @cxd() #0 {
+entry:
+; mips32r2-LABEL: cxd:
+; mips32-LABEL: cxd:
+  call void @xd(double 5.994560e+02)
+; mips32:       addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32-DAG:   lui     $[[REG_FPCONST_1:[0-9]+]], 16514
+; mips32-DAG:   ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 48037
+; mips32-DAG:   lui     $[[REG_FPCONST_3:[0-9]+]], 58195
+; mips32-DAG:   ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 63439
+; mips32-DAG:    mtc1   $[[REG_FPCONST_4]], $f12
+; mips32-DAG:       mtc1        $[[REG_FPCONST_2]], $f13
+; mips32-DAG:   lw      $25, %got(xd)($[[REG_GP]])
+; mips32:       jalr    $25
+; mips32r2:     addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32r2-DAG: lui     $[[REG_FPCONST_1:[0-9]+]], 16514
+; mips32r2-DAG: ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 48037
+; mips32r2-DAG: lui     $[[REG_FPCONST_3:[0-9]+]], 58195
+; mips32r2-DAG: ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 63439
+; mips32r2-DAG: mtc1    $[[REG_FPCONST_4]], $f12
+; mips32r2-DAG: mthc1   $[[REG_FPCONST_2]], $f12
+; mips32r2-DAG: lw      $25, %got(xd)($[[REG_GP]])
+; mips32r2 :    jalr    $25
+  ret void
+}
+
+declare void @xd(double) #1
+
+; Function Attrs: nounwind
+define void @cxdd() #0 {
+; mips32r2-LABEL: cxdd:
+; mips32-LABEL: cxdd:
+entry:
+  call void @xdd(double 1.234980e+03, double 0x40F5B331F7CED917)
+; mips32:       addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32-DAG:   lui     $[[REG_FPCONST_1:[0-9]+]], 16531
+; mips32-DAG:   ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 19435
+; mips32-DAG:   lui     $[[REG_FPCONST_3:[0-9]+]], 34078
+; mips32-DAG:   ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 47186
+; mips32-DAG:   mtc1    $[[REG_FPCONST_4]], $f12
+; mips32-DAG:   mtc1    $[[REG_FPCONST_2]], $f13
+; mips32-DAG:   lui     $[[REG_FPCONST_1:[0-9]+]], 16629
+; mips32-DAG:   ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 45873
+; mips32-DAG:   lui     $[[REG_FPCONST_3:[0-9]+]], 63438
+; mips32-DAG:   ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 55575
+; mips32-DAG:   mtc1    $[[REG_FPCONST_4]], $f14
+; mips32-DAG:   mtc1    $[[REG_FPCONST_2]], $f15
+; mips32-DAG:   lw      $25, %got(xdd)($[[REG_GP]])
+; mips32:       jalr    $25
+; mips32r2:     addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; mips32r2-DAG: lui     $[[REG_FPCONST_1:[0-9]+]], 16531
+; mips32r2-DAG: ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 19435
+; mips32r2-DAG: lui     $[[REG_FPCONST_3:[0-9]+]], 34078
+; mips32r2-DAG: ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 47186
+; mips32r2-DAG: mtc1    $[[REG_FPCONST_4]], $f12
+; mips32r2-DAG: mthc1   $[[REG_FPCONST_2]], $f12
+; mips32r2-DAG: lui     $[[REG_FPCONST_1:[0-9]+]], 16629
+; mips32r2-DAG: ori     $[[REG_FPCONST_2:[0-9]+]], $[[REG_FPCONST_1]], 45873
+; mips32r2-DAG: lui     $[[REG_FPCONST_3:[0-9]+]], 63438
+; mips32r2-DAG: ori     $[[REG_FPCONST_4:[0-9]+]], $[[REG_FPCONST_3]], 55575
+; mips32r2-DAG: mtc1    $[[REG_FPCONST_4]], $f14
+; mips32r2-DAG: mthc1   $[[REG_FPCONST_2]], $f14
+; mips32r2-DAG: lw      $25, %got(xdd)($[[REG_GP]])
+; mips32r2 :    jalr    $25
+  ret void
+}
+
+declare void @xdd(double, double) #1
+
+; Function Attrs: nounwind
+define void @cxif() #0 {
+entry:
+; CHECK-LABEL: cxif:
+  call void @xif(i32 345, float 0x407BCE5A20000000)
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    addiu   $4, $zero, 345
+; CHECK-DAG:    lui     $[[REGF_1:[0-9]+]], 17374
+; CHECK-DAG:    ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 29393
+; CHECK-DAG:    mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK-DAG:    mfc1    $5, $f[[REGF_3]]
+; CHECK-DAG:    lw      $25, %got(xif)($[[REG_GP]])
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xif(i32, float) #1
+
+; Function Attrs: nounwind
+define void @cxiff() #0 {
+entry:
+; CHECK-LABEL: cxiff:
+; CHECK2-LABEL: cxiff:
+  call void @xiff(i32 12239, float 0x408EDB3340000000, float 0x4013FFE5C0000000)
+; We need to do the two floating point parameters in a separate
+; check because we can't control the ordering of parts of the sequence
+;;
+; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK:        addiu   $4, $zero, 12239
+; CHECK2:       addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK2:       addiu   $4, $zero, 12239
+; CHECK:        lui     $[[REGF_1:[0-9]+]], 17526
+; CHECK:        ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 55706
+; CHECK:        mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK:        mfc1    $5, $f[[REGF_3]]
+; CHECK2:       lui     $[[REGF2_1:[0-9]+]], 16543
+; CHECK2:       ori     $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 65326
+; CHECK2:       mtc1    $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
+; CHECK2:       mfc1    $6, $f[[REGF2_3]]
+; CHECK:        lw      $25, %got(xiff)($[[REG_GP]])
+; CHECK2:       lw      $25, %got(xiff)($[[REG_GP]])
+; CHECK:        jalr    $25
+; CHECK2:       jalr    $25
+  ret void
+}
+
+declare void @xiff(i32, float, float) #1
+
+; Function Attrs: nounwind
+define void @cxifi() #0 {
+entry:
+; CHECK: cxifi:
+  call void @xifi(i32 887, float 0x402277CEE0000000, i32 888)
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    addiu   $4, $zero, 887
+; CHECK-DAG:    lui     $[[REGF_1:[0-9]+]], 16659
+; CHECK-DAG:    ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 48759
+; CHECK-DAG:    mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK-DAG:    mfc1    $5, $f[[REGF_3]]
+; CHECk-DAG:    addiu   $6, $zero, 888
+; CHECK-DAG:    lw      $25, %got(xifi)($[[REG_GP]])
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xifi(i32, float, i32) #1
+
+; Function Attrs: nounwind
+define void @cxifif() #0 {
+entry:
+; CHECK: cxifif:
+; CHECK2: cxifif:
+  call void @xifif(i32 67774, float 0x408EE0FBE0000000, i32 9991, float 0x40B15C8CC0000000)
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    lui     $[[REGI:[0-9]+]], 1
+; CHECK-DAG:    ori     $4, $[[REGI]], 2238
+; CHECK-DAG:    lui     $[[REGF_1:[0-9]+]], 17527
+; CHECK-DAG:    ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 2015
+; CHECK-DAG:    mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK-DAG:    mfc1    $5, $f[[REGF_3]]
+; CHECk-DAG:    addiu   $6, $zero, 888
+; CHECK2:       lui     $[[REGF2_1:[0-9]+]], 17802
+; CHECK2:       ori     $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 58470
+; CHECK2:       mtc1    $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
+; CHECK2:       mfc1    $7, $f[[REGF2_3]]
+; CHECK:        lw      $25, %got(xifif)($[[REG_GP]])
+; CHECK2:       lw      $25, %got(xifif)($[[REG_GP]])
+; CHECK2:       jalr    $25
+; CHECK:        jalr    $25
+
+  ret void
+}
+
+declare void @xifif(i32, float, i32, float) #1
+
+; Function Attrs: nounwind
+define void @cxiffi() #0 {
+entry:
+; CHECK-label: cxiffi:
+; CHECK2-label: cxiffi:
+  call void @xiffi(i32 45, float 0x3FF6666660000000, float 0x408F333340000000, i32 234)
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    addiu   $4, $zero, 45
+; CHECK2-DAG:   addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK2-DAG:   addiu   $4, $zero, 45
+; CHECK-DAG:    lui     $[[REGF_1:[0-9]+]], 16307
+; CHECK-DAG:    ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 13107
+; CHECK-DAG:    mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK-DAG:    mfc1    $5, $f[[REGF_3]]
+; CHECK2:       lui     $[[REGF2_1:[0-9]+]], 17529
+; CHECK2:       ori     $[[REGF2_2:[0-9]+]], $[[REGF2_1]], 39322
+; CHECK2:       mtc1    $[[REGF2_2]], $f[[REGF2_3:[0-9]+]]
+; CHECK2:       mfc1    $6, $f[[REGF2_3]]
+; CHECK-DAG:    lw      $25, %got(xiffi)($[[REG_GP]])
+; CHECK-DAG:    addiu   $7, $zero, 234
+; CHECK2-DAG:   lw      $25, %got(xiffi)($[[REG_GP]])
+; CHECK:        jalr    $25
+; CHECK2:       jalr    $25
+
+  ret void
+}
+
+declare void @xiffi(i32, float, float, i32) #1
+
+; Function Attrs: nounwind
+define void @cxifii() #0 {
+entry:
+; CHECK-DAG:    cxifii:
+  call void @xifii(i32 12239, float 0x408EDB3340000000, i32 998877, i32 1234)
+; CHECK-DAG:    addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
+; CHECK-DAG:    addiu   $4, $zero, 12239
+; CHECK-DAG:    lui     $[[REGF_1:[0-9]+]], 17526
+; CHECK-DAG:    ori     $[[REGF_2:[0-9]+]], $[[REGF_1]], 55706
+; CHECK-DAG:    mtc1    $[[REGF_2]], $f[[REGF_3:[0-9]+]]
+; CHECK-DAG:    mfc1    $5, $f[[REGF_3]]
+; CHECK-DAG:    lui     $[[REGI2:[0-9]+]], 15
+; CHECK-DAG:    ori     $6, $[[REGI2]], 15837
+; CHECk-DAG:    addiu   $7, $zero, 1234
+; CHECK-DAG:    lw      $25, %got(xifii)($[[REG_GP]])
+; CHECK:        jalr    $25
+  ret void
+}
+
+declare void @xifii(i32, float, i32, i32) #1
+
+; FIXME: this function will not pass yet. 
+; Function Attrs: nounwind
+; define void @cxfid() #0 {
+;entry:
+;  call void @xfid(float 0x4013B851E0000000, i32 811123, double 0x40934BFF487FCB92)
+;  ret void
+;}
+
+declare void @xfid(float, i32, double) #1
+
+; Function Attrs: nounwind
+define void @g() #0 {
+entry:
+  call void @cxi()
+  call void @cxii()
+  call void @cxiii()
+  call void @cxiiii()
+  call void @cxiiiiconv()
+  call void @cxf()
+  call void @cxff()
+  call void @cxd()
+  call void @cxfi()
+  call void @cxfii()
+  call void @cxfiii()
+  call void @cxdd()
+  call void @cxif()
+  call void @cxiff()
+  call void @cxifi()
+  call void @cxifii()
+  call void @cxifif()
+  call void @cxiffi()
+  ret void
+}
+
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.6.0 (gitosis@dmz-portal.mips.com:clang 43992fe7b17de5553ac06d323cb80cc6723a9ae3) (gitosis@dmz-portal.mips.com:llvm.git 0834e6839eb170197c81bb02e916258d1527e312)"}
diff --git a/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
new file mode 100644
index 000000000000..c72b1e70c718
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
@@ -0,0 +1,254 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@f1 = common global float 0.000000e+00, align 4
+@f2 = common global float 0.000000e+00, align 4
+@b1 = common global i32 0, align 4
+@d1 = common global double 0.000000e+00, align 8
+@d2 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @feq1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp oeq float %0, %1
+; CHECK-LABEL:  feq1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.eq.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @fne1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp une float %0, %1
+; CHECK-LABEL:  fne1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.eq.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @flt1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp olt float %0, %1
+; CHECK-LABEL:  flt1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.olt.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @fgt1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp ogt float %0, %1
+; CHECK-LABEL: fgt1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ule.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @fle1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp ole float %0, %1
+; CHECK-LABEL:  fle1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ole.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @fge1()  {
+entry:
+  %0 = load float* @f1, align 4
+  %1 = load float* @f2, align 4
+  %cmp = fcmp oge float %0, %1
+; CHECK-LABEL:  fge1:
+; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_F1_GOT:[0-9]+]], %got(f1)(${{[0-9]+}})
+; CHECK-DAG:    lwc1    $f[[REG_F2:[0-9]+]], 0($[[REG_F2_GOT]])
+; CHECK-DAG:    lwc1    $f[[REG_F1:[0-9]+]], 0($[[REG_F1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ult.s  $f[[REG_F1]], $f[[REG_F2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @deq1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp oeq double %0, %1
+; CHECK-LABEL:  deq1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.eq.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dne1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp une double %0, %1
+; CHECK-LABEL:  dne1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.eq.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dlt1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp olt double %0, %1
+; CHECK-LABEL:  dlt1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.olt.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dgt1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp ogt double %0, %1
+; CHECK-LABEL:  dgt1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ule.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dle1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp ole double %0, %1
+; CHECK-LABEL:  dle1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ole.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movt  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @dge1()  {
+entry:
+  %0 = load double* @d1, align 8
+  %1 = load double* @d2, align 8
+  %cmp = fcmp oge double %0, %1
+; CHECK-LABEL:  dge1:
+; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
+; CHECK-DAG:    lw      $[[REG_D1_GOT:[0-9]+]], %got(d1)(${{[0-9]+}})
+; CHECK-DAG:    ldc1    $f[[REG_D2:[0-9]+]], 0($[[REG_D2_GOT]])
+; CHECK-DAG:    ldc1    $f[[REG_D1:[0-9]+]], 0($[[REG_D1_GOT]])
+; CHECK-DAG:    addiu   $[[REG_ZERO:[0-9]+]], $zero, 0
+; CHECK-DAG:    addiu   $[[REG_ONE:[0-9]+]], $zero, 1
+; CHECK:        c.ult.d  $f[[REG_D1]], $f[[REG_D2]]
+; CHECK:        movf  $[[REG_ZERO]], $[[REG_ONE]], $fcc0
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/Fast-ISel/fpext.ll b/test/CodeGen/Mips/Fast-ISel/fpext.ll
new file mode 100644
index 000000000000..98aca756c58f
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fpext.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@f = global float 0x40147E6B80000000, align 4
+@d_f = common global double 0.000000e+00, align 8
+@.str = private unnamed_addr constant [6 x i8] c"%f  \0A\00", align 1
+
+; Function Attrs: nounwind
+define void @dv() #0 {
+entry:
+  %0 = load float* @f, align 4
+  %conv = fpext float %0 to double
+; CHECK: cvt.d.s  $f{{[0-9]+}}, $f{{[0-9]+}}
+  store double %conv, double* @d_f, align 8
+  ret void
+}
+
+
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/Mips/Fast-ISel/fpintconv.ll b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
new file mode 100644
index 000000000000..846726a868b3
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+
+@f = global float 0x40D6E83280000000, align 4
+@d = global double 0x4132D68780000000, align 8
+@i_f = common global i32 0, align 4
+@i_d = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
+
+; Function Attrs: nounwind
+define void @ifv() {
+entry:
+; CHECK-LABEL:   .ent  ifv
+  %0 = load float* @f, align 4
+  %conv = fptosi float %0 to i32
+; CHECK:   trunc.w.s  $f[[REG:[0-9]+]], $f{{[0-9]+}}
+; CHECK:   mfc1	${{[0-9]+}}, $f[[REG]]
+  store i32 %conv, i32* @i_f, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @idv() {
+entry:
+; CHECK-LABEL:   .ent  idv
+  %0 = load double* @d, align 8
+  %conv = fptosi double %0 to i32
+; CHECK:   trunc.w.d  $f[[REG:[0-9]+]], $f{{[0-9]+}}
+; CHECK:   mfc1	${{[0-9]+}}, $f[[REG]]
+  store i32 %conv, i32* @i_d, align 4
+  ret void
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/fptrunc.ll b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
new file mode 100644
index 000000000000..d843dee5a8c9
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@d = global double 0x40147E6B74DF0446, align 8
+@f = common global float 0.000000e+00, align 4
+@.str = private unnamed_addr constant [6 x i8] c"%f  \0A\00", align 1
+
+; Function Attrs: nounwind
+define void @fv() #0 {
+entry:
+  %0 = load double* @d, align 8
+  %conv = fptrunc double %0 to float
+; CHECK: cvt.s.d  $f{{[0-9]+}}, $f{{[0-9]+}}
+  store float %conv, float* @f, align 4
+  ret void
+}
+
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/Mips/Fast-ISel/icmpa.ll b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
new file mode 100644
index 000000000000..bd41a2911dc4
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/icmpa.ll
@@ -0,0 +1,210 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@c = global i32 4, align 4
+@d = global i32 9, align 4
+@uc = global i32 4, align 4
+@ud = global i32 9, align 4
+@b1 = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @eq()  {
+entry:
+; CHECK-LABEL:  .ent  eq
+
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp eq i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:  xor  $[[REG1:[0-9]+]], $[[REG_C]], $[[REG_D]]
+; CHECK:  sltiu  $[[REG2:[0-9]+]], $[[REG1]], 1
+; FIXME: This instruction is redundant. The sltiu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG2]], 1
+
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ne()  {
+entry:
+; CHECK-LABEL:  .ent  ne
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp ne i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:  xor  $[[REG1:[0-9]+]], $[[REG_C]], $[[REG_D]]
+; CHECK:  sltu  $[[REG2:[0-9]+]], $zero, $[[REG1]]
+; FIXME: This instruction is redundant. The sltu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG2]], 1
+
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ugt()  {
+entry:
+; CHECK-LABEL:  .ent  ugt
+  %0 = load i32* @uc, align 4
+  %1 = load i32* @ud, align 4
+  %cmp = icmp ugt i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UC_GOT:[0-9+]]], %got(uc)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UD:[0-9]+]], 0($[[REG_UD_GOT]])
+; CHECK-DAG:  lw	$[[REG_UC:[0-9]+]], 0($[[REG_UC_GOT]])
+; CHECK:  sltu  $[[REG1:[0-9]+]], $[[REG_UD]], $[[REG_UC]]
+; FIXME: This instruction is redundant. The sltu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 1
+
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ult()  {
+entry:
+; CHECK-LABEL:  .ent  ult
+  %0 = load i32* @uc, align 4
+  %1 = load i32* @ud, align 4
+  %cmp = icmp ult i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UC_GOT:[0-9+]]], %got(uc)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UD:[0-9]+]], 0($[[REG_UD_GOT]])
+; CHECK-DAG:  lw	$[[REG_UC:[0-9]+]], 0($[[REG_UC_GOT]])
+; CHECK:  sltu  $[[REG1:[0-9]+]], $[[REG_UC]], $[[REG_UD]]
+; FIXME: This instruction is redundant. The sltu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @uge()  {
+entry:
+; CHECK-LABEL:  .ent  uge
+  %0 = load i32* @uc, align 4
+  %1 = load i32* @ud, align 4
+  %cmp = icmp uge i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UC_GOT:[0-9+]]], %got(uc)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UD:[0-9]+]], 0($[[REG_UD_GOT]])
+; CHECK-DAG:  lw	$[[REG_UC:[0-9]+]], 0($[[REG_UC_GOT]])
+; CHECK:  sltu  $[[REG1:[0-9]+]], $[[REG_UC]], $[[REG_UD]]
+; CHECK:  xori  $[[REG2:[0-9]+]], $[[REG1]], 1
+; FIXME: This instruction is redundant. The sltu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG2]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @ule()  {
+entry:
+; CHECK-LABEL:  .ent  ule
+  %0 = load i32* @uc, align 4
+  %1 = load i32* @ud, align 4
+  %cmp = icmp ule i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UC_GOT:[0-9+]]], %got(uc)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_UD:[0-9]+]], 0($[[REG_UD_GOT]])
+; CHECK-DAG:  lw	$[[REG_UC:[0-9]+]], 0($[[REG_UC_GOT]])
+; CHECK:  sltu  $[[REG1:[0-9]+]], $[[REG_UD]], $[[REG_UC]]
+; CHECK:  xori  $[[REG2:[0-9]+]], $[[REG1]], 1
+; FIXME: This instruction is redundant. The sltu can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG2]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sgt()  {
+entry:
+; CHECK-LABEL:  .ent sgt
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp sgt i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:  slt  $[[REG1:[0-9]+]], $[[REG_D]], $[[REG_C]]
+; FIXME: This instruction is redundant. The slt can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @slt()  {
+entry:
+; CHECK-LABEL:  .ent slt
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp slt i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:  slt  $[[REG1:[0-9]+]], $[[REG_C]], $[[REG_D]]
+; FIXME: This instruction is redundant. The slt can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sge()  {
+entry:
+; CHECK-LABEL:  .ent sge
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp sge i32 %0, %1
+  %conv = zext i1 %cmp to i32
+  store i32 %conv, i32* @b1, align 4
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:  slt  $[[REG1:[0-9]+]], $[[REG_C]], $[[REG_D]]
+; CHECK:  xori  $[[REG2:[0-9]+]], $[[REG1]], 1
+; FIXME: This instruction is redundant. The slt can only produce 0 and 1.
+; CHECK:  andi  ${{[0-9]+}}, $[[REG2]], 1
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sle()  {
+entry:
+; CHECK-LABEL:  .ent sle
+  %0 = load i32* @c, align 4
+  %1 = load i32* @d, align 4
+  %cmp = icmp sle i32 %0, %1
+  %conv = zext i1 %cmp to i32
+; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_C_GOT:[0-9+]]], %got(c)(${{[0-9]+}})
+; CHECK-DAG:  lw	$[[REG_D:[0-9]+]], 0($[[REG_D_GOT]])
+; CHECK-DAG:  lw	$[[REG_C:[0-9]+]], 0($[[REG_C_GOT]])
+; CHECK:        slt     $[[REG1:[0-9]+]], $[[REG_D]], $[[REG_C]]
+; CHECK:        xori    $[[REG2:[0-9]+]], $[[REG1]], 1
+; FIXME: This instruction is redundant. The slt can only produce 0 and 1.
+; CHECK:        andi    ${{[0-9]+}}, $[[REG2]], 1
+  store i32 %conv, i32* @b1, align 4
+  ret void
+}
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
index f113a0eb1d54..d84478b9c5a9 100644
--- a/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
+++ b/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -6,6 +6,8 @@ target triple = "mips--linux-gnu"
 @c1 = common global i8 0, align 1
 ; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
 ; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
 
 @s2 = common global i16 0, align 2
 @s1 = common global i16 0, align 2
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
new file mode 100644
index 000000000000..f7f2c6481b3c
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
@@ -0,0 +1,179 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32r2
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32
+
+@b2 = global i8 0, align 1
+@b1 = global i8 1, align 1
+@uc1 = global i8 0, align 1
+@uc2 = global i8 -1, align 1
+@sc1 = global i8 -128, align 1
+@sc2 = global i8 127, align 1
+@ss1 = global i16 -32768, align 2
+@ss2 = global i16 32767, align 2
+@us1 = global i16 0, align 2
+@us2 = global i16 -1, align 2
+@ssi = global i16 0, align 2
+@ssj = global i16 0, align 2
+@i = global i32 0, align 4
+@j = global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
+@.str1 = private unnamed_addr constant [7 x i8] c"%i %i\0A\00", align 1
+
+; Function Attrs: nounwind
+define void @_Z3b_iv()  {
+entry:
+; CHECK-LABEL:   .ent  _Z3b_iv
+  %0 = load i8* @b1, align 1
+  %tobool = trunc i8 %0 to i1
+  %frombool = zext i1 %tobool to i8
+  store i8 %frombool, i8* @b2, align 1
+  %1 = load i8* @b2, align 1
+  %tobool1 = trunc i8 %1 to i1
+  %conv = zext i1 %tobool1 to i32
+  store i32 %conv, i32* @i, align 4
+; CHECK:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:  andi  $[[REG2:[0-9]+]], $[[REG1]], 1
+; CHECK:  sb  $[[REG2]], 0(${{[0-9]+}})
+
+
+
+  ret void
+; CHECK:   .end  _Z3b_iv
+}
+
+; Function Attrs: nounwind
+define void @_Z4uc_iv()  {
+entry:
+; CHECK-LABEL:  .ent  _Z4uc_iv
+
+  %0 = load i8* @uc1, align 1
+  %conv = zext i8 %0 to i32
+  store i32 %conv, i32* @i, align 4
+  %1 = load i8* @uc2, align 1
+  %conv1 = zext i8 %1 to i32
+; CHECK:   lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 255
+
+  store i32 %conv1, i32* @j, align 4
+  ret void
+; CHECK:  .end  _Z4uc_iv
+
+}
+
+; Function Attrs: nounwind
+define void @_Z4sc_iv()  {
+entry:
+; mips32r2-LABEL:  .ent  _Z4sc_iv
+; mips32-LABEL:  .ent  _Z4sc_iv
+
+  %0 = load i8* @sc1, align 1
+  %conv = sext i8 %0 to i32
+  store i32 %conv, i32* @i, align 4
+  %1 = load i8* @sc2, align 1
+  %conv1 = sext i8 %1 to i32
+  store i32 %conv1, i32* @j, align 4
+; mips32r2:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32r2:  seb  ${{[0-9]+}}, $[[REG1]]
+; mips32:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32:    sll  $[[REG2:[0-9]+]], $[[REG1]], 24
+; mips32:    sra  ${{[0-9]+}}, $[[REG2]], 24
+
+  ret void
+; CHECK:  .end  _Z4sc_iv
+}
+
+; Function Attrs: nounwind
+define void @_Z4us_iv()  {
+entry:
+; CHECK-LABEL:  .ent  _Z4us_iv
+  %0 = load i16* @us1, align 2
+  %conv = zext i16 %0 to i32
+  store i32 %conv, i32* @i, align 4
+  %1 = load i16* @us2, align 2
+  %conv1 = zext i16 %1 to i32
+  store i32 %conv1, i32* @j, align 4
+  ret void
+; CHECK:  lhu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 65535
+; CHECK:  .end  _Z4us_iv
+}
+
+; Function Attrs: nounwind
+define void @_Z4ss_iv()  {
+entry:
+; mips32r2-LABEL:  .ent  _Z4ss_iv
+; mips32=LABEL:  .ent  _Z4ss_iv
+
+  %0 = load i16* @ss1, align 2
+  %conv = sext i16 %0 to i32
+  store i32 %conv, i32* @i, align 4
+  %1 = load i16* @ss2, align 2
+  %conv1 = sext i16 %1 to i32
+  store i32 %conv1, i32* @j, align 4
+; mips32r2:  lhu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32r2:  seh  ${{[0-9]+}}, $[[REG1]]
+; mips32:    lhu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32:    sll  $[[REG2:[0-9]+]], $[[REG1]], 16
+; mips32:    sra  ${{[0-9]+}}, $[[REG2]], 16
+
+  ret void
+; CHECK:  .end  _Z4ss_iv
+}
+
+; Function Attrs: nounwind
+define void @_Z4b_ssv()  {
+entry:
+; CHECK-LABEL:  .ent  _Z4b_ssv
+  %0 = load i8* @b2, align 1
+  %tobool = trunc i8 %0 to i1
+  %conv = zext i1 %tobool to i16
+  store i16 %conv, i16* @ssi, align 2
+  ret void
+; CHECK:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 1
+; CHECK:  .end  _Z4b_ssv
+}
+
+; Function Attrs: nounwind
+define void @_Z5uc_ssv()  {
+entry:
+; CHECK-LABEL:  .ent  _Z5uc_ssv
+  %0 = load i8* @uc1, align 1
+  %conv = zext i8 %0 to i16
+  store i16 %conv, i16* @ssi, align 2
+  %1 = load i8* @uc2, align 1
+  %conv1 = zext i8 %1 to i16
+; CHECK:   lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 255
+
+  store i16 %conv1, i16* @ssj, align 2
+  ret void
+; CHECK:  .end  _Z5uc_ssv
+}
+
+; Function Attrs: nounwind
+define void @_Z5sc_ssv()  {
+entry:
+; mips32r2-LABEL:  .ent  _Z5sc_ssv
+; mips32-LABEL:  .ent  _Z5sc_ssv
+  %0 = load i8* @sc1, align 1
+  %conv = sext i8 %0 to i16
+  store i16 %conv, i16* @ssi, align 2
+  %1 = load i8* @sc2, align 1
+  %conv1 = sext i8 %1 to i16
+  store i16 %conv1, i16* @ssj, align 2
+; mips32r2:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32r2:  seb  ${{[0-9]+}}, $[[REG1]]
+; mips32:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
+; mips32:    sll  $[[REG2:[0-9]+]], $[[REG1]], 24
+; mips32:    sra  ${{[0-9]+}}, $[[REG2]], 24
+
+  ret void
+; CHECK:  .end  _Z5sc_ssv
+}
+
diff --git a/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
new file mode 100644
index 000000000000..93cf4c15a2f5
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/loadstrconst.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
+
+@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
+@s = common global i8* null, align 4
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** @s, align 4
+  ret void
+; CHECK:        .ent    foo
+; CHECK:        lw      $[[REG1:[0-9]+]], %got($.str)(${{[0-9]+}})
+; CHECK:        addiu   ${{[0-9]+}}, $[[REG1]], %lo($.str)
+
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
index eeaff878bf54..c847561d0278 100644
--- a/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
+++ b/test/CodeGen/Mips/Fast-ISel/nullvoid.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
 ; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
 
 ; Function Attrs: nounwind
 define void @foo() {
diff --git a/test/CodeGen/Mips/Fast-ISel/shift.ll b/test/CodeGen/Mips/Fast-ISel/shift.ll
new file mode 100644
index 000000000000..18fd5ac32d22
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/shift.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -O1 -fast-isel=true -mips-fast-isel -filetype=obj %s -o - \
+; RUN:   | llvm-objdump -arch mipsel -mcpu=mips32r2 -d - | FileCheck %s
+
+; This test checks that encoding for srl is correct when fast-isel for mips32r2 is used.
+
+%struct.s = type { [4 x i8], i32 }
+
+define i32 @main() nounwind uwtable {
+entry:
+  %foo = alloca %struct.s, align 4
+  %0 = bitcast %struct.s* %foo to i32*
+  %bf.load = load i32* %0, align 4
+  %bf.lshr = lshr i32 %bf.load, 2
+  %cmp = icmp ne i32 %bf.lshr, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  unreachable
+
+if.end:
+  ret i32 0
+}
+
+; CHECK: srl    ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestore.ll b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
index 5d52481dfdf3..83e3f3f24274 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestore.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestore.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
 ; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
 
 @abcd = external global i32
 
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
index 6759c01c774b..74723ae1beeb 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorefp1.ll
@@ -1,5 +1,11 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s 
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
 ; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32r2 
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s -check-prefix=mips32
 
 @f = common global float 0.000000e+00, align 4
 @de = common global double 0.000000e+00, align 8
@@ -23,15 +29,25 @@ entry:
 define void @d1() #0 {
 entry:
   store double 1.234567e+00, double* @de, align 8
-; CHECK:  .ent  d1
-; CHECK:  lui  $[[REG1a:[0-9]+]], 16371
-; CHECK:  ori  $[[REG2a:[0-9]+]], $[[REG1a]], 49353
-; CHECK:  lui  $[[REG1b:[0-9]+]], 21403
-; CHECK:  ori  $[[REG2b:[0-9]+]], $[[REG1b]], 34951
-; CHECK:  mtc1  $[[REG2b]], $f[[REG3:[0-9]+]]
-; CHECK:  mthc1  $[[REG2a]], $f[[REG3]]
-; CHECK:  sdc1  $f[[REG3]], 0(${{[0-9]+}})
-; CHECK:  .end  d1
+; mip32r2:  .ent  d1
+; mips32r2:  lui  $[[REG1a:[0-9]+]], 16371
+; mips32r2:  ori  $[[REG2a:[0-9]+]], $[[REG1a]], 49353
+; mips32r2:  lui  $[[REG1b:[0-9]+]], 21403
+; mips32r2:  ori  $[[REG2b:[0-9]+]], $[[REG1b]], 34951
+; mips32r2:  mtc1  $[[REG2b]], $f[[REG3:[0-9]+]]
+; mips32r2:  mthc1  $[[REG2a]], $f[[REG3]]
+; mips32r2:  sdc1  $f[[REG3]], 0(${{[0-9]+}})
+; mips32r2:  .end  d1
+; mips32:  .ent  d1
+; mips32:  lui  $[[REG1a:[0-9]+]], 16371
+; mips32:  ori  $[[REG2a:[0-9]+]], $[[REG1a]], 49353
+; mips32:  lui  $[[REG1b:[0-9]+]], 21403
+; mips32:  ori  $[[REG2b:[0-9]+]], $[[REG1b]], 34951
+; mips32:  mtc1  $[[REG2b]], $f[[REG3:[0-9]+]]
+; mips32:  mtc1  $[[REG2a]], $f{{[0-9]+}}
+; mips32:  sdc1  $f[[REG3]], 0(${{[0-9]+}})
+; mips32:  .end  d1
+
   ret void
 }
 
diff --git a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
index 7d2c8e73c352..128e1de9cad0 100644
--- a/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
+++ b/test/CodeGen/Mips/Fast-ISel/simplestorei.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32r2 \
 ; RUN:     < %s | FileCheck %s
+; RUN: llc -march=mipsel -relocation-model=pic -O0 -mips-fast-isel -fast-isel-abort -mcpu=mips32 \
+; RUN:     < %s | FileCheck %s
 
 @ijk = external global i32
 
diff --git a/test/CodeGen/Mips/atomic.ll b/test/CodeGen/Mips/atomic.ll
index 78fd8296178e..ccfeb00967e3 100644
--- a/test/CodeGen/Mips/atomic.ll
+++ b/test/CodeGen/Mips/atomic.ll
@@ -1,14 +1,15 @@
-; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
-; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
-; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
-; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
-; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL
-; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
-; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips4    < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64   < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=NO-SEB-SEH  -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r2 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mips64el --disable-machine-licm -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS64-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=NOT-MICROMIPS
+; RUN: llc -march=mipsel --disable-machine-licm -mcpu=mips32r2 -mattr=micromips < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=HAS-SEB-SEH -check-prefix=CHECK-EL -check-prefix=MICROMIPS
 
 ; Keep one big-endian check so that we don't reduce testing, but don't add more
 ; since endianness doesn't affect the body of the atomic operations.
-; RUN: llc -march=mips   --disable-machine-licm -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EB
+; RUN: llc -march=mips   --disable-machine-licm -mcpu=mips32 < %s | FileCheck %s -check-prefix=ALL -check-prefix=MIPS32-ANY -check-prefix=NO-SEB-SEH -check-prefix=CHECK-EB -check-prefix=NOT-MICROMIPS
 
 @x = common global i32 0, align 4
 
@@ -26,7 +27,8 @@ entry:
 ; ALL:           ll      $[[R1:[0-9]+]], 0($[[R0]])
 ; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
 ; ALL:           sc      $[[R2]], 0($[[R0]])
-; ALL:           beqz    $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R2]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicLoadNand32(i32 signext %incr) nounwind {
@@ -44,7 +46,8 @@ entry:
 ; ALL:           and     $[[R3:[0-9]+]], $[[R1]], $4
 ; ALL:           nor     $[[R2:[0-9]+]], $zero, $[[R3]]
 ; ALL:           sc      $[[R2]], 0($[[R0]])
-; ALL:           beqz    $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R2]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicSwap32(i32 signext %newval) nounwind {
@@ -63,7 +66,8 @@ entry:
 ; ALL:       $[[BB0:[A-Z_0-9]+]]:
 ; ALL:           ll      ${{[0-9]+}}, 0($[[R0]])
 ; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
-; ALL:           beqz    $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R2]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R2]], $[[BB0]]
 }
 
 define i32 @AtomicCmpSwap32(i32 signext %oldval, i32 signext %newval) nounwind {
@@ -84,7 +88,8 @@ entry:
 ; ALL:           ll      $2, 0($[[R0]])
 ; ALL:           bne     $2, $4, $[[BB1:[A-Z_0-9]+]]
 ; ALL:           sc      $[[R2:[0-9]+]], 0($[[R0]])
-; ALL:           beqz    $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R2]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R2]], $[[BB0]]
 ; ALL:       $[[BB1]]:
 }
 
@@ -120,7 +125,8 @@ entry:
 ; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
 ; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
 ; ALL:           sc      $[[R14]], 0($[[R2]])
-; ALL:           beqz    $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R14]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R14]], $[[BB0]]
 
 ; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
 ; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -159,7 +165,8 @@ entry:
 ; ALL:        and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
 ; ALL:        or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
 ; ALL:        sc      $[[R14]], 0($[[R2]])
-; ALL:        beqz    $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R14]], $[[BB0]]
+; MICROMIPS:  beqzc   $[[R14]], $[[BB0]]
 
 ; ALL:        and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
 ; ALL:        srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -199,7 +206,8 @@ entry:
 ; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
 ; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
 ; ALL:           sc      $[[R14]], 0($[[R2]])
-; ALL:           beqz    $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R14]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R14]], $[[BB0]]
 
 ; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
 ; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -237,7 +245,8 @@ entry:
 ; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
 ; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R18]]
 ; ALL:           sc      $[[R14]], 0($[[R2]])
-; ALL:           beqz    $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R14]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R14]], $[[BB0]]
 
 ; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
 ; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -282,7 +291,8 @@ entry:
 ; ALL:           and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
 ; ALL:           or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
 ; ALL:           sc      $[[R16]], 0($[[R2]])
-; ALL:           beqz    $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R16]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R16]], $[[BB0]]
 
 ; ALL:       $[[BB1]]:
 ; ALL:           srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
@@ -322,7 +332,8 @@ entry:
 ; ALL:           and     $[[R15:[0-9]+]], $[[R13]], $[[R8]]
 ; ALL:           or      $[[R16:[0-9]+]], $[[R15]], $[[R12]]
 ; ALL:           sc      $[[R16]], 0($[[R2]])
-; ALL:           beqz    $[[R16]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R16]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R16]], $[[BB0]]
 
 ; ALL:       $[[BB1]]:
 ; ALL:           srlv    $[[R17:[0-9]+]], $[[R14]], $[[R5]]
@@ -367,7 +378,8 @@ entry:
 ; ALL:           and     $[[R13:[0-9]+]], $[[R10]], $[[R8]]
 ; ALL:           or      $[[R14:[0-9]+]], $[[R13]], $[[R12]]
 ; ALL:           sc      $[[R14]], 0($[[R2]])
-; ALL:           beqz    $[[R14]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R14]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R14]], $[[BB0]]
 
 ; ALL:           and     $[[R15:[0-9]+]], $[[R10]], $[[R7]]
 ; ALL:           srlv    $[[R16:[0-9]+]], $[[R15]], $[[R5]]
@@ -430,5 +442,6 @@ entry:
 ; ALL:           ll      $[[R1:[0-9]+]], 0($[[PTR]])
 ; ALL:           addu    $[[R2:[0-9]+]], $[[R1]], $4
 ; ALL:           sc      $[[R2]], 0($[[PTR]])
-; ALL:           beqz    $[[R2]], $[[BB0]]
+; NOT-MICROMIPS: beqz    $[[R2]], $[[BB0]]
+; MICROMIPS:     beqzc   $[[R2]], $[[BB0]]
 }
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
index 7b1f44001a9a..3620868bb2f6 100644
--- a/test/CodeGen/Mips/brsize3.ll
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -30,4 +30,4 @@ x:                                                ; preds = %x, %entry
 attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #1 = { nounwind }
 
-!1 = metadata !{i32 45}
+!1 = !{i32 45}
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
index 6382fa228e19..f05e21191925 100644
--- a/test/CodeGen/Mips/brsize3a.ll
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -23,4 +23,4 @@ x:                                                ; preds = %x, %entry
 attributes #0 = { noreturn nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #1 = { nounwind }
 
-!1 = metadata !{i32 45}
+!1 = !{i32 45}
diff --git a/test/CodeGen/Mips/ci2.ll b/test/CodeGen/Mips/ci2.ll
index 7187f0c75888..e2068fdf14e1 100644
--- a/test/CodeGen/Mips/ci2.ll
+++ b/test/CodeGen/Mips/ci2.ll
@@ -36,4 +36,4 @@ if.end:                                           ; preds = %if.else, %if.then
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
-!1 = metadata !{i32 103}
+!1 = !{i32 103}
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index e548049ab346..b12c2df97c19 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -757,24 +757,9 @@ define i32 @slti6(i32 signext %a) nounwind readnone {
 
 ; ALL-LABEL: slti6:
 
-; 32-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 32-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMOV-NOT: movn
-
-; 32-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
-; 32-CMP-DAG:  xori [[R1]], [[R1]], 1
-; 32-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 32-CMP-NOT:  seleqz
-; 32-CMP-NOT:  selnez
-
-; 64-CMOV-DAG: slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMOV-DAG: xori [[R1]], [[R1]], 1
-; 64-CMOV-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMOV-NOT: movn
-
-; 64-CMP-DAG:  slti [[R1:\$[0-9]+]], $4, 7
-; 64-CMP-DAG:  xori [[R1]], [[R1]], 1
-; 64-CMP-DAG:  addiu [[R2:\$[0-9]+]], [[R1]], 3
-; 64-CMP-NOT:  seleqz
-; 64-CMP-NOT:  selnez
+; ALL-DAG: addiu [[R1:\$[0-9]+]], $zero, 6
+; ALL-DAG: slt [[R1]], [[R1]], $4
+; ALL-DAG: addiu [[R2:\$[0-9]+]], [[R1]], 3
+; ALL-NOT: movn
+; ALL-NOT:  seleqz
+; ALL-NOT:  selnez
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
index cb2bacaf17a5..f32ce244cf40 100644
--- a/test/CodeGen/Mips/const1.ll
+++ b/test/CodeGen/Mips/const1.ll
@@ -32,4 +32,4 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b754974ec32ab712ea7d8b52cd8037b24e7d6ed3) (gitosis@dmz-portal.mips.com:llvm.git 8e211187b501bc73edb938fde0019c9a20bcffd5)"}
+!0 = !{!"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b754974ec32ab712ea7d8b52cd8037b24e7d6ed3) (gitosis@dmz-portal.mips.com:llvm.git 8e211187b501bc73edb938fde0019c9a20bcffd5)"}
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
index b4c509fcd8c0..ac6795b2c833 100644
--- a/test/CodeGen/Mips/const4a.ll
+++ b/test/CodeGen/Mips/const4a.ll
@@ -177,4 +177,4 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
+!0 = !{!"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
index 3f02ab907e1e..c26e02f2ebba 100644
--- a/test/CodeGen/Mips/const6.ll
+++ b/test/CodeGen/Mips/const6.ll
@@ -159,6 +159,6 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
+!0 = !{!"clang version 3.4 (gitosis@dmz-portal.mips.com:clang.git b310439121c875937d78cc49cc969bc1197fc025) (gitosis@dmz-portal.mips.com:llvm.git 7fc0ca9656ebec8dad61f72f5a5ddfb232c070fd)"}
 
 
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
index d34239058734..aff1357c3a8f 100644
--- a/test/CodeGen/Mips/const6a.ll
+++ b/test/CodeGen/Mips/const6a.ll
@@ -26,4 +26,4 @@ entry:
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #1 = { nounwind }
 
-!1 = metadata !{i32 121}
+!1 = !{i32 121}
diff --git a/test/CodeGen/Mips/ctlz-v.ll b/test/CodeGen/Mips/ctlz-v.ll
new file mode 100644
index 000000000000..3d580e5771f4
--- /dev/null
+++ b/test/CodeGen/Mips/ctlz-v.ll
@@ -0,0 +1,19 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+
+define <2 x i32> @ctlzv2i32(<2 x i32> %x) {
+entry:
+; MIPS32: clz     $2, $4
+; MIPS32: clz     $3, $5
+
+; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $2, $[[A0]]
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
+; MIPS64-DAG: clz $3, $[[A1]]
+
+  %ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
+  ret <2 x i32> %ret
+}
+
diff --git a/test/CodeGen/Mips/cttz-v.ll b/test/CodeGen/Mips/cttz-v.ll
new file mode 100644
index 000000000000..85f69f9a17d9
--- /dev/null
+++ b/test/CodeGen/Mips/cttz-v.ll
@@ -0,0 +1,39 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+
+define <2 x i32> @cttzv2i32(<2 x i32> %x) {
+entry:
+; MIPS32-DAG: addiu   $[[R0:[0-9]+]], $4, -1
+; MIPS32-DAG: not     $[[R1:[0-9]+]], $4
+; MIPS32-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
+; MIPS32-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
+; MIPS32-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
+; MIPS32-DAG: subu    $2, $[[R4]], $[[R3]]
+; MIPS32-DAG: addiu   $[[R5:[0-9]+]], $5, -1
+; MIPS32-DAG: not     $[[R6:[0-9]+]], $5
+; MIPS32-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; MIPS32-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
+; MIPS32-DAG: jr      $ra
+; MIPS32-DAG: subu    $3, $[[R4]], $[[R8]]
+
+; MIPS64-DAG: sll     $[[A0:[0-9]+]], $4, 0
+; MIPS64-DAG: addiu   $[[R0:[0-9]+]], $[[A0]], -1
+; MIPS64-DAG: not     $[[R1:[0-9]+]], $[[A0]]
+; MIPS64-DAG: and     $[[R2:[0-9]+]], $[[R1]], $[[R0]]
+; MIPS64-DAG: clz     $[[R3:[0-9]+]], $[[R2]]
+; MIPS64-DAG: addiu   $[[R4:[0-9]+]], $zero, 32
+; MIPS64-DAG: subu    $2, $[[R4]], $[[R3]]
+; MIPS64-DAG: sll     $[[A1:[0-9]+]], $5, 0
+; MIPS64-DAG: addiu   $[[R5:[0-9]+]], $[[A1]], -1
+; MIPS64-DAG: not     $[[R6:[0-9]+]], $[[A1]]
+; MIPS64-DAG: and     $[[R7:[0-9]+]], $[[R6]], $[[R5]]
+; MIPS64-DAG: clz     $[[R8:[0-9]+]], $[[R7]]
+; MIPS64-DAG: jr      $ra
+; MIPS64-DAG: subu    $3, $[[R4]], $[[R8]]
+
+  %ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
+  ret <2 x i32> %ret
+}
+
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index 7ced36c016f7..84d3814ee8b8 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -385,7 +385,7 @@ entry:
 ; Function Attrs: nounwind
 declare double @exp2(double) #0
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #2 = { nounwind readnone }
 attributes #3 = { nounwind }
diff --git a/test/CodeGen/Mips/fptr2.ll b/test/CodeGen/Mips/fptr2.ll
deleted file mode 100644
index c8b5e0d1771e..000000000000
--- a/test/CodeGen/Mips/fptr2.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=static16
-
-; Function Attrs: nounwind
-define double @my_mul(double %a, double %b) #0 {
-entry:
-  %a.addr = alloca double, align 8
-  %b.addr = alloca double, align 8
-  store double %a, double* %a.addr, align 8
-  store double %b, double* %b.addr, align 8
-  %0 = load double* %a.addr, align 8
-  %1 = load double* %b.addr, align 8
-  %mul = fmul double %0, %1
-  ret double %mul
-}
-
-; static16: 	        .ent	__fn_stub_my_mul
-; static16:     	.set reorder
-; static16-NEXT:	#NO_APP
-; static16: 	        .end __fn_stub_my_mul
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
diff --git a/test/CodeGen/Mips/gpreg-lazy-binding.ll b/test/CodeGen/Mips/gpreg-lazy-binding.ll
index 88e596b3bb0d..3a636d82533f 100644
--- a/test/CodeGen/Mips/gpreg-lazy-binding.ll
+++ b/test/CodeGen/Mips/gpreg-lazy-binding.ll
@@ -25,3 +25,11 @@ entry:
   ret void
 }
 
+define void @no_lazy(void (i32)* %pf) {
+
+; CHECK-LABEL:  no_lazy
+; CHECK-NOT:    gp_disp
+
+  tail call void %pf(i32 1)
+  ret void
+}
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index 9df8d900693c..683952d0e4ec 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -118,8 +118,8 @@ entry:
 
 declare i32 @printf(i8*, ...) #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 
 
diff --git a/test/CodeGen/Mips/init-array.ll b/test/CodeGen/Mips/init-array.ll
index f96ce2647289..1ca182dae7a5 100644
--- a/test/CodeGen/Mips/init-array.ll
+++ b/test/CodeGen/Mips/init-array.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple mipsel-unknown-linux -use-init-array < %s | FileCheck  %s
+; RUN: llc -mtriple mipsel-unknown-linux < %s | FileCheck  %s
 
 target triple = "mipsel-unknown-linux"
 
diff --git a/test/CodeGen/Mips/inlineasm-assembler-directives.ll b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
new file mode 100644
index 000000000000..e4a6d1e26c69
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; Check for the emission of appropriate assembler directives before and
+; after the inline assembly code.
+define void @f() nounwind {
+entry:
+; CHECK:      #APP
+; CHECK-NEXT: .set  push
+; CHECK-NEXT: .set  at
+; CHECK-NEXT: .set  macro
+; CHECK-NEXT: .set  reorder
+; CHECK:      addi $9, ${{[2-9][0-9]?}}, 8
+; CHECK:      subi ${{[2-9][0-9]?}}, $9, 6
+; CHECK:      .set  pop
+; CHECK-NEXT: #NO_APP
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  store i32 20, i32* %a, align 4
+  %0 = load i32* %a, align 4
+  %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09subi $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
+  store i32 %1, i32* %b, align 4
+  ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index a67ddce222ae..41991d07a4fe 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -32,10 +32,10 @@ entry:
 
 ; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
 ; after the inline expression for a mflo to pull the value out of lo.
-; CHECK: #APP
-; CHECK-NEXT:  mtlo ${{[0-9]+}} 
+; CHECK:       #APP
+; CHECK:       mtlo ${{[0-9]+}}
 ; CHECK-NEXT:  madd ${{[0-9]+}},${{[0-9]+}}
-; CHECK-NEXT: #NO_APP	
+; CHECK:       #NO_APP
 ; CHECK-NEXT:  mflo	${{[0-9]+}}
   %bosco = alloca i32, align 4
   call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1,$2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index a08a0243b8b9..5518520c5491 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -5,6 +5,7 @@
 
 define i32 @f1(i32 %x) nounwind {
 entry:
+; CHECK-LABEL: f1:
 ; CHECK: addiu $[[T0:[0-9]+]], $sp
 ; CHECK: #APP
 ; CHECK: sw $4, 0($[[T0]])
@@ -22,42 +23,26 @@ entry:
   ret i32 %0
 }
 
-; "D": Second word of double word. This works for any memory element
+; CHECK-LABEL: main:
+; "D": Second word of a double word. This works for any memory element
 ; double or single.
 ; CHECK: #APP
-; CHECK-NEXT: lw ${{[0-9]+}},4(${{[0-9]+}});
-; CHECK-NEXT: #NO_APP
+; CHECK: lw ${{[0-9]+}},4(${{[0-9]+}});
+; CHECK: #NO_APP
 
-; No "D": First word of double word. This works for any memory element 
+; No "D": First word of a double word. This works for any memory element
 ; double or single.
 ; CHECK: #APP
-; CHECK-NEXT: lw ${{[0-9]+}},0(${{[0-9]+}});
-; CHECK-NEXT: #NO_APP
-
-;int b[8] = {0,1,2,3,4,5,6,7};
-;int main()
-;{
-;  int i;
-; 
-;  // The first word. Notice, no 'D'
-;  { asm (
-;    "lw    %0,%1;\n"
-;    : "=r" (i) : "m" (*(b+4)));}
-; 
-;  // The second word
-;  { asm (
-;    "lw    %0,%D1;\n"
-;    : "=r" (i) "m" (*(b+4)));}
-;}
+; CHECK: lw ${{[0-9]+}},0(${{[0-9]+}});
+; CHECK: #NO_APP
 
 @b = common global [20 x i32] zeroinitializer, align 4
 
 define void @main() {
 entry:
+; Second word:
   tail call void asm sideeffect "    lw    $0,${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
+; First word. Notice, no 'D':
   tail call void asm sideeffect "    lw    $0,${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32]* @b, i32 0, i32 3))
   ret void
 }
-
-attributes #0 = { nounwind }
-
diff --git a/test/CodeGen/Mips/lcb2.ll b/test/CodeGen/Mips/lcb2.ll
index 715584b6797d..59b96e64e95e 100644
--- a/test/CodeGen/Mips/lcb2.ll
+++ b/test/CodeGen/Mips/lcb2.ll
@@ -120,14 +120,14 @@ attributes #1 = { nounwind }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5 (gitosis@dmz-portal.mips.com:clang.git ed197d08c90d82e1119774e10920e6f7a841c8ec) (gitosis@dmz-portal.mips.com:llvm.git b9235a363fa2dddb26ac01cbaed58efbc9eff392)"}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{i32 59}
-!6 = metadata !{i32 156}
-!7 = metadata !{i32 210}
-!8 = metadata !{i32 299}
-!9 = metadata !{i32 340}
-!10 = metadata !{i32 412}
+!0 = !{!"clang version 3.5 (gitosis@dmz-portal.mips.com:clang.git ed197d08c90d82e1119774e10920e6f7a841c8ec) (gitosis@dmz-portal.mips.com:llvm.git b9235a363fa2dddb26ac01cbaed58efbc9eff392)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{i32 59}
+!6 = !{i32 156}
+!7 = !{i32 210}
+!8 = !{i32 299}
+!9 = !{i32 340}
+!10 = !{i32 412}
diff --git a/test/CodeGen/Mips/lcb3c.ll b/test/CodeGen/Mips/lcb3c.ll
index 72a0b8cf5cea..eb8329145421 100644
--- a/test/CodeGen/Mips/lcb3c.ll
+++ b/test/CodeGen/Mips/lcb3c.ll
@@ -55,5 +55,5 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 attributes #1 = { nounwind }
 
 
-!1 = metadata !{i32 65}
-!2 = metadata !{i32 167}
+!1 = !{i32 65}
+!2 = !{i32 167}
diff --git a/test/CodeGen/Mips/lcb4a.ll b/test/CodeGen/Mips/lcb4a.ll
index e37feca78179..fbcadd2552f8 100644
--- a/test/CodeGen/Mips/lcb4a.ll
+++ b/test/CodeGen/Mips/lcb4a.ll
@@ -59,11 +59,11 @@ attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointe
 attributes #1 = { nounwind }
 
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{i32 58}
-!6 = metadata !{i32 108}
-!7 = metadata !{i32 190}
-!8 = metadata !{i32 243}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{i32 58}
+!6 = !{i32 108}
+!7 = !{i32 190}
+!8 = !{i32 243}
diff --git a/test/CodeGen/Mips/lcb5.ll b/test/CodeGen/Mips/lcb5.ll
index 0a89c804945f..b2a8d1d33ef6 100644
--- a/test/CodeGen/Mips/lcb5.ll
+++ b/test/CodeGen/Mips/lcb5.ll
@@ -220,21 +220,21 @@ attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointe
 attributes #1 = { nounwind }
 
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{i32 57}
-!6 = metadata !{i32 107}
-!7 = metadata !{i32 188}
-!8 = metadata !{i32 241}
-!9 = metadata !{i32 338}
-!10 = metadata !{i32 391}
-!11 = metadata !{i32 477}
-!12 = metadata !{i32 533}
-!13 = metadata !{i32 621}
-!14 = metadata !{i32 663}
-!15 = metadata !{i32 747}
-!16 = metadata !{i32 792}
-!17 = metadata !{i32 867}
-!18 = metadata !{i32 953}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{i32 57}
+!6 = !{i32 107}
+!7 = !{i32 188}
+!8 = !{i32 241}
+!9 = !{i32 338}
+!10 = !{i32 391}
+!11 = !{i32 477}
+!12 = !{i32 533}
+!13 = !{i32 621}
+!14 = !{i32 663}
+!15 = !{i32 747}
+!16 = !{i32 792}
+!17 = !{i32 867}
+!18 = !{i32 953}
diff --git a/test/CodeGen/Mips/llvm-ir/mul.ll b/test/CodeGen/Mips/llvm-ir/mul.ll
new file mode 100644
index 000000000000..167412407cdc
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/mul.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=M2
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R1
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=32R1-R2 -check-prefix=32R2
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=32R6
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=M4
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=64R1-R2
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=64R1-R2
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN:     -check-prefix=ALL -check-prefix=64R6
+
+define signext i1 @mul_i1(i1 signext %a, i1 signext %b) {
+entry:
+; ALL-LABEL: mul_i1:
+
+  ; M2:         mult    $4, $5
+  ; M2:         mflo    $[[T0:[0-9]+]]
+  ; M2:         sll     $[[T0]], $[[T0]], 31
+  ; M2:         sra     $2, $[[T0]], 31
+
+  ; 32R1-R2:    mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R1-R2:    sll     $[[T0]], $[[T0]], 31
+  ; 32R1-R2:    sra     $2, $[[T0]], 31
+
+  ; 32R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R6:       sll     $[[T0]], $[[T0]], 31
+  ; 32R6:       sra     $2, $[[T0]], 31
+
+  ; M4:         mult    $4, $5
+  ; M4:         mflo    $[[T0:[0-9]+]]
+  ; M4:         sll     $[[T0]], $[[T0]], 31
+  ; M4:         sra     $2, $[[T0]], 31
+
+  ; 64R1-R2:    mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R1-R2:    sll     $[[T0]], $[[T0]], 31
+  ; 64R1-R2:    sra     $2, $[[T0]], 31
+
+  ; 64R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R6:       sll     $[[T0]], $[[T0]], 31
+  ; 64R6:       sra     $2, $[[T0]], 31
+
+  %r = mul i1 %a, %b
+  ret i1 %r
+}
+
+define signext i8 @mul_i8(i8 signext %a, i8 signext %b) {
+entry:
+; ALL-LABEL: mul_i8:
+
+  ; M2:         mult    $4, $5
+  ; M2:         mflo    $[[T0:[0-9]+]]
+  ; M2:         sll     $[[T0]], $[[T0]], 24
+  ; M2:         sra     $2, $[[T0]], 24
+
+  ; 32R1:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R1:       sll     $[[T0]], $[[T0]], 24
+  ; 32R1:       sra     $2, $[[T0]], 24
+
+  ; 32R2:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R2:       seb     $2, $[[T0]]
+
+  ; 32R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R6:       seb     $2, $[[T0]]
+
+  ; M4:         mult    $4, $5
+  ; M4:         mflo    $[[T0:[0-9]+]]
+  ; M4:         sll     $[[T0]], $[[T0]], 24
+  ; M4:         sra     $2, $[[T0]], 24
+
+  ; 64R1:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R1:       sll     $[[T0]], $[[T0]], 24
+  ; 64R1:       sra     $2, $[[T0]], 24
+
+  ; 64R2:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R2:       seb     $2, $[[T0]]
+
+  ; 64R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R6:       seb     $2, $[[T0]]
+  %r = mul i8 %a, %b
+  ret i8 %r
+}
+
+define signext i16 @mul_i16(i16 signext %a, i16 signext %b) {
+entry:
+; ALL-LABEL: mul_i16:
+
+  ; M2:         mult    $4, $5
+  ; M2:         mflo    $[[T0:[0-9]+]]
+  ; M2:         sll     $[[T0]], $[[T0]], 16
+  ; M2:         sra     $2, $[[T0]], 16
+
+  ; 32R1:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R1:       sll     $[[T0]], $[[T0]], 16
+  ; 32R1:       sra     $2, $[[T0]], 16
+
+  ; 32R2:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R2:       seh     $2, $[[T0]]
+
+  ; 32R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 32R6:       seh     $2, $[[T0]]
+
+  ; M4:         mult    $4, $5
+  ; M4:         mflo    $[[T0:[0-9]+]]
+  ; M4:         sll     $[[T0]], $[[T0]], 16
+  ; M4:         sra     $2, $[[T0]], 16
+
+  ; 64R1:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R1:       sll     $[[T0]], $[[T0]], 16
+  ; 64R1:       sra     $2, $[[T0]], 16
+
+  ; 64R2:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R2:       seh     $2, $[[T0]]
+
+  ; 64R6:       mul     $[[T0:[0-9]+]], $4, $5
+  ; 64R6:       seh     $2, $[[T0]]
+  %r = mul i16 %a, %b
+  ret i16 %r
+}
+
+define signext i32 @mul_i32(i32 signext %a, i32 signext %b) {
+entry:
+; ALL-LABEL: mul_i32:
+
+  ; M2:         mult    $4, $5
+  ; M2:         mflo    $2
+
+  ; 32R1-R2:    mul     $2, $4, $5
+  ; 32R6:       mul     $2, $4, $5
+
+  ; 64R1-R2:    mul     $2, $4, $5
+  ; 64R6:       mul     $2, $4, $5
+  %r = mul i32 %a, %b
+  ret i32 %r
+}
+
+define signext i64 @mul_i64(i64 signext %a, i64 signext %b) {
+entry:
+; ALL-LABEL: mul_i64:
+
+  ; M2:         mult    $4, $7
+  ; M2:         mflo    $[[T0:[0-9]+]]
+  ; M2:         mult    $5, $6
+  ; M2:         mflo    $[[T1:[0-9]+]]
+  ; M2:         multu   $5, $7
+  ; M2:         mflo    $3
+  ; M2:         mfhi    $4
+  ; M2:         addu    $[[T2:[0-9]+]], $4, $[[T1]]
+  ; M2:         addu    $2, $[[T2]], $[[T0]]
+
+  ; 32R1-R2:    multu   $5, $7
+  ; 32R1-R2:    mflo    $3
+  ; 32R1-R2:    mfhi    $[[T0:[0-9]+]]
+  ; 32R1-R2:    mul     $[[T1:[0-9]+]], $4, $7
+  ; 32R1-R2:    mul     $[[T2:[0-9]+]], $5, $6
+  ; 32R1-R2:    addu    $[[T0]], $[[T0]], $[[T2:[0-9]+]]
+  ; 32R1-R2:    addu    $2, $[[T0]], $[[T1]]
+
+  ; 32R6:       mul     $[[T0:[0-9]+]], $5, $6
+  ; 32R6:       muhu    $[[T1:[0-9]+]], $5, $7
+  ; 32R6:       addu    $[[T0]], $[[T1]], $[[T0]]
+  ; 32R6:       mul     $[[T2:[0-9]+]], $4, $7
+  ; 32R6:       addu    $2, $[[T0]], $[[T2]]
+  ; 32R6:       mul     $3, $5, $7
+
+  ; M4:         dmult   $4, $5
+  ; M4:         mflo    $2
+
+  ; 64R1-R2:    dmult   $4, $5
+  ; 64R1-R2:    mflo    $2
+
+  ; 64R6:       dmul    $2, $4, $5
+
+  %r = mul i64 %a, %b
+  ret i64 %r
+}
diff --git a/test/CodeGen/Mips/llvm-ir/select.ll b/test/CodeGen/Mips/llvm-ir/select.ll
new file mode 100644
index 000000000000..736bc579088d
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/select.ll
@@ -0,0 +1,702 @@
+; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=M2 -check-prefix=M2-M3
+; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=CMOV \
+; RUN:    -check-prefix=CMOV-32 -check-prefix=CMOV-32R1
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=CMOV \
+; RUN:    -check-prefix=CMOV-32 -check-prefix=CMOV-32R2
+; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=SEL -check-prefix=SEL-32
+; RUN: llc < %s -march=mips64 -mcpu=mips3 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=M3 -check-prefix=M2-M3
+; RUN: llc < %s -march=mips64 -mcpu=mips4 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
+; RUN: llc < %s -march=mips64 -mcpu=mips64 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=CMOV -check-prefix=CMOV-64
+; RUN: llc < %s -march=mips64 -mcpu=mips64r6 | FileCheck %s \
+; RUN:    -check-prefix=ALL -check-prefix=SEL -check-prefix=SEL-64
+
+define signext i1 @tst_select_i1_i1(i1 signext %s,
+                                    i1 signext %x, i1 signext %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_i1:
+
+  ; M2-M3:  andi    $[[T0:[0-9]+]], $4, 1
+  ; M2-M3:  bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2-M3:  nop
+  ; M2-M3:  move    $5, $6
+  ; M2-M3:  $[[BB0]]:
+  ; M2-M3:  jr      $ra
+  ; M2-M3:  move    $2, $5
+
+  ; CMOV:   andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV:   movn    $6, $5, $[[T0]]
+  ; CMOV:   move    $2, $6
+
+  ; SEL:    andi    $[[T0:[0-9]+]], $4, 1
+  ; SEL:    seleqz  $[[T1:[0-9]+]], $6, $[[T0]]
+  ; SEL:    selnez  $[[T2:[0-9]+]], $5, $[[T0]]
+  ; SEL:    or      $2, $[[T2]], $[[T1]]
+  %r = select i1 %s, i1 %x, i1 %y
+  ret i1 %r
+}
+
+define signext i8 @tst_select_i1_i8(i1 signext %s,
+                                    i8 signext %x, i8 signext %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_i8:
+
+  ; M2-M3:  andi    $[[T0:[0-9]+]], $4, 1
+  ; M2-M3:  bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2-M3:  nop
+  ; M2-M3:  move    $5, $6
+  ; M2-M3:  $[[BB0]]:
+  ; M2-M3:  jr      $ra
+  ; M2-M3:  move    $2, $5
+
+  ; CMOV:   andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV:   movn    $6, $5, $[[T0]]
+  ; CMOV:   move    $2, $6
+
+  ; SEL:    andi    $[[T0:[0-9]+]], $4, 1
+  ; SEL:    seleqz  $[[T1:[0-9]+]], $6, $[[T0]]
+  ; SEL:    selnez  $[[T2:[0-9]+]], $5, $[[T0]]
+  ; SEL:    or      $2, $[[T2]], $[[T1]]
+  %r = select i1 %s, i8 %x, i8 %y
+  ret i8 %r
+}
+
+define signext i32 @tst_select_i1_i32(i1 signext %s,
+                                      i32 signext %x, i32 signext %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_i32:
+
+  ; M2-M3:  andi    $[[T0:[0-9]+]], $4, 1
+  ; M2-M3:  bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2-M3:  nop
+  ; M2-M3:  move    $5, $6
+  ; M2-M3:  $[[BB0]]:
+  ; M2-M3:  jr      $ra
+  ; M2-M3:  move    $2, $5
+
+  ; CMOV:   andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV:   movn    $6, $5, $[[T0]]
+  ; CMOV:   move    $2, $6
+
+  ; SEL:    andi    $[[T0:[0-9]+]], $4, 1
+  ; SEL:    seleqz  $[[T1:[0-9]+]], $6, $[[T0]]
+  ; SEL:    selnez  $[[T2:[0-9]+]], $5, $[[T0]]
+  ; SEL:    or      $2, $[[T2]], $[[T1]]
+  %r = select i1 %s, i32 %x, i32 %y
+  ret i32 %r
+}
+
+define signext i64 @tst_select_i1_i64(i1 signext %s,
+                                      i64 signext %x, i64 signext %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_i64:
+
+  ; M2:     andi    $[[T0:[0-9]+]], $4, 1
+  ; M2:     bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2:     nop
+  ; M2:     lw      $[[T1:[0-9]+]], 16($sp)
+  ; M2:     $[[BB0]]:
+  ; FIXME: This branch is redundant
+  ; M2:     bnez    $[[T0]], $[[BB1:BB[0-9_]+]]
+  ; M2:     nop
+  ; M2:     lw      $[[T2:[0-9]+]], 20($sp)
+  ; M2:     $[[BB1]]:
+  ; M2:     move    $2, $[[T1]]
+  ; M2:     jr      $ra
+  ; M2:     move    $3, $[[T2]]
+
+  ; CMOV-32:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-32:    lw      $2, 16($sp)
+  ; CMOV-32:    movn    $2, $6, $[[T0]]
+  ; CMOV-32:    lw      $3, 20($sp)
+  ; CMOV-32:    movn    $3, $7, $[[T0]]
+
+  ; SEL-32:     andi    $[[T0:[0-9]+]], $4, 1
+  ; SEL-32:     selnez  $[[T1:[0-9]+]], $6, $[[T0]]
+  ; SEL-32:     lw      $[[T2:[0-9]+]], 16($sp)
+  ; SEL-32:     seleqz  $[[T3:[0-9]+]], $[[T2]], $[[T0]]
+  ; SEL-32:     or      $2, $[[T1]], $[[T3]]
+  ; SEL-32:     selnez  $[[T4:[0-9]+]], $7, $[[T0]]
+  ; SEL-32:     lw      $[[T5:[0-9]+]], 20($sp)
+  ; SEL-32:     seleqz  $[[T6:[0-9]+]], $[[T5]], $[[T0]]
+  ; SEL-32:     or      $3, $[[T4]], $[[T6]]
+
+  ; M3:         andi    $[[T0:[0-9]+]], $4, 1
+  ; M3:         bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M3:         nop
+  ; M3:         move    $5, $6
+  ; M3:         $[[BB0]]:
+  ; M3:         jr      $ra
+  ; M3:         move    $2, $5
+
+  ; CMOV-64:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-64:    movn    $6, $5, $[[T0]]
+  ; CMOV-64:    move    $2, $6
+
+  ; SEL-64:     andi    $[[T0:[0-9]+]], $4, 1
+  ; FIXME: This shift is redundant
+  ; SEL-64:     sll     $[[T0]], $[[T0]], 0
+  ; SEL-64:     seleqz  $[[T1:[0-9]+]], $6, $[[T0]]
+  ; SEL-64:     selnez  $[[T0]], $5, $[[T0]]
+  ; SEL-64:     or      $2, $[[T0]], $[[T1]]
+  %r = select i1 %s, i64 %x, i64 %y
+  ret i64 %r
+}
+
+define float @tst_select_i1_float(i1 signext %s, float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_float:
+
+  ; M2-M3:      andi    $[[T0:[0-9]+]], $4, 1
+  ; M2-M3:      bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         jr      $ra
+  ; M2:         mtc1    $6, $f0
+  ; M3:         mov.s   $f13, $f14
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr      $ra
+  ; M2:         mtc1    $5, $f0
+  ; M3:         mov.s   $f0, $f13
+
+  ; CMOV-32:    mtc1    $6, $f0
+  ; CMOV-32:    mtc1    $5, $f1
+  ; CMOV-32:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-32:    movn.s  $f0, $f1, $[[T0]]
+
+  ; SEL-32:     mtc1    $5, $[[F0:f[0-9]+]]
+  ; SEL-32:     mtc1    $6, $[[F1:f[0-9]+]]
+  ; SEL-32:     mtc1    $4, $f0
+  ; SEL-32:     sel.s   $f0, $[[F1]], $[[F0]]
+
+  ; CMOV-64:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-64:    movn.s  $f14, $f13, $[[T0]]
+  ; CMOV-64:    mov.s   $f0, $f14
+
+  ; SEL-64:     mtc1    $4, $f0
+  ; SEL-64:     sel.s   $f0, $f14, $f13
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_i1_float_reordered(float %x, float %y,
+                                            i1 signext %s) {
+entry:
+  ; ALL-LABEL: tst_select_i1_float_reordered:
+
+  ; M2-M3:      andi    $[[T0:[0-9]+]], $6, 1
+  ; M2-M3:      bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s   $f12, $f14
+  ; M3:         mov.s   $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr      $ra
+  ; M2-M3:      mov.s   $f0, $f12
+
+  ; CMOV-32:    andi    $[[T0:[0-9]+]], $6, 1
+  ; CMOV-32:    movn.s  $f14, $f12, $[[T0]]
+  ; CMOV-32:    mov.s   $f0, $f14
+
+  ; SEL-32:     mtc1    $6, $f0
+  ; SEL-32:     sel.s   $f0, $f14, $f12
+
+  ; CMOV-64:    andi    $[[T0:[0-9]+]], $6, 1
+  ; CMOV-64:    movn.s  $f13, $f12, $[[T0]]
+  ; CMOV-64:    mov.s   $f0, $f13
+
+  ; SEL-64:     mtc1    $6, $f0
+  ; SEL-64:     sel.s   $f0, $f13, $f12
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define double @tst_select_i1_double(i1 signext %s, double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_i1_double:
+
+  ; M2:         andi    $[[T0:[0-9]+]], $4, 1
+  ; M2:         bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M2:         nop
+  ; M2:         ldc1    $f0, 16($sp)
+  ; M2:         jr      $ra
+  ; M2:         nop
+  ; M2:         $[[BB0]]:
+  ; M2:         mtc1    $7, $f0
+  ; M2:         jr      $ra
+  ; M2:         mtc1    $6, $f1
+
+  ; CMOV-32:    mtc1    $7, $[[F0:f[0-9]+]]
+  ; CMOV-32R1:  mtc1    $6, $f{{[0-9]+}}
+  ; CMOV-32R2   mthc1   $6, $[[F0]]
+  ; CMOV-32:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-32:    ldc1    $f0, 16($sp)
+  ; CMOV-32:    movn.d  $f0, $[[F0]], $[[T0]]
+
+  ; SEL-32:     mtc1    $7, $[[F0:f[0-9]+]]
+  ; SEL-32:     mthc1   $6, $[[F0]]
+  ; SEL-32:     ldc1    $[[F1:f[0-9]+]], 16($sp)
+  ; SEL-32:     mtc1    $4, $f0
+  ; SEL-32:     sel.d   $f0, $[[F1]], $[[F0]]
+
+  ; M3:         andi    $[[T0:[0-9]+]], $4, 1
+  ; M3:         bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M3:         nop
+  ; M3:         mov.d   $f13, $f14
+  ; M3:         $[[BB0]]:
+  ; M3:         jr      $ra
+  ; M3:         mov.d   $f0, $f13
+
+  ; CMOV-64:    andi    $[[T0:[0-9]+]], $4, 1
+  ; CMOV-64:    movn.d  $f14, $f13, $[[T0]]
+  ; CMOV-64:    mov.d   $f0, $f14
+
+  ; SEL-64:     mtc1    $4, $f0
+  ; SEL-64:     sel.d   $f0, $f14, $f13
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_i1_double_reordered(double %x, double %y,
+                                              i1 signext %s) {
+entry:
+  ; ALL-LABEL: tst_select_i1_double_reordered:
+
+  ; M2:         lw      $[[T0:[0-9]+]], 16($sp)
+  ; M2:         andi    $[[T1:[0-9]+]], $[[T0]], 1
+  ; M2:         bnez    $[[T1]], $[[BB0:BB[0-9_]+]]
+  ; M2:         nop
+  ; M2:         mov.d   $f12, $f14
+  ; M2:         $[[BB0]]:
+  ; M2:         jr      $ra
+  ; M2:         mov.d   $f0, $f12
+
+  ; CMOV-32:    lw      $[[T0:[0-9]+]], 16($sp)
+  ; CMOV-32:    andi    $[[T1:[0-9]+]], $[[T0]], 1
+  ; CMOV-32:    movn.d  $f14, $f12, $[[T1]]
+  ; CMOV-32:    mov.d   $f0, $f14
+
+  ; SEL-32:     lw      $[[T0:[0-9]+]], 16($sp)
+  ; SEL-32:     mtc1    $[[T0]], $f0
+  ; SEL-32:     sel.d   $f0, $f14, $f12
+
+  ; M3:         andi    $[[T0:[0-9]+]], $6, 1
+  ; M3:         bnez    $[[T0]], $[[BB0:BB[0-9_]+]]
+  ; M3:         nop
+  ; M3:         mov.d   $f12, $f13
+  ; M3:         $[[BB0]]:
+  ; M3:         jr      $ra
+  ; M3:         mov.d   $f0, $f12
+
+  ; CMOV-64:    andi    $[[T0:[0-9]+]], $6, 1
+  ; CMOV-64:    movn.d  $f13, $f12, $[[T0]]
+  ; CMOV-64:    mov.d   $f0, $f13
+
+  ; SEL-64:     mtc1    $6, $f0
+  ; SEL-64:     sel.d   $f0, $f13, $f12
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define float @tst_select_fcmp_olt_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_olt_float:
+
+  ; M2:         c.olt.s   $f12, $f14
+  ; M3:         c.olt.s   $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.olt.s   $f12, $f14
+  ; CMOV-32:    movt.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.lt.s  $f0, $f12, $f14
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.olt.s   $f12, $f13
+  ; CMOV-64:    movt.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.lt.s  $f0, $f12, $f13
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+  %s = fcmp olt float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_fcmp_ole_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_ole_float:
+
+  ; M2:         c.ole.s   $f12, $f14
+  ; M3:         c.ole.s   $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.ole.s   $f12, $f14
+  ; CMOV-32:    movt.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.le.s  $f0, $f12, $f14
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ole.s   $f12, $f13
+  ; CMOV-64:    movt.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.le.s  $f0, $f12, $f13
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+  %s = fcmp ole float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_fcmp_ogt_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_ogt_float:
+
+  ; M2:         c.ule.s   $f12, $f14
+  ; M3:         c.ule.s   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.ule.s   $f12, $f14
+  ; CMOV-32:    movf.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.lt.s  $f0, $f14, $f12
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ule.s   $f12, $f13
+  ; CMOV-64:    movf.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.lt.s  $f0, $f13, $f12
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+  %s = fcmp ogt float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_fcmp_oge_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_oge_float:
+
+  ; M2:         c.ult.s   $f12, $f14
+  ; M3:         c.ult.s   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.ult.s   $f12, $f14
+  ; CMOV-32:    movf.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.le.s  $f0, $f14, $f12
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ult.s   $f12, $f13
+  ; CMOV-64:    movf.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.le.s  $f0, $f13, $f12
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+  %s = fcmp oge float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_fcmp_oeq_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_oeq_float:
+
+  ; M2:         c.eq.s    $f12, $f14
+  ; M3:         c.eq.s    $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.eq.s    $f12, $f14
+  ; CMOV-32:    movt.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.eq.s  $f0, $f12, $f14
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.eq.s    $f12, $f13
+  ; CMOV-64:    movt.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.eq.s  $f0, $f12, $f13
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+  %s = fcmp oeq float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define float @tst_select_fcmp_one_float(float %x, float %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_one_float:
+
+  ; M2:         c.ueq.s   $f12, $f14
+  ; M3:         c.ueq.s   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.s     $f12, $f14
+  ; M3:         mov.s     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.s     $f0, $f12
+
+  ; CMOV-32:    c.ueq.s   $f12, $f14
+  ; CMOV-32:    movf.s    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.s     $f0, $f14
+
+  ; SEL-32:     cmp.ueq.s $f0, $f12, $f14
+  ; SEL-32:     mfc1      $[[T0:[0-9]+]], $f0
+  ; SEL-32:     not       $[[T0]], $[[T0]]
+  ; SEL-32:     mtc1      $[[T0:[0-9]+]], $f0
+  ; SEL-32:     sel.s     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ueq.s   $f12, $f13
+  ; CMOV-64:    movf.s    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.s     $f0, $f13
+
+  ; SEL-64:     cmp.ueq.s $f0, $f12, $f13
+  ; SEL-64:     mfc1      $[[T0:[0-9]+]], $f0
+  ; SEL-64:     not       $[[T0]], $[[T0]]
+  ; SEL-64:     mtc1      $[[T0:[0-9]+]], $f0
+  ; SEL-64:     sel.s     $f0, $f13, $f12
+
+  %s = fcmp one float %x, %y
+  %r = select i1 %s, float %x, float %y
+  ret float %r
+}
+
+define double @tst_select_fcmp_olt_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_olt_double:
+
+  ; M2:         c.olt.d   $f12, $f14
+  ; M3:         c.olt.d   $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.olt.d   $f12, $f14
+  ; CMOV-32:    movt.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.lt.d  $f0, $f12, $f14
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.olt.d   $f12, $f13
+  ; CMOV-64:    movt.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.lt.d  $f0, $f12, $f13
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp olt double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_fcmp_ole_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_ole_double:
+
+  ; M2:         c.ole.d   $f12, $f14
+  ; M3:         c.ole.d   $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.ole.d   $f12, $f14
+  ; CMOV-32:    movt.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.le.d  $f0, $f12, $f14
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ole.d   $f12, $f13
+  ; CMOV-64:    movt.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.le.d  $f0, $f12, $f13
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp ole double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_fcmp_ogt_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_ogt_double:
+
+  ; M2:         c.ule.d   $f12, $f14
+  ; M3:         c.ule.d   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.ule.d   $f12, $f14
+  ; CMOV-32:    movf.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.lt.d  $f0, $f14, $f12
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ule.d   $f12, $f13
+  ; CMOV-64:    movf.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.lt.d  $f0, $f13, $f12
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp ogt double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_fcmp_oge_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_oge_double:
+
+  ; M2:         c.ult.d   $f12, $f14
+  ; M3:         c.ult.d   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.ult.d   $f12, $f14
+  ; CMOV-32:    movf.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.le.d  $f0, $f14, $f12
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ult.d   $f12, $f13
+  ; CMOV-64:    movf.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.le.d  $f0, $f13, $f12
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp oge double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_fcmp_oeq_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_oeq_double:
+
+  ; M2:         c.eq.d    $f12, $f14
+  ; M3:         c.eq.d    $f12, $f13
+  ; M2-M3:      bc1t      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.eq.d    $f12, $f14
+  ; CMOV-32:    movt.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.eq.d  $f0, $f12, $f14
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.eq.d    $f12, $f13
+  ; CMOV-64:    movt.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.eq.d  $f0, $f12, $f13
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp oeq double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
+
+define double @tst_select_fcmp_one_double(double %x, double %y) {
+entry:
+  ; ALL-LABEL: tst_select_fcmp_one_double:
+
+  ; M2:         c.ueq.d   $f12, $f14
+  ; M3:         c.ueq.d   $f12, $f13
+  ; M2-M3:      bc1f      $[[BB0:BB[0-9_]+]]
+  ; M2-M3:      nop
+  ; M2:         mov.d     $f12, $f14
+  ; M3:         mov.d     $f12, $f13
+  ; M2-M3:      $[[BB0]]:
+  ; M2-M3:      jr        $ra
+  ; M2-M3:      mov.d     $f0, $f12
+
+  ; CMOV-32:    c.ueq.d   $f12, $f14
+  ; CMOV-32:    movf.d    $f14, $f12, $fcc0
+  ; CMOV-32:    mov.d     $f0, $f14
+
+  ; SEL-32:     cmp.ueq.d $f0, $f12, $f14
+  ; SEL-32:     mfc1      $[[T0:[0-9]+]], $f0
+  ; SEL-32:     not       $[[T0]], $[[T0]]
+  ; SEL-32:     mtc1      $[[T0:[0-9]+]], $f0
+  ; SEL-32:     sel.d     $f0, $f14, $f12
+
+  ; CMOV-64:    c.ueq.d   $f12, $f13
+  ; CMOV-64:    movf.d    $f13, $f12, $fcc0
+  ; CMOV-64:    mov.d     $f0, $f13
+
+  ; SEL-64:     cmp.ueq.d $f0, $f12, $f13
+  ; SEL-64:     mfc1      $[[T0:[0-9]+]], $f0
+  ; SEL-64:     not       $[[T0]], $[[T0]]
+  ; SEL-64:     mtc1      $[[T0:[0-9]+]], $f0
+  ; SEL-64:     sel.d     $f0, $f13, $f12
+  %s = fcmp one double %x, %y
+  %r = select i1 %s, double %x, double %y
+  ret double %r
+}
diff --git a/test/CodeGen/Mips/longbranch.ll b/test/CodeGen/Mips/longbranch.ll
index b9b52be01dad..ad0235eb0af4 100644
--- a/test/CodeGen/Mips/longbranch.ll
+++ b/test/CodeGen/Mips/longbranch.ll
@@ -123,7 +123,7 @@ end:
 
 ; MICROMIPS:   $[[BB0]]:
 ; MICROMIPS:        lw      $[[R1:[0-9]+]], %got(x)($[[GP]])
-; MICROMIPS:        addiu   $[[R2:[0-9]+]], $zero, 1
+; MICROMIPS:        li16    $[[R2:[0-9]+]], 1
 ; MICROMIPS:        sw      $[[R2]], 0($[[R1]])
 ; MICROMIPS:   $[[BB2]]:
 ; MICROMIPS:        jr      $ra
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
index c80299166ab4..15e1f47ce29e 100644
--- a/test/CodeGen/Mips/mbrsize4a.ll
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -34,4 +34,4 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind }
 
-!1 = metadata !{i32 68}
+!1 = !{i32 68}
diff --git a/test/CodeGen/Mips/micromips-addiu.ll b/test/CodeGen/Mips/micromips-addiu.ll
new file mode 100644
index 000000000000..c5bee34028c8
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-addiu.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=pic -O3 < %s | FileCheck %s
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@z = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %addiu1 = add i32 %0, -7
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+                                  ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu1)
+
+  %1 = load i32* @y, align 4
+  %addiu2 = add i32 %1, 55
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+                                  ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu2)
+
+  %2 = load i32* @z, align 4
+  %addiu3 = add i32 %2, 24
+  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+                                  ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu3)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: addius5  ${{[0-9]+}}, -7
+; CHECK: addiu    ${{[0-9]+}}, ${{[0-9]+}}, 55
+; CHECK: addiur2  ${{[2-7]|16|17}}, ${{[2-7]|16|17}}, 24
diff --git a/test/CodeGen/Mips/micromips-andi.ll b/test/CodeGen/Mips/micromips-andi.ll
new file mode 100644
index 000000000000..b82d2b09eae4
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-andi.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=pic -O3 < %s | FileCheck %s
+
+@x = global i32 65504, align 4
+@y = global i32 60929, align 4
+@.str = private unnamed_addr constant [7 x i8] c"%08x \0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %and1 = and i32 %0, 4
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+                                  ([7 x i8]* @.str, i32 0, i32 0), i32 %and1)
+
+  %1 = load i32* @y, align 4
+  %and2 = and i32 %1, 5
+  %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
+                                  ([7 x i8]* @.str, i32 0, i32 0), i32 %and2)
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: andi16 ${{[2-7]|16|17}}, ${{[2-7]|16|17}}
+; CHECK: andi   ${{[0-9]+}}, ${{[0-9]+}}
diff --git a/test/CodeGen/Mips/micromips-atomic.ll b/test/CodeGen/Mips/micromips-atomic.ll
index a50e0b7850c3..82eee4bd84b5 100644
--- a/test/CodeGen/Mips/micromips-atomic.ll
+++ b/test/CodeGen/Mips/micromips-atomic.ll
@@ -14,5 +14,5 @@ entry:
 ; CHECK:   ll      $[[R1:[0-9]+]], 0($[[R0]])
 ; CHECK:   addu    $[[R2:[0-9]+]], $[[R1]], $4
 ; CHECK:   sc      $[[R2]], 0($[[R0]])
-; CHECK:   beqz    $[[R2]], $[[BB0]]
+; CHECK:   beqzc   $[[R2]], $[[BB0]]
 }
diff --git a/test/CodeGen/Mips/micromips-atomic1.ll b/test/CodeGen/Mips/micromips-atomic1.ll
new file mode 100644
index 000000000000..37c3d7682e4f
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-atomic1.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=mipsel -filetype=obj --disable-machine-licm -mattr=micromips < %s -o - \
+; RUN:   | llvm-objdump -no-show-raw-insn -arch mipsel -mcpu=mips32r2 -mattr=micromips -d - \
+; RUN:   | FileCheck %s -check-prefix=MICROMIPS
+
+; Use llvm-objdump to check wheter the encodings of microMIPS atomic instructions are correct.
+; While emitting assembly files directly when in microMIPS mode, it is possible to emit a mips32r2
+; instruction instead of microMIPS instruction, and since many mips32r2 and microMIPS
+; instructions have identical assembly formats, invalid instruction cannot be detected.
+
+@y = common global i8 0, align 1
+
+define signext i8 @AtomicLoadAdd8(i8 signext %incr) nounwind {
+entry:
+  %0 = atomicrmw add i8* @y, i8 %incr monotonic
+  ret i8 %0
+
+; MICROMIPS:     ll      ${{[0-9]+}}, 0(${{[0-9]+}})
+; MICROMIPS:     sc      ${{[0-9]+}}, 0(${{[0-9]+}})
+}
+
+define signext i8 @AtomicCmpSwap8(i8 signext %oldval, i8 signext %newval) nounwind {
+entry:
+  %pair0 = cmpxchg i8* @y, i8 %oldval, i8 %newval monotonic monotonic
+  %0 = extractvalue { i8, i1 } %pair0, 0
+  ret i8 %0
+
+; MICROMIPS:     ll      ${{[0-9]+}}, 0(${{[0-9]+}})
+; MICROMIPS:     sc      ${{[0-9]+}}, 0(${{[0-9]+}})
+}
diff --git a/test/CodeGen/Mips/micromips-compact-branches.ll b/test/CodeGen/Mips/micromips-compact-branches.ll
new file mode 100644
index 000000000000..670f9a05064f
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-compact-branches.ll
@@ -0,0 +1,19 @@
+; RUN: llc %s -march=mipsel -mattr=micromips -filetype=asm -O3 \
+; RUN: -disable-mips-delay-filler -relocation-model=pic -o - | FileCheck %s
+
+define void @main() nounwind uwtable {
+entry:
+  %x = alloca i32, align 4
+  %0 = load i32* %x, align 4
+  %cmp = icmp eq i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 10, i32* %x, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK: bnezc
diff --git a/test/CodeGen/Mips/micromips-delay-slot-jr.ll b/test/CodeGen/Mips/micromips-delay-slot-jr.ll
new file mode 100644
index 000000000000..df593b35e2a6
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-delay-slot-jr.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=static -O2 < %s | FileCheck %s
+
+@main.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* null], align 4
+@str = private unnamed_addr constant [2 x i8] c"A\00"
+@str2 = private unnamed_addr constant [2 x i8] c"B\00"
+
+define i32 @main() #0 {
+entry:
+  br label %L1
+
+L1:                                               ; preds = %entry, %L1
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %L1 ]
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0))
+  %inc = add i32 %i.0, 1
+  %arrayidx = getelementptr inbounds [3 x i8*]* @main.L, i32 0, i32 %i.0
+  %0 = load i8** %arrayidx, align 4, !tbaa !1
+  indirectbr i8* %0, [label %L1, label %L2]
+
+L2:                                               ; preds = %L1
+  %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str2, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture readonly) #1
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+
+; CHECK:      jr
+; CHECK-NEXT: nop
+
+%struct.foostruct = type { [3 x float] }
+%struct.barstruct = type { %struct.foostruct, float }
+@bar_ary = common global [4 x %struct.barstruct] zeroinitializer, align 4
+define float* @spooky(i32 signext %i) #0 {
+
+  %safe = getelementptr inbounds [4 x %struct.barstruct]* @bar_ary, i32 0, i32 %i, i32 1
+  store float 1.420000e+02, float* %safe, align 4, !tbaa !1
+  ret float* %safe
+}
+
+; CHECK:      spooky:
+; CHECK:      jr $ra
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/Mips/micromips-delay-slot.ll b/test/CodeGen/Mips/micromips-delay-slot.ll
new file mode 100644
index 000000000000..b5f6c56235bc
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-delay-slot.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=static -O2 < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @foo(i32 signext %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %shl = shl i32 %0, 2
+  %call = call i32 @bar(i32 signext %shl)
+  ret i32 %call
+}
+
+declare i32 @bar(i32 signext) #1
+
+; CHECK:      jals
+; CHECK-NEXT: sll16
diff --git a/test/CodeGen/Mips/micromips-li.ll b/test/CodeGen/Mips/micromips-li.ll
new file mode 100644
index 000000000000..ac315f938251
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-li.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=pic -O3 < %s | FileCheck %s
+
+@x = external global i32
+@y = external global i32
+@z = external global i32
+
+define i32 @main() nounwind {
+entry:
+  store i32 1, i32* @x, align 4
+  store i32 2148, i32* @y, align 4
+  store i32 33332, i32* @z, align 4
+  ret i32 0
+}
+
+; CHECK: li16   ${{[2-7]|16|17}}, 1
+; CHECK: addiu  ${{[0-9]+}}, $zero, 2148
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
diff --git a/test/CodeGen/Mips/micromips-rdhwr-directives.ll b/test/CodeGen/Mips/micromips-rdhwr-directives.ll
new file mode 100644
index 000000000000..af40a8796824
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-rdhwr-directives.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mcpu=mips32 -relocation-model=static < %s \
+; RUN:   -mattr=+micromips | FileCheck %s
+
+@a = external thread_local global i32
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK: .set  push
+; CHECK: .set  mips32r2
+; CHECK: rdhwr
+; CHECK: .set  pop
+
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/Mips/micromips-shift.ll b/test/CodeGen/Mips/micromips-shift.ll
new file mode 100644
index 000000000000..8215010bfc78
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-shift.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips \
+; RUN:   -relocation-model=pic -O3 < %s | FileCheck %s
+
+@a = global i32 10, align 4
+@b = global i32 0, align 4
+@c = global i32 10, align 4
+@d = global i32 0, align 4
+
+define i32 @shift_left() nounwind {
+entry:
+  %0 = load i32* @a, align 4
+  %shl = shl i32 %0, 4
+  store i32 %shl, i32* @b, align 4
+
+  %1 = load i32* @c, align 4
+  %shl1 = shl i32 %1, 10
+  store i32 %shl1, i32* @d, align 4
+
+  ret i32 0
+}
+
+; CHECK: sll16  ${{[2-7]|16|17}}, ${{[2-7]|16|17}}, {{[0-7]}}
+; CHECK: sll    ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
+
+@i = global i32 10654, align 4
+@j = global i32 0, align 4
+@m = global i32 10, align 4
+@n = global i32 0, align 4
+
+define i32 @shift_right() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  %shr = lshr i32 %0, 4
+  store i32 %shr, i32* @j, align 4
+
+  %1 = load i32* @m, align 4
+  %shr1 = lshr i32 %1, 10
+  store i32 %shr1, i32* @n, align 4
+
+  ret i32 0
+}
+
+; CHECK: srl16  ${{[2-7]|16|17}}, ${{[2-7]|16|17}}, {{[0-7]}}
+; CHECK: srl    ${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
diff --git a/test/CodeGen/Mips/mips16-hf-attr-2.ll b/test/CodeGen/Mips/mips16-hf-attr-2.ll
new file mode 100644
index 000000000000..60c6eaad8f76
--- /dev/null
+++ b/test/CodeGen/Mips/mips16-hf-attr-2.ll
@@ -0,0 +1,45 @@
+; Check that stubs generation for mips16 hard-float mode does not depend
+; on the function 'use-soft-float' attribute's value.
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
+; RUN:     -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+
+define void @bar_sf() #1 {
+; CHECK: bar_sf:
+entry:
+  %call1 = call float @foo(float 1.000000e+00)
+; CHECK: lw $3, %call16(foo)($2)
+; CHECK-NOT: lw $5, %got(__mips16_call_stub_sf_1)($3)
+  ret void
+}
+
+define void @bar_hf() #0 {
+; CHECK: bar_hf:
+entry:
+  %call1 = call float @foo(float 1.000000e+00)
+; CHECK: lw $2, %call16(foo)($3)
+; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3)
+  ret void
+}
+
+declare float @foo(float) #2
+
+attributes #0 = {
+  nounwind
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="false"
+}
+attributes #1 = {
+  nounwind
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="true"
+}
+attributes #2 = {
+  "less-precise-fpmad"="false" "no-frame-pointer-elim"="true"
+  "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false"
+  "no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
+  "unsafe-fp-math"="false" "use-soft-float"="true"
+}
diff --git a/test/CodeGen/Mips/mips16-hf-attr.ll b/test/CodeGen/Mips/mips16-hf-attr.ll
index d9ad6295bef8..c6ad442fdea2 100644
--- a/test/CodeGen/Mips/mips16-hf-attr.ll
+++ b/test/CodeGen/Mips/mips16-hf-attr.ll
@@ -3,8 +3,8 @@
 ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
 ; RUN:     -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
 
-define void @bar_sf() #0 {
-; CHECK: bar_sf:
+define void @bar_hf() #0 {
+; CHECK: bar_hf:
 entry:
   %call1 = call float @foo(float 1.000000e+00)
 ; CHECK: lw $2, %call16(foo)($3)
@@ -12,12 +12,12 @@ entry:
   ret void
 }
 
-define void @bar_hf() #1 {
-; CHECK: bar_hf:
+define void @bar_sf() #1 {
+; CHECK: bar_sf:
 entry:
   %call1 = call float @foo(float 1.000000e+00)
-; CHECK: lw $2, %call16(foo)($3)
-; CHECK: lw $5, %got(__mips16_call_stub_sf_1)($3)
+; CHECK: lw $3, %call16(foo)($2)
+; CHECK-NOT: lw $5, %got(__mips16_call_stub_sf_1)($3)
   ret void
 }
 
diff --git a/test/CodeGen/Mips/mips64-f128.ll b/test/CodeGen/Mips/mips64-f128.ll
index f0cbbd08d79a..6987d4ab0734 100644
--- a/test/CodeGen/Mips/mips64-f128.ll
+++ b/test/CodeGen/Mips/mips64-f128.ll
@@ -545,7 +545,7 @@ entry:
 
 ; ALL-LABEL: load_LD_float:
 ; ALL: ld   $[[R0:[0-9]+]], %got_disp(gf1)
-; ALL: lw   $4, 0($[[R0]])
+; ALL: lwu  $4, 0($[[R0]])
 ; ALL: ld   $25, %call16(__extendsftf2)
 ; ALL: jalr $25
 
diff --git a/test/CodeGen/Mips/msa/arithmetic_float.ll b/test/CodeGen/Mips/msa/arithmetic_float.ll
index 86e57ac85a3b..9aae284fe535 100644
--- a/test/CodeGen/Mips/msa/arithmetic_float.ll
+++ b/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -276,8 +276,8 @@ define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
   %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
-  ; CHECK-DAG: lui [[R3:\$[0-9]+]], 16384
-  ; CHECK-DAG: fill.w [[R4:\$w[0-9]+]], [[R3]]
+  ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.w [[R4:\$w[0-9]+]], [[R3]]
   ; CHECK-DAG: fexp2.w [[R5:\$w[0-9]+]], [[R4]], [[R1]]
   store <4 x float> %3, <4 x float>* %c
   ; CHECK-DAG: st.w [[R5]], 0($4)
@@ -287,16 +287,14 @@ define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
 }
 
 define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
-  ; CHECK:      .8byte 4611686018427387904
-  ; CHECK-NEXT: .8byte 4611686018427387904
   ; CHECK: fexp2_v2f64_2:
 
   %1 = load <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
   %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
-  ; CHECK-DAG: addiu [[G_PTR:\$[0-9]+]], {{.*}}, %lo($
-  ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[G_PTR]])
+  ; CHECK-DAG: ldi.d [[R2:\$w[0-9]+]], 1
+  ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R2]]
   ; CHECK-DAG: fexp2.d [[R4:\$w[0-9]+]], [[R3]], [[R1]]
   store <2 x double> %3, <2 x double>* %c
   ; CHECK-DAG: st.d [[R4]], 0($4)
diff --git a/test/CodeGen/Mips/named-register-n32.ll b/test/CodeGen/Mips/named-register-n32.ll
new file mode 100644
index 000000000000..1e5f53ac5c9c
--- /dev/null
+++ b/test/CodeGen/Mips/named-register-n32.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+noabicalls,-n64,+n32 < %s | FileCheck %s
+
+define i32* @get_gp() {
+entry:
+  %0 = call i64 @llvm.read_register.i64(metadata !0)
+  %1 = trunc i64 %0 to i32
+  %2 = inttoptr i32 %1 to i32*
+  ret i32* %2
+}
+
+; CHECK-LABEL: get_gp:
+; CHECK:           sll $2, $gp, 0
+
+declare i64 @llvm.read_register.i64(metadata)
+
+!llvm.named.register.$28 = !{!0}
+
+!0 = !{!"$28"}
diff --git a/test/CodeGen/Mips/named-register-n64.ll b/test/CodeGen/Mips/named-register-n64.ll
new file mode 100644
index 000000000000..31987726169e
--- /dev/null
+++ b/test/CodeGen/Mips/named-register-n64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64 -relocation-model=static -mattr=+noabicalls < %s | FileCheck %s
+
+define i32* @get_gp() {
+entry:
+  %0 = call i64 @llvm.read_register.i64(metadata !0)
+  %1 = inttoptr i64 %0 to i32*
+  ret i32* %1
+}
+
+; CHECK-LABEL: get_gp:
+; CHECK:           move $2, $gp
+
+declare i64 @llvm.read_register.i64(metadata)
+
+!llvm.named.register.$28 = !{!0}
+
+!0 = !{!"$28"}
diff --git a/test/CodeGen/Mips/named-register-o32.ll b/test/CodeGen/Mips/named-register-o32.ll
new file mode 100644
index 000000000000..0890c66283c3
--- /dev/null
+++ b/test/CodeGen/Mips/named-register-o32.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips -relocation-model=static -mattr=+noabicalls < %s | FileCheck %s
+
+define i32* @get_gp() {
+entry:
+  %0 = call i32 @llvm.read_register.i32(metadata !0)
+  %1 = inttoptr i32 %0 to i32*
+  ret i32* %1
+}
+
+; CHECK-LABEL: get_gp:
+; CHECK:           move $2, $gp
+
+declare i32 @llvm.read_register.i32(metadata)
+
+!llvm.named.register.$28 = !{!0}
+
+!0 = !{!"$28"}
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
index 0affb16ac7c2..5f7d74e41979 100644
--- a/test/CodeGen/Mips/nomips16.ll
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -33,6 +33,6 @@ entry:
 ; CHECK: 	.end	nofoo
 
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "mips16" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "nomips16" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/test/CodeGen/Mips/octeon.ll b/test/CodeGen/Mips/octeon.ll
index d5ff9bdf3608..9d82b74f5b7e 100644
--- a/test/CodeGen/Mips/octeon.ll
+++ b/test/CodeGen/Mips/octeon.ll
@@ -27,3 +27,69 @@ entry:
   %res = mul i64 %a, %b
   ret i64 %res
 }
+
+define i64 @cmpeq(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: cmpeq:
+; OCTEON: jr     $ra
+; OCTEON: seq    $2, $4, $5
+; MIPS64-LABEL: cmpeq:
+; MIPS64: xor    $1, $4, $5
+; MIPS64: sltiu  $1, $1, 1
+; MIPS64: dsll   $1, $1, 32
+; MIPS64: jr     $ra
+; MIPS64: dsrl   $2, $1, 32
+  %res = icmp eq i64 %a, %b
+  %res2 = zext i1 %res to i64
+  ret i64 %res2
+}
+
+define i64 @cmpeqi(i64 %a) nounwind {
+entry:
+; OCTEON-LABEL: cmpeqi:
+; OCTEON: jr     $ra
+; OCTEON: seqi   $2, $4, 42
+; MIPS64-LABEL: cmpeqi:
+; MIPS64: daddiu $1, $zero, 42
+; MIPS64: xor    $1, $4, $1
+; MIPS64: sltiu  $1, $1, 1
+; MIPS64: dsll   $1, $1, 32
+; MIPS64: jr     $ra
+; MIPS64: dsrl   $2, $1, 32
+  %res = icmp eq i64 %a, 42
+  %res2 = zext i1 %res to i64
+  ret i64 %res2
+}
+
+define i64 @cmpne(i64 %a, i64 %b) nounwind {
+entry:
+; OCTEON-LABEL: cmpne:
+; OCTEON: jr     $ra
+; OCTEON: sne    $2, $4, $5
+; MIPS64-LABEL: cmpne:
+; MIPS64: xor    $1, $4, $5
+; MIPS64: sltu   $1, $zero, $1
+; MIPS64: dsll   $1, $1, 32
+; MIPS64: jr     $ra
+; MIPS64: dsrl   $2, $1, 32
+  %res = icmp ne i64 %a, %b
+  %res2 = zext i1 %res to i64
+  ret i64 %res2
+}
+
+define i64 @cmpnei(i64 %a) nounwind {
+entry:
+; OCTEON-LABEL: cmpnei:
+; OCTEON: jr     $ra
+; OCTEON: snei   $2, $4, 42
+; MIPS64-LABEL: cmpnei:
+; MIPS64: daddiu $1, $zero, 42
+; MIPS64: xor    $1, $4, $1
+; MIPS64: sltu   $1, $zero, $1
+; MIPS64: dsll   $1, $1, 32
+; MIPS64: jr     $ra
+; MIPS64: dsrl   $2, $1, 32
+  %res = icmp ne i64 %a, 42
+  %res2 = zext i1 %res to i64
+  ret i64 %res2
+}
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
index 48757276bb8c..33ec8c40c610 100644
--- a/test/CodeGen/Mips/powif64_16.ll
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -20,7 +20,7 @@ define double @foo_pow_f64(double %y, i32 %p)  {
 attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="true" }
 attributes #1 = { nounwind readonly }
 
-!0 = metadata !{metadata !"double", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"int", metadata !1}
+!0 = !{!"double", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
diff --git a/test/CodeGen/Mips/prevent-hoisting.ll b/test/CodeGen/Mips/prevent-hoisting.ll
index da665c210909..210fe3b0f6d8 100644
--- a/test/CodeGen/Mips/prevent-hoisting.ll
+++ b/test/CodeGen/Mips/prevent-hoisting.ll
@@ -10,16 +10,19 @@
 
 ; CHECK-LABEL: readLumaCoeff8x8_CABAC
 
-; The check for "addiu" instruction is added so that we can match the correct "b" instruction.
+; The check for first "addiu" instruction is added so that we can match the correct "b" instruction.
 ; CHECK:           addiu ${{[0-9]+}}, $zero, -1
 ; CHECK:           b $[[BB0:BB[0-9_]+]]
+; CHECK-NEXT:      addiu ${{[0-9]+}}, $zero, 0
 
-; Check that sll instruction that writes to $1 starts basic block.
-; CHECK:       {{BB[0-9_#]+}}: 
+; Check that at the start of a fallthrough block there is a instruction that writes to $1.
+; CHECK-NEXT:  {{BB[0-9_#]+}}: 
+; CHECK-NEXT:      lw      $[[R1:[0-9]+]], %got(assignSE2partition)($[[R2:[0-9]+]])
 ; CHECK-NEXT:      sll $1, $[[R0:[0-9]+]], 4
 
-; Check that identical sll instruction starts another basic block.
+; Check that identical instructions are at the start of a target block.
 ; CHECK:       [[BB0]]:
+; CHECK-NEXT:      lw      $[[R1]], %got(assignSE2partition)($[[R2]])
 ; CHECK-NEXT:      sll $1, $[[R0]], 4
 
 
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
index 190baad0b1db..9af422fa1bdb 100644
--- a/test/CodeGen/Mips/seleq.ll
+++ b/test/CodeGen/Mips/seleq.ll
@@ -10,7 +10,7 @@
 @z3 = common global i32 0, align 4
 @z4 = common global i32 0, align 4
 
-define void @calc_seleq() nounwind "target-cpu"="mips32" "target-features"="+o32,+mips32" {
+define void @calc_seleq() nounwind {
 entry:
   %0 = load i32* @a, align 4
   %1 = load i32* @b, align 4
diff --git a/test/CodeGen/Mips/small-section-reserve-gp.ll b/test/CodeGen/Mips/small-section-reserve-gp.ll
index 03503fb2ae18..cbf0681c78e5 100644
--- a/test/CodeGen/Mips/small-section-reserve-gp.ll
+++ b/test/CodeGen/Mips/small-section-reserve-gp.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-sde-elf -march=mipsel -relocation-model=static < %s \
+; RUN: llc -mtriple=mipsel-sde-elf -march=mipsel -relocation-model=static -mattr=+noabicalls -mgpopt < %s \
 ; RUN: | FileCheck %s
 
 @i = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/Mips/start-asm-file.ll b/test/CodeGen/Mips/start-asm-file.ll
index 88724643166c..9dc501ce10b4 100644
--- a/test/CodeGen/Mips/start-asm-file.ll
+++ b/test/CodeGen/Mips/start-asm-file.ll
@@ -1,7 +1,4 @@
 ; Check the emission of directives at the start of an asm file.
-; This test is XFAILED until we fix the emission of '.option pic0' on
-; N32. At the moment we check if subtarget is Mips64 when we should be
-; checking the Subtarget's ABI.
 
 ; ### O32 ABI ###
 ; RUN: llc -filetype=asm -mtriple mips-unknown-linux -mcpu=mips32 \
diff --git a/test/CodeGen/NVPTX/annotations.ll b/test/CodeGen/NVPTX/annotations.ll
index 39d52d382663..2341377d75a6 100644
--- a/test/CodeGen/NVPTX/annotations.ll
+++ b/test/CodeGen/NVPTX/annotations.ll
@@ -33,21 +33,14 @@ define void @kernel_func_minctasm(float* %a) {
 
 !nvvm.annotations = !{!1, !2, !3, !4, !5, !6, !7, !8}
 
-!1 = metadata !{void (float*)* @kernel_func_maxntid, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*)* @kernel_func_maxntid,
-                metadata !"maxntidx", i32 10,
-                metadata !"maxntidy", i32 20,
-                metadata !"maxntidz", i32 30}
-
-!3 = metadata !{void (float*)* @kernel_func_reqntid, metadata !"kernel", i32 1}
-!4 = metadata !{void (float*)* @kernel_func_reqntid,
-                metadata !"reqntidx", i32 11,
-                metadata !"reqntidy", i32 22,
-                metadata !"reqntidz", i32 33}
-
-!5 = metadata !{void (float*)* @kernel_func_minctasm, metadata !"kernel", i32 1}
-!6 = metadata !{void (float*)* @kernel_func_minctasm,
-                metadata !"minctasm", i32 42}
-
-!7 = metadata !{i64 addrspace(1)* @texture, metadata !"texture", i32 1}
-!8 = metadata !{i64 addrspace(1)* @surface, metadata !"surface", i32 1}
+!1 = !{void (float*)* @kernel_func_maxntid, !"kernel", i32 1}
+!2 = !{void (float*)* @kernel_func_maxntid, !"maxntidx", i32 10, !"maxntidy", i32 20, !"maxntidz", i32 30}
+
+!3 = !{void (float*)* @kernel_func_reqntid, !"kernel", i32 1}
+!4 = !{void (float*)* @kernel_func_reqntid, !"reqntidx", i32 11, !"reqntidy", i32 22, !"reqntidz", i32 33}
+
+!5 = !{void (float*)* @kernel_func_minctasm, !"kernel", i32 1}
+!6 = !{void (float*)* @kernel_func_minctasm, !"minctasm", i32 42}
+
+!7 = !{i64 addrspace(1)* @texture, !"texture", i32 1}
+!8 = !{i64 addrspace(1)* @surface, !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/bug21465.ll b/test/CodeGen/NVPTX/bug21465.ll
new file mode 100644
index 000000000000..cacffceac517
--- /dev/null
+++ b/test/CodeGen/NVPTX/bug21465.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+%struct.S = type { i32, i32 }
+
+; Function Attrs: nounwind
+define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 {
+entry:
+; CHECK-LABEL @_Z22TakesStruct1SPi
+; CHECK:   bitcast %struct.S* %input to i8*
+; CHECK:   call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
+  %b = getelementptr inbounds %struct.S* %input, i64 0, i32 1
+  %0 = load i32* %b, align 4
+  store i32 %0, i32* %output, align 4
+  ret void
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (%struct.S*, i32*)* @_Z11TakesStruct1SPi, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 83d491637041..8483112381f1 100644
--- a/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -63,4 +63,4 @@ declare void @callee(float*, i8*)
 
 !nvvm.annotations = !{!0}
 
-!0 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
+!0 = !{void (float*)* @kernel_func, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/calling-conv.ll b/test/CodeGen/NVPTX/calling-conv.ll
index 190a1462adbc..3b03442ad8bd 100644
--- a/test/CodeGen/NVPTX/calling-conv.ll
+++ b/test/CodeGen/NVPTX/calling-conv.ll
@@ -27,4 +27,4 @@ define void @metadata_kernel(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = metadata !{void (float*)* @metadata_kernel, metadata !"kernel", i32 1}
+!1 = !{void (float*)* @metadata_kernel, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/fma-assoc.ll b/test/CodeGen/NVPTX/fma-assoc.ll
new file mode 100644
index 000000000000..fc04c61dd691
--- /dev/null
+++ b/test/CodeGen/NVPTX/fma-assoc.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
+
+define ptx_device float @t1_f32(float %x, float %y, float %z,
+                                float %u, float %v) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul float %x, %y
+  %b = fmul float %u, %v
+  %c = fadd float %a, %b
+  %d = fadd float %c, %z
+  ret float %d
+}
+
+define ptx_device double @t1_f64(double %x, double %y, double %z,
+                                 double %u, double %v) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul double %x, %y
+  %b = fmul double %u, %v
+  %c = fadd double %a, %b
+  %d = fadd double %c, %z
+  ret double %d
+}
diff --git a/test/CodeGen/NVPTX/fma.ll b/test/CodeGen/NVPTX/fma.ll
index 14b5c45b87d8..6785a01827e2 100644
--- a/test/CodeGen/NVPTX/fma.ll
+++ b/test/CodeGen/NVPTX/fma.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast | FileCheck %s
 
+declare float @dummy_f32(float, float) #0
+declare double @dummy_f64(double, double) #0
+
 define ptx_device float @t1_f32(float %x, float %y, float %z) {
 ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
 ; CHECK: ret;
@@ -8,6 +11,17 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) {
   ret float %b
 }
 
+define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul float %x, %y
+  %b = fadd float %a, %z
+  %c = fadd float %a, %w
+  %d = call float @dummy_f32(float %b, float %c)
+  ret float %d
+}
+
 define ptx_device double @t1_f64(double %x, double %y, double %z) {
 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
 ; CHECK: ret;
@@ -15,3 +29,14 @@ define ptx_device double @t1_f64(double %x, double %y, double %z) {
   %b = fadd double %a, %z
   ret double %b
 }
+
+define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
+; CHECK: ret;
+  %a = fmul double %x, %y
+  %b = fadd double %a, %z
+  %c = fadd double %a, %w
+  %d = call double @dummy_f64(double %b, double %c)
+  ret double %d
+}
diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll
index 2a527989e410..fb63d6ed575f 100644
--- a/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -23,4 +23,4 @@ define void @foo(i32* %a, i32* %b) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i32*, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-global.ll b/test/CodeGen/NVPTX/i1-global.ll
index 1dd8ae40db4f..e3fe08e5f874 100644
--- a/test/CodeGen/NVPTX/i1-global.ll
+++ b/test/CodeGen/NVPTX/i1-global.ll
@@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/i1-param.ll b/test/CodeGen/NVPTX/i1-param.ll
index f4df87439322..aac71960551f 100644
--- a/test/CodeGen/NVPTX/i1-param.ll
+++ b/test/CodeGen/NVPTX/i1-param.ll
@@ -16,4 +16,4 @@ define void @foo(i1 %p, i32* %out) {
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{void (i1, i32*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (i1, i32*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/ldu-i8.ll b/test/CodeGen/NVPTX/ldu-i8.ll
index 9cc667557906..36c99b30425d 100644
--- a/test/CodeGen/NVPTX/ldu-i8.ll
+++ b/test/CodeGen/NVPTX/ldu-i8.ll
@@ -2,15 +2,13 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
 
-declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8*, i32)
 
 define i8 @foo(i8* %a) {
 ; Ensure we properly truncate off the high-order 24 bits
 ; CHECK:        ldu.global.u8
 ; CHECK:        cvt.u32.u16
 ; CHECK:        and.b32         %r{{[0-9]+}}, %r{{[0-9]+}}, 255
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a), !align !0
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p0i8(i8* %a, i32 4)
   ret i8 %val
 }
-
-!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-ldg.ll b/test/CodeGen/NVPTX/ldu-ldg.ll
index 3b0619ff5175..4bfd68c22428 100644
--- a/test/CodeGen/NVPTX/ldu-ldg.ll
+++ b/test/CodeGen/NVPTX/ldu-ldg.ll
@@ -1,40 +1,36 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
 
-declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
-declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
-declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr)
-declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr)
+declare i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
+declare i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 %align)
+declare i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 %align)
 
 
 ; CHECK: func0
 define i8 @func0(i8 addrspace(1)* %ptr) {
 ; ldu.global.u8
-  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: func1
 define i32 @func1(i32 addrspace(1)* %ptr) {
 ; ldu.global.u32
-  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
   ret i32 %val
 }
 
 ; CHECK: func2
 define i8 @func2(i8 addrspace(1)* %ptr) {
 ; ld.global.nc.u8
-  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr), !align !0
+  %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1i8(i8 addrspace(1)* %ptr, i32 4)
   ret i8 %val
 }
 
 ; CHECK: func3
 define i32 @func3(i32 addrspace(1)* %ptr) {
 ; ld.global.nc.u32
-  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr), !align !0
+  %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1i32(i32 addrspace(1)* %ptr, i32 4)
   ret i32 %val
 }
-
-
-
-!0 = metadata !{i32 4}
diff --git a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
index 55707ea85106..fd35a7503901 100644
--- a/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
+++ b/test/CodeGen/NVPTX/ldu-reg-plus-offset.ll
@@ -7,15 +7,13 @@ define void @reg_plus_offset(i32* %a) {
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+32];
 ; CHECK:        ldu.global.u32  %r{{[0-9]+}}, [%r{{[0-9]+}}+36];
   %p2 = getelementptr i32* %a, i32 8
-  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2), !align !1
+  %t1 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p2, i32 4)
   %p3 = getelementptr i32* %a, i32 9
-  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3), !align !1
+  %t2 = call i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32* %p3, i32 4)
   %t3 = mul i32 %t1, %t2
   store i32 %t3, i32* %a
   ret void
 }
 
-!1 = metadata !{ i32 4 }
-
-declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*)
+declare i32 @llvm.nvvm.ldu.global.i.i32.p0i32(i32*, i32)
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
diff --git a/test/CodeGen/NVPTX/machine-sink.ll b/test/CodeGen/NVPTX/machine-sink.ll
new file mode 100644
index 000000000000..3614bea16534
--- /dev/null
+++ b/test/CodeGen/NVPTX/machine-sink.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+
+@scalar1 = internal addrspace(3) global float 0.000000e+00, align 4
+@scalar2 = internal addrspace(3) global float 0.000000e+00, align 4
+
+; We shouldn't sink mul.rn.f32 to BB %merge because BB %merge post-dominates
+; BB %entry. Over-sinking created more register pressure on this example. The
+; backend would sink the fmuls to BB %merge, but not the loads for being
+; conservative on sinking memory accesses. As a result, the loads and
+; the two fmuls would be separated to two basic blocks, causing two
+; cross-BB live ranges.
+define float @post_dominate(float %x, i1 %cond) {
+; CHECK-LABEL: post_dominate(
+entry:
+  %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
+  %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+; CHECK: ld.shared.f32
+; CHECK: ld.shared.f32
+  %2 = fmul float %0, %0
+  %3 = fmul float %1, %2
+; CHECK-NOT: bra
+; CHECK: mul.rn.f32
+; CHECK: mul.rn.f32
+  br i1 %cond, label %then, label %merge
+
+then:
+  %z = fadd float %x, %x
+  br label %then2
+
+then2:
+  %z2 = fadd float %z, %z
+  br label %merge
+
+merge:
+  %y = phi float [ 0.0, %entry ], [ %z2, %then2 ]
+  %w = fadd float %y, %3
+  ret float %w
+}
diff --git a/test/CodeGen/NVPTX/managed.ll b/test/CodeGen/NVPTX/managed.ll
index 4d7e7817f77b..d3f1604dbd36 100644
--- a/test/CodeGen/NVPTX/managed.ll
+++ b/test/CodeGen/NVPTX/managed.ll
@@ -8,4 +8,4 @@
 
 
 !nvvm.annotations = !{!0}
-!0 = metadata !{i32 addrspace(1)* @managed_g, metadata !"managed", i32 1}
+!0 = !{i32 addrspace(1)* @managed_g, !"managed", i32 1}
diff --git a/test/CodeGen/NVPTX/mulwide.ll b/test/CodeGen/NVPTX/mulwide.ll
index 43bb63098f67..1ddf9739e202 100644
--- a/test/CodeGen/NVPTX/mulwide.ll
+++ b/test/CodeGen/NVPTX/mulwide.ll
@@ -23,6 +23,28 @@ define i32 @mulwideu16(i16 %a, i16 %b) {
   ret i32 %val2
 }
 
+; OPT-LABEL: @mulwide8
+; NOOPT-LABEL: @mulwide8
+define i32 @mulwide8(i8 %a, i8 %b) {
+; OPT: mul.wide.s16
+; NOOPT: mul.lo.s32
+  %val0 = sext i8 %a to i32
+  %val1 = sext i8 %b to i32
+  %val2 = mul i32 %val0, %val1
+  ret i32 %val2
+}
+
+; OPT-LABEL: @mulwideu8
+; NOOPT-LABEL: @mulwideu8
+define i32 @mulwideu8(i8 %a, i8 %b) {
+; OPT: mul.wide.u16
+; NOOPT: mul.lo.s32
+  %val0 = zext i8 %a to i32
+  %val1 = zext i8 %b to i32
+  %val2 = mul i32 %val0, %val1
+  ret i32 %val2
+}
+
 ; OPT-LABEL: @mulwide32
 ; NOOPT-LABEL: @mulwide32
 define i64 @mulwide32(i32 %a, i32 %b) {
@@ -44,3 +66,25 @@ define i64 @mulwideu32(i32 %a, i32 %b) {
   %val2 = mul i64 %val0, %val1
   ret i64 %val2
 }
+
+; OPT-LABEL: @mulwideu7
+; NOOPT-LABEL: @mulwideu7
+define i64 @mulwideu7(i7 %a, i7 %b) {
+; OPT: mul.wide.u32
+; NOOPT: mul.lo.s64
+  %val0 = zext i7 %a to i64
+  %val1 = zext i7 %b to i64
+  %val2 = mul i64 %val0, %val1
+  ret i64 %val2
+}
+
+; OPT-LABEL: @mulwides7
+; NOOPT-LABEL: @mulwides7
+define i64 @mulwides7(i7 %a, i7 %b) {
+; OPT: mul.wide.s32
+; NOOPT: mul.lo.s64
+  %val0 = sext i7 %a to i64
+  %val1 = sext i7 %b to i64
+  %val2 = mul i64 %val0, %val1
+  ret i64 %val2
+}
diff --git a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 64745fcba3ba..841bbc3a517c 100644
--- a/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -70,5 +70,5 @@ if.end17:                                         ; preds = %if.else13, %if.then
 ; Function Attrs: noduplicate nounwind
 declare void @llvm.cuda.syncthreads() #2
 
-!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
-!1 = metadata !{null, metadata !"align", i32 8}
+!0 = !{void (float*)* @foo, !"kernel", i32 1}
+!1 = !{null, !"align", i32 8}
diff --git a/test/CodeGen/NVPTX/nvcl-param-align.ll b/test/CodeGen/NVPTX/nvcl-param-align.ll
new file mode 100644
index 000000000000..c1a489f1fc42
--- /dev/null
+++ b/test/CodeGen/NVPTX/nvcl-param-align.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
+
+target triple = "nvptx-unknown-nvcl"
+
+; CHECK-LABEL: .entry foo(
+define void @foo(i64 %img, i64 %sampler, <5 x float>* %v) {
+; The parameter alignment should be the next power of 2 of 5xsizeof(float),
+; which is 32.
+; CHECK: .param .u32 .ptr .align 32 foo_param_2
+  ret void
+}
+
+!nvvm.annotations = !{!1, !2, !3}
+!1 = !{void (i64, i64, <5 x float>*)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i64, <5 x float>*)* @foo, !"rdoimage", i32 0}
+!3 = !{void (i64, i64, <5 x float>*)* @foo, !"sampler", i32 1}
diff --git a/test/CodeGen/NVPTX/refl1.ll b/test/CodeGen/NVPTX/refl1.ll
index 4aeff0924955..e8782ea3aa27 100644
--- a/test/CodeGen/NVPTX/refl1.ll
+++ b/test/CodeGen/NVPTX/refl1.ll
@@ -36,4 +36,4 @@ attributes #2 = { alwaysinline inlinehint nounwind readnone }
 
 !nvvm.annotations = !{!0}
 
-!0 = metadata !{void (float*)* @foo, metadata !"kernel", i32 1}
+!0 = !{void (float*)* @foo, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/simple-call.ll b/test/CodeGen/NVPTX/simple-call.ll
index ab6f423cd80a..1b41361cf7ed 100644
--- a/test/CodeGen/NVPTX/simple-call.ll
+++ b/test/CodeGen/NVPTX/simple-call.ll
@@ -23,4 +23,4 @@ define void @kernel_func(float* %a) {
 
 !nvvm.annotations = !{!1}
 
-!1 = metadata !{void (float*)* @kernel_func, metadata !"kernel", i32 1}
+!1 = !{void (float*)* @kernel_func, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/surf-read-cuda.ll b/test/CodeGen/NVPTX/surf-read-cuda.ll
index 10a1ecc4c473..ed021346c0f9 100644
--- a/test/CodeGen/NVPTX/surf-read-cuda.ll
+++ b/test/CodeGen/NVPTX/surf-read-cuda.ll
@@ -47,7 +47,7 @@ define void @bar(float* %red, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
 
diff --git a/test/CodeGen/NVPTX/surf-read.ll b/test/CodeGen/NVPTX/surf-read.ll
index a69d03efe0d2..7383722a3596 100644
--- a/test/CodeGen/NVPTX/surf-read.ll
+++ b/test/CodeGen/NVPTX/surf-read.ll
@@ -16,5 +16,5 @@ define void @foo(i64 %img, float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, float*, i32)* @foo, metadata !"rdwrimage", i32 0}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, float*, i32)* @foo, !"rdwrimage", i32 0}
diff --git a/test/CodeGen/NVPTX/surf-write-cuda.ll b/test/CodeGen/NVPTX/surf-write-cuda.ll
index 654c47f46957..da55a242bba6 100644
--- a/test/CodeGen/NVPTX/surf-write-cuda.ll
+++ b/test/CodeGen/NVPTX/surf-write-cuda.ll
@@ -36,7 +36,7 @@ define void @bar(i32 %val, i32 %idx) {
 
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i32, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i32, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
 
diff --git a/test/CodeGen/NVPTX/surf-write.ll b/test/CodeGen/NVPTX/surf-write.ll
index 880231f96599..5098d2ae9e1c 100644
--- a/test/CodeGen/NVPTX/surf-write.ll
+++ b/test/CodeGen/NVPTX/surf-write.ll
@@ -12,5 +12,5 @@ define void @foo(i64 %img, i32 %val, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{void (i64, i32, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, i32, i32)* @foo, metadata !"wroimage", i32 0}
+!1 = !{void (i64, i32, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i32, i32)* @foo, !"wroimage", i32 0}
diff --git a/test/CodeGen/NVPTX/tex-read-cuda.ll b/test/CodeGen/NVPTX/tex-read-cuda.ll
index ee0cefa919b1..c5b5600de874 100644
--- a/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -41,6 +41,6 @@ define void @bar(float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (float*, i32)* @bar, metadata !"kernel", i32 1}
-!3 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
+!1 = !{void (i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (float*, i32)* @bar, !"kernel", i32 1}
+!3 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
diff --git a/test/CodeGen/NVPTX/tex-read.ll b/test/CodeGen/NVPTX/tex-read.ll
index 55e4bfc9e453..6e0fda69e4f5 100644
--- a/test/CodeGen/NVPTX/tex-read.ll
+++ b/test/CodeGen/NVPTX/tex-read.ll
@@ -15,6 +15,6 @@ define void @foo(i64 %img, i64 %sampler, float* %red, i32 %idx) {
 }
 
 !nvvm.annotations = !{!1, !2, !3}
-!1 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"kernel", i32 1}
-!2 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"rdoimage", i32 0}
-!3 = metadata !{void (i64, i64, float*, i32)* @foo, metadata !"sampler", i32 1}
+!1 = !{void (i64, i64, float*, i32)* @foo, !"kernel", i32 1}
+!2 = !{void (i64, i64, float*, i32)* @foo, !"rdoimage", i32 0}
+!3 = !{void (i64, i64, float*, i32)* @foo, !"sampler", i32 1}
diff --git a/test/CodeGen/NVPTX/texsurf-queries.ll b/test/CodeGen/NVPTX/texsurf-queries.ll
index c7637ccff77a..e56eb5dea18f 100644
--- a/test/CodeGen/NVPTX/texsurf-queries.ll
+++ b/test/CodeGen/NVPTX/texsurf-queries.ll
@@ -99,5 +99,5 @@ define i32 @s3() {
 
 
 !nvvm.annotations = !{!1, !2}
-!1 = metadata !{i64 addrspace(1)* @tex0, metadata !"texture", i32 1}
-!2 = metadata !{i64 addrspace(1)* @surf0, metadata !"surface", i32 1}
+!1 = !{i64 addrspace(1)* @tex0, !"texture", i32 1}
+!2 = !{i64 addrspace(1)* @surf0, !"surface", i32 1}
diff --git a/test/CodeGen/NVPTX/vector-global.ll b/test/CodeGen/NVPTX/vector-global.ll
new file mode 100644
index 000000000000..a463bee3a479
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-global.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+@g1 = external global <4 x i32> ; external global variable
+; CHECK: .extern .global .align 16 .b8 g1[16];
+@g2 = global <4 x i32> zeroinitializer ; module-level global variable
+; CHECK: .visible .global .align 16 .b8 g2[16];
diff --git a/test/CodeGen/NVPTX/vector-return.ll b/test/CodeGen/NVPTX/vector-return.ll
new file mode 100644
index 000000000000..15e50f8e1443
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-return.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+declare <2 x float> @bar(<2 x float> %input)
+
+define void @foo(<2 x float> %input, <2 x float>* %output) {
+; CHECK-LABEL: @foo
+entry:
+  %call = tail call <2 x float> @bar(<2 x float> %input)
+; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: ld.param.v2.f32 {[[ELEM1:%f[0-9]+]], [[ELEM2:%f[0-9]+]]}, [retval0+0];
+  store <2 x float> %call, <2 x float>* %output, align 8
+; CHECK: st.v2.f32 [{{%rd[0-9]+}}], {[[ELEM1]], [[ELEM2]]}
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/weak-linkage.ll b/test/CodeGen/NVPTX/weak-linkage.ll
index 7a1335783642..5df57b29249e 100644
--- a/test/CodeGen/NVPTX/weak-linkage.ll
+++ b/test/CodeGen/NVPTX/weak-linkage.ll
@@ -1,11 +1,17 @@
 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s
 
-
+; CHECK: // .weak foo
 ; CHECK: .weak .func foo
 define weak void @foo() {
   ret void
 }
 
+; CHECK: // .weak baz
+; CHECK: .weak .func baz
+define weak_odr void @baz() {
+  ret void
+}
+
 ; CHECK: .visible .func bar
 define void @bar() {
   ret void
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 3620b0e6340a..3624b5109301 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
+; RUN: llc -mcpu=g5 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
@@ -7,6 +9,12 @@ define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
+
+; CHECK-LABEL: @_ZNK4llvm5APInt17countLeadingZerosEv
+; CHECK: ld [[REG1:[0-9]+]], 0(3)
+; CHECK-NEXT: cntlzd [[REG2:[0-9]+]], [[REG1]]
+; CHECK-NEXT: addi 3, [[REG2]], -64
+; CHECK-NEXT: blr
 }
 
 declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 898c470b1726..bdd91f345718 100644
--- a/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s | grep stfd | count 3
-; RUN: llc < %s | grep stfs | count 1
-; RUN: llc < %s | grep lfd | count 2
-; RUN: llc < %s | grep lfs | count 2
+; RUN: llc -mattr=-vsx < %s | grep stfd | count 3
+; RUN: llc -mattr=-vsx < %s | grep stfs | count 1
+; RUN: llc -mattr=-vsx < %s | grep lfd | count 2
+; RUN: llc -mattr=-vsx < %s | grep lfs | count 2
 ; ModuleID = 'foo.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index 3acd01dcb273..e7bc5bfa37ec 100644
--- a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -183,4 +183,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!3 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
index 4a1a51237ffd..a6223d41cc3f 100644
--- a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -217,4 +217,4 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare i32 @puts(i8* nocapture) nounwind
 
-!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!3 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
index f841c5fb92e4..fdacef2cdd4d 100644
--- a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
+++ b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mattr=-vsx -mattr=+altivec -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mattr=+vsx -mattr=+altivec -mcpu=pwr7 < %s | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -18,3 +19,7 @@ entry:
 ; CHECK: lwz 3, -16(1)
 ; CHECK: blr
 
+; CHECK-VSX: addi [[REGISTER:[0-9]+]], 1, -16
+; CHECK-VSX: stxvd2x 34, 0, [[REGISTER]]
+; CHECK-VSX: lwz 3, -16(1)
+; CHECK-VSX: blr
diff --git a/test/CodeGen/PowerPC/Atomics-32.ll b/test/CodeGen/PowerPC/Atomics-32.ll
deleted file mode 100644
index b7f23b1dd83e..000000000000
--- a/test/CodeGen/PowerPC/Atomics-32.ll
+++ /dev/null
@@ -1,715 +0,0 @@
-; RUN: llc < %s -march=ppc32
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9"
-
-@sc = common global i8 0
-@uc = common global i8 0
-@ss = common global i16 0
-@us = common global i16 0
-@si = common global i32 0
-@ui = common global i32 0
-@sl = common global i32 0
-@ul = common global i32 0
-@sll = common global i64 0, align 8
-@ull = common global i64 0, align 8
-
-define void @test_op_ignore() nounwind {
-entry:
-  %0 = atomicrmw add i8* @sc, i8 1 monotonic
-  %1 = atomicrmw add i8* @uc, i8 1 monotonic
-  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %3 = atomicrmw add i16* %2, i16 1 monotonic
-  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %5 = atomicrmw add i16* %4, i16 1 monotonic
-  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %7 = atomicrmw add i32* %6, i32 1 monotonic
-  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %9 = atomicrmw add i32* %8, i32 1 monotonic
-  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %11 = atomicrmw add i32* %10, i32 1 monotonic
-  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %13 = atomicrmw add i32* %12, i32 1 monotonic
-  %14 = atomicrmw sub i8* @sc, i8 1 monotonic
-  %15 = atomicrmw sub i8* @uc, i8 1 monotonic
-  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %17 = atomicrmw sub i16* %16, i16 1 monotonic
-  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %19 = atomicrmw sub i16* %18, i16 1 monotonic
-  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %21 = atomicrmw sub i32* %20, i32 1 monotonic
-  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %23 = atomicrmw sub i32* %22, i32 1 monotonic
-  %24 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %25 = atomicrmw sub i32* %24, i32 1 monotonic
-  %26 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %27 = atomicrmw sub i32* %26, i32 1 monotonic
-  %28 = atomicrmw or i8* @sc, i8 1 monotonic
-  %29 = atomicrmw or i8* @uc, i8 1 monotonic
-  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %31 = atomicrmw or i16* %30, i16 1 monotonic
-  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %33 = atomicrmw or i16* %32, i16 1 monotonic
-  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %35 = atomicrmw or i32* %34, i32 1 monotonic
-  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %37 = atomicrmw or i32* %36, i32 1 monotonic
-  %38 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %39 = atomicrmw or i32* %38, i32 1 monotonic
-  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %41 = atomicrmw or i32* %40, i32 1 monotonic
-  %42 = atomicrmw xor i8* @sc, i8 1 monotonic
-  %43 = atomicrmw xor i8* @uc, i8 1 monotonic
-  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %45 = atomicrmw xor i16* %44, i16 1 monotonic
-  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %47 = atomicrmw xor i16* %46, i16 1 monotonic
-  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %49 = atomicrmw xor i32* %48, i32 1 monotonic
-  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %51 = atomicrmw xor i32* %50, i32 1 monotonic
-  %52 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %53 = atomicrmw xor i32* %52, i32 1 monotonic
-  %54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %55 = atomicrmw xor i32* %54, i32 1 monotonic
-  %56 = atomicrmw and i8* @sc, i8 1 monotonic
-  %57 = atomicrmw and i8* @uc, i8 1 monotonic
-  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %59 = atomicrmw and i16* %58, i16 1 monotonic
-  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %61 = atomicrmw and i16* %60, i16 1 monotonic
-  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %63 = atomicrmw and i32* %62, i32 1 monotonic
-  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %65 = atomicrmw and i32* %64, i32 1 monotonic
-  %66 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %67 = atomicrmw and i32* %66, i32 1 monotonic
-  %68 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %69 = atomicrmw and i32* %68, i32 1 monotonic
-  %70 = atomicrmw nand i8* @sc, i8 1 monotonic
-  %71 = atomicrmw nand i8* @uc, i8 1 monotonic
-  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %73 = atomicrmw nand i16* %72, i16 1 monotonic
-  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %75 = atomicrmw nand i16* %74, i16 1 monotonic
-  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %77 = atomicrmw nand i32* %76, i32 1 monotonic
-  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %79 = atomicrmw nand i32* %78, i32 1 monotonic
-  %80 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %81 = atomicrmw nand i32* %80, i32 1 monotonic
-  %82 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %83 = atomicrmw nand i32* %82, i32 1 monotonic
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-define void @test_fetch_and_op() nounwind {
-entry:
-  %0 = atomicrmw add i8* @sc, i8 11 monotonic
-  store i8 %0, i8* @sc, align 1
-  %1 = atomicrmw add i8* @uc, i8 11 monotonic
-  store i8 %1, i8* @uc, align 1
-  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %3 = atomicrmw add i16* %2, i16 11 monotonic
-  store i16 %3, i16* @ss, align 2
-  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %5 = atomicrmw add i16* %4, i16 11 monotonic
-  store i16 %5, i16* @us, align 2
-  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %7 = atomicrmw add i32* %6, i32 11 monotonic
-  store i32 %7, i32* @si, align 4
-  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %9 = atomicrmw add i32* %8, i32 11 monotonic
-  store i32 %9, i32* @ui, align 4
-  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %11 = atomicrmw add i32* %10, i32 11 monotonic
-  store i32 %11, i32* @sl, align 4
-  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %13 = atomicrmw add i32* %12, i32 11 monotonic
-  store i32 %13, i32* @ul, align 4
-  %14 = atomicrmw sub i8* @sc, i8 11 monotonic
-  store i8 %14, i8* @sc, align 1
-  %15 = atomicrmw sub i8* @uc, i8 11 monotonic
-  store i8 %15, i8* @uc, align 1
-  %16 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %17 = atomicrmw sub i16* %16, i16 11 monotonic
-  store i16 %17, i16* @ss, align 2
-  %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %19 = atomicrmw sub i16* %18, i16 11 monotonic
-  store i16 %19, i16* @us, align 2
-  %20 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %21 = atomicrmw sub i32* %20, i32 11 monotonic
-  store i32 %21, i32* @si, align 4
-  %22 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %23 = atomicrmw sub i32* %22, i32 11 monotonic
-  store i32 %23, i32* @ui, align 4
-  %24 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %25 = atomicrmw sub i32* %24, i32 11 monotonic
-  store i32 %25, i32* @sl, align 4
-  %26 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %27 = atomicrmw sub i32* %26, i32 11 monotonic
-  store i32 %27, i32* @ul, align 4
-  %28 = atomicrmw or i8* @sc, i8 11 monotonic
-  store i8 %28, i8* @sc, align 1
-  %29 = atomicrmw or i8* @uc, i8 11 monotonic
-  store i8 %29, i8* @uc, align 1
-  %30 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %31 = atomicrmw or i16* %30, i16 11 monotonic
-  store i16 %31, i16* @ss, align 2
-  %32 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %33 = atomicrmw or i16* %32, i16 11 monotonic
-  store i16 %33, i16* @us, align 2
-  %34 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %35 = atomicrmw or i32* %34, i32 11 monotonic
-  store i32 %35, i32* @si, align 4
-  %36 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %37 = atomicrmw or i32* %36, i32 11 monotonic
-  store i32 %37, i32* @ui, align 4
-  %38 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %39 = atomicrmw or i32* %38, i32 11 monotonic
-  store i32 %39, i32* @sl, align 4
-  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %41 = atomicrmw or i32* %40, i32 11 monotonic
-  store i32 %41, i32* @ul, align 4
-  %42 = atomicrmw xor i8* @sc, i8 11 monotonic
-  store i8 %42, i8* @sc, align 1
-  %43 = atomicrmw xor i8* @uc, i8 11 monotonic
-  store i8 %43, i8* @uc, align 1
-  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %45 = atomicrmw xor i16* %44, i16 11 monotonic
-  store i16 %45, i16* @ss, align 2
-  %46 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %47 = atomicrmw xor i16* %46, i16 11 monotonic
-  store i16 %47, i16* @us, align 2
-  %48 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %49 = atomicrmw xor i32* %48, i32 11 monotonic
-  store i32 %49, i32* @si, align 4
-  %50 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %51 = atomicrmw xor i32* %50, i32 11 monotonic
-  store i32 %51, i32* @ui, align 4
-  %52 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %53 = atomicrmw xor i32* %52, i32 11 monotonic
-  store i32 %53, i32* @sl, align 4
-  %54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %55 = atomicrmw xor i32* %54, i32 11 monotonic
-  store i32 %55, i32* @ul, align 4
-  %56 = atomicrmw and i8* @sc, i8 11 monotonic
-  store i8 %56, i8* @sc, align 1
-  %57 = atomicrmw and i8* @uc, i8 11 monotonic
-  store i8 %57, i8* @uc, align 1
-  %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %59 = atomicrmw and i16* %58, i16 11 monotonic
-  store i16 %59, i16* @ss, align 2
-  %60 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %61 = atomicrmw and i16* %60, i16 11 monotonic
-  store i16 %61, i16* @us, align 2
-  %62 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %63 = atomicrmw and i32* %62, i32 11 monotonic
-  store i32 %63, i32* @si, align 4
-  %64 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %65 = atomicrmw and i32* %64, i32 11 monotonic
-  store i32 %65, i32* @ui, align 4
-  %66 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %67 = atomicrmw and i32* %66, i32 11 monotonic
-  store i32 %67, i32* @sl, align 4
-  %68 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %69 = atomicrmw and i32* %68, i32 11 monotonic
-  store i32 %69, i32* @ul, align 4
-  %70 = atomicrmw nand i8* @sc, i8 11 monotonic
-  store i8 %70, i8* @sc, align 1
-  %71 = atomicrmw nand i8* @uc, i8 11 monotonic
-  store i8 %71, i8* @uc, align 1
-  %72 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %73 = atomicrmw nand i16* %72, i16 11 monotonic
-  store i16 %73, i16* @ss, align 2
-  %74 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %75 = atomicrmw nand i16* %74, i16 11 monotonic
-  store i16 %75, i16* @us, align 2
-  %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %77 = atomicrmw nand i32* %76, i32 11 monotonic
-  store i32 %77, i32* @si, align 4
-  %78 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %79 = atomicrmw nand i32* %78, i32 11 monotonic
-  store i32 %79, i32* @ui, align 4
-  %80 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %81 = atomicrmw nand i32* %80, i32 11 monotonic
-  store i32 %81, i32* @sl, align 4
-  %82 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %83 = atomicrmw nand i32* %82, i32 11 monotonic
-  store i32 %83, i32* @ul, align 4
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-define void @test_op_and_fetch() nounwind {
-entry:
-  %0 = load i8* @uc, align 1
-  %1 = atomicrmw add i8* @sc, i8 %0 monotonic
-  %2 = add i8 %1, %0
-  store i8 %2, i8* @sc, align 1
-  %3 = load i8* @uc, align 1
-  %4 = atomicrmw add i8* @uc, i8 %3 monotonic
-  %5 = add i8 %4, %3
-  store i8 %5, i8* @uc, align 1
-  %6 = load i8* @uc, align 1
-  %7 = zext i8 %6 to i16
-  %8 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %9 = atomicrmw add i16* %8, i16 %7 monotonic
-  %10 = add i16 %9, %7
-  store i16 %10, i16* @ss, align 2
-  %11 = load i8* @uc, align 1
-  %12 = zext i8 %11 to i16
-  %13 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %14 = atomicrmw add i16* %13, i16 %12 monotonic
-  %15 = add i16 %14, %12
-  store i16 %15, i16* @us, align 2
-  %16 = load i8* @uc, align 1
-  %17 = zext i8 %16 to i32
-  %18 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %19 = atomicrmw add i32* %18, i32 %17 monotonic
-  %20 = add i32 %19, %17
-  store i32 %20, i32* @si, align 4
-  %21 = load i8* @uc, align 1
-  %22 = zext i8 %21 to i32
-  %23 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %24 = atomicrmw add i32* %23, i32 %22 monotonic
-  %25 = add i32 %24, %22
-  store i32 %25, i32* @ui, align 4
-  %26 = load i8* @uc, align 1
-  %27 = zext i8 %26 to i32
-  %28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %29 = atomicrmw add i32* %28, i32 %27 monotonic
-  %30 = add i32 %29, %27
-  store i32 %30, i32* @sl, align 4
-  %31 = load i8* @uc, align 1
-  %32 = zext i8 %31 to i32
-  %33 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %34 = atomicrmw add i32* %33, i32 %32 monotonic
-  %35 = add i32 %34, %32
-  store i32 %35, i32* @ul, align 4
-  %36 = load i8* @uc, align 1
-  %37 = atomicrmw sub i8* @sc, i8 %36 monotonic
-  %38 = sub i8 %37, %36
-  store i8 %38, i8* @sc, align 1
-  %39 = load i8* @uc, align 1
-  %40 = atomicrmw sub i8* @uc, i8 %39 monotonic
-  %41 = sub i8 %40, %39
-  store i8 %41, i8* @uc, align 1
-  %42 = load i8* @uc, align 1
-  %43 = zext i8 %42 to i16
-  %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %45 = atomicrmw sub i16* %44, i16 %43 monotonic
-  %46 = sub i16 %45, %43
-  store i16 %46, i16* @ss, align 2
-  %47 = load i8* @uc, align 1
-  %48 = zext i8 %47 to i16
-  %49 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %50 = atomicrmw sub i16* %49, i16 %48 monotonic
-  %51 = sub i16 %50, %48
-  store i16 %51, i16* @us, align 2
-  %52 = load i8* @uc, align 1
-  %53 = zext i8 %52 to i32
-  %54 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %55 = atomicrmw sub i32* %54, i32 %53 monotonic
-  %56 = sub i32 %55, %53
-  store i32 %56, i32* @si, align 4
-  %57 = load i8* @uc, align 1
-  %58 = zext i8 %57 to i32
-  %59 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %60 = atomicrmw sub i32* %59, i32 %58 monotonic
-  %61 = sub i32 %60, %58
-  store i32 %61, i32* @ui, align 4
-  %62 = load i8* @uc, align 1
-  %63 = zext i8 %62 to i32
-  %64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %65 = atomicrmw sub i32* %64, i32 %63 monotonic
-  %66 = sub i32 %65, %63
-  store i32 %66, i32* @sl, align 4
-  %67 = load i8* @uc, align 1
-  %68 = zext i8 %67 to i32
-  %69 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %70 = atomicrmw sub i32* %69, i32 %68 monotonic
-  %71 = sub i32 %70, %68
-  store i32 %71, i32* @ul, align 4
-  %72 = load i8* @uc, align 1
-  %73 = atomicrmw or i8* @sc, i8 %72 monotonic
-  %74 = or i8 %73, %72
-  store i8 %74, i8* @sc, align 1
-  %75 = load i8* @uc, align 1
-  %76 = atomicrmw or i8* @uc, i8 %75 monotonic
-  %77 = or i8 %76, %75
-  store i8 %77, i8* @uc, align 1
-  %78 = load i8* @uc, align 1
-  %79 = zext i8 %78 to i16
-  %80 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %81 = atomicrmw or i16* %80, i16 %79 monotonic
-  %82 = or i16 %81, %79
-  store i16 %82, i16* @ss, align 2
-  %83 = load i8* @uc, align 1
-  %84 = zext i8 %83 to i16
-  %85 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %86 = atomicrmw or i16* %85, i16 %84 monotonic
-  %87 = or i16 %86, %84
-  store i16 %87, i16* @us, align 2
-  %88 = load i8* @uc, align 1
-  %89 = zext i8 %88 to i32
-  %90 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %91 = atomicrmw or i32* %90, i32 %89 monotonic
-  %92 = or i32 %91, %89
-  store i32 %92, i32* @si, align 4
-  %93 = load i8* @uc, align 1
-  %94 = zext i8 %93 to i32
-  %95 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %96 = atomicrmw or i32* %95, i32 %94 monotonic
-  %97 = or i32 %96, %94
-  store i32 %97, i32* @ui, align 4
-  %98 = load i8* @uc, align 1
-  %99 = zext i8 %98 to i32
-  %100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %101 = atomicrmw or i32* %100, i32 %99 monotonic
-  %102 = or i32 %101, %99
-  store i32 %102, i32* @sl, align 4
-  %103 = load i8* @uc, align 1
-  %104 = zext i8 %103 to i32
-  %105 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %106 = atomicrmw or i32* %105, i32 %104 monotonic
-  %107 = or i32 %106, %104
-  store i32 %107, i32* @ul, align 4
-  %108 = load i8* @uc, align 1
-  %109 = atomicrmw xor i8* @sc, i8 %108 monotonic
-  %110 = xor i8 %109, %108
-  store i8 %110, i8* @sc, align 1
-  %111 = load i8* @uc, align 1
-  %112 = atomicrmw xor i8* @uc, i8 %111 monotonic
-  %113 = xor i8 %112, %111
-  store i8 %113, i8* @uc, align 1
-  %114 = load i8* @uc, align 1
-  %115 = zext i8 %114 to i16
-  %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %117 = atomicrmw xor i16* %116, i16 %115 monotonic
-  %118 = xor i16 %117, %115
-  store i16 %118, i16* @ss, align 2
-  %119 = load i8* @uc, align 1
-  %120 = zext i8 %119 to i16
-  %121 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %122 = atomicrmw xor i16* %121, i16 %120 monotonic
-  %123 = xor i16 %122, %120
-  store i16 %123, i16* @us, align 2
-  %124 = load i8* @uc, align 1
-  %125 = zext i8 %124 to i32
-  %126 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %127 = atomicrmw xor i32* %126, i32 %125 monotonic
-  %128 = xor i32 %127, %125
-  store i32 %128, i32* @si, align 4
-  %129 = load i8* @uc, align 1
-  %130 = zext i8 %129 to i32
-  %131 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %132 = atomicrmw xor i32* %131, i32 %130 monotonic
-  %133 = xor i32 %132, %130
-  store i32 %133, i32* @ui, align 4
-  %134 = load i8* @uc, align 1
-  %135 = zext i8 %134 to i32
-  %136 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %137 = atomicrmw xor i32* %136, i32 %135 monotonic
-  %138 = xor i32 %137, %135
-  store i32 %138, i32* @sl, align 4
-  %139 = load i8* @uc, align 1
-  %140 = zext i8 %139 to i32
-  %141 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %142 = atomicrmw xor i32* %141, i32 %140 monotonic
-  %143 = xor i32 %142, %140
-  store i32 %143, i32* @ul, align 4
-  %144 = load i8* @uc, align 1
-  %145 = atomicrmw and i8* @sc, i8 %144 monotonic
-  %146 = and i8 %145, %144
-  store i8 %146, i8* @sc, align 1
-  %147 = load i8* @uc, align 1
-  %148 = atomicrmw and i8* @uc, i8 %147 monotonic
-  %149 = and i8 %148, %147
-  store i8 %149, i8* @uc, align 1
-  %150 = load i8* @uc, align 1
-  %151 = zext i8 %150 to i16
-  %152 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %153 = atomicrmw and i16* %152, i16 %151 monotonic
-  %154 = and i16 %153, %151
-  store i16 %154, i16* @ss, align 2
-  %155 = load i8* @uc, align 1
-  %156 = zext i8 %155 to i16
-  %157 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %158 = atomicrmw and i16* %157, i16 %156 monotonic
-  %159 = and i16 %158, %156
-  store i16 %159, i16* @us, align 2
-  %160 = load i8* @uc, align 1
-  %161 = zext i8 %160 to i32
-  %162 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %163 = atomicrmw and i32* %162, i32 %161 monotonic
-  %164 = and i32 %163, %161
-  store i32 %164, i32* @si, align 4
-  %165 = load i8* @uc, align 1
-  %166 = zext i8 %165 to i32
-  %167 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %168 = atomicrmw and i32* %167, i32 %166 monotonic
-  %169 = and i32 %168, %166
-  store i32 %169, i32* @ui, align 4
-  %170 = load i8* @uc, align 1
-  %171 = zext i8 %170 to i32
-  %172 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %173 = atomicrmw and i32* %172, i32 %171 monotonic
-  %174 = and i32 %173, %171
-  store i32 %174, i32* @sl, align 4
-  %175 = load i8* @uc, align 1
-  %176 = zext i8 %175 to i32
-  %177 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %178 = atomicrmw and i32* %177, i32 %176 monotonic
-  %179 = and i32 %178, %176
-  store i32 %179, i32* @ul, align 4
-  %180 = load i8* @uc, align 1
-  %181 = atomicrmw nand i8* @sc, i8 %180 monotonic
-  %182 = xor i8 %181, -1
-  %183 = and i8 %182, %180
-  store i8 %183, i8* @sc, align 1
-  %184 = load i8* @uc, align 1
-  %185 = atomicrmw nand i8* @uc, i8 %184 monotonic
-  %186 = xor i8 %185, -1
-  %187 = and i8 %186, %184
-  store i8 %187, i8* @uc, align 1
-  %188 = load i8* @uc, align 1
-  %189 = zext i8 %188 to i16
-  %190 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %191 = atomicrmw nand i16* %190, i16 %189 monotonic
-  %192 = xor i16 %191, -1
-  %193 = and i16 %192, %189
-  store i16 %193, i16* @ss, align 2
-  %194 = load i8* @uc, align 1
-  %195 = zext i8 %194 to i16
-  %196 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %197 = atomicrmw nand i16* %196, i16 %195 monotonic
-  %198 = xor i16 %197, -1
-  %199 = and i16 %198, %195
-  store i16 %199, i16* @us, align 2
-  %200 = load i8* @uc, align 1
-  %201 = zext i8 %200 to i32
-  %202 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %203 = atomicrmw nand i32* %202, i32 %201 monotonic
-  %204 = xor i32 %203, -1
-  %205 = and i32 %204, %201
-  store i32 %205, i32* @si, align 4
-  %206 = load i8* @uc, align 1
-  %207 = zext i8 %206 to i32
-  %208 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %209 = atomicrmw nand i32* %208, i32 %207 monotonic
-  %210 = xor i32 %209, -1
-  %211 = and i32 %210, %207
-  store i32 %211, i32* @ui, align 4
-  %212 = load i8* @uc, align 1
-  %213 = zext i8 %212 to i32
-  %214 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %215 = atomicrmw nand i32* %214, i32 %213 monotonic
-  %216 = xor i32 %215, -1
-  %217 = and i32 %216, %213
-  store i32 %217, i32* @sl, align 4
-  %218 = load i8* @uc, align 1
-  %219 = zext i8 %218 to i32
-  %220 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %221 = atomicrmw nand i32* %220, i32 %219 monotonic
-  %222 = xor i32 %221, -1
-  %223 = and i32 %222, %219
-  store i32 %223, i32* @ul, align 4
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-define void @test_compare_and_swap() nounwind {
-entry:
-  %0 = load i8* @uc, align 1
-  %1 = load i8* @sc, align 1
-  %pair2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
-  %2 = extractvalue { i8, i1 } %pair2, 0
-  store i8 %2, i8* @sc, align 1
-  %3 = load i8* @uc, align 1
-  %4 = load i8* @sc, align 1
-  %pair5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
-  %5 = extractvalue { i8, i1 } %pair5, 0
-  store i8 %5, i8* @uc, align 1
-  %6 = load i8* @uc, align 1
-  %7 = zext i8 %6 to i16
-  %8 = load i8* @sc, align 1
-  %9 = sext i8 %8 to i16
-  %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %pair11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
-  %11 = extractvalue { i16, i1 } %pair11, 0
-  store i16 %11, i16* @ss, align 2
-  %12 = load i8* @uc, align 1
-  %13 = zext i8 %12 to i16
-  %14 = load i8* @sc, align 1
-  %15 = sext i8 %14 to i16
-  %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %pair17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
-  %17 = extractvalue { i16, i1 } %pair17, 0
-  store i16 %17, i16* @us, align 2
-  %18 = load i8* @uc, align 1
-  %19 = zext i8 %18 to i32
-  %20 = load i8* @sc, align 1
-  %21 = sext i8 %20 to i32
-  %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %pair23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
-  %23 = extractvalue { i32, i1 } %pair23, 0
-  store i32 %23, i32* @si, align 4
-  %24 = load i8* @uc, align 1
-  %25 = zext i8 %24 to i32
-  %26 = load i8* @sc, align 1
-  %27 = sext i8 %26 to i32
-  %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %pair29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
-  %29 = extractvalue { i32, i1 } %pair29, 0
-  store i32 %29, i32* @ui, align 4
-  %30 = load i8* @uc, align 1
-  %31 = zext i8 %30 to i32
-  %32 = load i8* @sc, align 1
-  %33 = sext i8 %32 to i32
-  %34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %pair35 = cmpxchg i32* %34, i32 %31, i32 %33 monotonic monotonic
-  %35 = extractvalue { i32, i1 } %pair35, 0
-  store i32 %35, i32* @sl, align 4
-  %36 = load i8* @uc, align 1
-  %37 = zext i8 %36 to i32
-  %38 = load i8* @sc, align 1
-  %39 = sext i8 %38 to i32
-  %40 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %pair41 = cmpxchg i32* %40, i32 %37, i32 %39 monotonic monotonic
-  %41 = extractvalue { i32, i1 } %pair41, 0
-  store i32 %41, i32* @ul, align 4
-  %42 = load i8* @uc, align 1
-  %43 = load i8* @sc, align 1
-  %pair44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
-  %44 = extractvalue { i8, i1 } %pair44, 0
-  %45 = icmp eq i8 %44, %42
-  %46 = zext i1 %45 to i32
-  store i32 %46, i32* @ui, align 4
-  %47 = load i8* @uc, align 1
-  %48 = load i8* @sc, align 1
-  %pair49 = cmpxchg i8* @uc, i8 %47, i8 %48 monotonic monotonic
-  %49 = extractvalue { i8, i1 } %pair49, 0
-  %50 = icmp eq i8 %49, %47
-  %51 = zext i1 %50 to i32
-  store i32 %51, i32* @ui, align 4
-  %52 = load i8* @uc, align 1
-  %53 = zext i8 %52 to i16
-  %54 = load i8* @sc, align 1
-  %55 = sext i8 %54 to i16
-  %56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %pair57 = cmpxchg i16* %56, i16 %53, i16 %55 monotonic monotonic
-  %57 = extractvalue { i16, i1 } %pair57, 0
-  %58 = icmp eq i16 %57, %53
-  %59 = zext i1 %58 to i32
-  store i32 %59, i32* @ui, align 4
-  %60 = load i8* @uc, align 1
-  %61 = zext i8 %60 to i16
-  %62 = load i8* @sc, align 1
-  %63 = sext i8 %62 to i16
-  %64 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %pair65 = cmpxchg i16* %64, i16 %61, i16 %63 monotonic monotonic
-  %65 = extractvalue { i16, i1 } %pair65, 0
-  %66 = icmp eq i16 %65, %61
-  %67 = zext i1 %66 to i32
-  store i32 %67, i32* @ui, align 4
-  %68 = load i8* @uc, align 1
-  %69 = zext i8 %68 to i32
-  %70 = load i8* @sc, align 1
-  %71 = sext i8 %70 to i32
-  %72 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %pair73 = cmpxchg i32* %72, i32 %69, i32 %71 monotonic monotonic
-  %73 = extractvalue { i32, i1 } %pair73, 0
-  %74 = icmp eq i32 %73, %69
-  %75 = zext i1 %74 to i32
-  store i32 %75, i32* @ui, align 4
-  %76 = load i8* @uc, align 1
-  %77 = zext i8 %76 to i32
-  %78 = load i8* @sc, align 1
-  %79 = sext i8 %78 to i32
-  %80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %pair81 = cmpxchg i32* %80, i32 %77, i32 %79 monotonic monotonic
-  %81 = extractvalue { i32, i1 } %pair81, 0
-  %82 = icmp eq i32 %81, %77
-  %83 = zext i1 %82 to i32
-  store i32 %83, i32* @ui, align 4
-  %84 = load i8* @uc, align 1
-  %85 = zext i8 %84 to i32
-  %86 = load i8* @sc, align 1
-  %87 = sext i8 %86 to i32
-  %88 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %pair89 = cmpxchg i32* %88, i32 %85, i32 %87 monotonic monotonic
-  %89 = extractvalue { i32, i1 } %pair89, 0
-  %90 = icmp eq i32 %89, %85
-  %91 = zext i1 %90 to i32
-  store i32 %91, i32* @ui, align 4
-  %92 = load i8* @uc, align 1
-  %93 = zext i8 %92 to i32
-  %94 = load i8* @sc, align 1
-  %95 = sext i8 %94 to i32
-  %96 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %pair97 = cmpxchg i32* %96, i32 %93, i32 %95 monotonic monotonic
-  %97 = extractvalue { i32, i1 } %pair97, 0
-  %98 = icmp eq i32 %97, %93
-  %99 = zext i1 %98 to i32
-  store i32 %99, i32* @ui, align 4
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-define void @test_lock() nounwind {
-entry:
-  %0 = atomicrmw xchg i8* @sc, i8 1 monotonic
-  store i8 %0, i8* @sc, align 1
-  %1 = atomicrmw xchg i8* @uc, i8 1 monotonic
-  store i8 %1, i8* @uc, align 1
-  %2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  %3 = atomicrmw xchg i16* %2, i16 1 monotonic
-  store i16 %3, i16* @ss, align 2
-  %4 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  %5 = atomicrmw xchg i16* %4, i16 1 monotonic
-  store i16 %5, i16* @us, align 2
-  %6 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  %7 = atomicrmw xchg i32* %6, i32 1 monotonic
-  store i32 %7, i32* @si, align 4
-  %8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  %9 = atomicrmw xchg i32* %8, i32 1 monotonic
-  store i32 %9, i32* @ui, align 4
-  %10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  %11 = atomicrmw xchg i32* %10, i32 1 monotonic
-  store i32 %11, i32* @sl, align 4
-  %12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  %13 = atomicrmw xchg i32* %12, i32 1 monotonic
-  store i32 %13, i32* @ul, align 4
-  fence seq_cst
-  store volatile i8 0, i8* @sc, align 1
-  store volatile i8 0, i8* @uc, align 1
-  %14 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
-  store volatile i16 0, i16* %14, align 2
-  %15 = bitcast i8* bitcast (i16* @us to i8*) to i16*
-  store volatile i16 0, i16* %15, align 2
-  %16 = bitcast i8* bitcast (i32* @si to i8*) to i32*
-  store volatile i32 0, i32* %16, align 4
-  %17 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
-  store volatile i32 0, i32* %17, align 4
-  %18 = bitcast i8* bitcast (i32* @sl to i8*) to i32*
-  store volatile i32 0, i32* %18, align 4
-  %19 = bitcast i8* bitcast (i32* @ul to i8*) to i32*
-  store volatile i32 0, i32* %19, align 4
-  %20 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
-  store volatile i64 0, i64* %20, align 8
-  %21 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
-  store volatile i64 0, i64* %21, align 8
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 0ccea42619af..5b8aef42e76a 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -1,9 +1,8 @@
-; RUN: llvm-as < %s > %t.bc
-; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
-; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
+; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
 
-; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
 
 
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/aa-tbaa.ll b/test/CodeGen/PowerPC/aa-tbaa.ll
index 1939841f1f7e..0e7ff3d72765 100644
--- a/test/CodeGen/PowerPC/aa-tbaa.ll
+++ b/test/CodeGen/PowerPC/aa-tbaa.ll
@@ -35,7 +35,7 @@ next:
 ; CHECK: blr
 }
 
-!0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !"set1", metadata !0 }
-!2 = metadata !{ metadata !"set2", metadata !0 }
+!0 = !{ !"root" }
+!1 = !{ !"set1", !0 }
+!2 = !{ !"set2", !0 }
 
diff --git a/test/CodeGen/PowerPC/add-fi.ll b/test/CodeGen/PowerPC/add-fi.ll
new file mode 100644
index 000000000000..18892c8cdf5e
--- /dev/null
+++ b/test/CodeGen/PowerPC/add-fi.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32* @test1() {
+        %X = alloca { i32, i32 }
+        %Y = getelementptr {i32,i32}* %X, i32 0, i32 1
+        ret i32* %Y
+
+; CHECK-LABEL: @test1
+; CHECK: addi 3, 1, -4
+; CHECK: blr
+}
+
+define i32* @test2() {
+        %X = alloca { i32, i32, i32, i32 }
+        %Y = getelementptr {i32,i32,i32,i32}* %X, i32 0, i32 3
+        ret i32* %Y
+
+; CHECK-LABEL: @test2
+; CHECK: addi 3, 1, -4
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/addi-licm.ll b/test/CodeGen/PowerPC/addi-licm.ll
new file mode 100644
index 000000000000..b52cb678a969
--- /dev/null
+++ b/test/CodeGen/PowerPC/addi-licm.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define double @foo() #1 {
+entry:
+  %x = alloca [2048 x float], align 4
+  %y = alloca [2048 x float], align 4
+  %0 = bitcast [2048 x float]* %x to i8*
+  call void @llvm.lifetime.start(i64 8192, i8* %0) #2
+  %1 = bitcast [2048 x float]* %y to i8*
+  call void @llvm.lifetime.start(i64 8192, i8* %1) #2
+  br label %for.body.i
+
+; CHECK-LABEL: @foo
+; CHECK: addi [[REG1:[0-9]+]], 1,
+; CHECK: addi [[REG2:[0-9]+]], 1,
+; CHECK: %for.body.i
+; CHECK-DAG: lfsx {{[0-9]+}}, [[REG1]],
+; CHECK-DAG: lfsx {{[0-9]+}}, [[REG2]],
+; CHECK: blr
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.body.i
+  %accumulator.09.i = phi double [ %add.i, %for.body.i ], [ 0.000000e+00, %entry ]
+  %i.08.i = phi i64 [ %inc.i, %for.body.i ], [ 0, %entry ]
+  %arrayidx.i = getelementptr inbounds [2048 x float]* %x, i64 0, i64 %i.08.i
+  %v14 = load float* %arrayidx.i, align 4
+  %conv.i = fpext float %v14 to double
+  %arrayidx1.i = getelementptr inbounds [2048 x float]* %y, i64 0, i64 %i.08.i
+  %v15 = load float* %arrayidx1.i, align 4
+  %conv2.i = fpext float %v15 to double
+  %mul.i = fmul double %conv.i, %conv2.i
+  %add.i = fadd double %accumulator.09.i, %mul.i
+  %inc.i = add nuw nsw i64 %i.08.i, 1
+  %exitcond.i = icmp eq i64 %i.08.i, 2047
+  br i1 %exitcond.i, label %loop.exit, label %for.body.i
+
+loop.exit:                                        ; preds = %for.body.i
+  ret double %accumulator.09.i
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+declare void @bar(float*, float*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+
+
diff --git a/test/CodeGen/PowerPC/arr-fp-arg-no-copy.ll b/test/CodeGen/PowerPC/arr-fp-arg-no-copy.ll
new file mode 100644
index 000000000000..fd430a68e781
--- /dev/null
+++ b/test/CodeGen/PowerPC/arr-fp-arg-no-copy.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @bar() #0 {
+entry:
+  tail call void @xxx([2 x i64] [i64 4607182418800017408, i64 4611686018427387904]) #0
+  ret void
+
+; CHECK-LABEL: @bar
+; CHECK-DAG: li [[REG1:[0-9]+]], 1023
+; CHECK-DAG: li [[REG2:[0-9]+]], {{1$}}
+; CHECK-DAG: sldi 3, [[REG1]], 52
+; CHECK-DAG: sldi 4, [[REG2]], 62
+; CHECK: bl xxx
+; CHECK: blr
+}
+
+declare void @xxx([2 x i64])
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/asm-Zy.ll b/test/CodeGen/PowerPC/asm-Zy.ll
index 691165f23788..6d1ab0ed9d68 100644
--- a/test/CodeGen/PowerPC/asm-Zy.ll
+++ b/test/CodeGen/PowerPC/asm-Zy.ll
@@ -10,5 +10,5 @@ entry:
 ; CHECK: lwbrx 3, 0,
 }
 
-!0 = metadata !{i32 101688}
+!0 = !{i32 101688}
 
diff --git a/test/CodeGen/PowerPC/asm-constraints.ll b/test/CodeGen/PowerPC/asm-constraints.ll
new file mode 100644
index 000000000000..9bf8b75e0ace
--- /dev/null
+++ b/test/CodeGen/PowerPC/asm-constraints.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mcpu=pwr8 | FileCheck %s
+
+; Generated from following C code:
+;
+; void foo (int result, char *addr) {
+;   __asm__ __volatile__ (
+;     "ld%U1%X1 %0,%1\n"
+;     "cmpw %0,%0\n"
+;     "bne- 1f\n"
+;     "1: isync\n"
+;     : "=r" (result)
+;     : "m"(*addr) : "memory", "cr0");
+; }
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+; Check that we accept 'U' and 'X' constraints.
+define void @foo(i32 signext %result, i8* %addr) #0 {
+entry:
+  %result.addr = alloca i32, align 4
+  %addr.addr = alloca i8*, align 8
+  store i32 %result, i32* %result.addr, align 4
+  store i8* %addr, i8** %addr.addr, align 8
+  %0 = load i8** %addr.addr, align 8
+  %1 = call i32 asm sideeffect "ld${1:U}${1:X} $0,$1\0Acmpw $0,$0\0Abne- 1f\0A1: isync\0A", "=r,*m,~{memory},~{cr0}"(i8* %0) #1, !srcloc !1
+  store i32 %1, i32* %result.addr, align 4
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK: ld [[REG:[0-9]+]], 0(4)
+; CHECK: cmpw 0, [[REG]], [[REG]]
+; CHECK: bne- 0, .Ltmp[[TMP:[0-9]+]]
+; CHECK: .Ltmp[[TMP]]:
+; CHECK: isync
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.6.0 (trunk 217557)"}
+!1 = !{i32 67, i32 91, i32 110, i32 126}
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 843250f10b4f..9cb0fa5be5c9 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -30,8 +30,9 @@ define void @atomic_store(i64* %mem, i64 %val) nounwind {
 entry:
 ; CHECK: @atomic_store
   store atomic i64 %val, i64* %mem release, align 64
-; CHECK: ldarx
-; CHECK: stdcx.
+; CHECK: sync 1
+; CHECK-NOT: stdcx
+; CHECK: std
   ret void
 }
 
@@ -39,9 +40,9 @@ define i64 @atomic_load(i64* %mem) nounwind {
 entry:
 ; CHECK: @atomic_load
   %tmp = load atomic i64* %mem acquire, align 64
-; CHECK: ldarx
-; CHECK: stdcx.
-; CHECK: stdcx.
+; CHECK-NOT: ldarx
+; CHECK: ld
+; CHECK: sync 1
   ret i64 %tmp
 }
 
diff --git a/test/CodeGen/PowerPC/atomics-fences.ll b/test/CodeGen/PowerPC/atomics-fences.ll
new file mode 100644
index 000000000000..862bd173fdaf
--- /dev/null
+++ b/test/CodeGen/PowerPC/atomics-fences.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=440 | FileCheck %s --check-prefix=PPC440
+
+; Fences
+define void @fence_acquire() {
+; CHECK-LABEL: fence_acquire
+; CHECK: sync 1
+; PPC440-NOT: sync 1
+; PPC440: msync
+  fence acquire
+  ret void
+}
+define void @fence_release() {
+; CHECK-LABEL: fence_release
+; CHECK: sync 1
+; PPC440-NOT: sync 1
+; PPC440: msync
+  fence release
+  ret void
+}
+define void @fence_seq_cst() {
+; CHECK-LABEL: fence_seq_cst
+; CHECK: sync 0
+; PPC440-NOT: sync 0
+; PPC440: msync
+  fence seq_cst
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/atomics-indexed.ll b/test/CodeGen/PowerPC/atomics-indexed.ll
new file mode 100644
index 000000000000..bb9ca0401966
--- /dev/null
+++ b/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
+; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
+; This is already checked for in Atomics-64.ll
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
+
+; In this file, we check that atomic load/store can make use of the indexed
+; versions of the instructions.
+
+; Indexed version of loads
+define i8 @load_x_i8_seq_cst([100000 x i8]* %mem) {
+; CHECK-LABEL: load_x_i8_seq_cst
+; CHECK: sync 0
+; CHECK: lbzx
+; CHECK: sync 1
+  %ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
+  %val = load atomic i8* %ptr seq_cst, align 1
+  ret i8 %val
+}
+define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
+; CHECK-LABEL: load_x_i16_acquire
+; CHECK: lhzx
+; CHECK: sync 1
+  %ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
+  %val = load atomic i16* %ptr acquire, align 2
+  ret i16 %val
+}
+define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
+; CHECK-LABEL: load_x_i32_monotonic
+; CHECK: lwzx
+; CHECK-NOT: sync
+  %ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
+  %val = load atomic i32* %ptr monotonic, align 4
+  ret i32 %val
+}
+define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
+; CHECK-LABEL: load_x_i64_unordered
+; PPC32: __sync_
+; PPC64-NOT: __sync_
+; PPC64: ldx
+; CHECK-NOT: sync
+  %ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
+  %val = load atomic i64* %ptr unordered, align 8
+  ret i64 %val
+}
+
+; Indexed version of stores
+define void @store_x_i8_seq_cst([100000 x i8]* %mem) {
+; CHECK-LABEL: store_x_i8_seq_cst
+; CHECK: sync 0
+; CHECK: stbx
+  %ptr = getelementptr inbounds [100000 x i8]* %mem, i64 0, i64 90000
+  store atomic i8 42, i8* %ptr seq_cst, align 1
+  ret void
+}
+define void @store_x_i16_release([100000 x i16]* %mem) {
+; CHECK-LABEL: store_x_i16_release
+; CHECK: sync 1
+; CHECK: sthx
+  %ptr = getelementptr inbounds [100000 x i16]* %mem, i64 0, i64 90000
+  store atomic i16 42, i16* %ptr release, align 2
+  ret void
+}
+define void @store_x_i32_monotonic([100000 x i32]* %mem) {
+; CHECK-LABEL: store_x_i32_monotonic
+; CHECK-NOT: sync
+; CHECK: stwx
+  %ptr = getelementptr inbounds [100000 x i32]* %mem, i64 0, i64 90000
+  store atomic i32 42, i32* %ptr monotonic, align 4
+  ret void
+}
+define void @store_x_i64_unordered([100000 x i64]* %mem) {
+; CHECK-LABEL: store_x_i64_unordered
+; CHECK-NOT: sync 0
+; CHECK-NOT: sync 1
+; PPC32: __sync_
+; PPC64-NOT: __sync_
+; PPC64: stdx
+  %ptr = getelementptr inbounds [100000 x i64]* %mem, i64 0, i64 90000
+  store atomic i64 42, i64* %ptr unordered, align 8
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/atomics.ll b/test/CodeGen/PowerPC/atomics.ll
new file mode 100644
index 000000000000..5f6a6a4dcdf1
--- /dev/null
+++ b/test/CodeGen/PowerPC/atomics.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc32 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=PPC32
+; FIXME: -verify-machineinstrs currently fail on ppc64 (mismatched register/instruction).
+; This is already checked for in Atomics-64.ll
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -march=ppc64 | FileCheck %s --check-prefix=CHECK --check-prefix=PPC64
+
+; FIXME: we don't currently check for the operations themselves with CHECK-NEXT,
+;   because they are implemented in a very messy way with lwarx/stwcx.
+;   It should be fixed soon in another patch.
+
+; We first check loads, for all sizes from i8 to i64.
+; We also vary orderings to check for barriers.
+define i8 @load_i8_unordered(i8* %mem) {
+; CHECK-LABEL: load_i8_unordered
+; CHECK: lbz
+; CHECK-NOT: sync
+  %val = load atomic i8* %mem unordered, align 1
+  ret i8 %val
+}
+define i16 @load_i16_monotonic(i16* %mem) {
+; CHECK-LABEL: load_i16_monotonic
+; CHECK: lhz
+; CHECK-NOT: sync
+  %val = load atomic i16* %mem monotonic, align 2
+  ret i16 %val
+}
+define i32 @load_i32_acquire(i32* %mem) {
+; CHECK-LABEL: load_i32_acquire
+; CHECK: lwz
+  %val = load atomic i32* %mem acquire, align 4
+; CHECK: sync 1
+  ret i32 %val
+}
+define i64 @load_i64_seq_cst(i64* %mem) {
+; CHECK-LABEL: load_i64_seq_cst
+; CHECK: sync 0
+; PPC32: __sync_
+; PPC64-NOT: __sync_
+; PPC64: ld
+  %val = load atomic i64* %mem seq_cst, align 8
+; CHECK: sync 1
+  ret i64 %val
+}
+
+; Stores
+define void @store_i8_unordered(i8* %mem) {
+; CHECK-LABEL: store_i8_unordered
+; CHECK-NOT: sync
+; CHECK: stb
+  store atomic i8 42, i8* %mem unordered, align 1
+  ret void
+}
+define void @store_i16_monotonic(i16* %mem) {
+; CHECK-LABEL: store_i16_monotonic
+; CHECK-NOT: sync
+; CHECK: sth
+  store atomic i16 42, i16* %mem monotonic, align 2
+  ret void
+}
+define void @store_i32_release(i32* %mem) {
+; CHECK-LABEL: store_i32_release
+; CHECK: sync 1
+; CHECK: stw
+  store atomic i32 42, i32* %mem release, align 4
+  ret void
+}
+define void @store_i64_seq_cst(i64* %mem) {
+; CHECK-LABEL: store_i64_seq_cst
+; CHECK: sync 0
+; PPC32: __sync_
+; PPC64-NOT: __sync_
+; PPC64: std
+  store atomic i64 42, i64* %mem seq_cst, align 8
+  ret void
+}
+
+; Atomic CmpXchg
+define i8 @cas_strong_i8_sc_sc(i8* %mem) {
+; CHECK-LABEL: cas_strong_i8_sc_sc
+; CHECK: sync 0
+  %val = cmpxchg i8* %mem, i8 0, i8 1 seq_cst seq_cst
+; CHECK: sync 1
+  %loaded = extractvalue { i8, i1} %val, 0
+  ret i8 %loaded
+}
+define i16 @cas_weak_i16_acquire_acquire(i16* %mem) {
+; CHECK-LABEL: cas_weak_i16_acquire_acquire
+;CHECK-NOT: sync
+  %val = cmpxchg weak i16* %mem, i16 0, i16 1 acquire acquire
+; CHECK: sync 1
+  %loaded = extractvalue { i16, i1} %val, 0
+  ret i16 %loaded
+}
+define i32 @cas_strong_i32_acqrel_acquire(i32* %mem) {
+; CHECK-LABEL: cas_strong_i32_acqrel_acquire
+; CHECK: sync 1
+  %val = cmpxchg i32* %mem, i32 0, i32 1 acq_rel acquire
+; CHECK: sync 1
+  %loaded = extractvalue { i32, i1} %val, 0
+  ret i32 %loaded
+}
+define i64 @cas_weak_i64_release_monotonic(i64* %mem) {
+; CHECK-LABEL: cas_weak_i64_release_monotonic
+; CHECK: sync 1
+  %val = cmpxchg weak i64* %mem, i64 0, i64 1 release monotonic
+; CHECK-NOT: [sync ]
+  %loaded = extractvalue { i64, i1} %val, 0
+  ret i64 %loaded
+}
+
+; AtomicRMW
+define i8 @add_i8_monotonic(i8* %mem, i8 %operand) {
+; CHECK-LABEL: add_i8_monotonic
+; CHECK-NOT: sync
+  %val = atomicrmw add i8* %mem, i8 %operand monotonic
+  ret i8 %val
+}
+define i16 @xor_i16_seq_cst(i16* %mem, i16 %operand) {
+; CHECK-LABEL: xor_i16_seq_cst
+; CHECK: sync 0
+  %val = atomicrmw xor i16* %mem, i16 %operand seq_cst
+; CHECK: sync 1
+  ret i16 %val
+}
+define i32 @xchg_i32_acq_rel(i32* %mem, i32 %operand) {
+; CHECK-LABEL: xchg_i32_acq_rel
+; CHECK: sync 1
+  %val = atomicrmw xchg i32* %mem, i32 %operand acq_rel
+; CHECK: sync 1
+  ret i32 %val
+}
+define i64 @and_i64_release(i64* %mem, i64 %operand) {
+; CHECK-LABEL: and_i64_release
+; CHECK: sync 1
+  %val = atomicrmw and i64* %mem, i64 %operand release
+; CHECK-NOT: [sync ]
+  ret i64 %val
+}
diff --git a/test/CodeGen/PowerPC/bperm.ll b/test/CodeGen/PowerPC/bperm.ll
new file mode 100644
index 000000000000..c489c1f90a8f
--- /dev/null
+++ b/test/CodeGen/PowerPC/bperm.ll
@@ -0,0 +1,279 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @bs4(i32 zeroext %a) #0 {
+entry:
+  %0 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %0
+
+; CHECK-LABEL: @bs4
+; CHECK: rlwinm [[REG1:[0-9]+]], 3, 8, 0, 31
+; CHECK: rlwimi [[REG1]], 3, 24, 16, 23
+; CHECK: rlwimi [[REG1]], 3, 24, 0, 7
+; CHECK: mr 3, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @bs8(i64 %x) #0 {
+entry:
+  %0 = tail call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %0
+
+; CHECK-LABEL: @bs8
+; CHECK-DAG: rldicl [[REG1:[0-9]+]], 3, 16, 0
+; CHECK-DAG: rldicl [[REG2:[0-9]+]], 3, 8, 0
+; CHECK-DAG: rldicl [[REG3:[0-9]+]], 3, 24, 0
+; CHECK-DAG: rldimi [[REG2]], [[REG1]], 8, 48
+; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 32, 0
+; CHECK-DAG: rldimi [[REG2]], [[REG3]], 16, 40
+; CHECK-DAG: rldicl [[REG5:[0-9]+]], 3, 48, 0
+; CHECK-DAG: rldimi [[REG2]], [[REG4]], 24, 32
+; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 56, 0
+; CHECK-DAG: rldimi [[REG2]], [[REG5]], 40, 16
+; CHECK-DAG: rldimi [[REG2]], [[REG6]], 48, 8
+; CHECK-DAG: rldimi [[REG2]], 3, 56, 0
+; CHECK: mr 3, [[REG2]]
+; CHECK: blr
+}
+
+define i64 @test1(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i1, 8
+  %and = and i64 %0, 5963776000
+  ret i64 %and
+
+; CHECK-LABEL: @test1
+; CHECK-DAG: li [[REG1:[0-9]+]], 11375
+; CHECK-DAG: rldicl [[REG3:[0-9]+]], 4, 56, 0
+; CHECK-DAG: sldi [[REG2:[0-9]+]], [[REG1]], 19
+; CHECK: and 3, [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define i64 @test2(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i1, 6
+  %and = and i64 %0, 133434808670355456
+  ret i64 %and
+
+; CHECK-LABEL: @test2
+; CHECK-DAG: lis [[REG1:[0-9]+]], 474
+; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 58, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 3648
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
+; CHECK-DAG: oris [[REG4:[0-9]+]], [[REG3]], 25464
+; CHECK: and 3, [[REG5]], [[REG4]]
+; CHECK: blr
+}
+
+define i64 @test3(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = shl i64 %i0, 34
+  %and = and i64 %0, 191795733152661504
+  ret i64 %and
+
+; CHECK-LABEL: @test3
+; CHECK-DAG: lis [[REG1:[0-9]+]], 170
+; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 34, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 22861
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 34
+; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: blr
+}
+
+define i64 @test4(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i1, 15
+  %and = and i64 %0, 58195968
+  ret i64 %and
+
+; CHECK-LABEL: @test4
+; CHECK: rldicl [[REG1:[0-9]+]], 4, 49, 0
+; CHECK: andis. 3, [[REG1]], 888
+; CHECK: blr
+}
+
+define i64 @test5(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = shl i64 %i1, 12
+  %and = and i64 %0, 127252959854592
+  ret i64 %and
+
+; CHECK-LABEL: @test5
+; CHECK-DAG: lis [[REG1:[0-9]+]], 3703
+; CHECK-DAG: rldicl [[REG4:[0-9]+]], 4, 12, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 35951
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
+; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @test6(i32 zeroext %x) #0 {
+entry:
+  %and = lshr i32 %x, 16
+  %shr = and i32 %and, 255
+  %and1 = shl i32 %x, 16
+  %shl = and i32 %and1, 16711680
+  %or = or i32 %shr, %shl
+  ret i32 %or
+
+; CHECK-LABEL: @test6
+; CHECK: rlwinm [[REG1:[0-9]+]], 3, 16, 24, 31
+; CHECK: rlwimi [[REG1]], 3, 16, 8, 15
+; CHECK: mr 3, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @test7(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i0, 5
+  %and = and i64 %0, 58195968
+  ret i64 %and
+
+; CHECK-LABEL: @test7
+; CHECK: rlwinm [[REG1:[0-9]+]], 3, 27, 9, 12
+; CHECK: rlwimi [[REG1]], 3, 27, 6, 7
+; CHECK: mr 3, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @test8(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i0, 1
+  %and = and i64 %0, 169172533248
+  ret i64 %and
+
+; CHECK-LABEL: @test8
+; CHECK-DAG: lis [[REG1:[0-9]+]], 4
+; CHECK-DAG: rldicl [[REG4:[0-9]+]], 3, 63, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 60527
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 19
+; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: blr
+}
+
+define i64 @test9(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = lshr i64 %i1, 14
+  %and = and i64 %0, 18848677888
+  %1 = shl i64 %i1, 51
+  %and3 = and i64 %1, 405323966463344640
+  %or4 = or i64 %and, %and3
+  ret i64 %or4
+
+; CHECK-LABEL: @test9
+; CHECK-DAG: lis [[REG1:[0-9]+]], 1440
+; CHECK-DAG: rldicl [[REG5:[0-9]+]], 4, 62, 0
+; CHECK-DAG: rldicl [[REG6:[0-9]+]], 4, 50, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 4
+; CHECK-DAG: rldimi [[REG6]], [[REG5]], 53, 0
+; CHECK-DAG: sldi [[REG3:[0-9]+]], [[REG2]], 32
+; CHECK-DAG: oris [[REG4:[0-9]+]], [[REG3]], 25464
+; CHECK: and 3, [[REG6]], [[REG4]]
+; CHECK: blr
+}
+
+define i64 @test10(i64 %i0, i64 %i1) #0 {
+entry:
+  %0 = shl i64 %i0, 37
+  %and = and i64 %0, 15881483390550016
+  %1 = shl i64 %i0, 25
+  %and3 = and i64 %1, 2473599172608
+  %or4 = or i64 %and, %and3
+  ret i64 %or4
+
+; CHECK-LABEL: @test10
+; CHECK-DAG: lis [[REG1:[0-9]+]], 1
+; CHECK-DAG: rldicl [[REG6:[0-9]+]], 3, 25, 0
+; CHECK-DAG: rldicl [[REG7:[0-9]+]], 3, 37, 0
+; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 8183
+; CHECK-DAG: ori [[REG3:[0-9]+]], [[REG1]], 50017
+; CHECK-DAG: sldi [[REG4:[0-9]+]], [[REG2]], 25
+; CHECK-DAG: sldi [[REG5:[0-9]+]], [[REG3]], 37
+; CHECK-DAG: and [[REG8:[0-9]+]], [[REG6]], [[REG4]]
+; CHECK-DAG: and [[REG9:[0-9]+]], [[REG7]], [[REG5]]
+; CHECK: or 3, [[REG9]], [[REG8]]
+; CHECK: blr
+}
+
+define i64 @test11(i64 %x) #0 {
+entry:
+  %and = and i64 %x, 4294967295
+  %shl = shl i64 %x, 32
+  %or = or i64 %and, %shl
+  ret i64 %or
+
+; CHECK-LABEL: @test11
+; CHECK: rlwinm 3, 3, 0, 1, 0
+; CHECK: blr
+}
+
+define i64 @test12(i64 %x) #0 {
+entry:
+  %and = and i64 %x, 4294905855
+  %shl = shl i64 %x, 32
+  %or = or i64 %and, %shl
+  ret i64 %or
+
+; CHECK-LABEL: @test12
+; CHECK: rlwinm 3, 3, 0, 20, 15
+; CHECK: blr
+}
+
+define i64 @test13(i64 %x) #0 {
+entry:
+  %shl = shl i64 %x, 4
+  %and = and i64 %shl, 240
+  %shr = lshr i64 %x, 28
+  %and1 = and i64 %shr, 15
+  %or = or i64 %and, %and1
+  ret i64 %or
+
+; CHECK-LABEL: @test13
+; CHECK: rlwinm 3, 3, 4, 24, 31
+; CHECK: blr
+}
+
+define i64 @test14(i64 %x) #0 {
+entry:
+  %shl = shl i64 %x, 4
+  %and = and i64 %shl, 240
+  %shr = lshr i64 %x, 28
+  %and1 = and i64 %shr, 15
+  %and2 = and i64 %x, -4294967296
+  %or = or i64 %and1, %and2
+  %or3 = or i64 %or, %and
+  ret i64 %or3
+
+; CHECK-LABEL: @test14
+; CHECK: rldicr [[REG1:[0-9]+]], 3, 0, 31
+; CHECK: rlwimi [[REG1]], 3, 4, 24, 31
+; CHECK: mr 3, [[REG1]]
+; CHECK: blr
+}
+
+define i64 @test15(i64 %x) #0 {
+entry:
+  %shl = shl i64 %x, 4
+  %and = and i64 %shl, 240
+  %shr = lshr i64 %x, 28
+  %and1 = and i64 %shr, 15
+  %and2 = and i64 %x, -256
+  %or = or i64 %and1, %and2
+  %or3 = or i64 %or, %and
+  ret i64 %or3
+
+; CHECK-LABEL: @test15
+; CHECK: rlwimi 3, 3, 4, 24, 31
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #0
+declare i64 @llvm.bswap.i64(i64) #0
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index e155a35c4da0..b70671bfd5cb 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/PowerPC/byval-aliased.ll b/test/CodeGen/PowerPC/byval-aliased.ll
new file mode 100644
index 000000000000..9ef2f02f036a
--- /dev/null
+++ b/test/CodeGen/PowerPC/byval-aliased.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:o-p:32:32-f64:32:64-n32"
+target triple = "powerpc-apple-macosx10.5.0"
+ 
+%struct.sm = type { i8, i8 }
+ 
+; Function Attrs: nounwind ssp
+define void @foo(%struct.sm* byval %s) #0 {
+entry:
+  %a = getelementptr inbounds %struct.sm* %s, i32 0, i32 0
+  %0 = load i8* %a, align 1
+  %conv2 = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv2, 3
+  %conv1 = trunc i32 %add to i8
+  store i8 %conv1, i8* %a, align 1
+  call void @bar(%struct.sm* byval %s, %struct.sm* byval %s) #1
+  ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK: stb {{r[0-9]+}}, [[OFF:[0-9]+]]({{r[3]?1}})
+; CHECK: lhz r3, [[OFF]]({{r[3]?1}})
+; CHECK: bl _bar
+; CHECK: blr
+ 
+declare void @bar(%struct.sm* byval, %struct.sm* byval)
+ 
+attributes #0 = { nounwind ssp }
+attributes #1 = { nounwind }
+ 
diff --git a/test/CodeGen/PowerPC/cmpb-ppc32.ll b/test/CodeGen/PowerPC/cmpb-ppc32.ll
new file mode 100644
index 000000000000..639ed887b978
--- /dev/null
+++ b/test/CodeGen/PowerPC/cmpb-ppc32.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 {
+entry:
+  %0 = xor i16 %y, %x
+  %1 = and i16 %0, 255
+  %cmp = icmp eq i16 %1, 0
+  %cmp20 = icmp ult i16 %0, 256
+  %conv25 = select i1 %cmp, i32 255, i32 0
+  %conv27 = select i1 %cmp20, i32 65280, i32 0
+  %or = or i32 %conv25, %conv27
+  %conv29 = trunc i32 %or to i16
+  ret i16 %conv29
+
+; CHECK-LABEL: @test16
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rlwinm 3, [[REG1]], 0, 16, 31
+; CHECK: blr
+}
+
+define i32 @test32(i32 %x, i32 %y) #0 {
+entry:
+  %0 = xor i32 %y, %x
+  %1 = and i32 %0, 255
+  %cmp = icmp eq i32 %1, 0
+  %2 = and i32 %0, 65280
+  %cmp28 = icmp eq i32 %2, 0
+  %3 = and i32 %0, 16711680
+  %cmp34 = icmp eq i32 %3, 0
+  %cmp40 = icmp ult i32 %0, 16777216
+  %conv44 = select i1 %cmp, i32 255, i32 0
+  %conv45 = select i1 %cmp28, i32 65280, i32 0
+  %conv47 = select i1 %cmp34, i32 16711680, i32 0
+  %conv50 = select i1 %cmp40, i32 -16777216, i32 0
+  %or = or i32 %conv45, %conv50
+  %or49 = or i32 %or, %conv44
+  %or52 = or i32 %or49, %conv47
+  ret i32 %or52
+
+; CHECK-LABEL: @test32
+; CHECK: cmpb 3, 4, 3
+; CHECK-NOT: rlwinm
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/cmpb.ll b/test/CodeGen/PowerPC/cmpb.ll
new file mode 100644
index 000000000000..7d0c0ab3316b
--- /dev/null
+++ b/test/CodeGen/PowerPC/cmpb.ll
@@ -0,0 +1,204 @@
+; RUN: llc -mcpu pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define zeroext i16 @test16(i16 zeroext %x, i16 zeroext %y) #0 {
+entry:
+  %0 = xor i16 %y, %x
+  %1 = and i16 %0, 255
+  %cmp = icmp eq i16 %1, 0
+  %cmp20 = icmp ult i16 %0, 256
+  %conv25 = select i1 %cmp, i32 255, i32 0
+  %conv27 = select i1 %cmp20, i32 65280, i32 0
+  %or = or i32 %conv25, %conv27
+  %conv29 = trunc i32 %or to i16
+  ret i16 %conv29
+
+; CHECK-LABEL: @test16
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl 3, [[REG1]], 0, 48
+; CHECK: blr
+}
+
+define zeroext i16 @test16p1(i16 zeroext %x, i16 zeroext %y) #0 {
+entry:
+  %0 = xor i16 %y, %x
+  %1 = and i16 %0, 255
+  %cmp = icmp eq i16 %1, 0
+  %cmp20 = icmp ult i16 %0, 256
+  %conv28 = select i1 %cmp, i32 5, i32 0
+  %conv30 = select i1 %cmp20, i32 65280, i32 0
+  %or = or i32 %conv28, %conv30
+  %conv32 = trunc i32 %or to i16
+  ret i16 %conv32
+
+; CHECK-LABEL: @test16p1
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: andi. 3, [[REG1]], 65285
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define zeroext i16 @test16p2(i16 zeroext %x, i16 zeroext %y) #0 {
+entry:
+  %0 = xor i16 %y, %x
+  %1 = and i16 %0, 255
+  %cmp = icmp eq i16 %1, 0
+  %cmp20 = icmp ult i16 %0, 256
+  %conv28 = select i1 %cmp, i32 255, i32 0
+  %conv30 = select i1 %cmp20, i32 1280, i32 0
+  %or = or i32 %conv28, %conv30
+  %conv32 = trunc i32 %or to i16
+  ret i16 %conv32
+
+; CHECK-LABEL: @test16p2
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: andi. 3, [[REG1]], 1535
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define zeroext i16 @test16p3(i16 zeroext %x, i16 zeroext %y) #0 {
+entry:
+  %0 = xor i16 %y, %x
+  %1 = and i16 %0, 255
+  %cmp = icmp eq i16 %1, 0
+  %cmp20 = icmp ult i16 %0, 256
+  %conv27 = select i1 %cmp, i32 255, i32 0
+  %conv29 = select i1 %cmp20, i32 1024, i32 1280
+  %or = or i32 %conv27, %conv29
+  %conv31 = trunc i32 %or to i16
+  ret i16 %conv31
+
+; CHECK-LABEL: @test16p3
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 0, 55
+; CHECK: xori 3, [[REG2]], 1280
+; CHECK: blr
+}
+
+define zeroext i32 @test32(i32 zeroext %x, i32 zeroext %y) #0 {
+entry:
+  %0 = xor i32 %y, %x
+  %1 = and i32 %0, 255
+  %cmp = icmp eq i32 %1, 0
+  %2 = and i32 %0, 65280
+  %cmp28 = icmp eq i32 %2, 0
+  %3 = and i32 %0, 16711680
+  %cmp34 = icmp eq i32 %3, 0
+  %cmp40 = icmp ult i32 %0, 16777216
+  %conv44 = select i1 %cmp, i32 255, i32 0
+  %conv45 = select i1 %cmp28, i32 65280, i32 0
+  %conv47 = select i1 %cmp34, i32 16711680, i32 0
+  %conv50 = select i1 %cmp40, i32 -16777216, i32 0
+  %or = or i32 %conv45, %conv50
+  %or49 = or i32 %or, %conv44
+  %or52 = or i32 %or49, %conv47
+  ret i32 %or52
+
+; CHECK-LABEL: @test32
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl 3, [[REG1]], 0, 32
+; CHECK: blr
+}
+
+define zeroext i32 @test32p1(i32 zeroext %x, i32 zeroext %y) #0 {
+entry:
+  %0 = xor i32 %y, %x
+  %1 = and i32 %0, 255
+  %cmp = icmp eq i32 %1, 0
+  %2 = and i32 %0, 65280
+  %cmp28 = icmp eq i32 %2, 0
+  %3 = and i32 %0, 16711680
+  %cmp34 = icmp eq i32 %3, 0
+  %cmp40 = icmp ult i32 %0, 16777216
+  %conv47 = select i1 %cmp, i32 255, i32 0
+  %conv48 = select i1 %cmp28, i32 65280, i32 0
+  %conv50 = select i1 %cmp34, i32 458752, i32 0
+  %conv53 = select i1 %cmp40, i32 -16777216, i32 0
+  %or = or i32 %conv48, %conv53
+  %or52 = or i32 %or, %conv47
+  %or55 = or i32 %or52, %conv50
+  ret i32 %or55
+
+; CHECK-LABEL: @test32p1
+; CHECK: li [[REG1:[0-9]+]], 0
+; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
+; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
+; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
+; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: blr
+}
+
+define zeroext i32 @test32p2(i32 zeroext %x, i32 zeroext %y) #0 {
+entry:
+  %0 = xor i32 %y, %x
+  %1 = and i32 %0, 255
+  %cmp = icmp eq i32 %1, 0
+  %2 = and i32 %0, 65280
+  %cmp22 = icmp eq i32 %2, 0
+  %cmp28 = icmp ult i32 %0, 16777216
+  %conv32 = select i1 %cmp, i32 255, i32 0
+  %conv33 = select i1 %cmp22, i32 65280, i32 0
+  %conv35 = select i1 %cmp28, i32 -16777216, i32 0
+  %or = or i32 %conv33, %conv35
+  %or37 = or i32 %or, %conv32
+  ret i32 %or37
+
+; CHECK-LABEL: @test32p2
+; CHECK: li [[REG1:[0-9]+]], 0
+; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
+; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
+; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
+; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: blr
+}
+
+define i64 @test64(i64 %x, i64 %y) #0 {
+entry:
+  %shr19 = lshr i64 %x, 56
+  %conv21 = trunc i64 %shr19 to i32
+  %shr43 = lshr i64 %y, 56
+  %conv45 = trunc i64 %shr43 to i32
+  %0 = xor i64 %y, %x
+  %1 = and i64 %0, 255
+  %cmp = icmp eq i64 %1, 0
+  %2 = and i64 %0, 65280
+  %cmp52 = icmp eq i64 %2, 0
+  %3 = and i64 %0, 16711680
+  %cmp58 = icmp eq i64 %3, 0
+  %4 = and i64 %0, 4278190080
+  %cmp64 = icmp eq i64 %4, 0
+  %5 = and i64 %0, 1095216660480
+  %cmp70 = icmp eq i64 %5, 0
+  %6 = and i64 %0, 280375465082880
+  %cmp76 = icmp eq i64 %6, 0
+  %7 = and i64 %0, 71776119061217280
+  %cmp82 = icmp eq i64 %7, 0
+  %cmp88 = icmp eq i32 %conv21, %conv45
+  %conv92 = select i1 %cmp, i64 255, i64 0
+  %conv93 = select i1 %cmp52, i64 65280, i64 0
+  %or = or i64 %conv92, %conv93
+  %conv95 = select i1 %cmp58, i64 16711680, i64 0
+  %or97 = or i64 %or, %conv95
+  %conv98 = select i1 %cmp64, i64 4278190080, i64 0
+  %or100 = or i64 %or97, %conv98
+  %conv101 = select i1 %cmp70, i64 1095216660480, i64 0
+  %or103 = or i64 %or100, %conv101
+  %conv104 = select i1 %cmp76, i64 280375465082880, i64 0
+  %or106 = or i64 %or103, %conv104
+  %conv107 = select i1 %cmp82, i64 71776119061217280, i64 0
+  %or109 = or i64 %or106, %conv107
+  %conv110 = select i1 %cmp88, i64 -72057594037927936, i64 0
+  %or112 = or i64 %or109, %conv110
+  ret i64 %or112
+
+; CHECK-LABEL: @test64
+; CHECK: cmpb 3, 3, 4
+; CHECK-NOT: rldicl
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/code-align.ll b/test/CodeGen/PowerPC/code-align.ll
new file mode 100644
index 000000000000..306230be5005
--- /dev/null
+++ b/test/CodeGen/PowerPC/code-align.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=GENERIC
+; RUN: llc -mcpu=970 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=e500mc < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=e5500 < %s | FileCheck %s -check-prefix=BASIC
+; RUN: llc -mcpu=pwr4 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr5 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr5x < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr6 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr6x < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PWR
+; RUN: llc -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PWR
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %x) #0 {
+entry:
+  %mul = shl nsw i32 %x, 1
+  ret i32 %mul
+
+; GENERIC-LABEL: .globl  foo
+; BASIC-LABEL: .globl  foo
+; PWR-LABEL: .globl  foo
+; GENERIC: .align  2
+; BASIC: .align  4
+; PWR: .align  4
+; GENERIC: @foo
+; BASIC: @foo
+; PWR: @foo
+}
+
+; Function Attrs: nounwind
+define void @loop(i32 signext %x, i32* nocapture %a) #1 {
+entry:
+  br label %vector.body
+
+; GENERIC-LABEL: @loop
+; BASIC-LABEL: @loop
+; PWR-LABEL: @loop
+; GENERIC: mtctr
+; BASIC: mtctr
+; PWR: mtctr
+; GENERIC-NOT: .align
+; BASIC: .align  4
+; PWR: .align  4
+; GENERIC: bdnz
+; BASIC: bdnz
+; PWR: bdnz
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %induction45 = or i64 %index, 1
+  %0 = getelementptr inbounds i32* %a, i64 %index
+  %1 = getelementptr inbounds i32* %a, i64 %induction45
+  %2 = load i32* %0, align 4
+  %3 = load i32* %1, align 4
+  %4 = add nsw i32 %2, 4
+  %5 = add nsw i32 %3, 4
+  store i32 %4, i32* %0, align 4
+  store i32 %5, i32* %1, align 4
+  %index.next = add i64 %index, 2
+  %6 = icmp eq i64 %index.next, 2048
+  br i1 %6, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @sloop(i32 signext %x, i32* nocapture %a) #1 {
+entry:
+  br label %for.body
+
+; GENERIC-LABEL: @sloop
+; BASIC-LABEL: @sloop
+; PWR-LABEL: @sloop
+; GENERIC: mtctr
+; BASIC: mtctr
+; PWR: mtctr
+; GENERIC-NOT: .align
+; BASIC: .align  4
+; PWR: .align  5
+; GENERIC: bdnz
+; BASIC: bdnz
+; PWR: bdnz
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 4
+  store i32 %add, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/complex-return.ll b/test/CodeGen/PowerPC/complex-return.ll
index 8a6adaee5556..9d25e619d2e5 100644
--- a/test/CodeGen/PowerPC/complex-return.ll
+++ b/test/CodeGen/PowerPC/complex-return.ll
@@ -24,10 +24,10 @@ entry:
 }
 
 ; CHECK-LABEL: foo:
-; CHECK: lfd 3
-; CHECK: lfd 4
 ; CHECK: lfd 1
 ; CHECK: lfd 2
+; CHECK: lfd 3
+; CHECK: lfd 4
 
 define { float, float } @oof() nounwind {
 entry:
diff --git a/test/CodeGen/PowerPC/constants-i64.ll b/test/CodeGen/PowerPC/constants-i64.ll
new file mode 100644
index 000000000000..5f2815e57691
--- /dev/null
+++ b/test/CodeGen/PowerPC/constants-i64.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define i64 @cn1() #0 {
+entry:
+  ret i64 281474976710655
+
+; CHECK-LABEL: @cn1
+; CHECK: lis [[REG1:[0-9]+]], -1
+; CHECK: rldicr 3, [[REG1]], 48, 63
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @cnb() #0 {
+entry:
+  ret i64 281474976710575
+
+; CHECK-LABEL: @cnb
+; CHECK: lis [[REG1:[0-9]+]], -81
+; CHECK: rldicr 3, [[REG1]], 48, 63
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @f2(i64 %x) #0 {
+entry:
+  ret i64 -68719476736
+
+; CHECK-LABEL: @f2
+; CHECK: li [[REG1:[0-9]+]], -1
+; CHECK: sldi 3, [[REG1]], 36
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @f2a(i64 %x) #0 {
+entry:
+  ret i64 -361850994688
+
+; CHECK-LABEL: @f2a
+; CHECK: li [[REG1:[0-9]+]], -337
+; CHECK: sldi 3, [[REG1]], 30
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @f2n(i64 %x) #0 {
+entry:
+  ret i64 68719476735
+
+; CHECK-LABEL: @f2n
+; CHECK: lis [[REG1:[0-9]+]], -4096
+; CHECK: rldicr 3, [[REG1]], 36, 63
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @f3(i64 %x) #0 {
+entry:
+  ret i64 8589934591
+
+; CHECK-LABEL: @f3
+; CHECK: lis [[REG1:[0-9]+]], -32768
+; CHECK: rldicr 3, [[REG1]], 33, 63
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @cn2n() #0 {
+entry:
+  ret i64 -1407374887747585
+
+; CHECK-LABEL: @cn2n
+; CHECK: lis [[REG1:[0-9]+]], -5121
+; CHECK: ori [[REG2:[0-9]+]], [[REG1]], 65534
+; CHECK: rldicr 3, [[REG2]], 22, 63
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/copysignl.ll b/test/CodeGen/PowerPC/copysignl.ll
index 4b801b791d62..e280f832ce01 100644
--- a/test/CodeGen/PowerPC/copysignl.ll
+++ b/test/CodeGen/PowerPC/copysignl.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s -check-prefix=CHECK-VSX
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -11,6 +12,9 @@ entry:
 ; CHECK-LABEL: @foo_d_ll
 ; CHECK: fcpsgn 1, 3, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_d_ll
+; CHECK-VSX: xscpsgndp 1, 3, 1
+; CHECK-VSX: blr
 }
 
 declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0
@@ -24,6 +28,9 @@ entry:
 ; CHECK-LABEL: @foo_dl
 ; CHECK: fcpsgn 1, 2, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_dl
+; CHECK-VSX: xscpsgndp 1, 2, 1
+; CHECK-VSX: blr
 }
 
 declare double @copysign(double, double) #0
@@ -37,6 +44,9 @@ entry:
 ; CHECK-LABEL: @foo_ll
 ; CHECK: bl copysignl
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_ll
+; CHECK-VSX: bl copysignl
+; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_ld(double %a, double %b) #0 {
@@ -49,6 +59,9 @@ entry:
 ; CHECK-LABEL: @foo_ld
 ; CHECK: bl copysignl
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_ld
+; CHECK-VSX: bl copysignl
+; CHECK-VSX: blr
 }
 
 define ppc_fp128 @foo_lf(double %a, float %b) #0 {
@@ -61,6 +74,9 @@ entry:
 ; CHECK-LABEL: @foo_lf
 ; CHECK: bl copysignl
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_lf
+; CHECK-VSX: bl copysignl
+; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/crsave.ll b/test/CodeGen/PowerPC/crsave.ll
index a9b4b3607830..602ba94dc094 100644
--- a/test/CodeGen/PowerPC/crsave.ll
+++ b/test/CodeGen/PowerPC/crsave.ll
@@ -6,7 +6,7 @@ declare void @foo()
 define i32 @test_cr2() nounwind uwtable {
 entry:
   %ret = alloca i32, align 4
-  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
+  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
   store i32 %0, i32* %ret, align 4
   call void @foo()
   %1 = load i32* %ret, align 4
@@ -35,7 +35,7 @@ entry:
 define i32 @test_cr234() nounwind {
 entry:
   %ret = alloca i32, align 4
-  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmp 2,$2,$1\0A\09cmp 3,$2,$2\0A\09cmp 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
+  %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09cmpw 3,$2,$2\0A\09cmpw 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
   store i32 %0, i32* %ret, align 4
   call void @foo()
   %1 = load i32* %ret, align 4
diff --git a/test/CodeGen/PowerPC/ctrloops.ll b/test/CodeGen/PowerPC/ctrloops.ll
index ca00f687aa4e..ccab7cb7a0ba 100644
--- a/test/CodeGen/PowerPC/ctrloops.ll
+++ b/test/CodeGen/PowerPC/ctrloops.ll
@@ -1,6 +1,6 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-freebsd10.0"
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -relocation-model=pic | FileCheck %s
 
 @a = common global i32 0, align 4
 
@@ -73,3 +73,26 @@ for.end:                                          ; preds = %for.body, %entry
 ; CHECK-NOT: cmplwi
 ; CHECK: bdnz
 }
+
+@tls_var = external thread_local global i8
+
+define i32 @test4() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %phi = phi i32 [ %dec, %for.body ], [ undef, %entry ]
+  %load = ptrtoint i8* @tls_var to i32
+  %dec = add i32 %phi, -1
+  %cmp = icmp sgt i32 %phi, 1
+  br i1 %cmp, label %for.body, label %return
+
+return:                                           ; preds = %for.body
+  ret i32 %load
+; CHECK-LABEL: @test4
+; CHECK-NOT: mtctr
+; CHECK: addi {{[0-9]+}}
+; CHECK: cmpwi
+; CHECK-NOT: bdnz
+; CHECK: bgt
+}
diff --git a/test/CodeGen/PowerPC/cttz-ctlz-spec.ll b/test/CodeGen/PowerPC/cttz-ctlz-spec.ll
new file mode 100644
index 000000000000..13b017a746ec
--- /dev/null
+++ b/test/CodeGen/PowerPC/cttz-ctlz-spec.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i64 @test1(i64 %A) {
+; CHECK-LABEL: @test1(
+; CHECK: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; CHECK-NEXT: ret i64 [[CTLZ]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+define i64 @test1b(i64 %A) {
+; CHECK-LABEL: @test1b(
+; CHECK: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; CHECK-NEXT: ret i64 [[CTTZ]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 6beea558c0db..bd153674eab1 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -6,34 +6,34 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !17
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !18
   %add = add nsw i32 %argc, 1, !dbg !19
   ret i32 %add, !dbg !19
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 720913, metadata !21, i32 12, metadata !"clang version 3.1", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !"", metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !21, null, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13, i32 0} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9, metadata !10}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!13 = metadata !{metadata !15, metadata !16}
-!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 1, i32 14, metadata !5, null}
-!18 = metadata !{i32 1, i32 26, metadata !5, null}
-!19 = metadata !{i32 2, i32 3, metadata !20, null}
-!20 = metadata !{i32 720907, metadata !21, metadata !5, i32 1, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{metadata !"dbg.c", metadata !"/src"}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1\001\00\000\00\000", !21, !1, !1, !3, !1, !""} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00main\00main\00\001\000\001\000\006\00256\001\000", !21, null, !7, null, i32 (i32, i8**)* @main, null, null, !13} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !21} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x24\00char\000\008\008\000\000\008", null, null} ; [ DW_TAG_base_type ]
+!13 = !{!15, !16}
+!15 = !{!"0x101\00argc\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x101\00argv\0033554433\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!17 = !MDLocation(line: 1, column: 14, scope: !5)
+!18 = !MDLocation(line: 1, column: 26, scope: !5)
+!19 = !MDLocation(line: 2, column: 3, scope: !20)
+!20 = !{!"0xb\001\0034\000", !21, !5} ; [ DW_TAG_lexical_block ]
+!21 = !{!"dbg.c", !"/src"}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/PowerPC/early-ret2.ll b/test/CodeGen/PowerPC/early-ret2.ll
index 17847770a831..f9758d3f7d4c 100644
--- a/test/CodeGen/PowerPC/early-ret2.ll
+++ b/test/CodeGen/PowerPC/early-ret2.ll
@@ -25,5 +25,5 @@ while.end:                                        ; preds = %while.body, %while.
 
 attributes #0 = { noinline nounwind }
 
-!0 = metadata !{}
+!0 = !{}
 
diff --git a/test/CodeGen/PowerPC/empty-functions.ll b/test/CodeGen/PowerPC/empty-functions.ll
index 3a2907d5d7b9..e32a8472b835 100644
--- a/test/CodeGen/PowerPC/empty-functions.ll
+++ b/test/CodeGen/PowerPC/empty-functions.ll
@@ -1,12 +1,43 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-MACHO %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-MACHO %s
+; RUN: llc < %s -mtriple=powerpc-linux-gnu | FileCheck -check-prefix=LINUX-NO-FP %s
+; RUN: llc < %s -mtriple=powerpc-linux-gnu -disable-fp-elim | FileCheck -check-prefix=LINUX-FP %s
 
 define void @func() {
 entry:
   unreachable
 }
-; CHECK-NO-FP:     _func:
-; CHECK-NO-FP:     nop
 
-; CHECK-FP:      _func:
-; CHECK-FP:      nop
+; MachO cannot handle an empty function.
+; CHECK-MACHO:     _func:
+; CHECK-MACHO-NEXT: .cfi_startproc
+; CHECK-MACHO-NEXT: {{^}};
+; CHECK-MACHO-NEXT:     nop
+; CHECK-MACHO-NEXT: .cfi_endproc
+
+; An empty function is perfectly fine on ELF.
+; LINUX-NO-FP: func:
+; LINUX-NO-FP-NEXT: .cfi_startproc
+; LINUX-NO-FP-NEXT: {{^}}#
+; LINUX-NO-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-NO-FP-NEXT: .size   func, .L{{.*}}-func
+; LINUX-NO-FP-NEXT: .cfi_endproc
+
+; A cfi directive can point to the end of a function. It (and in fact the
+; entire body) could be optimized out because of the unreachable, but we
+; don't do it right now.
+; LINUX-FP: func:
+; LINUX-FP-NEXT: .cfi_startproc
+; LINUX-FP-NEXT: {{^}}#
+; LINUX-FP-NEXT: stw 31, -4(1)
+; LINUX-FP-NEXT: stwu 1, -16(1)
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT:  .cfi_def_cfa_offset 16
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_offset r31, -4
+; LINUX-FP-NEXT: mr 31, 1
+; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_def_cfa_register r31
+; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .size   func, .Ltmp3-func
+; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/PowerPC/fabs.ll b/test/CodeGen/PowerPC/fabs.ll
index ddcce745084a..36aa23d03550 100644
--- a/test/CodeGen/PowerPC/fabs.ll
+++ b/test/CodeGen/PowerPC/fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1"
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1"
 
 define double @fabs(double %f) {
 entry:
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
index 33a8ba903e3d..b2cc75e26114 100644
--- a/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; FIXME: FastISel currently returns false if it hits code that uses VSX
+; registers and with -fast-isel-abort turned on the test case will then fail.
+; When fastisel better supports VSX fix up this test case.
+;
+; RUN: llc < %s -O0 -verify-machineinstrs -mattr=-vsx -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
 
 define i32 @t1(i8 signext %a) nounwind {
   %1 = sext i8 %a to i32
@@ -57,11 +61,11 @@ entry:
 ; ELF64: t10
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
 ; ELF64: li 3, 0
-; ELF64: li 4, 248
-; ELF64: li 5, 187
+; ELF64: li 4, -8
+; ELF64: li 5, -69
 ; ELF64: li 6, 28
 ; ELF64: li 7, 40
-; ELF64: li 8, 186
+; ELF64: li 8, -70
 ; ELF64: rldicl 3, 3, 0, 56
 ; ELF64: rldicl 4, 4, 0, 56
 ; ELF64: rldicl 5, 5, 0, 56
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
index 33f7a79783cc..c1f6b6327a44 100644
--- a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -1,5 +1,8 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
-
+; FIXME: FastISel currently returns false if it hits code that uses VSX
+; registers and with -fast-isel-abort turned on the test case will then fail.
+; When fastisel better supports VSX fix up this test case.
+;
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
 define void @t1a(float %a) uwtable ssp {
 entry:
 ; ELF64: t1a
diff --git a/test/CodeGen/PowerPC/fast-isel-const.ll b/test/CodeGen/PowerPC/fast-isel-const.ll
new file mode 100644
index 000000000000..1057d0a0ce2b
--- /dev/null
+++ b/test/CodeGen/PowerPC/fast-isel-const.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+
+define zeroext i1 @testi1(i8 %in) nounwind uwtable ssp {
+entry:
+  %c = icmp eq i8 %in, 5
+  br i1 %c, label %true, label %false
+
+; ELF64-LABEL: @testi1
+
+true:
+  br label %end
+
+; ELF64-NOT: li {{[0-9]+}}, -1
+; ELF64: li {{[0-9]+}}, 1
+
+false:
+  br label %end
+
+; ELF64: li {{[0-9]+}}, 0
+
+end:
+  %r = phi i1 [ 0, %false], [ 1, %true ]
+  ret i1 %r
+
+; ELF64: blr
+}
+
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index 71611060ed7a..b0e29c1274a2 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -1,6 +1,10 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=ELF64LE
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970
+; FIXME: FastISel currently returns false if it hits code that uses VSX
+; registers and with -fast-isel-abort turned on the test case will then fail.
+; When fastisel better supports VSX fix up this test case.
+;
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx | FileCheck %s --check-prefix=ELF64LE
+; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 -mattr=-vsx | FileCheck %s --check-prefix=PPC970
 
 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt
 ;; to SelectionDAG in some cases.
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
index 026b15fe5e4e..ef702e21d6a1 100644
--- a/test/CodeGen/PowerPC/fast-isel-load-store.ll
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; FIXME: FastISel currently returns false if it hits code that uses VSX
+; registers and with -fast-isel-abort turned on the test case will then fail.
+; When fastisel better supports VSX fix up this test case.
+;
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort -mattr=-vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
 
 ; This test verifies that load/store instructions are properly generated,
 ; and that they pass MI verification.
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index f82de70c9286..ae34fbf7bfe1 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
+; FIXME: FastISel currently returns false if it hits code that uses VSX
+; registers and with -fast-isel-abort turned on the test case will then fail.
+; When fastisel better supports VSX fix up this test case.
+;
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
 
 define zeroext i1 @rettrue() nounwind uwtable ssp {
 entry:
diff --git a/test/CodeGen/PowerPC/fcpsgn.ll b/test/CodeGen/PowerPC/fcpsgn.ll
index f4699816340a..4d4afc64d9e5 100644
--- a/test/CodeGen/PowerPC/fcpsgn.ll
+++ b/test/CodeGen/PowerPC/fcpsgn.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -10,6 +11,9 @@ entry:
 ; CHECK-LABEL: @foo_dd
 ; CHECK: fcpsgn 1, 2, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_dd
+; CHECK-VSX: xscpsgndp 1, 2, 1
+; CHECK-VSX: blr
 }
 
 declare double @copysign(double, double) #0
@@ -22,6 +26,9 @@ entry:
 ; CHECK-LABEL: @foo_ss
 ; CHECK: fcpsgn 1, 2, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_ss
+; CHECK-VSX: fcpsgn 1, 2, 1
+; CHECK-VSX: blr
 }
 
 declare float @copysignf(float, float) #0
@@ -35,6 +42,9 @@ entry:
 ; CHECK-LABEL: @foo_sd
 ; CHECK: fcpsgn 1, 2, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_sd
+; CHECK-VSX: fcpsgn 1, 2, 1
+; CHECK-VSX: blr
 }
 
 define double @foo_ds(double %a, float %b) #0 {
@@ -46,6 +56,9 @@ entry:
 ; CHECK-LABEL: @foo_ds
 ; CHECK: fcpsgn 1, 2, 1
 ; CHECK: blr
+; CHECK-VSX-LABEL: @foo_ds
+; CHECK-VSX: fcpsgn 1, 2, 1
+; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/fdiv-combine.ll b/test/CodeGen/PowerPC/fdiv-combine.ll
new file mode 100644
index 000000000000..d3dc3fe913fd
--- /dev/null
+++ b/test/CodeGen/PowerPC/fdiv-combine.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Following test case checks:
+;   a / D; b / D; c / D;
+;                =>
+;   recip = 1.0 / D; a * recip; b * recip; c * recip;
+
+define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 {
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fdiv
+; CHECK: fmul
+; CHECK: fmul
+; CHECK: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  %div2 = fdiv double %c, %D
+  tail call void @foo_3d(double %div, double %div1, double %div2)
+  ret void
+}
+
+define void @two_fdiv_double(double %D, double %a, double %b) #0 {
+; CHECK-LABEL: two_fdiv_double:
+; CHECK: fdiv
+; CHECK: fdiv
+; CHECK-NEXT-NOT: fmul
+  %div = fdiv double %a, %D
+  %div1 = fdiv double %b, %D
+  tail call void @foo_2d(double %div, double %div1)
+  ret void
+}
+
+declare void @foo_3d(double, double, double)
+declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
+declare void @foo_2d(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/PowerPC/fma-assoc.ll b/test/CodeGen/PowerPC/fma-assoc.ll
new file mode 100644
index 000000000000..dc1316e5e24f
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma-assoc.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+
+define double @test_FMADD_ASSOC1(double %A, double %B, double %C,
+                                 double %D, double %E) {
+	%F = fmul double %A, %B         ; <double> [#uses=1]
+	%G = fmul double %C, %D         ; <double> [#uses=1]
+	%H = fadd double %F, %G         ; <double> [#uses=1]
+	%I = fadd double %H, %E         ; <double> [#uses=1]
+	ret double %I
+; CHECK-LABEL: test_FMADD_ASSOC1:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC1:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_ASSOC2(double %A, double %B, double %C,
+                                 double %D, double %E) {
+	%F = fmul double %A, %B         ; <double> [#uses=1]
+	%G = fmul double %C, %D         ; <double> [#uses=1]
+	%H = fadd double %F, %G         ; <double> [#uses=1]
+	%I = fadd double %E, %H         ; <double> [#uses=1]
+	ret double %I
+; CHECK-LABEL: test_FMADD_ASSOC2:
+; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD_ASSOC2:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC1(double %A, double %B, double %C,
+                                 double %D, double %E) {
+	%F = fmul double %A, %B         ; <double> [#uses=1]
+	%G = fmul double %C, %D         ; <double> [#uses=1]
+	%H = fadd double %F, %G         ; <double> [#uses=1]
+	%I = fsub double %H, %E         ; <double> [#uses=1]
+	ret double %I
+; CHECK-LABEL: test_FMSUB_ASSOC1:
+; CHECK: fmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC1:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: xsmaddadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_ASSOC2(double %A, double %B, double %C,
+                                 double %D, double %E) {
+	%F = fmul double %A, %B         ; <double> [#uses=1]
+	%G = fmul double %C, %D         ; <double> [#uses=1]
+	%H = fadd double %F, %G         ; <double> [#uses=1]
+	%I = fsub double %E, %H         ; <double> [#uses=1]
+	ret double %I
+; CHECK-LABEL: test_FMSUB_ASSOC2:
+; CHECK: fnmsub
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB_ASSOC2:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: xsnmsubadp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
diff --git a/test/CodeGen/PowerPC/fma-ext.ll b/test/CodeGen/PowerPC/fma-ext.ll
new file mode 100644
index 000000000000..56825ce8f227
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma-ext.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+
+define double @test_FMADD_EXT1(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fpext float %D to double   ; <double> [#uses=1]
+    %F = fadd double %E, %C         ; <double> [#uses=1]
+    ret double %F
+; CHECK-LABEL: test_FMADD_EXT1:
+; CHECK: fmadd
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMADD_EXT1:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMADD_EXT2(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fpext float %D to double   ; <double> [#uses=1]
+    %F = fadd double %C, %E         ; <double> [#uses=1]
+    ret double %F
+; CHECK-LABEL: test_FMADD_EXT2:
+; CHECK: fmadd
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMADD_EXT2:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_EXT1(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fpext float %D to double   ; <double> [#uses=1]
+    %F = fsub double %E, %C         ; <double> [#uses=1]
+    ret double %F
+; CHECK-LABEL: test_FMSUB_EXT1:
+; CHECK: fmsub
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMSUB_EXT1:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_EXT2(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fpext float %D to double   ; <double> [#uses=1]
+    %F = fsub double %C, %E         ; <double> [#uses=1]
+    ret double %F
+; CHECK-LABEL: test_FMSUB_EXT2:
+; CHECK: fnmsub
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMSUB_EXT2:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: fmr
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB_EXT3(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fsub float -0.000000e+00, %D ;		<float> [#uses=1]
+    %F = fpext float %E to double   ; <double> [#uses=1]
+    %G = fsub double %F, %C         ; <double> [#uses=1]
+    ret double %G
+; CHECK-LABEL: test_FMSUB_EXT3:
+; CHECK: fneg
+; CHECK-NEXT: fmsub
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMSUB_EXT3:
+; CHECK-VSX: xsnegdp
+; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX-NEXT: blr
+}
+    
+define double @test_FMSUB_EXT4(float %A, float %B, double %C) {
+    %D = fmul float %A, %B          ; <float> [#uses=1]
+    %E = fpext float %D to double   ; <double> [#uses=1]
+    %F = fsub double -0.000000e+00, %E ;		<double> [#uses=1]
+    %G = fsub double %F, %C         ; <double> [#uses=1]
+    ret double %G
+; CHECK-LABEL: test_FMSUB_EXT4:
+; CHECK: fneg
+; CHECK-NEXT: fmsub
+; CHECK-NEXT: blr
+                                
+; CHECK-VSX-LABEL: test_FMSUB_EXT4:
+; CHECK-VSX: xsnegdp
+; CHECK-VSX-NEXT: xsmsubmdp
+; CHECK-VSX-NEXT: blr
+}  
+\ No newline at end of file
diff --git a/test/CodeGen/PowerPC/fma-mutate.ll b/test/CodeGen/PowerPC/fma-mutate.ll
new file mode 100644
index 000000000000..1a391f4c2305
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma-mutate.ll
@@ -0,0 +1,21 @@
+; Test several VSX FMA mutation opportunities.  The first one isn't a
+; reasonable transformation because the killed product register is the
+; same as the FMA target register.  The second one is legal.  The third
+; one doesn't fit the feeding-copy pattern.
+
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare double @llvm.sqrt.f64(double)
+
+define double @foo3(double %a) nounwind {
+  %r = call double @llvm.sqrt.f64(double %a)
+  ret double %r
+
+; CHECK: @foo3
+; CHECK: xsnmsubadp [[REG:[0-9]+]], {{[0-9]+}}, [[REG]]
+; CHECK: xsmaddmdp
+; CHECK: xsmaddadp
+}
+
diff --git a/test/CodeGen/PowerPC/fma.ll b/test/CodeGen/PowerPC/fma.ll
index db19761b431c..ab5251b2a554 100644
--- a/test/CodeGen/PowerPC/fma.ll
+++ b/test/CodeGen/PowerPC/fma.ll
@@ -1,12 +1,21 @@
-; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+
+declare double @dummy1(double) #0
+declare double @dummy2(double, double) #0
+declare double @dummy3(double, double, double) #0
 
 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
-	%E = fadd double %D, %C		; <double> [#uses=1]
+	%E = fadd double %C, %D		; <double> [#uses=1]
 	ret double %E
 ; CHECK-LABEL: test_FMADD1:
 ; CHECK: fmadd
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD1:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: blr
 }
 
 define double @test_FMADD2(double %A, double %B, double %C) {
@@ -16,15 +25,38 @@ define double @test_FMADD2(double %A, double %B, double %C) {
 ; CHECK-LABEL: test_FMADD2:
 ; CHECK: fmadd
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMADD2:
+; CHECK-VSX: xsmaddmdp
+; CHECK-VSX-NEXT: blr
 }
 
-define double @test_FMSUB(double %A, double %B, double %C) {
+define double @test_FMSUB1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
 	%E = fsub double %D, %C		; <double> [#uses=1]
 	ret double %E
-; CHECK-LABEL: test_FMSUB:
+; CHECK-LABEL: test_FMSUB1:
 ; CHECK: fmsub
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FMSUB1:
+; CHECK-VSX: xsmsubmdp
+; CHECK-VSX-NEXT: blr
+}
+
+define double @test_FMSUB2(double %A, double %B, double %C, double %D) {
+	%E = fmul double %A, %B 	; <double> [#uses=2]
+	%F = fadd double %E, %C 	; <double> [#uses=1]
+	%G = fsub double %E, %D 	; <double> [#uses=1]
+	%H = call double @dummy2(double %F, double %G)      ; <double> [#uses=1]
+	ret double %H
+; CHECK-LABEL: test_FMSUB2:
+; CHECK: fmadd
+; CHECK-NEXT: fmsub
+
+; CHECK-VSX-LABEL: test_FMSUB2:
+; CHECK-VSX: xsmaddadp
+; CHECK-VSX-NEXT: xsmsubmdp
 }
 
 define double @test_FNMADD1(double %A, double %B, double %C) {
@@ -35,6 +67,10 @@ define double @test_FNMADD1(double %A, double %B, double %C) {
 ; CHECK-LABEL: test_FNMADD1:
 ; CHECK: fnmadd
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FNMADD1:
+; CHECK-VSX: xsnmaddmdp
+; CHECK-VSX-NEXT: blr
 }
 
 define double @test_FNMADD2(double %A, double %B, double %C) {
@@ -45,6 +81,10 @@ define double @test_FNMADD2(double %A, double %B, double %C) {
 ; CHECK-LABEL: test_FNMADD2:
 ; CHECK: fnmadd
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FNMADD2:
+; CHECK-VSX: xsnmaddmdp
+; CHECK-VSX-NEXT: blr
 }
 
 define double @test_FNMSUB1(double %A, double %B, double %C) {
@@ -54,6 +94,9 @@ define double @test_FNMSUB1(double %A, double %B, double %C) {
 ; CHECK-LABEL: test_FNMSUB1:
 ; CHECK: fnmsub
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FNMSUB1:
+; CHECK-VSX: xsnmsubmdp
 }
 
 define double @test_FNMSUB2(double %A, double %B, double %C) {
@@ -64,6 +107,10 @@ define double @test_FNMSUB2(double %A, double %B, double %C) {
 ; CHECK-LABEL: test_FNMSUB2:
 ; CHECK: fnmsub
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FNMSUB2:
+; CHECK-VSX: xsnmsubmdp
+; CHECK-VSX-NEXT: blr
 }
 
 define float @test_FNMSUBS(float %A, float %B, float %C) {
@@ -74,4 +121,8 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 ; CHECK-LABEL: test_FNMSUBS:
 ; CHECK: fnmsubs
 ; CHECK-NEXT: blr
+
+; CHECK-VSX-LABEL: test_FNMSUBS:
+; CHECK-VSX: fnmsubs
+; CHECK-VSX-NEXT: blr
 }
diff --git a/test/CodeGen/PowerPC/fmaxnum.ll b/test/CodeGen/PowerPC/fmaxnum.ll
new file mode 100644
index 000000000000..182585029071
--- /dev/null
+++ b/test/CodeGen/PowerPC/fmaxnum.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s
+
+declare float @fmaxf(float, float)
+declare double @fmax(double, double)
+declare ppc_fp128 @fmaxl(ppc_fp128, ppc_fp128)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+declare ppc_fp128 @llvm.maxnum.ppcf128(ppc_fp128, ppc_fp128)
+
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>)
+
+; CHECK-LABEL: @test_fmaxf
+; CHECK: bl fmaxf
+define float @test_fmaxf(float %x, float %y) {
+  %z = call float @fmaxf(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_fmax
+; CHECK: bl fmax
+define double @test_fmax(double %x, double %y) {
+  %z = call double @fmax(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_fmaxl
+; CHECK: bl fmaxl
+define ppc_fp128 @test_fmaxl(ppc_fp128 %x, ppc_fp128 %y) {
+  %z = call ppc_fp128 @fmaxl(ppc_fp128 %x, ppc_fp128 %y) readnone
+  ret ppc_fp128 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxf
+; CHECK: bl fmaxf
+define float @test_intrinsic_fmaxf(float %x, float %y) {
+  %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax
+; CHECK: bl fmax
+define double @test_intrinsic_fmax(double %x, double %y) {
+  %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxl
+; CHECK: bl fmaxl
+define ppc_fp128 @test_intrinsic_fmaxl(ppc_fp128 %x, ppc_fp128 %y) {
+  %z = call ppc_fp128 @llvm.maxnum.ppcf128(ppc_fp128 %x, ppc_fp128 %y) readnone
+  ret ppc_fp128 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxf_v2f32
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+define <2 x float> @test_intrinsic_fmaxf_v2f32(<2 x float> %x, <2 x float> %y) {
+  %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
+  ret <2 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxf_v4f32
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+define <4 x float> @test_intrinsic_fmaxf_v4f32(<4 x float> %x, <4 x float> %y) {
+  %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
+  ret <4 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxf_v8f32
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+; CHECK: bl fmaxf
+define <8 x float> @test_intrinsic_fmaxf_v8f32(<8 x float> %x, <8 x float> %y) {
+  %z = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %x, <8 x float> %y) readnone
+  ret <8 x float> %z
+}
diff --git a/test/CodeGen/PowerPC/fminnum.ll b/test/CodeGen/PowerPC/fminnum.ll
new file mode 100644
index 000000000000..fe91284cdb72
--- /dev/null
+++ b/test/CodeGen/PowerPC/fminnum.ll
@@ -0,0 +1,86 @@
+; RUN: llc -march=ppc32 -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s
+
+declare float @fminf(float, float)
+declare double @fmin(double, double)
+declare ppc_fp128 @fminl(ppc_fp128, ppc_fp128)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare ppc_fp128 @llvm.minnum.ppcf128(ppc_fp128, ppc_fp128)
+
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>)
+
+; CHECK-LABEL: @test_fminf
+; CHECK: bl fminf
+define float @test_fminf(float %x, float %y) {
+  %z = call float @fminf(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_fmin
+; CHECK: bl fmin
+define double @test_fmin(double %x, double %y) {
+  %z = call double @fmin(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_fminl
+; CHECK: bl fminl
+define ppc_fp128 @test_fminl(ppc_fp128 %x, ppc_fp128 %y) {
+  %z = call ppc_fp128 @fminl(ppc_fp128 %x, ppc_fp128 %y) readnone
+  ret ppc_fp128 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_f32
+; CHECK: bl fminf
+define float @test_intrinsic_fmin_f32(float %x, float %y) {
+  %z = call float @llvm.minnum.f32(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_f64
+; CHECK: bl fmin
+define double @test_intrinsic_fmin_f64(double %x, double %y) {
+  %z = call double @llvm.minnum.f64(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_f128
+; CHECK: bl fminl
+define ppc_fp128 @test_intrinsic_fmin_f128(ppc_fp128 %x, ppc_fp128 %y) {
+  %z = call ppc_fp128 @llvm.minnum.ppcf128(ppc_fp128 %x, ppc_fp128 %y) readnone
+  ret ppc_fp128 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fminf_v2f32
+; CHECK: bl fminf
+; CHECK: bl fminf
+define <2 x float> @test_intrinsic_fminf_v2f32(<2 x float> %x, <2 x float> %y) {
+  %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
+  ret <2 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v4f32
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
+  %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
+  ret <4 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v8f32
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+; CHECK: bl fminf
+define <8 x float> @test_intrinsic_fmin_v8f32(<8 x float> %x, <8 x float> %y) {
+  %z = call <8 x float> @llvm.minnum.v8f32(<8 x float> %x, <8 x float> %y) readnone
+  ret <8 x float> %z
+}
diff --git a/test/CodeGen/PowerPC/fnabs.ll b/test/CodeGen/PowerPC/fnabs.ll
index 9fa2dcb2909c..fc6a04e00941 100644
--- a/test/CodeGen/PowerPC/fnabs.ll
+++ b/test/CodeGen/PowerPC/fnabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep fnabs
+; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fnabs
 
 declare double @fabs(double)
 
diff --git a/test/CodeGen/PowerPC/fp-branch.ll b/test/CodeGen/PowerPC/fp-branch.ll
index 673da027e229..f5857563745b 100644
--- a/test/CodeGen/PowerPC/fp-branch.ll
+++ b/test/CodeGen/PowerPC/fp-branch.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep fcmp | count 1
+; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fcmp | count 1
 
 declare i1 @llvm.isunordered.f64(double, double)
 
diff --git a/test/CodeGen/PowerPC/fp-to-int-ext.ll b/test/CodeGen/PowerPC/fp-to-int-ext.ll
new file mode 100644
index 000000000000..bfacd89ca1a2
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-to-int-ext.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define double @foo1(i32* %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %conv = sext i32 %0 to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @foo1
+; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+define double @foo2(i32* %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %conv = zext i32 %0 to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @foo2
+; CHECK: lfiwzx [[REG1:[0-9]+]], 0, 3
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+define double @foo3(i32* %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = add i32 %0, 8
+  %conv = zext i32 %1 to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @foo3
+; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
+; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
+; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
+; CHECK-DAG: stw [[REG2]],
+; CHECK: lfiwzx [[REG4:[0-9]+]], 0, [[REG3]]
+; CHECK: fcfid 1, [[REG4]]
+; CHECK: blr
+}
+
+define double @foo4(i32* %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = add i32 %0, 8
+  %conv = sext i32 %1 to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @foo4
+; CHECK-DAG: lwz [[REG1:[0-9]+]], 0(3)
+; CHECK-DAG: addi [[REG3:[0-9]+]], 1,
+; CHECK-DAG: addi [[REG2:[0-9]+]], [[REG1]], 8
+; CHECK-DAG: stw [[REG2]],
+; CHECK: lfiwax [[REG4:[0-9]+]], 0, [[REG3]]
+; CHECK: fcfid 1, [[REG4]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/fp-to-int-to-fp.ll b/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
new file mode 100644
index 000000000000..f56b9b38eadf
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp-to-int-to-fp.ll
@@ -0,0 +1,70 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=FPCVT
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s -check-prefix=PPC64
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define float @fool(float %X) #0 {
+entry:
+  %conv = fptosi float %X to i64
+  %conv1 = sitofp i64 %conv to float
+  ret float %conv1
+
+; FPCVT-LABEL: @fool
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfids 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @fool
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid [[REG2:[0-9]+]], [[REG1]]
+; PPC64: frsp 1, [[REG2]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @foodl(double %X) #0 {
+entry:
+  %conv = fptosi double %X to i64
+  %conv1 = sitofp i64 %conv to double
+  ret double %conv1
+
+; FPCVT-LABEL: @foodl
+; FPCVT: fctidz [[REG1:[0-9]+]], 1
+; FPCVT: fcfid 1, [[REG1]]
+; FPCVT: blr
+
+; PPC64-LABEL: @foodl
+; PPC64: fctidz [[REG1:[0-9]+]], 1
+; PPC64: fcfid 1, [[REG1]]
+; PPC64: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @fooul(float %X) #0 {
+entry:
+  %conv = fptoui float %X to i64
+  %conv1 = uitofp i64 %conv to float
+  ret float %conv1
+
+; FPCVT-LABEL: @fooul
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidus 1, [[REG1]]
+; FPCVT: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @fooudl(double %X) #0 {
+entry:
+  %conv = fptoui double %X to i64
+  %conv1 = uitofp i64 %conv to double
+  ret double %conv1
+
+; FPCVT-LABEL: @fooudl
+; FPCVT: fctiduz [[REG1:[0-9]+]], 1
+; FPCVT: fcfidu 1, [[REG1]]
+; FPCVT: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/fp_to_uint.ll b/test/CodeGen/PowerPC/fp_to_uint.ll
index 1360b62d273b..187d2d6ee1e9 100644
--- a/test/CodeGen/PowerPC/fp_to_uint.ll
+++ b/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1
+; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fctiwz | count 1
+
 
 define i16 @foo(float %a) {
 entry:
diff --git a/test/CodeGen/PowerPC/fsel.ll b/test/CodeGen/PowerPC/fsel.ll
index 8cd43e616bf6..afceb63d355d 100644
--- a/test/CodeGen/PowerPC/fsel.ll
+++ b/test/CodeGen/PowerPC/fsel.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=-vsx | FileCheck -check-prefix=CHECK-FM %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=+vsx | FileCheck -check-prefix=CHECK-FM-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -16,6 +17,10 @@ entry:
 ; CHECK-FM: @zerocmp1
 ; CHECK-FM: fsel 1, 1, 2, 3
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @zerocmp1
+; CHECK-FM-VSX: fsel 1, 1, 2, 3
+; CHECK-FM-VSX: blr
 }
 
 define double @zerocmp2(double %a, double %y, double %z) #0 {
@@ -32,6 +37,11 @@ entry:
 ; CHECK-FM: fneg [[REG:[0-9]+]], 1
 ; CHECK-FM: fsel 1, [[REG]], 3, 2
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @zerocmp2
+; CHECK-FM-VSX: xsnegdp [[REG:[0-9]+]], 1
+; CHECK-FM-VSX: fsel 1, [[REG]], 3, 2
+; CHECK-FM-VSX: blr
 }
 
 define double @zerocmp3(double %a, double %y, double %z) #0 {
@@ -49,6 +59,12 @@ entry:
 ; CHECK-FM: fneg [[REG2:[0-9]+]], 1
 ; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @zerocmp3
+; CHECK-FM-VSX: xsnegdp [[REG2:[0-9]+]], 1
+; CHECK-FM-VSX: fsel [[REG:[0-9]+]], 1, 2, 3
+; CHECK-FM-VSX: fsel 1, [[REG2]], [[REG]], 3
+; CHECK-FM-VSX: blr
 }
 
 define double @min1(double %a, double %b) #0 {
@@ -65,6 +81,11 @@ entry:
 ; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
 ; CHECK-FM: fsel 1, [[REG]], 1, 2
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @min1
+; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1
+; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2
+; CHECK-FM-VSX: blr
 }
 
 define double @max1(double %a, double %b) #0 {
@@ -81,6 +102,11 @@ entry:
 ; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
 ; CHECK-FM: fsel 1, [[REG]], 1, 2
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @max1
+; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2
+; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2
+; CHECK-FM-VSX: blr
 }
 
 define double @cmp1(double %a, double %b, double %y, double %z) #0 {
@@ -97,6 +123,11 @@ entry:
 ; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2
 ; CHECK-FM: fsel 1, [[REG]], 3, 4
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @cmp1
+; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2
+; CHECK-FM-VSX: fsel 1, [[REG]], 3, 4
+; CHECK-FM-VSX: blr
 }
 
 define double @cmp2(double %a, double %b, double %y, double %z) #0 {
@@ -113,6 +144,11 @@ entry:
 ; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1
 ; CHECK-FM: fsel 1, [[REG]], 4, 3
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @cmp2
+; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1
+; CHECK-FM-VSX: fsel 1, [[REG]], 4, 3
+; CHECK-FM-VSX: blr
 }
 
 define double @cmp3(double %a, double %b, double %y, double %z) #0 {
@@ -131,6 +167,13 @@ entry:
 ; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]]
 ; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4
 ; CHECK-FM: blr
+
+; CHECK-FM-VSX: @cmp3
+; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2
+; CHECK-FM-VSX: xsnegdp [[REG3:[0-9]+]], [[REG]]
+; CHECK-FM-VSX: fsel [[REG2:[0-9]+]], [[REG]], 3, 4
+; CHECK-FM-VSX: fsel 1, [[REG3]], [[REG2]], 4
+; CHECK-FM-VSX: blr
 }
 
 attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/PowerPC/fsqrt.ll b/test/CodeGen/PowerPC/fsqrt.ll
index bf8c4a22c95f..019dfa4e67b6 100644
--- a/test/CodeGen/PowerPC/fsqrt.ll
+++ b/test/CodeGen/PowerPC/fsqrt.ll
@@ -1,13 +1,13 @@
 ; fsqrt should be generated when the fsqrt feature is enabled, but not 
 ; otherwise.
 
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
 ; RUN:   grep "fsqrt f1, f1"
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
 ; RUN:   grep "fsqrt f1, f1"
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
 ; RUN:   not grep "fsqrt f1, f1"
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
 ; RUN:   not grep "fsqrt f1, f1"
 
 declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
index f97d0ff6268c..2ea036f83496 100644
--- a/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
+++ b/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@@ -130,10 +130,10 @@ attributes #4 = { optsize }
 attributes #5 = { nounwind optsize }
 attributes #6 = { noreturn optsize }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"bool", metadata !1}
-!4 = metadata !{i8 0, i8 2}
-!5 = metadata !{metadata !0, metadata !0, i64 0}
-!6 = metadata !{metadata !3, metadata !3, i64 0}
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"bool", !1}
+!4 = !{i8 0, i8 2}
+!5 = !{!0, !0, i64 0}
+!6 = !{!3, !3, i64 0}
diff --git a/test/CodeGen/PowerPC/i1-ext-fold.ll b/test/CodeGen/PowerPC/i1-ext-fold.ll
new file mode 100644
index 000000000000..19bd8ff65555
--- /dev/null
+++ b/test/CodeGen/PowerPC/i1-ext-fold.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  ret i32 %shl
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  %add1 = or i32 %shl, 5
+  ret i32 %add1
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 5
+; CHECK-DAG: li [[REG2:[0-9]+]], 21
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp sle i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  ret i32 %shl
+
+; CHECK-LABEL: @foo3
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK: isel 3, 0, [[REG1]],
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/i64_fp.ll b/test/CodeGen/PowerPC/i64_fp.ll
index d53c94878409..67f4e0bc4b69 100644
--- a/test/CodeGen/PowerPC/i64_fp.ll
+++ b/test/CodeGen/PowerPC/i64_fp.ll
@@ -1,21 +1,21 @@
 ; fcfid and fctid should be generated when the 64bit feature is enabled, but not
 ; otherwise.
 
-; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fcfid
-; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+64bit | \
 ; RUN:   grep fctidz
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fcfid
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g5 | \
 ; RUN:   grep fctidz
-; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fcfid
-; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=-64bit | \
 ; RUN:   not grep fctidz
-; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fcfid
-; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g4 | \
 ; RUN:   not grep fctidz
 
 define double @X(double %Y) {
diff --git a/test/CodeGen/PowerPC/ia-neg-const.ll b/test/CodeGen/PowerPC/ia-neg-const.ll
index 165fc1339d0b..556ab80e2c03 100644
--- a/test/CodeGen/PowerPC/ia-neg-const.ll
+++ b/test/CodeGen/PowerPC/ia-neg-const.ll
@@ -14,8 +14,8 @@ entry:
 }
 
 ; CHECK: ld
-; CHECK-NOT: addi   3,3,4294967295
-; CHECK: addi   3,3,-1
+; CHECK-NOT: addi   3, 3, 4294967295
+; CHECK: addi   3, 3, -1
 ; CHECK: blr
 
 ; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/in-asm-f64-reg.ll b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
index 1321dfce2027..08b1a2c876e0 100644
--- a/test/CodeGen/PowerPC/in-asm-f64-reg.ll
+++ b/test/CodeGen/PowerPC/in-asm-f64-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -no-integrated-as | FileCheck %s
 
 define void @f() {
 ; CHECK: @f
diff --git a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index 5e31cd58301c..4d8e704f07a0 100644
--- a/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -105,4 +105,4 @@ if.end40:
 attributes #0 = { alwaysinline inlinehint nounwind }
 attributes #1 = { nounwind }
 
-!0 = metadata !{i32 -2146895770}
+!0 = !{i32 -2146895770}
diff --git a/test/CodeGen/PowerPC/lbz-from-ld-shift.ll b/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
new file mode 100644
index 000000000000..3eacd6a45fb4
--- /dev/null
+++ b/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readonly
+define signext i32 @test(i32* nocapture readonly %P) #0 {
+entry:
+  %0 = load i32* %P, align 4
+  %shr = lshr i32 %0, 24
+  ret i32 %shr
+
+; CHECK-LABEL: @test
+; CHECK: lbz 3, 0(3)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readonly }
+
diff --git a/test/CodeGen/PowerPC/ld-st-upd.ll b/test/CodeGen/PowerPC/ld-st-upd.ll
new file mode 100644
index 000000000000..24f31aca05ad
--- /dev/null
+++ b/test/CodeGen/PowerPC/ld-st-upd.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define i32* @test4(i32* readonly %X, i32* nocapture %dest) #0 {
+  %Y = getelementptr i32* %X, i64 4
+  %A = load i32* %Y, align 4
+  store i32 %A, i32* %dest, align 4
+  ret i32* %Y
+
+; CHECK-LABEL: @test4
+; CHECK: lwzu [[REG1:[0-9]+]], 16(3)
+; CHECK: stw [[REG1]], 0(4)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/mcm-10.ll b/test/CodeGen/PowerPC/mcm-10.ll
index c3ab74725ce6..9565ebc780bf 100644
--- a/test/CodeGen/PowerPC/mcm-10.ll
+++ b/test/CodeGen/PowerPC/mcm-10.ll
@@ -22,5 +22,4 @@ entry:
 ; CHECK-NOT: extsw
 ; CHECK: stw {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
 ; CHECK: .type [[VAR]],@object
-; CHECK: .local [[VAR]]
-; CHECK: .comm [[VAR]],4,4
+; CHECK: .lcomm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-12.ll b/test/CodeGen/PowerPC/mcm-12.ll
index b31b6053fca0..668b54f5557b 100644
--- a/test/CodeGen/PowerPC/mcm-12.ll
+++ b/test/CodeGen/PowerPC/mcm-12.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 
 ; Test peephole optimization for medium code model (32-bit TOC offsets)
 ; for loading a value from the constant pool (TOC-relative).
@@ -16,3 +17,10 @@ entry:
 ; CHECK-LABEL: test_double_const:
 ; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
 ; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]])
+
+; CHECK-VSX: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK-VSX: .quad 4562098671269285104
+; CHECK-VSX-LABEL: test_double_const:
+; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l
+; CHECK-VSX: lxsdx {{[0-9]+}}, 0, [[REG1]]
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
index fee98d838ff1..811600ecdbf6 100644
--- a/test/CodeGen/PowerPC/mcm-2.ll
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -23,8 +23,7 @@ entry:
 ; MEDIUM: lwz {{[0-9]+}}, 0([[REG2]])
 ; MEDIUM: stw {{[0-9]+}}, 0([[REG2]])
 ; MEDIUM: .type [[VAR]],@object
-; MEDIUM: .local [[VAR]]
-; MEDIUM: .comm [[VAR]],4,4
+; MEDIUM: .lcomm [[VAR]],4,4
 
 ; LARGE-LABEL: test_fn_static:
 ; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
@@ -34,6 +33,5 @@ entry:
 ; LARGE: [[VAR]]:
 ; LARGE: .tc [[VAR2:[a-z0-9A-Z_.]+]][TC],[[VAR2]]
 ; LARGE: .type [[VAR2]],@object
-; LARGE: .local [[VAR2]]
-; LARGE: .comm [[VAR2]],4,4
+; LARGE: .lcomm [[VAR2]],4,4
 
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
index 73dd902cd028..e4ceb3ad6da7 100644
--- a/test/CodeGen/PowerPC/mcm-4.ll
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -1,5 +1,7 @@
-; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false <%s | FileCheck -check-prefix=MEDIUM %s
-; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false <%s | FileCheck -check-prefix=LARGE %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s
 
 ; Test correct code generation for medium and large code model
 ; for loading a value from the constant pool (TOC-relative).
@@ -19,9 +21,23 @@ entry:
 ; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
 ; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]])
 
+; MEDIUM-VSX: [[VAR:[a-z0-9A-Z_.]+]]:
+; MEDIUM-VSX: .quad 4562098671269285104
+; MEDIUM-VSX-LABEL: test_double_const:
+; MEDIUM-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; MEDIUM-VSX: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; MEDIUM-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]]
+
 ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]:
 ; LARGE: .quad 4562098671269285104
 ; LARGE-LABEL: test_double_const:
 ; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
 ; LARGE: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
 ; LARGE: lfd {{[0-9]+}}, 0([[REG2]])
+
+; LARGE-VSX: [[VAR:[a-z0-9A-Z_.]+]]:
+; LARGE-VSX: .quad 4562098671269285104
+; LARGE-VSX-LABEL: test_double_const:
+; LARGE-VSX: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha
+; LARGE-VSX: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]])
+; LARGE-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]]
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
index 659cdf74d026..743cc62ddba7 100644
--- a/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc32 -no-integrated-as
 ; ModuleID = 'mult-alt-generic.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
 target triple = "powerpc"
diff --git a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
index 3da06f65db83..29a57867f567 100644
--- a/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
+++ b/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc64 -no-integrated-as
 ; ModuleID = 'mult-alt-generic.c'
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64"
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r0.ll b/test/CodeGen/PowerPC/named-reg-alloc-r0.ll
index e683f99bd422..b669c351afa5 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r0.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r0.ll
@@ -12,4 +12,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"r0\00"}
+!0 = !{!"r0\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll
index b047f9f92588..419e12c02074 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r1-64.ll
@@ -15,4 +15,4 @@ entry:
 
 declare i64 @llvm.read_register.i64(metadata) nounwind
 
-!0 = metadata !{metadata !"r1\00"}
+!0 = !{!"r1\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r1.ll b/test/CodeGen/PowerPC/named-reg-alloc-r1.ll
index 9d0eb34caa5b..3ccab8ceca08 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r1.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r1.ll
@@ -17,4 +17,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"r1\00"}
+!0 = !{!"r1\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll
index df5085bbf7df..74e31fdd136a 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r13-64.ll
@@ -15,4 +15,4 @@ entry:
 
 declare i64 @llvm.read_register.i64(metadata) nounwind
 
-!0 = metadata !{metadata !"r13\00"}
+!0 = !{!"r13\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r13.ll b/test/CodeGen/PowerPC/named-reg-alloc-r13.ll
index 900ebb2f4854..314f5d5f65b0 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r13.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r13.ll
@@ -15,4 +15,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"r13\00"}
+!0 = !{!"r13\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
index 0da33fa5f19a..d4ed05b9e50a 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2-64.ll
@@ -14,4 +14,4 @@ entry:
 
 declare i64 @llvm.read_register.i64(metadata) nounwind
 
-!0 = metadata !{metadata !"r2\00"}
+!0 = !{!"r2\00"}
diff --git a/test/CodeGen/PowerPC/named-reg-alloc-r2.ll b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
index 51e7e3ee0339..262d034e16bd 100644
--- a/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
+++ b/test/CodeGen/PowerPC/named-reg-alloc-r2.ll
@@ -15,4 +15,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"r2\00"}
+!0 = !{!"r2\00"}
diff --git a/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
new file mode 100644
index 000000000000..6beee253a2ec
--- /dev/null
+++ b/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readonly
+define double @test1(i64* nocapture readonly %x) #0 {
+entry:
+  %0 = load i64* %x, align 8
+  %conv = sitofp i64 %0 to double
+  ret double %conv
+
+; CHECK-LABEL: @test1
+; CHECK: lfd [[REG1:[0-9]+]], 0(3)
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readonly
+define double @test2(i32* nocapture readonly %x) #0 {
+entry:
+  %0 = load i32* %x, align 4
+  %conv = sitofp i32 %0 to double
+  ret double %conv
+
+; CHECK-LABEL: @test2
+; CHECK: lfiwax [[REG1:[0-9]+]], 0, 3
+; CHECK: fcfid 1, [[REG1]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @foo(float %X) #0 {
+entry:
+  %conv = fptosi float %X to i32
+  %conv1 = sitofp i32 %conv to float
+  ret float %conv1
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfids 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @food(double %X) #0 {
+entry:
+  %conv = fptosi double %X to i32
+  %conv1 = sitofp i32 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @food
+; CHECK-DAG: fctiwz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwax [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfid 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define float @foou(float %X) #0 {
+entry:
+  %conv = fptoui float %X to i32
+  %conv1 = uitofp i32 %conv to float
+  ret float %conv1
+
+; CHECK-LABEL: @foou
+; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfidus 1, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define double @fooud(double %X) #0 {
+entry:
+  %conv = fptoui double %X to i32
+  %conv1 = uitofp i32 %conv to double
+  ret double %conv1
+
+; CHECK-LABEL: @fooud
+; CHECK-DAG: fctiwuz [[REG2:[0-9]+]], 1
+; CHECK-DAG: addi [[REG1:[0-9]+]], 1,
+; CHECK: stfiwx [[REG2]], 0, [[REG1]]
+; CHECK: lfiwzx [[REG3:[0-9]+]], 0, [[REG1]]
+; CHECK: fcfidu 1, [[REG3]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readonly }
+
diff --git a/test/CodeGen/PowerPC/post-ra-ec.ll b/test/CodeGen/PowerPC/post-ra-ec.ll
new file mode 100644
index 000000000000..9c61677650ba
--- /dev/null
+++ b/test/CodeGen/PowerPC/post-ra-ec.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.inode.0.12.120 = type { i8* }
+%struct.kstat2.1.13.121 = type { i32 }
+%struct.task_struct.4.16.124 = type { i8*, %struct.atomic_t.2.14.122, %struct.signal_struct.3.15.123* }
+%struct.atomic_t.2.14.122 = type { i32 }
+%struct.signal_struct.3.15.123 = type { i64 }
+%struct.pid.5.17.125 = type { i8* }
+
+; Function Attrs: nounwind
+define signext i32 @proc_task_getattr(%struct.inode.0.12.120* nocapture readonly %inode, %struct.kstat2.1.13.121* nocapture %stat) #0 {
+entry:
+  %call1.i = tail call %struct.task_struct.4.16.124* @get_pid_task(%struct.pid.5.17.125* undef, i32 zeroext 0) #0
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i64* undef, align 8
+  %conv.i = trunc i64 %0 to i32
+  %1 = load i32* null, align 4
+  %add = add i32 %1, %conv.i
+  store i32 %add, i32* null, align 4
+  %counter.i.i = getelementptr inbounds %struct.task_struct.4.16.124* %call1.i, i64 0, i32 1, i32 0
+  %2 = tail call i32 asm sideeffect "\09lwsync\0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0A\09sync\0A", "=&r,r,~{cr0},~{xer},~{memory}"(i32* %counter.i.i) #0
+  %cmp.i = icmp eq i32 %2, 0
+  br i1 %cmp.i, label %if.then.i, label %if.end
+
+; CHECK-LABEL: @proc_task_getattr
+; CHECK-NOT: stwcx. [[REG:[0-9]+]],0,[[REG]]
+; CHECK: blr
+
+if.then.i:                                        ; preds = %if.then
+  %3 = bitcast %struct.task_struct.4.16.124* %call1.i to i8*
+  tail call void @foo(i8* %3) #0
+  unreachable
+
+if.end:                                           ; preds = %if.then, %entry
+  ret i32 0
+}
+
+declare void @foo(i8*)
+
+declare %struct.task_struct.4.16.124* @get_pid_task(%struct.pid.5.17.125*, i32 zeroext)
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc32-cyclecounter.ll b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll
new file mode 100644
index 000000000000..9e2cd0b12880
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc32-cyclecounter.ll
@@ -0,0 +1,20 @@
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc"
+; RUN: llc -mcpu=ppc < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+
+define i64 @test1() nounwind {
+entry:
+  %r = call i64 @llvm.readcyclecounter()
+  ret i64 %r
+}
+
+; CHECK: @test1
+; CHECK: mfspr 3, 269
+; CHECK: mfspr 4, 268
+; CHECK: mfspr [[REG:[0-9]+]], 269
+; CHECK: cmpw [[CR:[0-9]+]], 3, [[REG]]
+; CHECK: bne [[CR]], .LBB
+
+declare i64 @llvm.readcyclecounter()
+
diff --git a/test/CodeGen/PowerPC/ppc32-lshrti3.ll b/test/CodeGen/PowerPC/ppc32-lshrti3.ll
index 6e76feaf1b34..f773cce81be3 100644
--- a/test/CodeGen/PowerPC/ppc32-lshrti3.ll
+++ b/test/CodeGen/PowerPC/ppc32-lshrti3.ll
@@ -36,4 +36,4 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 (213754)"}
+!0 = !{!"clang version 3.5.0 (213754)"}
diff --git a/test/CodeGen/PowerPC/ppc32-pic-large.ll b/test/CodeGen/PowerPC/ppc32-pic-large.ll
new file mode 100644
index 000000000000..bb906ec78d11
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=LARGE-BSS %s
+@bar = common global i32 0, align 4
+
+declare i32 @call_foo(i32, ...)
+
+define i32 @foo() {
+entry:
+  %0 = load i32* @bar, align 4
+  %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+  ret i32 %0
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"PIC Level", i32 2}
+; LARGE-BSS:       [[POFF:\.L[0-9]+\$poff]]:
+; LARGE-BSS-NEXT:    .long .LTOC-[[PB:\.L[0-9]+\$pb]]
+; LARGE-BSS-NEXT:  foo:
+; LARGE-BSS:         stw 30, -8(1)
+; LARGE-BSS:         bl [[PB]]
+; LARGE-BSS-NEXT:  [[PB]]:
+; LARGE-BSS:         mflr 30
+; LARGE-BSS:         lwz [[REG:[0-9]+]], [[POFF]]-[[PB]](30)
+; LARGE-BSS-NEXT:    add 30, [[REG]], 30
+; LARGE-BSS-DAG:     lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.LTOC(30)
+; LARGE-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
+; LARGE-BSS-DAG:     stw {{[0-9]+}}, 8(1)
+; LARGE-BSS:         lwz 30, -8(1)
+; LARGE-BSS:       [[VREF]]:
+; LARGE-BSS-NEXT:    .long bar
diff --git a/test/CodeGen/PowerPC/ppc32-pic.ll b/test/CodeGen/PowerPC/ppc32-pic.ll
index 5bb78a4655ae..abc136757177 100644
--- a/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -1,21 +1,24 @@
-; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck %s
-@foobar = common global i32 0, align 4
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck -check-prefix=SMALL-BSS %s
+@bar = common global i32 0, align 4
+
+declare i32 @call_foo(i32, ...)
 
 define i32 @foo() {
 entry:
-  %0 = load i32* @foobar, align 4
-  ret i32 %0
+  %0 = load i32* @bar, align 4
+  %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
+  ret i32 0
 }
 
-; CHECK:       [[POFF:\.L[0-9]+\$poff]]:
-; CHECK-NEXT:    .long .L.TOC.-[[PB:\.L[0-9]+\$pb]]
-; CHECK-NEXT:  foo:
-; CHECK:         bl [[PB]]
-; CHECK-NEXT:  [[PB]]:
-; CHECK:         mflr 30
-; CHECK:         lwz [[REG:[0-9]+]], [[POFF]]-[[PB]](30)
-; CHECK-NEXT:    add 30, [[REG]], 30
-; CHECK:         lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.L.TOC.(30)
-; CHECK:         lwz {{[0-9]+}}, 0([[VREG]])
-; CHECK:       [[VREF]]:
-; CHECK-NEXT:    .long foobar
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"PIC Level", i32 1}
+; SMALL-BSS-LABEL:foo:
+; SMALL-BSS:         stw 30, -8(1)
+; SMALL-BSS:         stwu 1, -32(1)
+; SMALL-BSS:         bl _GLOBAL_OFFSET_TABLE_@local-4
+; SMALL-BSS:         mflr 30
+; SMALL-BSS-DAG:     stw {{[0-9]+}}, 8(1)
+; SMALL-BSS-DAG:     lwz [[VREG:[0-9]+]], bar@GOT(30)
+; SMALL-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
+; SMALL-BSS:         bl call_foo@PLT
+; SMALL-BSS:         lwz 30, -8(1)
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
index 1274173256cf..3f4e7fd0d2de 100644
--- a/test/CodeGen/PowerPC/ppc440-msync.ll
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
 ; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s -check-prefix=BE-CHK
 
 define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
diff --git a/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index 764d3ce5fd45..5ed029cc9702 100644
--- a/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 
 ; Verify internal alignment of long double in a struct.  The double
 ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain
@@ -24,3 +25,12 @@ entry:
 ; CHECK: lfd 1, 64(1)
 ; CHECK: lfd 2, 72(1)
 
+; CHECK-VSX: std 6, 72(1)
+; CHECK-VSX: std 5, 64(1)
+; CHECK-VSX: std 4, 56(1)
+; CHECK-VSX: std 3, 48(1)
+; CHECK-VSX: li 3, 16
+; CHECK-VSX: addi 4, 1, 48
+; CHECK-VSX: lxsdx 1, 4, 3
+; CHECK-VSX: li 3, 24
+; CHECK-VSX: lxsdx 2, 4, 3
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
new file mode 100644
index 000000000000..479c7a7af25f
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc-crash.ll
@@ -0,0 +1,19 @@
+; RUN: not llc < %s -mtriple=powerpc64-unknown-linux-gnu 2>&1 | FileCheck %s
+;
+; Check that misuse of anyregcc results in a compile time error.
+
+; CHECK: LLVM ERROR: ran out of registers during register allocation
+define i64 @anyreglimit(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
+                        i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
+                        i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
+                        i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32) {
+entry:
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 15, i8* inttoptr (i64 0 to i8*), i32 32,
+                i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5, i64 %v6, i64 %v7, i64 %v8,
+                i64 %v9, i64 %v10, i64 %v11, i64 %v12, i64 %v13, i64 %v14, i64 %v15, i64 %v16,
+                i64 %v17, i64 %v18, i64 %v19, i64 %v20, i64 %v21, i64 %v22, i64 %v23, i64 %v24,
+                i64 %v25, i64 %v26, i64 %v27, i64 %v28, i64 %v29, i64 %v30, i64 %v31, i64 %v32)
+  ret i64 %result
+}
+
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/PowerPC/ppc64-anyregcc.ll b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
new file mode 100644
index 000000000000..7cd3c4b38200
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-anyregcc.ll
@@ -0,0 +1,367 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Stackmap Header: no constants - 6 callsites
+; CHECK-LABEL: .section	.llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 8
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 8
+
+; Functions and stack size
+; CHECK-NEXT:   .quad test
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access1
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access2
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad property_access3
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad anyreg_test1
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad anyreg_test2
+; CHECK-NEXT:   .quad 160
+; CHECK-NEXT:   .quad patchpoint_spilldef
+; CHECK-NEXT:   .quad 256
+; CHECK-NEXT:   .quad patchpoint_spillargs
+; CHECK-NEXT:   .quad 288
+
+
+; test
+; CHECK-LABEL:  .long   .L{{.*}}-.L.test
+; CHECK-NEXT:   .short  0
+; 3 locations
+; CHECK-NEXT:   .short  3
+; Loc 0: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Constant 3
+; CHECK-NEXT:   .byte 4
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long 3
+define i64 @test() nounwind ssp uwtable {
+entry:
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 0, i32 24, i8* null, i32 2, i32 1, i32 2, i64 3)
+  ret i64 0
+}
+
+; property access 1 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access1
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access1(i8* %obj) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 1, i32 24, i8* %f, i32 1, i8* %obj)
+  ret i64 %ret
+}
+
+; property access 2 - %obj is an anyreg call argument and should therefore be in a register
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access2
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @property_access2() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %f, i32 1, i64* %obj)
+  ret i64 %ret
+}
+
+; property access 3 - %obj is a frame index
+; CHECK-LABEL:  .long   .L{{.*}}-.L.property_access3
+; CHECK-NEXT:   .short  0
+; 2 locations
+; CHECK-NEXT:   .short  2
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Direct FP - 8
+; CHECK-NEXT:   .byte 2
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+; CHECK-NEXT:   .long 112
+define i64 @property_access3() nounwind ssp uwtable {
+entry:
+  %obj = alloca i64, align 8
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 3, i32 24, i8* %f, i32 0, i64* %obj)
+  ret i64 %ret
+}
+
+; anyreg_test1
+; CHECK-LABEL:  .long   .L{{.*}}-.L.anyreg_test1
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test1(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 4, i32 24, i8* %f, i32 13, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; anyreg_test2
+; CHECK-LABEL:  .long   .L{{.*}}-.L.anyreg_test2
+; CHECK-NEXT:   .short  0
+; 14 locations
+; CHECK-NEXT:   .short  14
+; Loc 0: Register <-- this is the return register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 1: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 2: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 3: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 4: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 5: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 6: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 7: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 8: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 9: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 10: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 11: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 12: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+; Loc 13: Register
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short {{[0-9]+}}
+; CHECK-NEXT:   .long 0
+define i64 @anyreg_test2(i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13) nounwind ssp uwtable {
+entry:
+  %f = inttoptr i64 281474417671919 to i8*
+  %ret = call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %f, i32 8, i8* %a1, i8* %a2, i8* %a3, i8* %a4, i8* %a5, i8* %a6, i8* %a7, i8* %a8, i8* %a9, i8* %a10, i8* %a11, i8* %a12, i8* %a13)
+  ret i64 %ret
+}
+
+; Test spilling the return value of an anyregcc call.
+;
+; <rdar://problem/15432754> [JS] Assertion: "Folded a def to a non-store!"
+;
+; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spilldef
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 3
+; Loc 0: Register (some register that will be spilled to the stack)
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+define i64 @patchpoint_spilldef(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 12, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2)
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
+},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  ret i64 %result
+}
+
+; Test spilling the arguments of an anyregcc call.
+;
+; <rdar://problem/15487687> [JS] AnyRegCC argument ends up being spilled
+;
+; CHECK-LABEL: .long .L{{.*}}-.L.patchpoint_spillargs
+; CHECK-NEXT: .short 0
+; CHECK-NEXT: .short 5
+; Loc 0: Return a register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 1: Arg0 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 2: Arg1 in a Register
+; CHECK-NEXT: .byte  1
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short {{[0-9]+}}
+; CHECK-NEXT: .long  0
+; Loc 3: Arg2 spilled to FP -96
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 31
+; CHECK-NEXT: .long 128
+; Loc 4: Arg3 spilled to FP - 88
+; CHECK-NEXT: .byte  3
+; CHECK-NEXT: .byte  8
+; CHECK-NEXT: .short 31
+; CHECK-NEXT: .long 136
+define i64 @patchpoint_spillargs(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17
+},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  %result = tail call anyregcc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 13, i32 24, i8* inttoptr (i64 0 to i8*), i32 2, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll
index 31794be25beb..707ba95235f2 100644
--- a/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 | FileCheck %s
+; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -67,3 +67,20 @@ define double @test_external(double %x) nounwind {
 ; CHECK-NEXT: nop
   ret double %call
 }
+
+; The 'ld 2, 40(1)' really must always come directly after the bctrl to make
+; the unwinding code in libgcc happy.
+@g = external global void ()*
+declare void @h(i64)
+define void @test_indir_toc_reload(i64 %x) {
+  %1 = load void ()** @g
+  call void %1()
+  call void @h(i64 %x)
+  ret void
+
+; CHECK-LABEL: @test_indir_toc_reload
+; CHECK: bctrl
+; CHECK-NEXT: ld 2, 40(1)
+; CHECK: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-elf-abi.ll b/test/CodeGen/PowerPC/ppc64-elf-abi.ll
new file mode 100644
index 000000000000..d82122d58ee5
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-elf-abi.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-ELFv1
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mattr=+elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mattr=+elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-ELFv2
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mattr=+elfv1 < %s | FileCheck %s -check-prefix=CHECK-ELFv1
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mattr=+elfv2 < %s | FileCheck %s -check-prefix=CHECK-ELFv2
+
+; CHECK-ELFv2: .abiversion 2
+; CHECK-ELFv1-NOT: .abiversion 2
+
diff --git a/test/CodeGen/PowerPC/ppc64-gep-opt.ll b/test/CodeGen/PowerPC/ppc64-gep-opt.ll
new file mode 100644
index 000000000000..14cf9a7e8382
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-gep-opt.ll
@@ -0,0 +1,157 @@
+; RUN: llc -O3 -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -O3 -print-after=codegenprepare -mcpu=ppc64 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s
+; RUN: llc -O3 -print-after=codegenprepare -mcpu=pwr7  < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Following test cases test enabling SeparateConstOffsetFromGEP pass in the PPC
+; backend. If useAA() returns true, it will lower a GEP with multiple indices
+; into GEPs with a single index, otherwise it will lower it into a
+; "ptrtoint+arithmetics+inttoptr" form.
+
+%struct = type { i32, i32, i32, i32, [20 x i32] }
+
+; Check that when two complex GEPs are used in two basic blocks, LLVM can
+; elimilate the common subexpression for the second use.
+define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
+  %liberties = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
+  %1 = load i32* %liberties, align 4
+  %cmp = icmp eq i32 %1, %lib
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %origin = getelementptr [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
+  %2 = load i32* %origin, align 4
+  store i32 %2, i32* %adj, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; CHECK-NoAA-LABEL: @test_GEP_CSE(
+; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64
+; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
+; CHECK-NoAA: add i64 [[PTR2]], 23052
+; CHECK-NoAA: inttoptr
+; CHECK-NoAA: if.then:
+; CHECK-NoAA-NOT: ptrtoint
+; CHECK-NoAA-NOT: mul
+; CHECK-NoAA: add i64 [[PTR2]], 23048
+; CHECK-NoAA: inttoptr
+
+; CHECK-UseAA-LABEL: @test_GEP_CSE(
+; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = bitcast [240 x %struct]* %string to i8*
+; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
+; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8* [[PTR0]], i64 [[IDX]]
+; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23052
+; CHECK-UseAA: bitcast
+; CHECK-UseAA: if.then:
+; CHECK-UseAA: getelementptr i8* [[PTR1]], i64 23048
+; CHECK-UseAA: bitcast
+
+%class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]}
+%struct.pt = type { %struct.point*, i32, i32 }
+%struct.point = type { i32, i32 }
+
+; Check when a GEP is used across two basic block, LLVM can sink the address
+; calculation and code gen can generate a better addressing mode for the second
+; use.
+define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
+  %1 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
+  %2 = load i32* %1, align 4
+  %3 = getelementptr %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
+  %4 = load i32* %3, align 4
+  %5 = icmp eq i32 %2, %4
+  br i1 %5, label %if.true, label %exit
+
+if.true:
+  %6 = shl i32 %4, 1
+  store i32 %6, i32* %3, align 4
+  br label %exit
+
+exit:
+  %7 = add nsw i32 %4, 1
+  store i32 %7, i32* %1, align 4
+  ret void
+}
+; CHECK-LABEL: test_GEP_across_BB:
+; CHECK-NOT: lwzu
+; CHECK: blr
+
+; CHECK-NoAA-LABEL: test_GEP_across_BB(
+; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528
+; CHECK-NoAA: add i64 [[TMP]], 532
+; CHECK-NoAA: if.true:
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532
+; CHECK-NoAA: exit:
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528
+
+; CHECK-UseAA-LABEL: test_GEP_across_BB(
+; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
+; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 528
+; CHECK-UseAA: getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: if.true:
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 532
+; CHECK-UseAA: exit:
+; CHECK-UseAA: {{%sunk[a-zA-Z0-9]+}} = getelementptr i8* [[PTR0]], i64 528
+
+%struct.S = type { float, double }
+@struct_array = global [1024 x %struct.S] zeroinitializer, align 16
+
+; The following two test cases check we can extract constant from indices of
+; struct type.
+; The constant offsets are from indices "i64 %idxprom" and "i32 1". As the
+; alloca size of %struct.S is 16, and "i32 1" is the 2rd element whose field
+; offset is 8, the total constant offset is (5 * 16 + 8) = 88.
+define double* @test-struct_1(i32 %i) {
+entry:
+  %add = add nsw i32 %i, 5
+  %idxprom = sext i32 %add to i64
+  %p = getelementptr [1024 x %struct.S]* @struct_array, i64 0, i64 %idxprom, i32 1
+  ret double* %p
+}
+; CHECK-NoAA-LABEL: @test-struct_1(
+; CHECK-NoAA-NOT: getelementptr
+; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
+
+; CHECK-UseAA-LABEL: @test-struct_1(
+; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 88
+
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+
+; The constant offsets are from indices "i32 3", "i64 %arrayidx" and "i32 1".
+; "i32 3" is the 4th element whose field offset is 16. The alloca size of
+; %struct1 is 32. "i32 1" is the 2rd element whose field offset is 8. So the
+; total constant offset is 16 + (-2 * 32) + 8 = -40
+define %struct2* @test-struct_2(%struct0* %ptr, i64 %idx) {
+entry:
+  %arrayidx = add nsw i64 %idx, -2
+  %ptr2 = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+  ret %struct2* %ptr2
+}
+; CHECK-NoAA-LABEL: @test-struct_2(
+; CHECK-NoAA-NOT: = getelementptr
+; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, -40
+
+; CHECK-UseAA-LABEL: @test-struct_2(
+; CHECK-UseAA: getelementptr i8* %{{[a-zA-Z0-9]+}}, i64 -40
+
+; Test that when a index is added from two constant, SeparateConstOffsetFromGEP
+; pass does not generate incorrect result.
+define void @test_const_add([3 x i32]* %in) {
+  %inc = add nsw i32 2, 1
+  %idxprom = sext i32 %inc to i64
+  %arrayidx = getelementptr [3 x i32]* %in, i64 %idxprom, i64 2
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+}
+; CHECK-LABEL: test_const_add:
+; CHECK: li [[REG:[0-9]+]], 0
+; CHECK: stw [[REG]], 44(3)
+; CHECK: blr
+
diff --git a/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll b/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll
new file mode 100644
index 000000000000..9f56f0102b7c
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-nonfunc-calls.ll
@@ -0,0 +1,69 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.cd = type { i64, i64, i64 }
+
+@something = global [33 x i8] c"this is not really code, but...\0A\00", align 1
+@tls_something = thread_local global %struct.cd zeroinitializer, align 8
+@extern_something = external global %struct.cd
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+  tail call void bitcast ([33 x i8]* @something to void ()*)() #0
+  ret void
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: addis [[REG1:[0-9]+]], 2, something@toc@ha
+; CHECK-DAG: std 2, 40(1)
+; CHECK-DAG: addi [[REG3:[0-9]+]], [[REG1]], something@toc@l
+; CHECK-DAG: ld [[REG2:[0-9]+]], 0([[REG3]])
+; CHECK-DAG: ld 11, 16([[REG3]])
+; CHECK-DAG: ld 2, 8([[REG3]])
+; CHECK-DAG: mtctr [[REG2]]
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @bar() #0 {
+entry:
+  tail call void bitcast (%struct.cd* @tls_something to void ()*)() #0
+  ret void
+
+; CHECK-LABEL: @bar
+; CHECK-DAG: addis [[REG1:[0-9]+]], 13, tls_something@tprel@ha
+; CHECK-DAG: std 2, 40(1)
+; CHECK-DAG: addi [[REG3:[0-9]+]], [[REG1]], tls_something@tprel@l
+; CHECK-DAG: ld [[REG2:[0-9]+]], 0([[REG3]])
+; CHECK-DAG: ld 11, 16([[REG3]])
+; CHECK-DAG: ld 2, 8([[REG3]])
+; CHECK-DAG: mtctr [[REG2]]
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+; CHECK: blr
+}
+
+; Function Attrs: nounwind
+define void @ext() #0 {
+entry:
+  tail call void bitcast (%struct.cd* @extern_something to void ()*)() #0
+  ret void
+
+; CHECK-LABEL: @ext
+; CHECK-DAG: addis [[REG1:[0-9]+]], 2, [[NAME:[._A-Za-z0-9]+]]@toc@ha
+; CHECK-DAG: std 2, 40(1)
+; CHECK-DAG: ld [[REG3:[0-9]+]], [[NAME]]@toc@l(3)
+; CHECK-DAG: ld [[REG2:[0-9]+]], 0([[REG3]])
+; CHECK-DAG: ld 11, 16([[REG3]])
+; CHECK-DAG: ld 2, 8([[REG3]])
+; CHECK-DAG: mtctr [[REG2]]
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
new file mode 100644
index 000000000000..5e58fdab2168
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -0,0 +1,93 @@
+; RUN: llc                             < %s | FileCheck %s
+; RUN: llc -fast-isel -fast-isel-abort < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Trivial patchpoint codegen
+;
+define i64 @trivial_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: trivial_patchpoint_codegen:
+
+; CHECK: li 11, -8531
+; CHECK-NEXT: rldic 11, 11, 32, 16
+; CHECK-NEXT: oris 11, 11, 48879
+; CHECK-NEXT: ori 11, 11, 51966
+; CHECK-NEXT: mtctr 11
+; CHECK-NEXT: bctrl
+
+; CHECK: li 11, -8531
+; CHECK-NEXT: rldic 11, 11, 32, 16
+; CHECK-NEXT: oris 11, 11, 48879
+; CHECK-NEXT: ori 11, 11, 51967
+; CHECK-NEXT: mtctr 11
+; CHECK-NEXT: bctrl
+
+; CHECK: blr
+
+  %resolveCall2 = inttoptr i64 244837814094590 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 24, i8* %resolveCall2, i32 4, i64 %p1, i64 %p2, i64 %p3, i64 %p4)
+  %resolveCall3 = inttoptr i64 244837814094591 to i8*
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 3, i32 24, i8* %resolveCall3, i32 2, i64 %p1, i64 %result)
+  ret i64 %result
+}
+
+; Caller frame metadata with stackmaps. This should not be optimized
+; as a leaf function.
+;
+; CHECK-LABEL: caller_meta_leaf
+; CHECK: stdu 1, -80(1)
+; CHECK: Ltmp
+; CHECK: addi 1, 1, 80
+; CHECK: blr
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+  ret void
+}
+
+; Test patchpoints reusing the same TargetConstant.
+; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
+; There is no way to verify this, since it depends on memory allocation.
+; But I think it's useful to include as a working example.
+define i64 @testLowerConstant(i64 %arg, i64 %tmp2, i64 %tmp10, i64* %tmp33, i64 %tmp79) {
+entry:
+  %tmp80 = add i64 %tmp79, -16
+  %tmp81 = inttoptr i64 %tmp80 to i64*
+  %tmp82 = load i64* %tmp81, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
+  %tmp83 = load i64* %tmp33, align 8
+  %tmp84 = add i64 %tmp83, -24
+  %tmp85 = inttoptr i64 %tmp84 to i64*
+  %tmp86 = load i64* %tmp85, align 8
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
+  ret i64 10
+}
+
+; Test small patchpoints that don't emit calls.
+define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: small_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+; CHECK: blr
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 20, i8* null, i32 2, i64 %p1, i64 %p2)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/PowerPC/ppc64-prefetch.ll b/test/CodeGen/PowerPC/ppc64-prefetch.ll
index b2f37097f920..b2f6e7d66520 100644
--- a/test/CodeGen/PowerPC/ppc64-prefetch.ll
+++ b/test/CodeGen/PowerPC/ppc64-prefetch.ll
@@ -1,15 +1,34 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mcpu=a2 < %s | FileCheck %s
 
 define void @test1(i8* %a, ...) nounwind {
 entry:
   call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
   ret void
+
+; CHECK-LABEL: @test1
+; CHECK: dcbt
 }
 
 declare void @llvm.prefetch(i8*, i32, i32, i32)
 
-; CHECK: @test1
-; CHECK: dcbt
+define void @test2(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 1, i32 3, i32 1)
+  ret void
+
+; CHECK-LABEL: @test2
+; CHECK: dcbtst
+}
+
+define void @test3(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 0)
+  ret void
+
+; CHECK-LABEL: @test3
+; CHECK: icbt
+}
+
 
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
new file mode 100644
index 000000000000..368ddc5c8335
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-stackmap-nops.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-gnu-linux | FileCheck %s
+
+define void @test_shadow_optimization() {
+entry:
+; Expect 12 bytes worth of nops here rather than 32: With the shadow optimization
+; in place, 20 bytes will be consumed by the frame teardown and return instr.
+; CHECK-LABEL: test_shadow_optimization:
+
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NOT:  nop
+; CHECK: addi 1, 1, 64
+; CHECK: ld [[REG1:[0-9]+]], 16(1)
+; CHECK: ld 31, -8(1)
+; CHECK: mtlr [[REG1]]
+; CHECK: blr
+
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64  0, i32  32)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+
diff --git a/test/CodeGen/PowerPC/ppc64-stackmap.ll b/test/CodeGen/PowerPC/ppc64-stackmap.ll
new file mode 100644
index 000000000000..9be8d0c8ad44
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-stackmap.ll
@@ -0,0 +1,289 @@
+; RUN: llc                             < %s | FileCheck %s
+;
+; Note: Print verbose stackmaps using -debug-only=stackmaps.
+
+; We are not getting the correct stack alignment when cross compiling for arm64.
+; So specify a datalayout here.
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; CHECK-LABEL:  .section  .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 11
+; Num LargeConstants
+; CHECK-NEXT:   .long 2
+; Num Callsites
+; CHECK-NEXT:   .long 11
+
+; Functions and stack size
+; CHECK-NEXT:   .quad constantargs
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad osrinline
+; CHECK-NEXT:   .quad 144
+; CHECK-NEXT:   .quad osrcold
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad propertyRead
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad propertyWrite
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad jsVoidCall
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad jsIntCall
+; CHECK-NEXT:   .quad 128
+; CHECK-NEXT:   .quad spilledValue
+; CHECK-NEXT:   .quad 320
+; CHECK-NEXT:   .quad spilledStackMapValue
+; CHECK-NEXT:   .quad 224
+; CHECK-NEXT:   .quad liveConstant
+; CHECK-NEXT:   .quad 64
+; CHECK-NEXT:   .quad clobberLR
+; CHECK-NEXT:   .quad 208
+
+; Num LargeConstants
+; CHECK-NEXT:   .quad   4294967295
+; CHECK-NEXT:   .quad   4294967296
+
+; Constant arguments
+;
+; CHECK-NEXT:   .quad   1
+; CHECK-NEXT:   .long   .L{{.*}}-.L.constantargs
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  4
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65535
+; SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   65536
+; SmallConstant
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   0
+; LargeConstant at index 0
+; CHECK-NEXT:   .byte   5
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   1
+
+define void @constantargs() {
+entry:
+  %0 = inttoptr i64 244837814094590 to i8*
+  tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 1, i32 24, i8* %0, i32 0, i64 65535, i64 65536, i64 4294967295, i64 4294967296)
+  ret void
+}
+
+; Inline OSR Exit
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.osrinline
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrinline(i64 %a, i64 %b) {
+entry:
+  ; Runtime void->void call.
+  call void inttoptr (i64 244837814094590 to void ()*)()
+  ; Followed by inline OSR patchpoint with 12-byte shadow and 2 live vars.
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 3, i32 12, i64 %a, i64 %b)
+  ret void
+}
+
+; Cold OSR Exit
+;
+; 2 live variables in register.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.osrcold
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long  0
+define void @osrcold(i64 %a, i64 %b) {
+entry:
+  %test = icmp slt i64 %a, %b
+  br i1 %test, label %ret, label %cold
+cold:
+  ; OSR patchpoint with 12-byte nop-slide and 2 live vars.
+  %thunk = inttoptr i64 244837814094590 to i8*
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 4, i32 24, i8* %thunk, i32 0, i64 %a, i64 %b)
+  unreachable
+ret:
+  ret void
+}
+
+; Property Read
+; CHECK-LABEL:  .long   .L{{.*}}-.L.propertyRead
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  0
+;
+; FIXME: There are currently no stackmap entries. After moving to
+; AnyRegCC, we will have entries for the object and return value.
+define i64 @propertyRead(i64* %obj) {
+entry:
+  %resolveRead = inttoptr i64 244837814094590 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 24, i8* %resolveRead, i32 1, i64* %obj)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Property Write
+; CHECK-LABEL:  .long   .L{{.*}}-.L.propertyWrite
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @propertyWrite(i64 %dummy1, i64* %obj, i64 %dummy2, i64 %a) {
+entry:
+  %resolveWrite = inttoptr i64 244837814094590 to i8*
+  call anyregcc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 24, i8* %resolveWrite, i32 2, i64* %obj, i64 %a)
+  ret void
+}
+
+; Void JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.jsVoidCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define void @jsVoidCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 244837814094590 to i8*
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 7, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  ret void
+}
+
+; i64 JS Call
+;
+; 2 live variables in registers.
+;
+; CHECK-LABEL:  .long   .L{{.*}}-.L.jsIntCall
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .short  2
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .byte   1
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  {{[0-9]+}}
+; CHECK-NEXT:   .long   0
+define i64 @jsIntCall(i64 %dummy1, i64* %obj, i64 %arg, i64 %l1, i64 %l2) {
+entry:
+  %resolveCall = inttoptr i64 244837814094590 to i8*
+  %result = call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 8, i32 24, i8* %resolveCall, i32 2, i64* %obj, i64 %arg, i64 %l1, i64 %l2)
+  %add = add i64 %result, 3
+  ret i64 %add
+}
+
+; Spilled stack map values.
+;
+; Verify 28 stack map entries.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.spilledValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 28
+;
+; Check that at least one is a spilled entry from r31.
+; Location: Indirect FP + ...
+; CHECK:        .byte 3
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) {
+entry:
+  call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 11, i32 24, i8* null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27)
+  ret void
+}
+
+; Spilled stack map values.
+;
+; Verify 30 stack map entries.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.spilledStackMapValue
+; CHECK-NEXT:   .short 0
+; CHECK-NEXT:   .short 30
+;
+; Check that at least one is a spilled entry from r31.
+; Location: Indirect FP + ...
+; CHECK:        .byte 3
+; CHECK-NEXT:   .byte 8
+; CHECK-NEXT:   .short 31
+define webkit_jscc void @spilledStackMapValue(i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29) {
+entry:
+  call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 12, i32 16, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27, i64 %l28, i64 %l29)
+  ret void
+}
+
+
+; Map a constant value.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.liveConstant
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: SmallConstant
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .byte   8
+; CHECK-NEXT:   .short  0
+; CHECK-NEXT:   .long   33
+
+define void @liveConstant() {
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 8, i32 33)
+  ret void
+}
+
+; Map a value when LR is the only free register.
+;
+; CHECK-LABEL:  .long .L{{.*}}-.L.clobberLR
+; CHECK-NEXT:   .short 0
+; 1 location
+; CHECK-NEXT:   .short 1
+; Loc 0: Indirect FP (r31) - offset
+; CHECK-NEXT:   .byte   3
+; CHECK-NEXT:   .byte   4
+; CHECK-NEXT:   .short  31
+; CHECK-NEXT:   .long   {{[0-9]+}}
+define void @clobberLR(i32 %a) {
+  tail call void asm sideeffect "nop", "~{r0},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r14},~{r15},~{r16},~{r17},~{r18},~{r19},~{r20},~{r21},~{r22},~{r23},~{r24},~{r25},~{r26},~{r27},~{r28},~{r29},~{r30},~{r31}"() nounwind
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 8, i32 %a)
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
index 5a63b01badc9..c9a4f91fddea 100644
--- a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
+++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
@@ -16,5 +16,5 @@ declare void @llvm.va_start(i8*) nounwind
 
 ; CHECK: @intvaarg
 ; Make sure that the va pointer is incremented by 8 (not 4).
-; CHECK: addi{{.*}}, 8
+; CHECK: addi{{.*}}, 1, 64 
 
diff --git a/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 9eed623bacaa..4edd8d59e526 100644
--- a/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/test/CodeGen/PowerPC/ppc64le-aggregates.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s
+
+; Currently VSX support is disabled for this test because we generate lxsdx
+; instead of lfd, and stxsdx instead of stfd.  That is a poor choice when we
+; have reg+imm addressing, and is on the list of things to be fixed.
 
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/ppcf128-1.ll b/test/CodeGen/PowerPC/ppcf128-1.ll
index 1047fe5d3ba9..2cec934c66fd 100644
--- a/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts | llc > %t
+; RUN: opt < %s -O3 | llc > %t
 ; ModuleID = 'ld3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/ppcf128-endian.ll b/test/CodeGen/PowerPC/ppcf128-endian.ll
index 2a5f13a5c3da..180fedf5c9f4 100644
--- a/test/CodeGen/PowerPC/ppcf128-endian.ll
+++ b/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=pwr7 -mattr=+altivec < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+altivec -mattr=-vsx < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-n32:64"
 target triple = "powerpc64le-unknown-linux-gnu"
diff --git a/test/CodeGen/PowerPC/pr15630.ll b/test/CodeGen/PowerPC/pr15630.ll
index c5ba8a4d4f04..3c1b604f0090 100644
--- a/test/CodeGen/PowerPC/pr15630.ll
+++ b/test/CodeGen/PowerPC/pr15630.ll
@@ -13,4 +13,5 @@ entry:
   ret void
 }
 
-; CHECK: stwcx.
+; CHECK: sync
+; CHECK: stb
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
index 24bcda02b3a5..62a9ede0200b 100644
--- a/test/CodeGen/PowerPC/pr17168.ll
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -25,7 +25,7 @@ for.cond968.preheader:                            ; preds = %for.cond968.prehead
 for.end1042:                                      ; preds = %for.cond968.preheader, %for.cond964.preheader, %entry
   %0 = phi i32 [ undef, %for.cond964.preheader ], [ undef, %for.cond968.preheader ], [ undef, %entry ]
   %1 = load i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
-  tail call void @llvm.dbg.value(metadata !447, i64 0, metadata !119), !dbg !448
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !119, metadata !{!"0x102"}), !dbg !448
   %sub10454270 = add nsw i32 %0, -1, !dbg !448
   %cmp10464271 = icmp sgt i32 %sub10454270, 1, !dbg !448
   %sub11134263 = add nsw i32 %1, -1, !dbg !450
@@ -46,7 +46,7 @@ for.cond1816.preheader.for.inc1898_crit_edge:     ; preds = %for.cond1816.prehea
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -54,468 +54,468 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!438, !464}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 190311)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !298, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"bt.c", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !82, metadata !102, metadata !114, metadata !132, metadata !145, metadata !154, metadata !155, metadata !162, metadata !183, metadata !200, metadata !201, metadata !207, metadata !208, metadata !215, metadata !221, metadata !230, metadata !238, metadata !246, metadata !255, metadata !260, metadata !261, metadata !268, metadata !274, metadata !279, metadata !280, metadata !287, metadata !293}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 74, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !12, i32 74} ; [ DW_TAG_subprogram ] [line 74] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
-!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !17, metadata !18, metadata !19, metadata !21, metadata !22, metadata !23, metadata !25, metadata !26}
-!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777290, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 74]
-!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554506, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 74]
-!15 = metadata !{i32 786688, metadata !4, metadata !"niter", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [niter] [line 76]
-!16 = metadata !{i32 786688, metadata !4, metadata !"step", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [step] [line 76]
-!17 = metadata !{i32 786688, metadata !4, metadata !"n3", metadata !5, i32 76, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n3] [line 76]
-!18 = metadata !{i32 786688, metadata !4, metadata !"nthreads", metadata !5, i32 77, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [nthreads] [line 77]
-!19 = metadata !{i32 786688, metadata !4, metadata !"navg", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [navg] [line 78]
-!20 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!21 = metadata !{i32 786688, metadata !4, metadata !"mflops", metadata !5, i32 78, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [mflops] [line 78]
-!22 = metadata !{i32 786688, metadata !4, metadata !"tmax", metadata !5, i32 80, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tmax] [line 80]
-!23 = metadata !{i32 786688, metadata !4, metadata !"verified", metadata !5, i32 81, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [verified] [line 81]
-!24 = metadata !{i32 786454, metadata !1, null, metadata !"boolean", i32 12, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [boolean] [line 12, size 0, align 0, offset 0] [from int]
-!25 = metadata !{i32 786688, metadata !4, metadata !"class", metadata !5, i32 82, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [class] [line 82]
-!26 = metadata !{i32 786688, metadata !4, metadata !"fp", metadata !5, i32 83, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [fp] [line 83]
-!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from FILE]
-!28 = metadata !{i32 786454, metadata !1, null, metadata !"FILE", i32 49, i64 0, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_typedef ] [FILE] [line 49, size 0, align 0, offset 0] [from _IO_FILE]
-!29 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_FILE", i32 271, i64 1728, i64 64, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_FILE] [line 271, size 1728, align 64, offset 0] [def] [from ]
-!30 = metadata !{metadata !"/usr/include/libio.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
-!31 = metadata !{metadata !32, metadata !33, metadata !34, metadata !35, metadata !36, metadata !37, metadata !38, metadata !39, metadata !40, metadata !41, metadata !42, metadata !43, metadata !44, metadata !52, metadata !53, metadata !54, metadata !55, metadata !58, metadata !60, metadata !62, metadata !66, metadata !68, metadata !70, metadata !71, metadata !72, metadata !73, metadata !74, metadata !77, metadata !78}
-!32 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags", i32 272, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags] [line 272, size 32, align 32, offset 0] [from int]
-!33 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_ptr", i32 277, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_ptr] [line 277, size 64, align 64, offset 64] [from ]
-!34 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_end", i32 278, i64 64, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_end] [line 278, size 64, align 64, offset 128] [from ]
-!35 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_read_base", i32 279, i64 64, i64 64, i64 192, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_read_base] [line 279, size 64, align 64, offset 192] [from ]
-!36 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_base", i32 280, i64 64, i64 64, i64 256, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_base] [line 280, size 64, align 64, offset 256] [from ]
-!37 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_ptr", i32 281, i64 64, i64 64, i64 320, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_ptr] [line 281, size 64, align 64, offset 320] [from ]
-!38 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_write_end", i32 282, i64 64, i64 64, i64 384, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_write_end] [line 282, size 64, align 64, offset 384] [from ]
-!39 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_base", i32 283, i64 64, i64 64, i64 448, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_base] [line 283, size 64, align 64, offset 448] [from ]
-!40 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_buf_end", i32 284, i64 64, i64 64, i64 512, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_buf_end] [line 284, size 64, align 64, offset 512] [from ]
-!41 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_base", i32 286, i64 64, i64 64, i64 576, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_base] [line 286, size 64, align 64, offset 576] [from ]
-!42 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_backup_base", i32 287, i64 64, i64 64, i64 640, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_backup_base] [line 287, size 64, align 64, offset 640] [from ]
-!43 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_IO_save_end", i32 288, i64 64, i64 64, i64 704, i32 0, metadata !10} ; [ DW_TAG_member ] [_IO_save_end] [line 288, size 64, align 64, offset 704] [from ]
-!44 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_markers", i32 290, i64 64, i64 64, i64 768, i32 0, metadata !45} ; [ DW_TAG_member ] [_markers] [line 290, size 64, align 64, offset 768] [from ]
-!45 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !46} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_marker]
-!46 = metadata !{i32 786451, metadata !30, null, metadata !"_IO_marker", i32 186, i64 192, i64 64, i32 0, i32 0, null, metadata !47, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [_IO_marker] [line 186, size 192, align 64, offset 0] [def] [from ]
-!47 = metadata !{metadata !48, metadata !49, metadata !51}
-!48 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_next", i32 187, i64 64, i64 64, i64 0, i32 0, metadata !45} ; [ DW_TAG_member ] [_next] [line 187, size 64, align 64, offset 0] [from ]
-!49 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_sbuf", i32 188, i64 64, i64 64, i64 64, i32 0, metadata !50} ; [ DW_TAG_member ] [_sbuf] [line 188, size 64, align 64, offset 64] [from ]
-!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_FILE]
-!51 = metadata !{i32 786445, metadata !30, metadata !46, metadata !"_pos", i32 192, i64 32, i64 32, i64 128, i32 0, metadata !8} ; [ DW_TAG_member ] [_pos] [line 192, size 32, align 32, offset 128] [from int]
-!52 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_chain", i32 292, i64 64, i64 64, i64 832, i32 0, metadata !50} ; [ DW_TAG_member ] [_chain] [line 292, size 64, align 64, offset 832] [from ]
-!53 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_fileno", i32 294, i64 32, i64 32, i64 896, i32 0, metadata !8} ; [ DW_TAG_member ] [_fileno] [line 294, size 32, align 32, offset 896] [from int]
-!54 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_flags2", i32 298, i64 32, i64 32, i64 928, i32 0, metadata !8} ; [ DW_TAG_member ] [_flags2] [line 298, size 32, align 32, offset 928] [from int]
-!55 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_old_offset", i32 300, i64 64, i64 64, i64 960, i32 0, metadata !56} ; [ DW_TAG_member ] [_old_offset] [line 300, size 64, align 64, offset 960] [from __off_t]
-!56 = metadata !{i32 786454, metadata !30, null, metadata !"__off_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off_t] [line 141, size 0, align 0, offset 0] [from long int]
-!57 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
-!58 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_cur_column", i32 304, i64 16, i64 16, i64 1024, i32 0, metadata !59} ; [ DW_TAG_member ] [_cur_column] [line 304, size 16, align 16, offset 1024] [from unsigned short]
-!59 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
-!60 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_vtable_offset", i32 305, i64 8, i64 8, i64 1040, i32 0, metadata !61} ; [ DW_TAG_member ] [_vtable_offset] [line 305, size 8, align 8, offset 1040] [from signed char]
-!61 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!62 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_shortbuf", i32 306, i64 8, i64 8, i64 1048, i32 0, metadata !63} ; [ DW_TAG_member ] [_shortbuf] [line 306, size 8, align 8, offset 1048] [from ]
-!63 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 8, i64 8, i32 0, i32 0, metadata !11, metadata !64, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
-!64 = metadata !{metadata !65}
-!65 = metadata !{i32 786465, i64 0, i64 1}        ; [ DW_TAG_subrange_type ] [0, 0]
-!66 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_lock", i32 310, i64 64, i64 64, i64 1088, i32 0, metadata !67} ; [ DW_TAG_member ] [_lock] [line 310, size 64, align 64, offset 1088] [from ]
-!67 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!68 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_offset", i32 319, i64 64, i64 64, i64 1152, i32 0, metadata !69} ; [ DW_TAG_member ] [_offset] [line 319, size 64, align 64, offset 1152] [from __off64_t]
-!69 = metadata !{i32 786454, metadata !30, null, metadata !"__off64_t", i32 142, i64 0, i64 0, i64 0, i32 0, metadata !57} ; [ DW_TAG_typedef ] [__off64_t] [line 142, size 0, align 0, offset 0] [from long int]
-!70 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad1", i32 328, i64 64, i64 64, i64 1216, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad1] [line 328, size 64, align 64, offset 1216] [from ]
-!71 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad2", i32 329, i64 64, i64 64, i64 1280, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad2] [line 329, size 64, align 64, offset 1280] [from ]
-!72 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad3", i32 330, i64 64, i64 64, i64 1344, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad3] [line 330, size 64, align 64, offset 1344] [from ]
-!73 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad4", i32 331, i64 64, i64 64, i64 1408, i32 0, metadata !67} ; [ DW_TAG_member ] [__pad4] [line 331, size 64, align 64, offset 1408] [from ]
-!74 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"__pad5", i32 332, i64 64, i64 64, i64 1472, i32 0, metadata !75} ; [ DW_TAG_member ] [__pad5] [line 332, size 64, align 64, offset 1472] [from size_t]
-!75 = metadata !{i32 786454, metadata !30, null, metadata !"size_t", i32 42, i64 0, i64 0, i64 0, i32 0, metadata !76} ; [ DW_TAG_typedef ] [size_t] [line 42, size 0, align 0, offset 0] [from long unsigned int]
-!76 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!77 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_mode", i32 334, i64 32, i64 32, i64 1536, i32 0, metadata !8} ; [ DW_TAG_member ] [_mode] [line 334, size 32, align 32, offset 1536] [from int]
-!78 = metadata !{i32 786445, metadata !30, metadata !29, metadata !"_unused2", i32 336, i64 160, i64 8, i64 1568, i32 0, metadata !79} ; [ DW_TAG_member ] [_unused2] [line 336, size 160, align 8, offset 1568] [from ]
-!79 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !11, metadata !80, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
-!80 = metadata !{metadata !81}
-!81 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
-!82 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"verify", metadata !"verify", metadata !"", i32 2388, metadata !83, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !86, i32 2388} ; [ DW_TAG_subprogram ] [line 2388] [local] [def] [verify]
-!83 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !84, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!84 = metadata !{null, metadata !8, metadata !10, metadata !85}
-!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from boolean]
-!86 = metadata !{metadata !87, metadata !88, metadata !89, metadata !90, metadata !94, metadata !95, metadata !96, metadata !97, metadata !98, metadata !99, metadata !100, metadata !101}
-!87 = metadata !{i32 786689, metadata !82, metadata !"no_time_steps", metadata !5, i32 16779604, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [no_time_steps] [line 2388]
-!88 = metadata !{i32 786689, metadata !82, metadata !"class", metadata !5, i32 33556820, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [class] [line 2388]
-!89 = metadata !{i32 786689, metadata !82, metadata !"verified", metadata !5, i32 50334036, metadata !85, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [verified] [line 2388]
-!90 = metadata !{i32 786688, metadata !82, metadata !"xcrref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrref] [line 2397]
-!91 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 320, i64 64, i32 0, i32 0, metadata !20, metadata !92, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 64, offset 0] [from double]
-!92 = metadata !{metadata !93}
-!93 = metadata !{i32 786465, i64 0, i64 5}        ; [ DW_TAG_subrange_type ] [0, 4]
-!94 = metadata !{i32 786688, metadata !82, metadata !"xceref", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xceref] [line 2397]
-!95 = metadata !{i32 786688, metadata !82, metadata !"xcrdif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcrdif] [line 2397]
-!96 = metadata !{i32 786688, metadata !82, metadata !"xcedif", metadata !5, i32 2397, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcedif] [line 2397]
-!97 = metadata !{i32 786688, metadata !82, metadata !"epsilon", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [epsilon] [line 2398]
-!98 = metadata !{i32 786688, metadata !82, metadata !"xce", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xce] [line 2398]
-!99 = metadata !{i32 786688, metadata !82, metadata !"xcr", metadata !5, i32 2398, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xcr] [line 2398]
-!100 = metadata !{i32 786688, metadata !82, metadata !"dtref", metadata !5, i32 2398, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtref] [line 2398]
-!101 = metadata !{i32 786688, metadata !82, metadata !"m", metadata !5, i32 2399, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2399]
-!102 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"rhs_norm", metadata !"rhs_norm", metadata !"", i32 266, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !106, i32 266} ; [ DW_TAG_subprogram ] [line 266] [local] [def] [rhs_norm]
-!103 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !104, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!104 = metadata !{null, metadata !105}
-!105 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
-!106 = metadata !{metadata !107, metadata !108, metadata !109, metadata !110, metadata !111, metadata !112, metadata !113}
-!107 = metadata !{i32 786689, metadata !102, metadata !"rms", metadata !5, i32 16777482, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 266]
-!108 = metadata !{i32 786688, metadata !102, metadata !"i", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 271]
-!109 = metadata !{i32 786688, metadata !102, metadata !"j", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 271]
-!110 = metadata !{i32 786688, metadata !102, metadata !"k", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 271]
-!111 = metadata !{i32 786688, metadata !102, metadata !"d", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 271]
-!112 = metadata !{i32 786688, metadata !102, metadata !"m", metadata !5, i32 271, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 271]
-!113 = metadata !{i32 786688, metadata !102, metadata !"add", metadata !5, i32 272, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 272]
-!114 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"compute_rhs", metadata !"compute_rhs", metadata !"", i32 1767, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @compute_rhs, null, null, metadata !117, i32 1767} ; [ DW_TAG_subprogram ] [line 1767] [local] [def] [compute_rhs]
-!115 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !116, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!116 = metadata !{null}
-!117 = metadata !{metadata !118, metadata !119, metadata !120, metadata !121, metadata !122, metadata !123, metadata !124, metadata !125, metadata !126, metadata !127, metadata !128, metadata !129, metadata !130, metadata !131}
-!118 = metadata !{i32 786688, metadata !114, metadata !"i", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1769]
-!119 = metadata !{i32 786688, metadata !114, metadata !"j", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1769]
-!120 = metadata !{i32 786688, metadata !114, metadata !"k", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1769]
-!121 = metadata !{i32 786688, metadata !114, metadata !"m", metadata !5, i32 1769, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 1769]
-!122 = metadata !{i32 786688, metadata !114, metadata !"rho_inv", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [rho_inv] [line 1770]
-!123 = metadata !{i32 786688, metadata !114, metadata !"uijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [uijk] [line 1770]
-!124 = metadata !{i32 786688, metadata !114, metadata !"up1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [up1] [line 1770]
-!125 = metadata !{i32 786688, metadata !114, metadata !"um1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [um1] [line 1770]
-!126 = metadata !{i32 786688, metadata !114, metadata !"vijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vijk] [line 1770]
-!127 = metadata !{i32 786688, metadata !114, metadata !"vp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vp1] [line 1770]
-!128 = metadata !{i32 786688, metadata !114, metadata !"vm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [vm1] [line 1770]
-!129 = metadata !{i32 786688, metadata !114, metadata !"wijk", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wijk] [line 1770]
-!130 = metadata !{i32 786688, metadata !114, metadata !"wp1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wp1] [line 1770]
-!131 = metadata !{i32 786688, metadata !114, metadata !"wm1", metadata !5, i32 1770, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [wm1] [line 1770]
-!132 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"error_norm", metadata !"error_norm", metadata !"", i32 225, metadata !103, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !133, i32 225} ; [ DW_TAG_subprogram ] [line 225] [local] [def] [error_norm]
-!133 = metadata !{metadata !134, metadata !135, metadata !136, metadata !137, metadata !138, metadata !139, metadata !140, metadata !141, metadata !142, metadata !143, metadata !144}
-!134 = metadata !{i32 786689, metadata !132, metadata !"rms", metadata !5, i32 16777441, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rms] [line 225]
-!135 = metadata !{i32 786688, metadata !132, metadata !"i", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 232]
-!136 = metadata !{i32 786688, metadata !132, metadata !"j", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 232]
-!137 = metadata !{i32 786688, metadata !132, metadata !"k", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 232]
-!138 = metadata !{i32 786688, metadata !132, metadata !"m", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 232]
-!139 = metadata !{i32 786688, metadata !132, metadata !"d", metadata !5, i32 232, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 232]
-!140 = metadata !{i32 786688, metadata !132, metadata !"xi", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 233]
-!141 = metadata !{i32 786688, metadata !132, metadata !"eta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 233]
-!142 = metadata !{i32 786688, metadata !132, metadata !"zeta", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 233]
-!143 = metadata !{i32 786688, metadata !132, metadata !"u_exact", metadata !5, i32 233, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u_exact] [line 233]
-!144 = metadata !{i32 786688, metadata !132, metadata !"add", metadata !5, i32 233, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [add] [line 233]
-!145 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_solution", metadata !"exact_solution", metadata !"", i32 643, metadata !146, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !148, i32 644} ; [ DW_TAG_subprogram ] [line 643] [local] [def] [scope 644] [exact_solution]
-!146 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !147, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!147 = metadata !{null, metadata !20, metadata !20, metadata !20, metadata !105}
-!148 = metadata !{metadata !149, metadata !150, metadata !151, metadata !152, metadata !153}
-!149 = metadata !{i32 786689, metadata !145, metadata !"xi", metadata !5, i32 16777859, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [xi] [line 643]
-!150 = metadata !{i32 786689, metadata !145, metadata !"eta", metadata !5, i32 33555075, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [eta] [line 643]
-!151 = metadata !{i32 786689, metadata !145, metadata !"zeta", metadata !5, i32 50332291, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [zeta] [line 643]
-!152 = metadata !{i32 786689, metadata !145, metadata !"dtemp", metadata !5, i32 67109508, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [dtemp] [line 644]
-!153 = metadata !{i32 786688, metadata !145, metadata !"m", metadata !5, i32 653, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 653]
-!154 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"set_constants", metadata !"set_constants", metadata !"", i32 2191, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2191} ; [ DW_TAG_subprogram ] [line 2191] [local] [def] [set_constants]
-!155 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsinit", metadata !"lhsinit", metadata !"", i32 855, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !156, i32 855} ; [ DW_TAG_subprogram ] [line 855] [local] [def] [lhsinit]
-!156 = metadata !{metadata !157, metadata !158, metadata !159, metadata !160, metadata !161}
-!157 = metadata !{i32 786688, metadata !155, metadata !"i", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 857]
-!158 = metadata !{i32 786688, metadata !155, metadata !"j", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 857]
-!159 = metadata !{i32 786688, metadata !155, metadata !"k", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 857]
-!160 = metadata !{i32 786688, metadata !155, metadata !"m", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 857]
-!161 = metadata !{i32 786688, metadata !155, metadata !"n", metadata !5, i32 857, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 857]
-!162 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"initialize", metadata !"initialize", metadata !"", i32 669, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !163, i32 669} ; [ DW_TAG_subprogram ] [line 669] [local] [def] [initialize]
-!163 = metadata !{metadata !164, metadata !165, metadata !166, metadata !167, metadata !168, metadata !169, metadata !170, metadata !171, metadata !172, metadata !173, metadata !174, metadata !179, metadata !180, metadata !181, metadata !182}
-!164 = metadata !{i32 786688, metadata !162, metadata !"i", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 679]
-!165 = metadata !{i32 786688, metadata !162, metadata !"j", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 679]
-!166 = metadata !{i32 786688, metadata !162, metadata !"k", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 679]
-!167 = metadata !{i32 786688, metadata !162, metadata !"m", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 679]
-!168 = metadata !{i32 786688, metadata !162, metadata !"ix", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ix] [line 679]
-!169 = metadata !{i32 786688, metadata !162, metadata !"iy", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iy] [line 679]
-!170 = metadata !{i32 786688, metadata !162, metadata !"iz", metadata !5, i32 679, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [iz] [line 679]
-!171 = metadata !{i32 786688, metadata !162, metadata !"xi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 680]
-!172 = metadata !{i32 786688, metadata !162, metadata !"eta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 680]
-!173 = metadata !{i32 786688, metadata !162, metadata !"zeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 680]
-!174 = metadata !{i32 786688, metadata !162, metadata !"Pface", metadata !5, i32 680, metadata !175, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pface] [line 680]
-!175 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1920, i64 64, i32 0, i32 0, metadata !20, metadata !176, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1920, align 64, offset 0] [from double]
-!176 = metadata !{metadata !177, metadata !178, metadata !93}
-!177 = metadata !{i32 786465, i64 0, i64 2}       ; [ DW_TAG_subrange_type ] [0, 1]
-!178 = metadata !{i32 786465, i64 0, i64 3}       ; [ DW_TAG_subrange_type ] [0, 2]
-!179 = metadata !{i32 786688, metadata !162, metadata !"Pxi", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pxi] [line 680]
-!180 = metadata !{i32 786688, metadata !162, metadata !"Peta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Peta] [line 680]
-!181 = metadata !{i32 786688, metadata !162, metadata !"Pzeta", metadata !5, i32 680, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [Pzeta] [line 680]
-!182 = metadata !{i32 786688, metadata !162, metadata !"temp", metadata !5, i32 680, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [temp] [line 680]
-!183 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"exact_rhs", metadata !"exact_rhs", metadata !"", i32 301, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !184, i32 301} ; [ DW_TAG_subprogram ] [line 301] [local] [def] [exact_rhs]
-!184 = metadata !{metadata !185, metadata !186, metadata !187, metadata !188, metadata !189, metadata !190, metadata !191, metadata !192, metadata !193, metadata !194, metadata !195, metadata !196, metadata !197, metadata !198, metadata !199}
-!185 = metadata !{i32 786688, metadata !183, metadata !"dtemp", metadata !5, i32 310, metadata !91, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtemp] [line 310]
-!186 = metadata !{i32 786688, metadata !183, metadata !"xi", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xi] [line 310]
-!187 = metadata !{i32 786688, metadata !183, metadata !"eta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [eta] [line 310]
-!188 = metadata !{i32 786688, metadata !183, metadata !"zeta", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [zeta] [line 310]
-!189 = metadata !{i32 786688, metadata !183, metadata !"dtpp", metadata !5, i32 310, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [dtpp] [line 310]
-!190 = metadata !{i32 786688, metadata !183, metadata !"m", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 311]
-!191 = metadata !{i32 786688, metadata !183, metadata !"i", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 311]
-!192 = metadata !{i32 786688, metadata !183, metadata !"j", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 311]
-!193 = metadata !{i32 786688, metadata !183, metadata !"k", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 311]
-!194 = metadata !{i32 786688, metadata !183, metadata !"ip1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ip1] [line 311]
-!195 = metadata !{i32 786688, metadata !183, metadata !"im1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [im1] [line 311]
-!196 = metadata !{i32 786688, metadata !183, metadata !"jp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jp1] [line 311]
-!197 = metadata !{i32 786688, metadata !183, metadata !"jm1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jm1] [line 311]
-!198 = metadata !{i32 786688, metadata !183, metadata !"km1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [km1] [line 311]
-!199 = metadata !{i32 786688, metadata !183, metadata !"kp1", metadata !5, i32 311, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [kp1] [line 311]
-!200 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"adi", metadata !"adi", metadata !"", i32 210, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 210} ; [ DW_TAG_subprogram ] [line 210] [local] [def] [adi]
-!201 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"add", metadata !"add", metadata !"", i32 187, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !202, i32 187} ; [ DW_TAG_subprogram ] [line 187] [local] [def] [add]
-!202 = metadata !{metadata !203, metadata !204, metadata !205, metadata !206}
-!203 = metadata !{i32 786688, metadata !201, metadata !"i", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 193]
-!204 = metadata !{i32 786688, metadata !201, metadata !"j", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 193]
-!205 = metadata !{i32 786688, metadata !201, metadata !"k", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 193]
-!206 = metadata !{i32 786688, metadata !201, metadata !"m", metadata !5, i32 193, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 193]
-!207 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve", metadata !"z_solve", metadata !"", i32 3457, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3457} ; [ DW_TAG_subprogram ] [line 3457] [local] [def] [z_solve]
-!208 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_backsubstitute", metadata !"z_backsubstitute", metadata !"", i32 3480, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !209, i32 3480} ; [ DW_TAG_subprogram ] [line 3480] [local] [def] [z_backsubstitute]
-!209 = metadata !{metadata !210, metadata !211, metadata !212, metadata !213, metadata !214}
-!210 = metadata !{i32 786688, metadata !208, metadata !"i", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3492]
-!211 = metadata !{i32 786688, metadata !208, metadata !"j", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3492]
-!212 = metadata !{i32 786688, metadata !208, metadata !"k", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3492]
-!213 = metadata !{i32 786688, metadata !208, metadata !"m", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3492]
-!214 = metadata !{i32 786688, metadata !208, metadata !"n", metadata !5, i32 3492, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3492]
-!215 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z_solve_cell", metadata !"z_solve_cell", metadata !"", i32 3512, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !216, i32 3512} ; [ DW_TAG_subprogram ] [line 3512] [local] [def] [z_solve_cell]
-!216 = metadata !{metadata !217, metadata !218, metadata !219, metadata !220}
-!217 = metadata !{i32 786688, metadata !215, metadata !"i", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3527]
-!218 = metadata !{i32 786688, metadata !215, metadata !"j", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3527]
-!219 = metadata !{i32 786688, metadata !215, metadata !"k", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3527]
-!220 = metadata !{i32 786688, metadata !215, metadata !"ksize", metadata !5, i32 3527, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ksize] [line 3527]
-!221 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvrhs", metadata !"binvrhs", metadata !"", i32 3154, metadata !222, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !225, i32 3154} ; [ DW_TAG_subprogram ] [line 3154] [local] [def] [binvrhs]
-!222 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !223, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!223 = metadata !{null, metadata !224, metadata !105}
-!224 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!225 = metadata !{metadata !226, metadata !227, metadata !228, metadata !229}
-!226 = metadata !{i32 786689, metadata !221, metadata !"lhs", metadata !5, i32 16780370, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 3154]
-!227 = metadata !{i32 786689, metadata !221, metadata !"r", metadata !5, i32 33557586, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 3154]
-!228 = metadata !{i32 786688, metadata !221, metadata !"pivot", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 3159]
-!229 = metadata !{i32 786688, metadata !221, metadata !"coeff", metadata !5, i32 3159, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 3159]
-!230 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matmul_sub", metadata !"matmul_sub", metadata !"", i32 2841, metadata !231, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !233, i32 2842} ; [ DW_TAG_subprogram ] [line 2841] [local] [def] [scope 2842] [matmul_sub]
-!231 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !232, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!232 = metadata !{null, metadata !224, metadata !224, metadata !224}
-!233 = metadata !{metadata !234, metadata !235, metadata !236, metadata !237}
-!234 = metadata !{i32 786689, metadata !230, metadata !"ablock", metadata !5, i32 16780057, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2841]
-!235 = metadata !{i32 786689, metadata !230, metadata !"bblock", metadata !5, i32 33557273, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bblock] [line 2841]
-!236 = metadata !{i32 786689, metadata !230, metadata !"cblock", metadata !5, i32 50334490, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [cblock] [line 2842]
-!237 = metadata !{i32 786688, metadata !230, metadata !"j", metadata !5, i32 2851, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2851]
-!238 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"matvec_sub", metadata !"matvec_sub", metadata !"", i32 2814, metadata !239, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !241, i32 2814} ; [ DW_TAG_subprogram ] [line 2814] [local] [def] [matvec_sub]
-!239 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !240, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!240 = metadata !{null, metadata !224, metadata !105, metadata !105}
-!241 = metadata !{metadata !242, metadata !243, metadata !244, metadata !245}
-!242 = metadata !{i32 786689, metadata !238, metadata !"ablock", metadata !5, i32 16780030, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ablock] [line 2814]
-!243 = metadata !{i32 786689, metadata !238, metadata !"avec", metadata !5, i32 33557246, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [avec] [line 2814]
-!244 = metadata !{i32 786689, metadata !238, metadata !"bvec", metadata !5, i32 50334462, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bvec] [line 2814]
-!245 = metadata !{i32 786688, metadata !238, metadata !"i", metadata !5, i32 2823, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2823]
-!246 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"binvcrhs", metadata !"binvcrhs", metadata !"", i32 2885, metadata !247, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !249, i32 2885} ; [ DW_TAG_subprogram ] [line 2885] [local] [def] [binvcrhs]
-!247 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !248, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!248 = metadata !{null, metadata !224, metadata !224, metadata !105}
-!249 = metadata !{metadata !250, metadata !251, metadata !252, metadata !253, metadata !254}
-!250 = metadata !{i32 786689, metadata !246, metadata !"lhs", metadata !5, i32 16780101, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [lhs] [line 2885]
-!251 = metadata !{i32 786689, metadata !246, metadata !"c", metadata !5, i32 33557317, metadata !224, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 2885]
-!252 = metadata !{i32 786689, metadata !246, metadata !"r", metadata !5, i32 50334533, metadata !105, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 2885]
-!253 = metadata !{i32 786688, metadata !246, metadata !"pivot", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pivot] [line 2890]
-!254 = metadata !{i32 786688, metadata !246, metadata !"coeff", metadata !5, i32 2890, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [coeff] [line 2890]
-!255 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsz", metadata !"lhsz", metadata !"", i32 1475, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !256, i32 1475} ; [ DW_TAG_subprogram ] [line 1475] [local] [def] [lhsz]
-!256 = metadata !{metadata !257, metadata !258, metadata !259}
-!257 = metadata !{i32 786688, metadata !255, metadata !"i", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1484]
-!258 = metadata !{i32 786688, metadata !255, metadata !"j", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1484]
-!259 = metadata !{i32 786688, metadata !255, metadata !"k", metadata !5, i32 1484, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1484]
-!260 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve", metadata !"y_solve", metadata !"", i32 3299, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3299} ; [ DW_TAG_subprogram ] [line 3299] [local] [def] [y_solve]
-!261 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_backsubstitute", metadata !"y_backsubstitute", metadata !"", i32 3323, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !262, i32 3323} ; [ DW_TAG_subprogram ] [line 3323] [local] [def] [y_backsubstitute]
-!262 = metadata !{metadata !263, metadata !264, metadata !265, metadata !266, metadata !267}
-!263 = metadata !{i32 786688, metadata !261, metadata !"i", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3335]
-!264 = metadata !{i32 786688, metadata !261, metadata !"j", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3335]
-!265 = metadata !{i32 786688, metadata !261, metadata !"k", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3335]
-!266 = metadata !{i32 786688, metadata !261, metadata !"m", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 3335]
-!267 = metadata !{i32 786688, metadata !261, metadata !"n", metadata !5, i32 3335, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 3335]
-!268 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"y_solve_cell", metadata !"y_solve_cell", metadata !"", i32 3355, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !269, i32 3355} ; [ DW_TAG_subprogram ] [line 3355] [local] [def] [y_solve_cell]
-!269 = metadata !{metadata !270, metadata !271, metadata !272, metadata !273}
-!270 = metadata !{i32 786688, metadata !268, metadata !"i", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3370]
-!271 = metadata !{i32 786688, metadata !268, metadata !"j", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 3370]
-!272 = metadata !{i32 786688, metadata !268, metadata !"k", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 3370]
-!273 = metadata !{i32 786688, metadata !268, metadata !"jsize", metadata !5, i32 3370, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [jsize] [line 3370]
-!274 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsy", metadata !"lhsy", metadata !"", i32 1181, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !275, i32 1181} ; [ DW_TAG_subprogram ] [line 1181] [local] [def] [lhsy]
-!275 = metadata !{metadata !276, metadata !277, metadata !278}
-!276 = metadata !{i32 786688, metadata !274, metadata !"i", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 1190]
-!277 = metadata !{i32 786688, metadata !274, metadata !"j", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 1190]
-!278 = metadata !{i32 786688, metadata !274, metadata !"k", metadata !5, i32 1190, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 1190]
-!279 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve", metadata !"x_solve", metadata !"", i32 2658, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 2658} ; [ DW_TAG_subprogram ] [line 2658] [local] [def] [x_solve]
-!280 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_backsubstitute", metadata !"x_backsubstitute", metadata !"", i32 2684, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !281, i32 2684} ; [ DW_TAG_subprogram ] [line 2684] [local] [def] [x_backsubstitute]
-!281 = metadata !{metadata !282, metadata !283, metadata !284, metadata !285, metadata !286}
-!282 = metadata !{i32 786688, metadata !280, metadata !"i", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2696]
-!283 = metadata !{i32 786688, metadata !280, metadata !"j", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2696]
-!284 = metadata !{i32 786688, metadata !280, metadata !"k", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2696]
-!285 = metadata !{i32 786688, metadata !280, metadata !"m", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [m] [line 2696]
-!286 = metadata !{i32 786688, metadata !280, metadata !"n", metadata !5, i32 2696, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [n] [line 2696]
-!287 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x_solve_cell", metadata !"x_solve_cell", metadata !"", i32 2716, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !288, i32 2716} ; [ DW_TAG_subprogram ] [line 2716] [local] [def] [x_solve_cell]
-!288 = metadata !{metadata !289, metadata !290, metadata !291, metadata !292}
-!289 = metadata !{i32 786688, metadata !287, metadata !"i", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2728]
-!290 = metadata !{i32 786688, metadata !287, metadata !"j", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 2728]
-!291 = metadata !{i32 786688, metadata !287, metadata !"k", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 2728]
-!292 = metadata !{i32 786688, metadata !287, metadata !"isize", metadata !5, i32 2728, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [isize] [line 2728]
-!293 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"lhsx", metadata !"lhsx", metadata !"", i32 898, metadata !115, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !294, i32 898} ; [ DW_TAG_subprogram ] [line 898] [local] [def] [lhsx]
-!294 = metadata !{metadata !295, metadata !296, metadata !297}
-!295 = metadata !{i32 786688, metadata !293, metadata !"i", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 907]
-!296 = metadata !{i32 786688, metadata !293, metadata !"j", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 907]
-!297 = metadata !{i32 786688, metadata !293, metadata !"k", metadata !5, i32 907, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k] [line 907]
-!298 = metadata !{metadata !299, metadata !304, metadata !305, metadata !309, metadata !310, metadata !311, metadata !312, metadata !313, metadata !314, metadata !315, metadata !316, metadata !317, metadata !318, metadata !319, metadata !320, metadata !321, metadata !322, metadata !323, metadata !324, metadata !325, metadata !326, metadata !327, metadata !328, metadata !329, metadata !330, metadata !331, metadata !332, metadata !333, metadata !334, metadata !335, metadata !336, metadata !337, metadata !338, metadata !339, metadata !340, metadata !341, metadata !342, metadata !343, metadata !347, metadata !350, metadata !351, metadata !352, metadata !353, metadata !354, metadata !355, metadata !356, metadata !360, metadata !361, metadata !362, metadata !363, metadata !364, metadata !365, metadata !366, metadata !367, metadata !368, metadata !369, metadata !370, metadata !371, metadata !372, metadata !373, metadata !374, metadata !375, metadata !376, metadata !377, metadata !378, metadata !379, metadata !380, metadata !381, metadata !382, metadata !383, metadata !384, metadata !385, metadata !386, metadata !387, metadata !388, metadata !389, metadata !390, metadata !391, metadata !392, metadata !393, metadata !394, metadata !395, metadata !396, metadata !397, metadata !398, metadata !399, metadata !400, metadata !401, metadata !402, metadata !403, metadata !404, metadata !405, metadata !406, metadata !407, metadata !408, metadata !409, metadata !410, metadata !411, metadata !412, metadata !413, metadata !414, metadata !415, metadata !416, metadata !417, metadata !418, metadata !419, metadata !422, metadata !426, metadata !427, metadata !430, metadata !431, metadata !434, metadata !435, metadata !436, metadata !437}
-!299 = metadata !{i32 786484, i32 0, null, metadata !"grid_points", metadata !"grid_points", metadata !"", metadata !300, i32 28, metadata !302, i32 1, i32 1, [3 x i32]* @grid_points, null} ; [ DW_TAG_variable ] [grid_points] [line 28] [local] [def]
-!300 = metadata !{i32 786473, metadata !301}      ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/./header.h]
-!301 = metadata !{metadata !"./header.h", metadata !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
-!302 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 96, i64 32, i32 0, i32 0, metadata !8, metadata !303, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 96, align 32, offset 0] [from int]
-!303 = metadata !{metadata !178}
-!304 = metadata !{i32 786484, i32 0, null, metadata !"dt", metadata !"dt", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dt] [line 35] [local] [def]
-!305 = metadata !{i32 786484, i32 0, null, metadata !"rhs", metadata !"rhs", metadata !"", metadata !300, i32 68, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rhs] [line 68] [local] [def]
-!306 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1385839040, i64 64, i32 0, i32 0, metadata !20, metadata !307, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1385839040, align 64, offset 0] [from double]
-!307 = metadata !{metadata !308, metadata !308, metadata !308, metadata !93}
-!308 = metadata !{i32 786465, i64 0, i64 163}     ; [ DW_TAG_subrange_type ] [0, 162]
-!309 = metadata !{i32 786484, i32 0, null, metadata !"zzcon5", metadata !"zzcon5", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon5] [line 42] [local] [def]
-!310 = metadata !{i32 786484, i32 0, null, metadata !"zzcon4", metadata !"zzcon4", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon4] [line 42] [local] [def]
-!311 = metadata !{i32 786484, i32 0, null, metadata !"zzcon3", metadata !"zzcon3", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon3] [line 42] [local] [def]
-!312 = metadata !{i32 786484, i32 0, null, metadata !"dz5tz1", metadata !"dz5tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5tz1] [line 43] [local] [def]
-!313 = metadata !{i32 786484, i32 0, null, metadata !"dz4tz1", metadata !"dz4tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4tz1] [line 43] [local] [def]
-!314 = metadata !{i32 786484, i32 0, null, metadata !"dz3tz1", metadata !"dz3tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3tz1] [line 43] [local] [def]
-!315 = metadata !{i32 786484, i32 0, null, metadata !"zzcon2", metadata !"zzcon2", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon2] [line 42] [local] [def]
-!316 = metadata !{i32 786484, i32 0, null, metadata !"dz2tz1", metadata !"dz2tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2tz1] [line 43] [local] [def]
-!317 = metadata !{i32 786484, i32 0, null, metadata !"tz2", metadata !"tz2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz2] [line 31] [local] [def]
-!318 = metadata !{i32 786484, i32 0, null, metadata !"dz1tz1", metadata !"dz1tz1", metadata !"", metadata !300, i32 43, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1tz1] [line 43] [local] [def]
-!319 = metadata !{i32 786484, i32 0, null, metadata !"yycon5", metadata !"yycon5", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon5] [line 40] [local] [def]
-!320 = metadata !{i32 786484, i32 0, null, metadata !"yycon4", metadata !"yycon4", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon4] [line 40] [local] [def]
-!321 = metadata !{i32 786484, i32 0, null, metadata !"yycon3", metadata !"yycon3", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon3] [line 40] [local] [def]
-!322 = metadata !{i32 786484, i32 0, null, metadata !"dy5ty1", metadata !"dy5ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5ty1] [line 41] [local] [def]
-!323 = metadata !{i32 786484, i32 0, null, metadata !"dy4ty1", metadata !"dy4ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4ty1] [line 41] [local] [def]
-!324 = metadata !{i32 786484, i32 0, null, metadata !"dy3ty1", metadata !"dy3ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3ty1] [line 41] [local] [def]
-!325 = metadata !{i32 786484, i32 0, null, metadata !"yycon2", metadata !"yycon2", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon2] [line 40] [local] [def]
-!326 = metadata !{i32 786484, i32 0, null, metadata !"dy2ty1", metadata !"dy2ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2ty1] [line 41] [local] [def]
-!327 = metadata !{i32 786484, i32 0, null, metadata !"ty2", metadata !"ty2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty2] [line 31] [local] [def]
-!328 = metadata !{i32 786484, i32 0, null, metadata !"dy1ty1", metadata !"dy1ty1", metadata !"", metadata !300, i32 41, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1ty1] [line 41] [local] [def]
-!329 = metadata !{i32 786484, i32 0, null, metadata !"dssp", metadata !"dssp", metadata !"", metadata !300, i32 35, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dssp] [line 35] [local] [def]
-!330 = metadata !{i32 786484, i32 0, null, metadata !"c1", metadata !"c1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1] [line 45] [local] [def]
-!331 = metadata !{i32 786484, i32 0, null, metadata !"xxcon5", metadata !"xxcon5", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon5] [line 38] [local] [def]
-!332 = metadata !{i32 786484, i32 0, null, metadata !"xxcon4", metadata !"xxcon4", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon4] [line 38] [local] [def]
-!333 = metadata !{i32 786484, i32 0, null, metadata !"xxcon3", metadata !"xxcon3", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon3] [line 38] [local] [def]
-!334 = metadata !{i32 786484, i32 0, null, metadata !"dx5tx1", metadata !"dx5tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5tx1] [line 39] [local] [def]
-!335 = metadata !{i32 786484, i32 0, null, metadata !"dx4tx1", metadata !"dx4tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4tx1] [line 39] [local] [def]
-!336 = metadata !{i32 786484, i32 0, null, metadata !"dx3tx1", metadata !"dx3tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3tx1] [line 39] [local] [def]
-!337 = metadata !{i32 786484, i32 0, null, metadata !"c2", metadata !"c2", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2] [line 45] [local] [def]
-!338 = metadata !{i32 786484, i32 0, null, metadata !"con43", metadata !"con43", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con43] [line 48] [local] [def]
-!339 = metadata !{i32 786484, i32 0, null, metadata !"xxcon2", metadata !"xxcon2", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon2] [line 38] [local] [def]
-!340 = metadata !{i32 786484, i32 0, null, metadata !"dx2tx1", metadata !"dx2tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2tx1] [line 39] [local] [def]
-!341 = metadata !{i32 786484, i32 0, null, metadata !"tx2", metadata !"tx2", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx2] [line 31] [local] [def]
-!342 = metadata !{i32 786484, i32 0, null, metadata !"dx1tx1", metadata !"dx1tx1", metadata !"", metadata !300, i32 39, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1tx1] [line 39] [local] [def]
-!343 = metadata !{i32 786484, i32 0, null, metadata !"forcing", metadata !"forcing", metadata !"", metadata !300, i32 66, metadata !344, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [forcing] [line 66] [local] [def]
-!344 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1663006848, i64 64, i32 0, i32 0, metadata !20, metadata !345, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1663006848, align 64, offset 0] [from double]
-!345 = metadata !{metadata !308, metadata !308, metadata !308, metadata !346}
-!346 = metadata !{i32 786465, i64 0, i64 6}       ; [ DW_TAG_subrange_type ] [0, 5]
-!347 = metadata !{i32 786484, i32 0, null, metadata !"qs", metadata !"qs", metadata !"", metadata !300, i32 63, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [qs] [line 63] [local] [def]
-!348 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 277167808, i64 64, i32 0, i32 0, metadata !20, metadata !349, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 277167808, align 64, offset 0] [from double]
-!349 = metadata !{metadata !308, metadata !308, metadata !308}
-!350 = metadata !{i32 786484, i32 0, null, metadata !"square", metadata !"square", metadata !"", metadata !300, i32 65, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [square] [line 65] [local] [def]
-!351 = metadata !{i32 786484, i32 0, null, metadata !"ws", metadata !"ws", metadata !"", metadata !300, i32 62, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ws] [line 62] [local] [def]
-!352 = metadata !{i32 786484, i32 0, null, metadata !"vs", metadata !"vs", metadata !"", metadata !300, i32 61, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [vs] [line 61] [local] [def]
-!353 = metadata !{i32 786484, i32 0, null, metadata !"us", metadata !"us", metadata !"", metadata !300, i32 60, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [us] [line 60] [local] [def]
-!354 = metadata !{i32 786484, i32 0, null, metadata !"rho_i", metadata !"rho_i", metadata !"", metadata !300, i32 64, metadata !348, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [rho_i] [line 64] [local] [def]
-!355 = metadata !{i32 786484, i32 0, null, metadata !"u", metadata !"u", metadata !"", metadata !300, i32 67, metadata !306, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [u] [line 67] [local] [def]
-!356 = metadata !{i32 786484, i32 0, null, metadata !"ce", metadata !"ce", metadata !"", metadata !300, i32 36, metadata !357, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ce] [line 36] [local] [def]
-!357 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 4160, i64 64, i32 0, i32 0, metadata !20, metadata !358, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 4160, align 64, offset 0] [from double]
-!358 = metadata !{metadata !93, metadata !359}
-!359 = metadata !{i32 786465, i64 0, i64 13}      ; [ DW_TAG_subrange_type ] [0, 12]
-!360 = metadata !{i32 786484, i32 0, null, metadata !"dnzm1", metadata !"dnzm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnzm1] [line 44] [local] [def]
-!361 = metadata !{i32 786484, i32 0, null, metadata !"dnym1", metadata !"dnym1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnym1] [line 44] [local] [def]
-!362 = metadata !{i32 786484, i32 0, null, metadata !"dnxm1", metadata !"dnxm1", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dnxm1] [line 44] [local] [def]
-!363 = metadata !{i32 786484, i32 0, null, metadata !"zzcon1", metadata !"zzcon1", metadata !"", metadata !300, i32 42, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [zzcon1] [line 42] [local] [def]
-!364 = metadata !{i32 786484, i32 0, null, metadata !"yycon1", metadata !"yycon1", metadata !"", metadata !300, i32 40, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [yycon1] [line 40] [local] [def]
-!365 = metadata !{i32 786484, i32 0, null, metadata !"xxcon1", metadata !"xxcon1", metadata !"", metadata !300, i32 38, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [xxcon1] [line 38] [local] [def]
-!366 = metadata !{i32 786484, i32 0, null, metadata !"con16", metadata !"con16", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [con16] [line 48] [local] [def]
-!367 = metadata !{i32 786484, i32 0, null, metadata !"c2iv", metadata !"c2iv", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2iv] [line 48] [local] [def]
-!368 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tz3", metadata !"c3c4tz3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tz3] [line 48] [local] [def]
-!369 = metadata !{i32 786484, i32 0, null, metadata !"c3c4ty3", metadata !"c3c4ty3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4ty3] [line 48] [local] [def]
-!370 = metadata !{i32 786484, i32 0, null, metadata !"c3c4tx3", metadata !"c3c4tx3", metadata !"", metadata !300, i32 48, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4tx3] [line 48] [local] [def]
-!371 = metadata !{i32 786484, i32 0, null, metadata !"comz6", metadata !"comz6", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz6] [line 47] [local] [def]
-!372 = metadata !{i32 786484, i32 0, null, metadata !"comz5", metadata !"comz5", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz5] [line 47] [local] [def]
-!373 = metadata !{i32 786484, i32 0, null, metadata !"comz4", metadata !"comz4", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz4] [line 47] [local] [def]
-!374 = metadata !{i32 786484, i32 0, null, metadata !"comz1", metadata !"comz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [comz1] [line 47] [local] [def]
-!375 = metadata !{i32 786484, i32 0, null, metadata !"dtdssp", metadata !"dtdssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtdssp] [line 45] [local] [def]
-!376 = metadata !{i32 786484, i32 0, null, metadata !"c2dttz1", metadata !"c2dttz1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttz1] [line 47] [local] [def]
-!377 = metadata !{i32 786484, i32 0, null, metadata !"c2dtty1", metadata !"c2dtty1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dtty1] [line 47] [local] [def]
-!378 = metadata !{i32 786484, i32 0, null, metadata !"c2dttx1", metadata !"c2dttx1", metadata !"", metadata !300, i32 47, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c2dttx1] [line 47] [local] [def]
-!379 = metadata !{i32 786484, i32 0, null, metadata !"dttz2", metadata !"dttz2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz2] [line 46] [local] [def]
-!380 = metadata !{i32 786484, i32 0, null, metadata !"dttz1", metadata !"dttz1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttz1] [line 46] [local] [def]
-!381 = metadata !{i32 786484, i32 0, null, metadata !"dtty2", metadata !"dtty2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty2] [line 46] [local] [def]
-!382 = metadata !{i32 786484, i32 0, null, metadata !"dtty1", metadata !"dtty1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dtty1] [line 46] [local] [def]
-!383 = metadata !{i32 786484, i32 0, null, metadata !"dttx2", metadata !"dttx2", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx2] [line 46] [local] [def]
-!384 = metadata !{i32 786484, i32 0, null, metadata !"dttx1", metadata !"dttx1", metadata !"", metadata !300, i32 46, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dttx1] [line 46] [local] [def]
-!385 = metadata !{i32 786484, i32 0, null, metadata !"c5dssp", metadata !"c5dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5dssp] [line 45] [local] [def]
-!386 = metadata !{i32 786484, i32 0, null, metadata !"c4dssp", metadata !"c4dssp", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4dssp] [line 45] [local] [def]
-!387 = metadata !{i32 786484, i32 0, null, metadata !"dzmax", metadata !"dzmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dzmax] [line 37] [local] [def]
-!388 = metadata !{i32 786484, i32 0, null, metadata !"dymax", metadata !"dymax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dymax] [line 37] [local] [def]
-!389 = metadata !{i32 786484, i32 0, null, metadata !"dxmax", metadata !"dxmax", metadata !"", metadata !300, i32 37, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dxmax] [line 37] [local] [def]
-!390 = metadata !{i32 786484, i32 0, null, metadata !"dz5", metadata !"dz5", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz5] [line 34] [local] [def]
-!391 = metadata !{i32 786484, i32 0, null, metadata !"dz4", metadata !"dz4", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz4] [line 34] [local] [def]
-!392 = metadata !{i32 786484, i32 0, null, metadata !"dz3", metadata !"dz3", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz3] [line 34] [local] [def]
-!393 = metadata !{i32 786484, i32 0, null, metadata !"dz2", metadata !"dz2", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz2] [line 34] [local] [def]
-!394 = metadata !{i32 786484, i32 0, null, metadata !"dz1", metadata !"dz1", metadata !"", metadata !300, i32 34, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dz1] [line 34] [local] [def]
-!395 = metadata !{i32 786484, i32 0, null, metadata !"dy5", metadata !"dy5", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy5] [line 33] [local] [def]
-!396 = metadata !{i32 786484, i32 0, null, metadata !"dy4", metadata !"dy4", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy4] [line 33] [local] [def]
-!397 = metadata !{i32 786484, i32 0, null, metadata !"dy3", metadata !"dy3", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy3] [line 33] [local] [def]
-!398 = metadata !{i32 786484, i32 0, null, metadata !"dy2", metadata !"dy2", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy2] [line 33] [local] [def]
-!399 = metadata !{i32 786484, i32 0, null, metadata !"dy1", metadata !"dy1", metadata !"", metadata !300, i32 33, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dy1] [line 33] [local] [def]
-!400 = metadata !{i32 786484, i32 0, null, metadata !"dx5", metadata !"dx5", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx5] [line 32] [local] [def]
-!401 = metadata !{i32 786484, i32 0, null, metadata !"dx4", metadata !"dx4", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx4] [line 32] [local] [def]
-!402 = metadata !{i32 786484, i32 0, null, metadata !"dx3", metadata !"dx3", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx3] [line 32] [local] [def]
-!403 = metadata !{i32 786484, i32 0, null, metadata !"dx2", metadata !"dx2", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx2] [line 32] [local] [def]
-!404 = metadata !{i32 786484, i32 0, null, metadata !"dx1", metadata !"dx1", metadata !"", metadata !300, i32 32, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [dx1] [line 32] [local] [def]
-!405 = metadata !{i32 786484, i32 0, null, metadata !"tz3", metadata !"tz3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz3] [line 31] [local] [def]
-!406 = metadata !{i32 786484, i32 0, null, metadata !"tz1", metadata !"tz1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tz1] [line 31] [local] [def]
-!407 = metadata !{i32 786484, i32 0, null, metadata !"ty3", metadata !"ty3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty3] [line 31] [local] [def]
-!408 = metadata !{i32 786484, i32 0, null, metadata !"ty1", metadata !"ty1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ty1] [line 31] [local] [def]
-!409 = metadata !{i32 786484, i32 0, null, metadata !"tx3", metadata !"tx3", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx3] [line 31] [local] [def]
-!410 = metadata !{i32 786484, i32 0, null, metadata !"tx1", metadata !"tx1", metadata !"", metadata !300, i32 31, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tx1] [line 31] [local] [def]
-!411 = metadata !{i32 786484, i32 0, null, metadata !"conz1", metadata !"conz1", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [conz1] [line 45] [local] [def]
-!412 = metadata !{i32 786484, i32 0, null, metadata !"c1345", metadata !"c1345", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1345] [line 44] [local] [def]
-!413 = metadata !{i32 786484, i32 0, null, metadata !"c3c4", metadata !"c3c4", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3c4] [line 44] [local] [def]
-!414 = metadata !{i32 786484, i32 0, null, metadata !"c1c5", metadata !"c1c5", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c5] [line 44] [local] [def]
-!415 = metadata !{i32 786484, i32 0, null, metadata !"c1c2", metadata !"c1c2", metadata !"", metadata !300, i32 44, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c1c2] [line 44] [local] [def]
-!416 = metadata !{i32 786484, i32 0, null, metadata !"c5", metadata !"c5", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c5] [line 45] [local] [def]
-!417 = metadata !{i32 786484, i32 0, null, metadata !"c4", metadata !"c4", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c4] [line 45] [local] [def]
-!418 = metadata !{i32 786484, i32 0, null, metadata !"c3", metadata !"c3", metadata !"", metadata !300, i32 45, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [c3] [line 45] [local] [def]
-!419 = metadata !{i32 786484, i32 0, null, metadata !"lhs", metadata !"lhs", metadata !"", metadata !300, i32 69, metadata !420, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [lhs] [line 69] [local] [def]
-!420 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 20787585600, i64 64, i32 0, i32 0, metadata !20, metadata !421, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 20787585600, align 64, offset 0] [from double]
-!421 = metadata !{metadata !308, metadata !308, metadata !308, metadata !178, metadata !93, metadata !93}
-!422 = metadata !{i32 786484, i32 0, null, metadata !"q", metadata !"q", metadata !"", metadata !300, i32 73, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [q] [line 73] [local] [def]
-!423 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 10368, i64 64, i32 0, i32 0, metadata !20, metadata !424, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 10368, align 64, offset 0] [from double]
-!424 = metadata !{metadata !425}
-!425 = metadata !{i32 786465, i64 0, i64 162}     ; [ DW_TAG_subrange_type ] [0, 161]
-!426 = metadata !{i32 786484, i32 0, null, metadata !"cuf", metadata !"cuf", metadata !"", metadata !300, i32 72, metadata !423, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [cuf] [line 72] [local] [def]
-!427 = metadata !{i32 786484, i32 0, null, metadata !"buf", metadata !"buf", metadata !"", metadata !300, i32 75, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [buf] [line 75] [local] [def]
-!428 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 51840, i64 64, i32 0, i32 0, metadata !20, metadata !429, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 51840, align 64, offset 0] [from double]
-!429 = metadata !{metadata !425, metadata !93}
-!430 = metadata !{i32 786484, i32 0, null, metadata !"ue", metadata !"ue", metadata !"", metadata !300, i32 74, metadata !428, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [ue] [line 74] [local] [def]
-!431 = metadata !{i32 786484, i32 0, null, metadata !"njac", metadata !"njac", metadata !"", metadata !300, i32 86, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [njac] [line 86] [local] [def]
-!432 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 6886684800, i64 64, i32 0, i32 0, metadata !20, metadata !433, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 6886684800, align 64, offset 0] [from double]
-!433 = metadata !{metadata !308, metadata !308, metadata !425, metadata !93, metadata !93}
-!434 = metadata !{i32 786484, i32 0, null, metadata !"fjac", metadata !"fjac", metadata !"", metadata !300, i32 84, metadata !432, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [fjac] [line 84] [local] [def]
-!435 = metadata !{i32 786484, i32 0, null, metadata !"tmp3", metadata !"tmp3", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp3] [line 88] [local] [def]
-!436 = metadata !{i32 786484, i32 0, null, metadata !"tmp2", metadata !"tmp2", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp2] [line 88] [local] [def]
-!437 = metadata !{i32 786484, i32 0, null, metadata !"tmp1", metadata !"tmp1", metadata !"", metadata !300, i32 88, metadata !20, i32 1, i32 1, null, null} ; [ DW_TAG_variable ] [tmp1] [line 88] [local] [def]
-!438 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!439 = metadata !{i32 1898, i32 0, metadata !440, null}
-!440 = metadata !{i32 786443, metadata !1, metadata !114, i32 1898, i32 0, i32 107} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!441 = metadata !{i32 1913, i32 0, metadata !442, null}
-!442 = metadata !{i32 786443, metadata !1, metadata !114, i32 1913, i32 0, i32 115} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!443 = metadata !{i32 1923, i32 0, metadata !114, null}
-!444 = metadata !{metadata !"int", metadata !445}
-!445 = metadata !{metadata !"omnipotent char", metadata !446}
-!446 = metadata !{metadata !"Simple C/C++ TBAA"}
-!447 = metadata !{i32 1}
-!448 = metadata !{i32 1925, i32 0, metadata !449, null}
-!449 = metadata !{i32 786443, metadata !1, metadata !114, i32 1925, i32 0, i32 121} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!450 = metadata !{i32 1939, i32 0, metadata !451, null}
-!451 = metadata !{i32 786443, metadata !1, metadata !114, i32 1939, i32 0, i32 127} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!452 = metadata !{i32 1940, i32 0, metadata !453, null}
-!453 = metadata !{i32 786443, metadata !1, metadata !454, i32 1940, i32 0, i32 129} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!454 = metadata !{i32 786443, metadata !1, metadata !451, i32 1939, i32 0, i32 128} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!455 = metadata !{i32 1941, i32 0, metadata !456, null}
-!456 = metadata !{i32 786443, metadata !1, metadata !457, i32 1941, i32 0, i32 131} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!457 = metadata !{i32 786443, metadata !1, metadata !453, i32 1940, i32 0, i32 130} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!458 = metadata !{i32 2020, i32 0, metadata !459, null}
-!459 = metadata !{i32 786443, metadata !1, metadata !460, i32 2020, i32 0, i32 149} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!460 = metadata !{i32 786443, metadata !1, metadata !461, i32 2019, i32 0, i32 148} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!461 = metadata !{i32 786443, metadata !1, metadata !462, i32 2019, i32 0, i32 147} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!462 = metadata !{i32 786443, metadata !1, metadata !463, i32 2018, i32 0, i32 146} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!463 = metadata !{i32 786443, metadata !1, metadata !114, i32 2018, i32 0, i32 145} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
-!464 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 190311)\001\00\000\00\000", !1, !2, !2, !3, !298, !2} ; [ DW_TAG_compile_unit ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c] [DW_LANG_C99]
+!1 = !{!"bt.c", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!2 = !{}
+!3 = !{!4, !82, !102, !114, !132, !145, !154, !155, !162, !183, !200, !201, !207, !208, !215, !221, !230, !238, !246, !255, !260, !261, !268, !274, !279, !280, !287, !293}
+!4 = !{!"0x2e\00main\00main\00\0074\000\001\000\006\00256\001\0074", !1, !5, !6, null, null, null, null, !12} ; [ DW_TAG_subprogram ] [line 74] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = !{!"0x24\00char\000\008\008\000\000\008", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_unsigned_char]
+!12 = !{!13, !14, !15, !16, !17, !18, !19, !21, !22, !23, !25, !26}
+!13 = !{!"0x101\00argc\0016777290\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [argc] [line 74]
+!14 = !{!"0x101\00argv\0033554506\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [argv] [line 74]
+!15 = !{!"0x100\00niter\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [niter] [line 76]
+!16 = !{!"0x100\00step\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [step] [line 76]
+!17 = !{!"0x100\00n3\0076\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [n3] [line 76]
+!18 = !{!"0x100\00nthreads\0077\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [nthreads] [line 77]
+!19 = !{!"0x100\00navg\0078\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [navg] [line 78]
+!20 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!21 = !{!"0x100\00mflops\0078\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [mflops] [line 78]
+!22 = !{!"0x100\00tmax\0080\000", !4, !5, !20} ; [ DW_TAG_auto_variable ] [tmax] [line 80]
+!23 = !{!"0x100\00verified\0081\000", !4, !5, !24} ; [ DW_TAG_auto_variable ] [verified] [line 81]
+!24 = !{!"0x16\00boolean\0012\000\000\000\000", !1, null, !8} ; [ DW_TAG_typedef ] [boolean] [line 12, size 0, align 0, offset 0] [from int]
+!25 = !{!"0x100\00class\0082\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [class] [line 82]
+!26 = !{!"0x100\00fp\0083\000", !4, !5, !27} ; [ DW_TAG_auto_variable ] [fp] [line 83]
+!27 = !{!"0xf\00\000\0064\0064\000\000", null, null, !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from FILE]
+!28 = !{!"0x16\00FILE\0049\000\000\000\000", !1, null, !29} ; [ DW_TAG_typedef ] [FILE] [line 49, size 0, align 0, offset 0] [from _IO_FILE]
+!29 = !{!"0x13\00_IO_FILE\00271\001728\0064\000\000\000", !30, null, null, !31, null, null, null} ; [ DW_TAG_structure_type ] [_IO_FILE] [line 271, size 1728, align 64, offset 0] [def] [from ]
+!30 = !{!"/usr/include/libio.h", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!31 = !{!32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !52, !53, !54, !55, !58, !60, !62, !66, !68, !70, !71, !72, !73, !74, !77, !78}
+!32 = !{!"0xd\00_flags\00272\0032\0032\000\000", !30, !29, !8} ; [ DW_TAG_member ] [_flags] [line 272, size 32, align 32, offset 0] [from int]
+!33 = !{!"0xd\00_IO_read_ptr\00277\0064\0064\0064\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_ptr] [line 277, size 64, align 64, offset 64] [from ]
+!34 = !{!"0xd\00_IO_read_end\00278\0064\0064\00128\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_end] [line 278, size 64, align 64, offset 128] [from ]
+!35 = !{!"0xd\00_IO_read_base\00279\0064\0064\00192\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_read_base] [line 279, size 64, align 64, offset 192] [from ]
+!36 = !{!"0xd\00_IO_write_base\00280\0064\0064\00256\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_base] [line 280, size 64, align 64, offset 256] [from ]
+!37 = !{!"0xd\00_IO_write_ptr\00281\0064\0064\00320\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_ptr] [line 281, size 64, align 64, offset 320] [from ]
+!38 = !{!"0xd\00_IO_write_end\00282\0064\0064\00384\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_write_end] [line 282, size 64, align 64, offset 384] [from ]
+!39 = !{!"0xd\00_IO_buf_base\00283\0064\0064\00448\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_buf_base] [line 283, size 64, align 64, offset 448] [from ]
+!40 = !{!"0xd\00_IO_buf_end\00284\0064\0064\00512\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_buf_end] [line 284, size 64, align 64, offset 512] [from ]
+!41 = !{!"0xd\00_IO_save_base\00286\0064\0064\00576\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_save_base] [line 286, size 64, align 64, offset 576] [from ]
+!42 = !{!"0xd\00_IO_backup_base\00287\0064\0064\00640\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_backup_base] [line 287, size 64, align 64, offset 640] [from ]
+!43 = !{!"0xd\00_IO_save_end\00288\0064\0064\00704\000", !30, !29, !10} ; [ DW_TAG_member ] [_IO_save_end] [line 288, size 64, align 64, offset 704] [from ]
+!44 = !{!"0xd\00_markers\00290\0064\0064\00768\000", !30, !29, !45} ; [ DW_TAG_member ] [_markers] [line 290, size 64, align 64, offset 768] [from ]
+!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, !46} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_marker]
+!46 = !{!"0x13\00_IO_marker\00186\00192\0064\000\000\000", !30, null, null, !47, null, null, null} ; [ DW_TAG_structure_type ] [_IO_marker] [line 186, size 192, align 64, offset 0] [def] [from ]
+!47 = !{!48, !49, !51}
+!48 = !{!"0xd\00_next\00187\0064\0064\000\000", !30, !46, !45} ; [ DW_TAG_member ] [_next] [line 187, size 64, align 64, offset 0] [from ]
+!49 = !{!"0xd\00_sbuf\00188\0064\0064\0064\000", !30, !46, !50} ; [ DW_TAG_member ] [_sbuf] [line 188, size 64, align 64, offset 64] [from ]
+!50 = !{!"0xf\00\000\0064\0064\000\000", null, null, !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _IO_FILE]
+!51 = !{!"0xd\00_pos\00192\0032\0032\00128\000", !30, !46, !8} ; [ DW_TAG_member ] [_pos] [line 192, size 32, align 32, offset 128] [from int]
+!52 = !{!"0xd\00_chain\00292\0064\0064\00832\000", !30, !29, !50} ; [ DW_TAG_member ] [_chain] [line 292, size 64, align 64, offset 832] [from ]
+!53 = !{!"0xd\00_fileno\00294\0032\0032\00896\000", !30, !29, !8} ; [ DW_TAG_member ] [_fileno] [line 294, size 32, align 32, offset 896] [from int]
+!54 = !{!"0xd\00_flags2\00298\0032\0032\00928\000", !30, !29, !8} ; [ DW_TAG_member ] [_flags2] [line 298, size 32, align 32, offset 928] [from int]
+!55 = !{!"0xd\00_old_offset\00300\0064\0064\00960\000", !30, !29, !56} ; [ DW_TAG_member ] [_old_offset] [line 300, size 64, align 64, offset 960] [from __off_t]
+!56 = !{!"0x16\00__off_t\00141\000\000\000\000", !30, null, !57} ; [ DW_TAG_typedef ] [__off_t] [line 141, size 0, align 0, offset 0] [from long int]
+!57 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!58 = !{!"0xd\00_cur_column\00304\0016\0016\001024\000", !30, !29, !59} ; [ DW_TAG_member ] [_cur_column] [line 304, size 16, align 16, offset 1024] [from unsigned short]
+!59 = !{!"0x24\00unsigned short\000\0016\0016\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
+!60 = !{!"0xd\00_vtable_offset\00305\008\008\001040\000", !30, !29, !61} ; [ DW_TAG_member ] [_vtable_offset] [line 305, size 8, align 8, offset 1040] [from signed char]
+!61 = !{!"0x24\00signed char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!62 = !{!"0xd\00_shortbuf\00306\008\008\001048\000", !30, !29, !63} ; [ DW_TAG_member ] [_shortbuf] [line 306, size 8, align 8, offset 1048] [from ]
+!63 = !{!"0x1\00\000\008\008\000\000", null, null, !11, !64, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!64 = !{!65}
+!65 = !{!"0x21\000\001"}        ; [ DW_TAG_subrange_type ] [0, 0]
+!66 = !{!"0xd\00_lock\00310\0064\0064\001088\000", !30, !29, !67} ; [ DW_TAG_member ] [_lock] [line 310, size 64, align 64, offset 1088] [from ]
+!67 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!68 = !{!"0xd\00_offset\00319\0064\0064\001152\000", !30, !29, !69} ; [ DW_TAG_member ] [_offset] [line 319, size 64, align 64, offset 1152] [from __off64_t]
+!69 = !{!"0x16\00__off64_t\00142\000\000\000\000", !30, null, !57} ; [ DW_TAG_typedef ] [__off64_t] [line 142, size 0, align 0, offset 0] [from long int]
+!70 = !{!"0xd\00__pad1\00328\0064\0064\001216\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad1] [line 328, size 64, align 64, offset 1216] [from ]
+!71 = !{!"0xd\00__pad2\00329\0064\0064\001280\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad2] [line 329, size 64, align 64, offset 1280] [from ]
+!72 = !{!"0xd\00__pad3\00330\0064\0064\001344\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad3] [line 330, size 64, align 64, offset 1344] [from ]
+!73 = !{!"0xd\00__pad4\00331\0064\0064\001408\000", !30, !29, !67} ; [ DW_TAG_member ] [__pad4] [line 331, size 64, align 64, offset 1408] [from ]
+!74 = !{!"0xd\00__pad5\00332\0064\0064\001472\000", !30, !29, !75} ; [ DW_TAG_member ] [__pad5] [line 332, size 64, align 64, offset 1472] [from size_t]
+!75 = !{!"0x16\00size_t\0042\000\000\000\000", !30, null, !76} ; [ DW_TAG_typedef ] [size_t] [line 42, size 0, align 0, offset 0] [from long unsigned int]
+!76 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!77 = !{!"0xd\00_mode\00334\0032\0032\001536\000", !30, !29, !8} ; [ DW_TAG_member ] [_mode] [line 334, size 32, align 32, offset 1536] [from int]
+!78 = !{!"0xd\00_unused2\00336\00160\008\001568\000", !30, !29, !79} ; [ DW_TAG_member ] [_unused2] [line 336, size 160, align 8, offset 1568] [from ]
+!79 = !{!"0x1\00\000\00160\008\000\000", null, null, !11, !80, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!80 = !{!81}
+!81 = !{!"0x21\000\0020"}       ; [ DW_TAG_subrange_type ] [0, 19]
+!82 = !{!"0x2e\00verify\00verify\00\002388\001\001\000\006\00256\001\002388", !1, !5, !83, null, null, null, null, !86} ; [ DW_TAG_subprogram ] [line 2388] [local] [def] [verify]
+!83 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !84, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!84 = !{null, !8, !10, !85}
+!85 = !{!"0xf\00\000\0064\0064\000\000", null, null, !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from boolean]
+!86 = !{!87, !88, !89, !90, !94, !95, !96, !97, !98, !99, !100, !101}
+!87 = !{!"0x101\00no_time_steps\0016779604\000", !82, !5, !8} ; [ DW_TAG_arg_variable ] [no_time_steps] [line 2388]
+!88 = !{!"0x101\00class\0033556820\000", !82, !5, !10} ; [ DW_TAG_arg_variable ] [class] [line 2388]
+!89 = !{!"0x101\00verified\0050334036\000", !82, !5, !85} ; [ DW_TAG_arg_variable ] [verified] [line 2388]
+!90 = !{!"0x100\00xcrref\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcrref] [line 2397]
+!91 = !{!"0x1\00\000\00320\0064\000\000", null, null, !20, !92, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 64, offset 0] [from double]
+!92 = !{!93}
+!93 = !{!"0x21\000\005"}        ; [ DW_TAG_subrange_type ] [0, 4]
+!94 = !{!"0x100\00xceref\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xceref] [line 2397]
+!95 = !{!"0x100\00xcrdif\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcrdif] [line 2397]
+!96 = !{!"0x100\00xcedif\002397\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcedif] [line 2397]
+!97 = !{!"0x100\00epsilon\002398\000", !82, !5, !20} ; [ DW_TAG_auto_variable ] [epsilon] [line 2398]
+!98 = !{!"0x100\00xce\002398\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xce] [line 2398]
+!99 = !{!"0x100\00xcr\002398\000", !82, !5, !91} ; [ DW_TAG_auto_variable ] [xcr] [line 2398]
+!100 = !{!"0x100\00dtref\002398\000", !82, !5, !20} ; [ DW_TAG_auto_variable ] [dtref] [line 2398]
+!101 = !{!"0x100\00m\002399\000", !82, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 2399]
+!102 = !{!"0x2e\00rhs_norm\00rhs_norm\00\00266\001\001\000\006\00256\001\00266", !1, !5, !103, null, null, null, null, !106} ; [ DW_TAG_subprogram ] [line 266] [local] [def] [rhs_norm]
+!103 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !104, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!104 = !{null, !105}
+!105 = !{!"0xf\00\000\0064\0064\000\000", null, null, !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!106 = !{!107, !108, !109, !110, !111, !112, !113}
+!107 = !{!"0x101\00rms\0016777482\000", !102, !5, !105} ; [ DW_TAG_arg_variable ] [rms] [line 266]
+!108 = !{!"0x100\00i\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 271]
+!109 = !{!"0x100\00j\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 271]
+!110 = !{!"0x100\00k\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 271]
+!111 = !{!"0x100\00d\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [d] [line 271]
+!112 = !{!"0x100\00m\00271\000", !102, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 271]
+!113 = !{!"0x100\00add\00272\000", !102, !5, !20} ; [ DW_TAG_auto_variable ] [add] [line 272]
+!114 = !{!"0x2e\00compute_rhs\00compute_rhs\00\001767\001\001\000\006\00256\001\001767", !1, !5, !115, null, void ()* @compute_rhs, null, null, !117} ; [ DW_TAG_subprogram ] [line 1767] [local] [def] [compute_rhs]
+!115 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !116, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!116 = !{null}
+!117 = !{!118, !119, !120, !121, !122, !123, !124, !125, !126, !127, !128, !129, !130, !131}
+!118 = !{!"0x100\00i\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1769]
+!119 = !{!"0x100\00j\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1769]
+!120 = !{!"0x100\00k\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1769]
+!121 = !{!"0x100\00m\001769\000", !114, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 1769]
+!122 = !{!"0x100\00rho_inv\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [rho_inv] [line 1770]
+!123 = !{!"0x100\00uijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [uijk] [line 1770]
+!124 = !{!"0x100\00up1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [up1] [line 1770]
+!125 = !{!"0x100\00um1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [um1] [line 1770]
+!126 = !{!"0x100\00vijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vijk] [line 1770]
+!127 = !{!"0x100\00vp1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vp1] [line 1770]
+!128 = !{!"0x100\00vm1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [vm1] [line 1770]
+!129 = !{!"0x100\00wijk\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wijk] [line 1770]
+!130 = !{!"0x100\00wp1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wp1] [line 1770]
+!131 = !{!"0x100\00wm1\001770\000", !114, !5, !20} ; [ DW_TAG_auto_variable ] [wm1] [line 1770]
+!132 = !{!"0x2e\00error_norm\00error_norm\00\00225\001\001\000\006\00256\001\00225", !1, !5, !103, null, null, null, null, !133} ; [ DW_TAG_subprogram ] [line 225] [local] [def] [error_norm]
+!133 = !{!134, !135, !136, !137, !138, !139, !140, !141, !142, !143, !144}
+!134 = !{!"0x101\00rms\0016777441\000", !132, !5, !105} ; [ DW_TAG_arg_variable ] [rms] [line 225]
+!135 = !{!"0x100\00i\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 232]
+!136 = !{!"0x100\00j\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 232]
+!137 = !{!"0x100\00k\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 232]
+!138 = !{!"0x100\00m\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 232]
+!139 = !{!"0x100\00d\00232\000", !132, !5, !8} ; [ DW_TAG_auto_variable ] [d] [line 232]
+!140 = !{!"0x100\00xi\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 233]
+!141 = !{!"0x100\00eta\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 233]
+!142 = !{!"0x100\00zeta\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 233]
+!143 = !{!"0x100\00u_exact\00233\000", !132, !5, !91} ; [ DW_TAG_auto_variable ] [u_exact] [line 233]
+!144 = !{!"0x100\00add\00233\000", !132, !5, !20} ; [ DW_TAG_auto_variable ] [add] [line 233]
+!145 = !{!"0x2e\00exact_solution\00exact_solution\00\00643\001\001\000\006\00256\001\00644", !1, !5, !146, null, null, null, null, !148} ; [ DW_TAG_subprogram ] [line 643] [local] [def] [scope 644] [exact_solution]
+!146 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !147, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!147 = !{null, !20, !20, !20, !105}
+!148 = !{!149, !150, !151, !152, !153}
+!149 = !{!"0x101\00xi\0016777859\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [xi] [line 643]
+!150 = !{!"0x101\00eta\0033555075\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [eta] [line 643]
+!151 = !{!"0x101\00zeta\0050332291\000", !145, !5, !20} ; [ DW_TAG_arg_variable ] [zeta] [line 643]
+!152 = !{!"0x101\00dtemp\0067109508\000", !145, !5, !105} ; [ DW_TAG_arg_variable ] [dtemp] [line 644]
+!153 = !{!"0x100\00m\00653\000", !145, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 653]
+!154 = !{!"0x2e\00set_constants\00set_constants\00\002191\001\001\000\006\00256\001\002191", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2191] [local] [def] [set_constants]
+!155 = !{!"0x2e\00lhsinit\00lhsinit\00\00855\001\001\000\006\00256\001\00855", !1, !5, !115, null, null, null, null, !156} ; [ DW_TAG_subprogram ] [line 855] [local] [def] [lhsinit]
+!156 = !{!157, !158, !159, !160, !161}
+!157 = !{!"0x100\00i\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 857]
+!158 = !{!"0x100\00j\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 857]
+!159 = !{!"0x100\00k\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 857]
+!160 = !{!"0x100\00m\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 857]
+!161 = !{!"0x100\00n\00857\000", !155, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 857]
+!162 = !{!"0x2e\00initialize\00initialize\00\00669\001\001\000\006\00256\001\00669", !1, !5, !115, null, null, null, null, !163} ; [ DW_TAG_subprogram ] [line 669] [local] [def] [initialize]
+!163 = !{!164, !165, !166, !167, !168, !169, !170, !171, !172, !173, !174, !179, !180, !181, !182}
+!164 = !{!"0x100\00i\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 679]
+!165 = !{!"0x100\00j\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 679]
+!166 = !{!"0x100\00k\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 679]
+!167 = !{!"0x100\00m\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 679]
+!168 = !{!"0x100\00ix\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [ix] [line 679]
+!169 = !{!"0x100\00iy\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [iy] [line 679]
+!170 = !{!"0x100\00iz\00679\000", !162, !5, !8} ; [ DW_TAG_auto_variable ] [iz] [line 679]
+!171 = !{!"0x100\00xi\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 680]
+!172 = !{!"0x100\00eta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 680]
+!173 = !{!"0x100\00zeta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 680]
+!174 = !{!"0x100\00Pface\00680\000", !162, !5, !175} ; [ DW_TAG_auto_variable ] [Pface] [line 680]
+!175 = !{!"0x1\00\000\001920\0064\000\000", null, null, !20, !176, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1920, align 64, offset 0] [from double]
+!176 = !{!177, !178, !93}
+!177 = !{!"0x21\000\002"}       ; [ DW_TAG_subrange_type ] [0, 1]
+!178 = !{!"0x21\000\003"}       ; [ DW_TAG_subrange_type ] [0, 2]
+!179 = !{!"0x100\00Pxi\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Pxi] [line 680]
+!180 = !{!"0x100\00Peta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Peta] [line 680]
+!181 = !{!"0x100\00Pzeta\00680\000", !162, !5, !20} ; [ DW_TAG_auto_variable ] [Pzeta] [line 680]
+!182 = !{!"0x100\00temp\00680\000", !162, !5, !91} ; [ DW_TAG_auto_variable ] [temp] [line 680]
+!183 = !{!"0x2e\00exact_rhs\00exact_rhs\00\00301\001\001\000\006\00256\001\00301", !1, !5, !115, null, null, null, null, !184} ; [ DW_TAG_subprogram ] [line 301] [local] [def] [exact_rhs]
+!184 = !{!185, !186, !187, !188, !189, !190, !191, !192, !193, !194, !195, !196, !197, !198, !199}
+!185 = !{!"0x100\00dtemp\00310\000", !183, !5, !91} ; [ DW_TAG_auto_variable ] [dtemp] [line 310]
+!186 = !{!"0x100\00xi\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [xi] [line 310]
+!187 = !{!"0x100\00eta\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [eta] [line 310]
+!188 = !{!"0x100\00zeta\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [zeta] [line 310]
+!189 = !{!"0x100\00dtpp\00310\000", !183, !5, !20} ; [ DW_TAG_auto_variable ] [dtpp] [line 310]
+!190 = !{!"0x100\00m\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 311]
+!191 = !{!"0x100\00i\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 311]
+!192 = !{!"0x100\00j\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 311]
+!193 = !{!"0x100\00k\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 311]
+!194 = !{!"0x100\00ip1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [ip1] [line 311]
+!195 = !{!"0x100\00im1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [im1] [line 311]
+!196 = !{!"0x100\00jp1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [jp1] [line 311]
+!197 = !{!"0x100\00jm1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [jm1] [line 311]
+!198 = !{!"0x100\00km1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [km1] [line 311]
+!199 = !{!"0x100\00kp1\00311\000", !183, !5, !8} ; [ DW_TAG_auto_variable ] [kp1] [line 311]
+!200 = !{!"0x2e\00adi\00adi\00\00210\001\001\000\006\00256\001\00210", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 210] [local] [def] [adi]
+!201 = !{!"0x2e\00add\00add\00\00187\001\001\000\006\00256\001\00187", !1, !5, !115, null, null, null, null, !202} ; [ DW_TAG_subprogram ] [line 187] [local] [def] [add]
+!202 = !{!203, !204, !205, !206}
+!203 = !{!"0x100\00i\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 193]
+!204 = !{!"0x100\00j\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 193]
+!205 = !{!"0x100\00k\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 193]
+!206 = !{!"0x100\00m\00193\000", !201, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 193]
+!207 = !{!"0x2e\00z_solve\00z_solve\00\003457\001\001\000\006\00256\001\003457", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 3457] [local] [def] [z_solve]
+!208 = !{!"0x2e\00z_backsubstitute\00z_backsubstitute\00\003480\001\001\000\006\00256\001\003480", !1, !5, !115, null, null, null, null, !209} ; [ DW_TAG_subprogram ] [line 3480] [local] [def] [z_backsubstitute]
+!209 = !{!210, !211, !212, !213, !214}
+!210 = !{!"0x100\00i\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3492]
+!211 = !{!"0x100\00j\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3492]
+!212 = !{!"0x100\00k\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3492]
+!213 = !{!"0x100\00m\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 3492]
+!214 = !{!"0x100\00n\003492\000", !208, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 3492]
+!215 = !{!"0x2e\00z_solve_cell\00z_solve_cell\00\003512\001\001\000\006\00256\001\003512", !1, !5, !115, null, null, null, null, !216} ; [ DW_TAG_subprogram ] [line 3512] [local] [def] [z_solve_cell]
+!216 = !{!217, !218, !219, !220}
+!217 = !{!"0x100\00i\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3527]
+!218 = !{!"0x100\00j\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3527]
+!219 = !{!"0x100\00k\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3527]
+!220 = !{!"0x100\00ksize\003527\000", !215, !5, !8} ; [ DW_TAG_auto_variable ] [ksize] [line 3527]
+!221 = !{!"0x2e\00binvrhs\00binvrhs\00\003154\001\001\000\006\00256\001\003154", !1, !5, !222, null, null, null, null, !225} ; [ DW_TAG_subprogram ] [line 3154] [local] [def] [binvrhs]
+!222 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !223, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!223 = !{null, !224, !105}
+!224 = !{!"0xf\00\000\0064\0064\000\000", null, null, !91} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!225 = !{!226, !227, !228, !229}
+!226 = !{!"0x101\00lhs\0016780370\000", !221, !5, !224} ; [ DW_TAG_arg_variable ] [lhs] [line 3154]
+!227 = !{!"0x101\00r\0033557586\000", !221, !5, !105} ; [ DW_TAG_arg_variable ] [r] [line 3154]
+!228 = !{!"0x100\00pivot\003159\000", !221, !5, !20} ; [ DW_TAG_auto_variable ] [pivot] [line 3159]
+!229 = !{!"0x100\00coeff\003159\000", !221, !5, !20} ; [ DW_TAG_auto_variable ] [coeff] [line 3159]
+!230 = !{!"0x2e\00matmul_sub\00matmul_sub\00\002841\001\001\000\006\00256\001\002842", !1, !5, !231, null, null, null, null, !233} ; [ DW_TAG_subprogram ] [line 2841] [local] [def] [scope 2842] [matmul_sub]
+!231 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !232, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!232 = !{null, !224, !224, !224}
+!233 = !{!234, !235, !236, !237}
+!234 = !{!"0x101\00ablock\0016780057\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [ablock] [line 2841]
+!235 = !{!"0x101\00bblock\0033557273\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [bblock] [line 2841]
+!236 = !{!"0x101\00cblock\0050334490\000", !230, !5, !224} ; [ DW_TAG_arg_variable ] [cblock] [line 2842]
+!237 = !{!"0x100\00j\002851\000", !230, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2851]
+!238 = !{!"0x2e\00matvec_sub\00matvec_sub\00\002814\001\001\000\006\00256\001\002814", !1, !5, !239, null, null, null, null, !241} ; [ DW_TAG_subprogram ] [line 2814] [local] [def] [matvec_sub]
+!239 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !240, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!240 = !{null, !224, !105, !105}
+!241 = !{!242, !243, !244, !245}
+!242 = !{!"0x101\00ablock\0016780030\000", !238, !5, !224} ; [ DW_TAG_arg_variable ] [ablock] [line 2814]
+!243 = !{!"0x101\00avec\0033557246\000", !238, !5, !105} ; [ DW_TAG_arg_variable ] [avec] [line 2814]
+!244 = !{!"0x101\00bvec\0050334462\000", !238, !5, !105} ; [ DW_TAG_arg_variable ] [bvec] [line 2814]
+!245 = !{!"0x100\00i\002823\000", !238, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2823]
+!246 = !{!"0x2e\00binvcrhs\00binvcrhs\00\002885\001\001\000\006\00256\001\002885", !1, !5, !247, null, null, null, null, !249} ; [ DW_TAG_subprogram ] [line 2885] [local] [def] [binvcrhs]
+!247 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !248, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!248 = !{null, !224, !224, !105}
+!249 = !{!250, !251, !252, !253, !254}
+!250 = !{!"0x101\00lhs\0016780101\000", !246, !5, !224} ; [ DW_TAG_arg_variable ] [lhs] [line 2885]
+!251 = !{!"0x101\00c\0033557317\000", !246, !5, !224} ; [ DW_TAG_arg_variable ] [c] [line 2885]
+!252 = !{!"0x101\00r\0050334533\000", !246, !5, !105} ; [ DW_TAG_arg_variable ] [r] [line 2885]
+!253 = !{!"0x100\00pivot\002890\000", !246, !5, !20} ; [ DW_TAG_auto_variable ] [pivot] [line 2890]
+!254 = !{!"0x100\00coeff\002890\000", !246, !5, !20} ; [ DW_TAG_auto_variable ] [coeff] [line 2890]
+!255 = !{!"0x2e\00lhsz\00lhsz\00\001475\001\001\000\006\00256\001\001475", !1, !5, !115, null, null, null, null, !256} ; [ DW_TAG_subprogram ] [line 1475] [local] [def] [lhsz]
+!256 = !{!257, !258, !259}
+!257 = !{!"0x100\00i\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1484]
+!258 = !{!"0x100\00j\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1484]
+!259 = !{!"0x100\00k\001484\000", !255, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1484]
+!260 = !{!"0x2e\00y_solve\00y_solve\00\003299\001\001\000\006\00256\001\003299", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 3299] [local] [def] [y_solve]
+!261 = !{!"0x2e\00y_backsubstitute\00y_backsubstitute\00\003323\001\001\000\006\00256\001\003323", !1, !5, !115, null, null, null, null, !262} ; [ DW_TAG_subprogram ] [line 3323] [local] [def] [y_backsubstitute]
+!262 = !{!263, !264, !265, !266, !267}
+!263 = !{!"0x100\00i\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3335]
+!264 = !{!"0x100\00j\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3335]
+!265 = !{!"0x100\00k\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3335]
+!266 = !{!"0x100\00m\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 3335]
+!267 = !{!"0x100\00n\003335\000", !261, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 3335]
+!268 = !{!"0x2e\00y_solve_cell\00y_solve_cell\00\003355\001\001\000\006\00256\001\003355", !1, !5, !115, null, null, null, null, !269} ; [ DW_TAG_subprogram ] [line 3355] [local] [def] [y_solve_cell]
+!269 = !{!270, !271, !272, !273}
+!270 = !{!"0x100\00i\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3370]
+!271 = !{!"0x100\00j\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 3370]
+!272 = !{!"0x100\00k\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 3370]
+!273 = !{!"0x100\00jsize\003370\000", !268, !5, !8} ; [ DW_TAG_auto_variable ] [jsize] [line 3370]
+!274 = !{!"0x2e\00lhsy\00lhsy\00\001181\001\001\000\006\00256\001\001181", !1, !5, !115, null, null, null, null, !275} ; [ DW_TAG_subprogram ] [line 1181] [local] [def] [lhsy]
+!275 = !{!276, !277, !278}
+!276 = !{!"0x100\00i\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 1190]
+!277 = !{!"0x100\00j\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 1190]
+!278 = !{!"0x100\00k\001190\000", !274, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 1190]
+!279 = !{!"0x2e\00x_solve\00x_solve\00\002658\001\001\000\006\00256\001\002658", !1, !5, !115, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2658] [local] [def] [x_solve]
+!280 = !{!"0x2e\00x_backsubstitute\00x_backsubstitute\00\002684\001\001\000\006\00256\001\002684", !1, !5, !115, null, null, null, null, !281} ; [ DW_TAG_subprogram ] [line 2684] [local] [def] [x_backsubstitute]
+!281 = !{!282, !283, !284, !285, !286}
+!282 = !{!"0x100\00i\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2696]
+!283 = !{!"0x100\00j\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2696]
+!284 = !{!"0x100\00k\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 2696]
+!285 = !{!"0x100\00m\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [m] [line 2696]
+!286 = !{!"0x100\00n\002696\000", !280, !5, !8} ; [ DW_TAG_auto_variable ] [n] [line 2696]
+!287 = !{!"0x2e\00x_solve_cell\00x_solve_cell\00\002716\001\001\000\006\00256\001\002716", !1, !5, !115, null, null, null, null, !288} ; [ DW_TAG_subprogram ] [line 2716] [local] [def] [x_solve_cell]
+!288 = !{!289, !290, !291, !292}
+!289 = !{!"0x100\00i\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 2728]
+!290 = !{!"0x100\00j\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 2728]
+!291 = !{!"0x100\00k\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 2728]
+!292 = !{!"0x100\00isize\002728\000", !287, !5, !8} ; [ DW_TAG_auto_variable ] [isize] [line 2728]
+!293 = !{!"0x2e\00lhsx\00lhsx\00\00898\001\001\000\006\00256\001\00898", !1, !5, !115, null, null, null, null, !294} ; [ DW_TAG_subprogram ] [line 898] [local] [def] [lhsx]
+!294 = !{!295, !296, !297}
+!295 = !{!"0x100\00i\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 907]
+!296 = !{!"0x100\00j\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [j] [line 907]
+!297 = !{!"0x100\00k\00907\000", !293, !5, !8} ; [ DW_TAG_auto_variable ] [k] [line 907]
+!298 = !{!299, !304, !305, !309, !310, !311, !312, !313, !314, !315, !316, !317, !318, !319, !320, !321, !322, !323, !324, !325, !326, !327, !328, !329, !330, !331, !332, !333, !334, !335, !336, !337, !338, !339, !340, !341, !342, !343, !347, !350, !351, !352, !353, !354, !355, !356, !360, !361, !362, !363, !364, !365, !366, !367, !368, !369, !370, !371, !372, !373, !374, !375, !376, !377, !378, !379, !380, !381, !382, !383, !384, !385, !386, !387, !388, !389, !390, !391, !392, !393, !394, !395, !396, !397, !398, !399, !400, !401, !402, !403, !404, !405, !406, !407, !408, !409, !410, !411, !412, !413, !414, !415, !416, !417, !418, !419, !422, !426, !427, !430, !431, !434, !435, !436, !437}
+!299 = !{!"0x34\00grid_points\00grid_points\00\0028\001\001", null, !300, !302, [3 x i32]* @grid_points, null} ; [ DW_TAG_variable ] [grid_points] [line 28] [local] [def]
+!300 = !{!"0x29", !301}      ; [ DW_TAG_file_type ] [/home/hfinkel/src/NPB2.3-omp-C/BT/./header.h]
+!301 = !{!"./header.h", !"/home/hfinkel/src/NPB2.3-omp-C/BT"}
+!302 = !{!"0x1\00\000\0096\0032\000\000", null, null, !8, !303, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 96, align 32, offset 0] [from int]
+!303 = !{!178}
+!304 = !{!"0x34\00dt\00dt\00\0035\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dt] [line 35] [local] [def]
+!305 = !{!"0x34\00rhs\00rhs\00\0068\001\001", null, !300, !306, null, null} ; [ DW_TAG_variable ] [rhs] [line 68] [local] [def]
+!306 = !{!"0x1\00\000\001385839040\0064\000\000", null, null, !20, !307, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1385839040, align 64, offset 0] [from double]
+!307 = !{!308, !308, !308, !93}
+!308 = !{!"0x21\000\00163"}     ; [ DW_TAG_subrange_type ] [0, 162]
+!309 = !{!"0x34\00zzcon5\00zzcon5\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon5] [line 42] [local] [def]
+!310 = !{!"0x34\00zzcon4\00zzcon4\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon4] [line 42] [local] [def]
+!311 = !{!"0x34\00zzcon3\00zzcon3\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon3] [line 42] [local] [def]
+!312 = !{!"0x34\00dz5tz1\00dz5tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz5tz1] [line 43] [local] [def]
+!313 = !{!"0x34\00dz4tz1\00dz4tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz4tz1] [line 43] [local] [def]
+!314 = !{!"0x34\00dz3tz1\00dz3tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz3tz1] [line 43] [local] [def]
+!315 = !{!"0x34\00zzcon2\00zzcon2\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon2] [line 42] [local] [def]
+!316 = !{!"0x34\00dz2tz1\00dz2tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz2tz1] [line 43] [local] [def]
+!317 = !{!"0x34\00tz2\00tz2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz2] [line 31] [local] [def]
+!318 = !{!"0x34\00dz1tz1\00dz1tz1\00\0043\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz1tz1] [line 43] [local] [def]
+!319 = !{!"0x34\00yycon5\00yycon5\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon5] [line 40] [local] [def]
+!320 = !{!"0x34\00yycon4\00yycon4\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon4] [line 40] [local] [def]
+!321 = !{!"0x34\00yycon3\00yycon3\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon3] [line 40] [local] [def]
+!322 = !{!"0x34\00dy5ty1\00dy5ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy5ty1] [line 41] [local] [def]
+!323 = !{!"0x34\00dy4ty1\00dy4ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy4ty1] [line 41] [local] [def]
+!324 = !{!"0x34\00dy3ty1\00dy3ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy3ty1] [line 41] [local] [def]
+!325 = !{!"0x34\00yycon2\00yycon2\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon2] [line 40] [local] [def]
+!326 = !{!"0x34\00dy2ty1\00dy2ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy2ty1] [line 41] [local] [def]
+!327 = !{!"0x34\00ty2\00ty2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty2] [line 31] [local] [def]
+!328 = !{!"0x34\00dy1ty1\00dy1ty1\00\0041\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy1ty1] [line 41] [local] [def]
+!329 = !{!"0x34\00dssp\00dssp\00\0035\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dssp] [line 35] [local] [def]
+!330 = !{!"0x34\00c1\00c1\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1] [line 45] [local] [def]
+!331 = !{!"0x34\00xxcon5\00xxcon5\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon5] [line 38] [local] [def]
+!332 = !{!"0x34\00xxcon4\00xxcon4\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon4] [line 38] [local] [def]
+!333 = !{!"0x34\00xxcon3\00xxcon3\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon3] [line 38] [local] [def]
+!334 = !{!"0x34\00dx5tx1\00dx5tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx5tx1] [line 39] [local] [def]
+!335 = !{!"0x34\00dx4tx1\00dx4tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx4tx1] [line 39] [local] [def]
+!336 = !{!"0x34\00dx3tx1\00dx3tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx3tx1] [line 39] [local] [def]
+!337 = !{!"0x34\00c2\00c2\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2] [line 45] [local] [def]
+!338 = !{!"0x34\00con43\00con43\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [con43] [line 48] [local] [def]
+!339 = !{!"0x34\00xxcon2\00xxcon2\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon2] [line 38] [local] [def]
+!340 = !{!"0x34\00dx2tx1\00dx2tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx2tx1] [line 39] [local] [def]
+!341 = !{!"0x34\00tx2\00tx2\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx2] [line 31] [local] [def]
+!342 = !{!"0x34\00dx1tx1\00dx1tx1\00\0039\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx1tx1] [line 39] [local] [def]
+!343 = !{!"0x34\00forcing\00forcing\00\0066\001\001", null, !300, !344, null, null} ; [ DW_TAG_variable ] [forcing] [line 66] [local] [def]
+!344 = !{!"0x1\00\000\001663006848\0064\000\000", null, null, !20, !345, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1663006848, align 64, offset 0] [from double]
+!345 = !{!308, !308, !308, !346}
+!346 = !{!"0x21\000\006"}       ; [ DW_TAG_subrange_type ] [0, 5]
+!347 = !{!"0x34\00qs\00qs\00\0063\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [qs] [line 63] [local] [def]
+!348 = !{!"0x1\00\000\00277167808\0064\000\000", null, null, !20, !349, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 277167808, align 64, offset 0] [from double]
+!349 = !{!308, !308, !308}
+!350 = !{!"0x34\00square\00square\00\0065\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [square] [line 65] [local] [def]
+!351 = !{!"0x34\00ws\00ws\00\0062\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [ws] [line 62] [local] [def]
+!352 = !{!"0x34\00vs\00vs\00\0061\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [vs] [line 61] [local] [def]
+!353 = !{!"0x34\00us\00us\00\0060\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [us] [line 60] [local] [def]
+!354 = !{!"0x34\00rho_i\00rho_i\00\0064\001\001", null, !300, !348, null, null} ; [ DW_TAG_variable ] [rho_i] [line 64] [local] [def]
+!355 = !{!"0x34\00u\00u\00\0067\001\001", null, !300, !306, null, null} ; [ DW_TAG_variable ] [u] [line 67] [local] [def]
+!356 = !{!"0x34\00ce\00ce\00\0036\001\001", null, !300, !357, null, null} ; [ DW_TAG_variable ] [ce] [line 36] [local] [def]
+!357 = !{!"0x1\00\000\004160\0064\000\000", null, null, !20, !358, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 4160, align 64, offset 0] [from double]
+!358 = !{!93, !359}
+!359 = !{!"0x21\000\0013"}      ; [ DW_TAG_subrange_type ] [0, 12]
+!360 = !{!"0x34\00dnzm1\00dnzm1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnzm1] [line 44] [local] [def]
+!361 = !{!"0x34\00dnym1\00dnym1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnym1] [line 44] [local] [def]
+!362 = !{!"0x34\00dnxm1\00dnxm1\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dnxm1] [line 44] [local] [def]
+!363 = !{!"0x34\00zzcon1\00zzcon1\00\0042\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [zzcon1] [line 42] [local] [def]
+!364 = !{!"0x34\00yycon1\00yycon1\00\0040\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [yycon1] [line 40] [local] [def]
+!365 = !{!"0x34\00xxcon1\00xxcon1\00\0038\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [xxcon1] [line 38] [local] [def]
+!366 = !{!"0x34\00con16\00con16\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [con16] [line 48] [local] [def]
+!367 = !{!"0x34\00c2iv\00c2iv\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2iv] [line 48] [local] [def]
+!368 = !{!"0x34\00c3c4tz3\00c3c4tz3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4tz3] [line 48] [local] [def]
+!369 = !{!"0x34\00c3c4ty3\00c3c4ty3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4ty3] [line 48] [local] [def]
+!370 = !{!"0x34\00c3c4tx3\00c3c4tx3\00\0048\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4tx3] [line 48] [local] [def]
+!371 = !{!"0x34\00comz6\00comz6\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz6] [line 47] [local] [def]
+!372 = !{!"0x34\00comz5\00comz5\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz5] [line 47] [local] [def]
+!373 = !{!"0x34\00comz4\00comz4\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz4] [line 47] [local] [def]
+!374 = !{!"0x34\00comz1\00comz1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [comz1] [line 47] [local] [def]
+!375 = !{!"0x34\00dtdssp\00dtdssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtdssp] [line 45] [local] [def]
+!376 = !{!"0x34\00c2dttz1\00c2dttz1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dttz1] [line 47] [local] [def]
+!377 = !{!"0x34\00c2dtty1\00c2dtty1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dtty1] [line 47] [local] [def]
+!378 = !{!"0x34\00c2dttx1\00c2dttx1\00\0047\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c2dttx1] [line 47] [local] [def]
+!379 = !{!"0x34\00dttz2\00dttz2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttz2] [line 46] [local] [def]
+!380 = !{!"0x34\00dttz1\00dttz1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttz1] [line 46] [local] [def]
+!381 = !{!"0x34\00dtty2\00dtty2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtty2] [line 46] [local] [def]
+!382 = !{!"0x34\00dtty1\00dtty1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dtty1] [line 46] [local] [def]
+!383 = !{!"0x34\00dttx2\00dttx2\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttx2] [line 46] [local] [def]
+!384 = !{!"0x34\00dttx1\00dttx1\00\0046\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dttx1] [line 46] [local] [def]
+!385 = !{!"0x34\00c5dssp\00c5dssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c5dssp] [line 45] [local] [def]
+!386 = !{!"0x34\00c4dssp\00c4dssp\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c4dssp] [line 45] [local] [def]
+!387 = !{!"0x34\00dzmax\00dzmax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dzmax] [line 37] [local] [def]
+!388 = !{!"0x34\00dymax\00dymax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dymax] [line 37] [local] [def]
+!389 = !{!"0x34\00dxmax\00dxmax\00\0037\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dxmax] [line 37] [local] [def]
+!390 = !{!"0x34\00dz5\00dz5\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz5] [line 34] [local] [def]
+!391 = !{!"0x34\00dz4\00dz4\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz4] [line 34] [local] [def]
+!392 = !{!"0x34\00dz3\00dz3\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz3] [line 34] [local] [def]
+!393 = !{!"0x34\00dz2\00dz2\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz2] [line 34] [local] [def]
+!394 = !{!"0x34\00dz1\00dz1\00\0034\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dz1] [line 34] [local] [def]
+!395 = !{!"0x34\00dy5\00dy5\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy5] [line 33] [local] [def]
+!396 = !{!"0x34\00dy4\00dy4\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy4] [line 33] [local] [def]
+!397 = !{!"0x34\00dy3\00dy3\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy3] [line 33] [local] [def]
+!398 = !{!"0x34\00dy2\00dy2\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy2] [line 33] [local] [def]
+!399 = !{!"0x34\00dy1\00dy1\00\0033\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dy1] [line 33] [local] [def]
+!400 = !{!"0x34\00dx5\00dx5\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx5] [line 32] [local] [def]
+!401 = !{!"0x34\00dx4\00dx4\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx4] [line 32] [local] [def]
+!402 = !{!"0x34\00dx3\00dx3\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx3] [line 32] [local] [def]
+!403 = !{!"0x34\00dx2\00dx2\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx2] [line 32] [local] [def]
+!404 = !{!"0x34\00dx1\00dx1\00\0032\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [dx1] [line 32] [local] [def]
+!405 = !{!"0x34\00tz3\00tz3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz3] [line 31] [local] [def]
+!406 = !{!"0x34\00tz1\00tz1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tz1] [line 31] [local] [def]
+!407 = !{!"0x34\00ty3\00ty3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty3] [line 31] [local] [def]
+!408 = !{!"0x34\00ty1\00ty1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [ty1] [line 31] [local] [def]
+!409 = !{!"0x34\00tx3\00tx3\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx3] [line 31] [local] [def]
+!410 = !{!"0x34\00tx1\00tx1\00\0031\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tx1] [line 31] [local] [def]
+!411 = !{!"0x34\00conz1\00conz1\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [conz1] [line 45] [local] [def]
+!412 = !{!"0x34\00c1345\00c1345\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1345] [line 44] [local] [def]
+!413 = !{!"0x34\00c3c4\00c3c4\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3c4] [line 44] [local] [def]
+!414 = !{!"0x34\00c1c5\00c1c5\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1c5] [line 44] [local] [def]
+!415 = !{!"0x34\00c1c2\00c1c2\00\0044\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c1c2] [line 44] [local] [def]
+!416 = !{!"0x34\00c5\00c5\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c5] [line 45] [local] [def]
+!417 = !{!"0x34\00c4\00c4\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c4] [line 45] [local] [def]
+!418 = !{!"0x34\00c3\00c3\00\0045\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [c3] [line 45] [local] [def]
+!419 = !{!"0x34\00lhs\00lhs\00\0069\001\001", null, !300, !420, null, null} ; [ DW_TAG_variable ] [lhs] [line 69] [local] [def]
+!420 = !{!"0x1\00\000\0020787585600\0064\000\000", null, null, !20, !421, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 20787585600, align 64, offset 0] [from double]
+!421 = !{!308, !308, !308, !178, !93, !93}
+!422 = !{!"0x34\00q\00q\00\0073\001\001", null, !300, !423, null, null} ; [ DW_TAG_variable ] [q] [line 73] [local] [def]
+!423 = !{!"0x1\00\000\0010368\0064\000\000", null, null, !20, !424, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 10368, align 64, offset 0] [from double]
+!424 = !{!425}
+!425 = !{!"0x21\000\00162"}     ; [ DW_TAG_subrange_type ] [0, 161]
+!426 = !{!"0x34\00cuf\00cuf\00\0072\001\001", null, !300, !423, null, null} ; [ DW_TAG_variable ] [cuf] [line 72] [local] [def]
+!427 = !{!"0x34\00buf\00buf\00\0075\001\001", null, !300, !428, null, null} ; [ DW_TAG_variable ] [buf] [line 75] [local] [def]
+!428 = !{!"0x1\00\000\0051840\0064\000\000", null, null, !20, !429, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 51840, align 64, offset 0] [from double]
+!429 = !{!425, !93}
+!430 = !{!"0x34\00ue\00ue\00\0074\001\001", null, !300, !428, null, null} ; [ DW_TAG_variable ] [ue] [line 74] [local] [def]
+!431 = !{!"0x34\00njac\00njac\00\0086\001\001", null, !300, !432, null, null} ; [ DW_TAG_variable ] [njac] [line 86] [local] [def]
+!432 = !{!"0x1\00\000\006886684800\0064\000\000", null, null, !20, !433, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 6886684800, align 64, offset 0] [from double]
+!433 = !{!308, !308, !425, !93, !93}
+!434 = !{!"0x34\00fjac\00fjac\00\0084\001\001", null, !300, !432, null, null} ; [ DW_TAG_variable ] [fjac] [line 84] [local] [def]
+!435 = !{!"0x34\00tmp3\00tmp3\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp3] [line 88] [local] [def]
+!436 = !{!"0x34\00tmp2\00tmp2\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp2] [line 88] [local] [def]
+!437 = !{!"0x34\00tmp1\00tmp1\00\0088\001\001", null, !300, !20, null, null} ; [ DW_TAG_variable ] [tmp1] [line 88] [local] [def]
+!438 = !{i32 2, !"Dwarf Version", i32 4}
+!439 = !MDLocation(line: 1898, scope: !440)
+!440 = !{!"0xb\001898\000\00107", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!441 = !MDLocation(line: 1913, scope: !442)
+!442 = !{!"0xb\001913\000\00115", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!443 = !MDLocation(line: 1923, scope: !114)
+!444 = !{!"int", !445}
+!445 = !{!"omnipotent char", !446}
+!446 = !{!"Simple C/C++ TBAA"}
+!447 = !{i32 1}
+!448 = !MDLocation(line: 1925, scope: !449)
+!449 = !{!"0xb\001925\000\00121", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!450 = !MDLocation(line: 1939, scope: !451)
+!451 = !{!"0xb\001939\000\00127", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!452 = !MDLocation(line: 1940, scope: !453)
+!453 = !{!"0xb\001940\000\00129", !1, !454} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!454 = !{!"0xb\001939\000\00128", !1, !451} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!455 = !MDLocation(line: 1941, scope: !456)
+!456 = !{!"0xb\001941\000\00131", !1, !457} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!457 = !{!"0xb\001940\000\00130", !1, !453} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!458 = !MDLocation(line: 2020, scope: !459)
+!459 = !{!"0xb\002020\000\00149", !1, !460} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!460 = !{!"0xb\002019\000\00148", !1, !461} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!461 = !{!"0xb\002019\000\00147", !1, !462} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!462 = !{!"0xb\002018\000\00146", !1, !463} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!463 = !{!"0xb\002018\000\00145", !1, !114} ; [ DW_TAG_lexical_block ] [/home/hfinkel/src/NPB2.3-omp-C/BT/bt.c]
+!464 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/PowerPC/recipest.ll b/test/CodeGen/PowerPC/recipest.ll
index 891e801dd3b5..cd77548b281b 100644
--- a/test/CodeGen/PowerPC/recipest.ll
+++ b/test/CodeGen/PowerPC/recipest.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -8,7 +8,6 @@ declare float @llvm.sqrt.f32(float)
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
 
 define double @foo(double %a, double %b) nounwind {
-entry:
   %x = call double @llvm.sqrt.f64(double %b)
   %r = fdiv double %a, %x
   ret double %r
@@ -17,12 +16,12 @@ entry:
 ; CHECK-DAG: frsqrte
 ; CHECK-DAG: fnmsub
 ; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK: blr
 
 ; CHECK-SAFE: @foo
@@ -32,7 +31,6 @@ entry:
 }
 
 define double @foof(double %a, float %b) nounwind {
-entry:
   %x = call float @llvm.sqrt.f32(float %b)
   %y = fpext float %x to double
   %r = fdiv double %a, %y
@@ -42,10 +40,10 @@ entry:
 ; CHECK-DAG: frsqrtes
 ; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
-; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: fmul
-; CHECK: blr
+; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmul
+; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @foof
 ; CHECK-SAFE: fsqrts
@@ -54,7 +52,6 @@ entry:
 }
 
 define float @food(float %a, double %b) nounwind {
-entry:
   %x = call double @llvm.sqrt.f64(double %b)
   %y = fptrunc double %x to float
   %r = fdiv float %a, %y
@@ -64,14 +61,14 @@ entry:
 ; CHECK-DAG: frsqrte
 ; CHECK-DAG: fnmsub
 ; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: frsp
-; CHECK: fmuls
-; CHECK: blr
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: frsp
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @foo
 ; CHECK-SAFE: fsqrt
@@ -80,7 +77,6 @@ entry:
 }
 
 define float @goo(float %a, float %b) nounwind {
-entry:
   %x = call float @llvm.sqrt.f32(float %b)
   %r = fdiv float %a, %x
   ret float %r
@@ -89,10 +85,10 @@ entry:
 ; CHECK-DAG: frsqrtes
 ; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
-; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: fmuls
-; CHECK: blr
+; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @goo
 ; CHECK-SAFE: fsqrts
@@ -100,8 +96,35 @@ entry:
 ; CHECK-SAFE: blr
 }
 
+; Recognize that this is rsqrt(a) * rcp(b) * c, 
+; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
+define float @rsqrt_fmul(float %a, float %b, float %c) {
+  %x = call float @llvm.sqrt.f32(float %a)
+  %y = fmul float %x, %b 
+  %z = fdiv float %c, %y
+  ret float %z
+
+; CHECK: @rsqrt_fmul
+; CHECK-DAG: frsqrtes
+; CHECK-DAG: fres
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmuls
+; CHECK-DAG: fnmsubs
+; CHECK-DAG: fmadds
+; CHECK-DAG: fmadds
+; CHECK: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
+
+; CHECK-SAFE: @rsqrt_fmul
+; CHECK-SAFE: fsqrts
+; CHECK-SAFE: fmuls
+; CHECK-SAFE: fdivs
+; CHECK-SAFE: blr
+}
+
 define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind {
-entry:
   %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b)
   %r = fdiv <4 x float> %a, %x
   ret <4 x float> %r
@@ -115,7 +138,6 @@ entry:
 }
 
 define double @foo2(double %a, double %b) nounwind {
-entry:
   %r = fdiv double %a, %b
   ret double %r
 
@@ -123,10 +145,10 @@ entry:
 ; CHECK-DAG: fre
 ; CHECK-DAG: fnmsub
 ; CHECK: fmadd
-; CHECK: fnmsub
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: blr
+; CHECK-NEXT: fnmsub
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @foo2
 ; CHECK-SAFE: fdiv
@@ -134,7 +156,6 @@ entry:
 }
 
 define float @goo2(float %a, float %b) nounwind {
-entry:
   %r = fdiv float %a, %b
   ret float %r
 
@@ -142,8 +163,8 @@ entry:
 ; CHECK-DAG: fres
 ; CHECK-DAG: fnmsubs
 ; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: blr
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: blr
 
 ; CHECK-SAFE: @goo2
 ; CHECK-SAFE: fdivs
@@ -151,7 +172,6 @@ entry:
 }
 
 define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind {
-entry:
   %r = fdiv <4 x float> %a, %b
   ret <4 x float> %r
 
@@ -164,7 +184,6 @@ entry:
 }
 
 define double @foo3(double %a) nounwind {
-entry:
   %r = call double @llvm.sqrt.f64(double %a)
   ret double %r
 
@@ -173,16 +192,12 @@ entry:
 ; CHECK-DAG: frsqrte
 ; CHECK-DAG: fnmsub
 ; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fmul
-; CHECK: fmadd
-; CHECK: fmul
-; CHECK: fre
-; CHECK: fnmsub
-; CHECK: fmadd
-; CHECK: fnmsub
-; CHECK: fmadd
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmadd
+; CHECK-NEXT: fmul
+; CHECK-NEXT: fmul
 ; CHECK: blr
 
 ; CHECK-SAFE: @foo3
@@ -191,7 +206,6 @@ entry:
 }
 
 define float @goo3(float %a) nounwind {
-entry:
   %r = call float @llvm.sqrt.f32(float %a)
   ret float %r
 
@@ -200,11 +214,9 @@ entry:
 ; CHECK-DAG: frsqrtes
 ; CHECK-DAG: fnmsubs
 ; CHECK: fmuls
-; CHECK: fmadds
-; CHECK: fmuls
-; CHECK: fres
-; CHECK: fnmsubs
-; CHECK: fmadds
+; CHECK-NEXT: fmadds
+; CHECK-NEXT: fmuls
+; CHECK-NEXT: fmuls
 ; CHECK: blr
 
 ; CHECK-SAFE: @goo3
@@ -213,13 +225,11 @@ entry:
 }
 
 define <4 x float> @hoo3(<4 x float> %a) nounwind {
-entry:
   %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
   ret <4 x float> %r
 
 ; CHECK: @hoo3
 ; CHECK: vrsqrtefp
-; CHECK-DAG: vrefp
 ; CHECK-DAG: vcmpeqfp
 
 ; CHECK-SAFE: @hoo3
diff --git a/test/CodeGen/PowerPC/retaddr2.ll b/test/CodeGen/PowerPC/retaddr2.ll
new file mode 100644
index 000000000000..8fa3b4d13b7e
--- /dev/null
+++ b/test/CodeGen/PowerPC/retaddr2.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define i8* @test1() #0 {
+entry:
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+; CHECK-LABEL: @test1
+; CHECK: mflr 0
+; CHECK: std 0, 16(1)
+; FIXME: These next two lines don't both need to load the same value.
+; CHECK-DAG: ld 3, 16(1)
+; CHECK-DAG: ld 0, 16(1)
+; CHECK: mtlr 0
+; CHECK: blr
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.returnaddress(i32) #0
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/rlwimi-and.ll b/test/CodeGen/PowerPC/rlwimi-and.ll
index 213363ee819f..9433f8e3dee2 100644
--- a/test/CodeGen/PowerPC/rlwimi-and.ll
+++ b/test/CodeGen/PowerPC/rlwimi-and.ll
@@ -28,11 +28,9 @@ codeRepl17:                                       ; preds = %codeRepl4
   store i16 %rvml38.sroa.0.0.insert.insert, i16* undef, align 2
   unreachable
 
-; FIXME: the SLWI could be folded into the RLWIMI to give a rotate of 8.
 ; CHECK: @test
-; CHECK-DAG: slwi [[R1:[0-9]+]], {{[0-9]+}}, 31
-; CHECK-DAG: rlwinm [[R2:[0-9]+]], {{[0-9]+}}, 0, 31, 31
-; CHECK: rlwimi [[R2]], [[R1]], 9, 23, 23
+; CHECK: rlwinm [[R1:[0-9]+]], {{[0-9]+}}, 0, 31, 31
+; CHECK: rlwimi [[R1]], {{[0-9]+}}, 8, 23, 23
 
 codeRepl29:                                       ; preds = %codeRepl1
   unreachable
diff --git a/test/CodeGen/PowerPC/rlwimi2.ll b/test/CodeGen/PowerPC/rlwimi2.ll
index 1bee4e03f1b0..7978718b5a38 100644
--- a/test/CodeGen/PowerPC/rlwimi2.ll
+++ b/test/CodeGen/PowerPC/rlwimi2.ll
@@ -1,7 +1,7 @@
 ; All of these ands and shifts should be folded into rlwimi's
 ; RUN: llc < %s -march=ppc32 -o %t
-; RUN: grep rlwimi %t | count 3
-; RUN: grep srwi   %t | count 1
+; RUN: grep rlwimi %t | count 4
+; RUN: not grep srwi %t
 ; RUN: not grep slwi %t
 
 define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {
diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll
new file mode 100644
index 000000000000..33995e114d27
--- /dev/null
+++ b/test/CodeGen/PowerPC/rm-zext.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %a) #0 {
+entry:
+  %mul = mul nsw i32 %a, %a
+  %shr2 = lshr i32 %mul, 5
+  ret i32 %shr2
+
+; CHECK-LABEL @foo
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+define zeroext i32 @test6(i32 zeroext %x) #0 {
+entry:
+  %and = lshr i32 %x, 16
+  %shr = and i32 %and, 255
+  %and1 = shl i32 %x, 16
+  %shl = and i32 %and1, 16711680
+  %or = or i32 %shr, %shl
+  ret i32 %or
+
+; CHECK-LABEL @test6
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+define zeroext i32 @min(i32 zeroext %a, i32 zeroext %b) #0 {
+entry:
+  %cmp = icmp ule i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+
+; CHECK-LABEL @min
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #0
+
+; Function Attrs: nounwind readonly
+define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = tail call i32 @llvm.bswap.i32(i32 %0)
+  ret i32 %1
+
+; CHECK-LABEL: @bs32
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readonly
+define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
+entry:
+  %0 = load i16* %x, align 2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %0)
+  ret i16 %1
+
+; CHECK-LABEL: @bs16
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i16 @llvm.bswap.i16(i16) #0
+
+; Function Attrs: nounwind readnone
+define zeroext i32 @ctlz32(i32 zeroext %x) #0 {
+entry:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  ret i32 %0
+
+; CHECK-LABEL: @ctlz32
+; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) #0
+
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
+
diff --git a/test/CodeGen/PowerPC/rounding-ops.ll b/test/CodeGen/PowerPC/rounding-ops.ll
index bf0a6415df67..42f12364eff3 100644
--- a/test/CodeGen/PowerPC/rounding-ops.ll
+++ b/test/CodeGen/PowerPC/rounding-ops.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -8,6 +9,8 @@ define float @test1(float %x) nounwind  {
 
 ; CHECK-LABEL: test1:
 ; CHECK: frim 1, 1
+; CHECK-VSX-LABEL: test1:
+; CHECK-VSX: frim 1, 1
 }
 
 declare float @floorf(float) nounwind readnone
@@ -18,6 +21,8 @@ define double @test2(double %x) nounwind  {
 
 ; CHECK-LABEL: test2:
 ; CHECK: frim 1, 1
+; CHECK-VSX-LABEL: test2:
+; CHECK-VSX: xsrdpim 1, 1
 }
 
 declare double @floor(double) nounwind readnone
@@ -28,6 +33,8 @@ define float @test3(float %x) nounwind  {
 
 ; CHECK-LABEL: test3:
 ; CHECK: frin 1, 1
+; CHECK-VSX-LABEL: test3:
+; CHECK-VSX: frin 1, 1
 }
 
 declare float @roundf(float) nounwind readnone
@@ -38,6 +45,8 @@ define double @test4(double %x) nounwind  {
 
 ; CHECK-LABEL: test4:
 ; CHECK: frin 1, 1
+; CHECK-VSX-LABEL: test4:
+; CHECK-VSX: xsrdpi 1, 1
 }
 
 declare double @round(double) nounwind readnone
@@ -48,6 +57,8 @@ define float @test5(float %x) nounwind  {
 
 ; CHECK-LABEL: test5:
 ; CHECK: frip 1, 1
+; CHECK-VSX-LABEL: test5:
+; CHECK-VSX: frip 1, 1
 }
 
 declare float @ceilf(float) nounwind readnone
@@ -58,6 +69,8 @@ define double @test6(double %x) nounwind  {
 
 ; CHECK-LABEL: test6:
 ; CHECK: frip 1, 1
+; CHECK-VSX-LABEL: test6:
+; CHECK-VSX: xsrdpip 1, 1
 }
 
 declare double @ceil(double) nounwind readnone
@@ -68,6 +81,8 @@ define float @test9(float %x) nounwind  {
 
 ; CHECK-LABEL: test9:
 ; CHECK: friz 1, 1
+; CHECK-VSX-LABEL: test9:
+; CHECK-VSX: friz 1, 1
 }
 
 declare float @truncf(float) nounwind readnone
@@ -78,6 +93,8 @@ define double @test10(double %x) nounwind  {
 
 ; CHECK-LABEL: test10:
 ; CHECK: friz 1, 1
+; CHECK-VSX-LABEL: test10:
+; CHECK-VSX: xsrdpiz 1, 1
 }
 
 declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/PowerPC/sdiv-pow2.ll b/test/CodeGen/PowerPC/sdiv-pow2.ll
new file mode 100644
index 000000000000..5ec019dfb4af
--- /dev/null
+++ b/test/CodeGen/PowerPC/sdiv-pow2.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc < %s | FileCheck -check-prefix=CHECK-32 %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, 8
+  ret i32 %div
+
+; CHECK-LABEL @foo4
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: extsw 3, [[REG2]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, 8
+  ret i64 %div
+
+; CHECK-LABEL @foo8
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze 3, [[REG1]]
+; CHECK: blr
+
+; CHECK-32-LABEL @foo8
+; CHECK-32-NOT: sradi
+; CHECK-32: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo4n(i32 signext %a) #0 {
+entry:
+  %div = sdiv i32 %a, -8
+  ret i32 %div
+
+; CHECK-LABEL: @foo4n
+; CHECK: srawi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg [[REG3:[0-9]+]], [[REG2]]
+; CHECK: extsw 3, [[REG3]]
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define i64 @foo8n(i64 %a) #0 {
+entry:
+  %div = sdiv i64 %a, -8
+  ret i64 %div
+
+; CHECK-LABEL: @foo8n
+; CHECK: sradi [[REG1:[0-9]+]], 3, 3
+; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
+; CHECK: neg 3, [[REG2]]
+; CHECK: blr
+
+; CHECK-32-LABEL @foo8n
+; CHECK-32-NOT: sradi
+; CHECK-32: blr
+}
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/PowerPC/sections.ll b/test/CodeGen/PowerPC/sections.ll
index d77dfddd0f90..37a8d1648f1b 100644
--- a/test/CodeGen/PowerPC/sections.ll
+++ b/test/CodeGen/PowerPC/sections.ll
@@ -1,12 +1,7 @@
 ; Test to make sure that bss sections are printed with '.section' directive.
 ; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=PIC
 
 @A = global i32 0
 
 ; CHECK:  .section  .bss,"aw",@nobits
 ; CHECK:  .globl A
-
-; PIC:    .section  .got2,"aw",@progbits
-; PIC:    .section  .bss,"aw",@nobits
-; PIC:    .globl A
diff --git a/test/CodeGen/PowerPC/split-index-tc.ll b/test/CodeGen/PowerPC/split-index-tc.ll
new file mode 100644
index 000000000000..03aff243b231
--- /dev/null
+++ b/test/CodeGen/PowerPC/split-index-tc.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%"class.llvm::MachineOperand" = type { i8, [3 x i8], i64, i64*, i64 }
+
+; Function Attrs: nounwind
+define void @_ZN4llvm17ScheduleDAGInstrs14addPhysRegDepsEPNS_5SUnitEj() #0 align 2 {
+
+; If we were able to split out the indexing, the load with update should be
+; removed (resulting in a nearly-empty output).
+; CHECK-LABEL: @_ZN4llvm17ScheduleDAGInstrs14addPhysRegDepsEPNS_5SUnitEj
+; CHECK-NOT: lhzu
+
+entry:
+  %0 = load %"class.llvm::MachineOperand"** undef, align 8
+  br i1 undef, label %_ZNK4llvm14MachineOperand6getRegEv.exit, label %cond.false.i123
+
+cond.false.i123:                                  ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
+  unreachable
+
+_ZNK4llvm14MachineOperand6getRegEv.exit:          ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
+  %IsDef.i = getelementptr inbounds %"class.llvm::MachineOperand"* %0, i64 undef, i32 1
+  %1 = bitcast [3 x i8]* %IsDef.i to i24*
+  %bf.load.i = load i24* %1, align 1
+  %2 = and i24 %bf.load.i, 128
+  br i1 undef, label %for.cond.cleanup, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %_ZNK4llvm14MachineOperand6getRegEv.exit
+  %3 = zext i24 %2 to i32
+  br i1 undef, label %cond.false.i134, label %_ZNK4llvm18MCRegAliasIteratordeEv.exit
+
+for.cond.cleanup:                                 ; preds = %_ZNK4llvm14MachineOperand6getRegEv.exit
+  br i1 undef, label %_ZNK4llvm14MachineOperand5isDefEv.exit, label %cond.false.i129
+
+cond.false.i129:                                  ; preds = %for.cond.cleanup
+  unreachable
+
+_ZNK4llvm14MachineOperand5isDefEv.exit:           ; preds = %for.cond.cleanup
+  br i1 undef, label %_ZNK4llvm14MachineOperand6getRegEv.exit247, label %cond.false.i244
+
+cond.false.i134:                                  ; preds = %for.body.lr.ph
+  unreachable
+
+_ZNK4llvm18MCRegAliasIteratordeEv.exit:           ; preds = %for.body.lr.ph
+  unreachable
+
+cond.false.i244:                                  ; preds = %_ZNK4llvm14MachineOperand5isDefEv.exit
+  unreachable
+
+_ZNK4llvm14MachineOperand6getRegEv.exit247:       ; preds = %_ZNK4llvm14MachineOperand5isDefEv.exit
+  br i1 undef, label %if.then53, label %if.end55
+
+if.then53:                                        ; preds = %_ZNK4llvm14MachineOperand6getRegEv.exit247
+  unreachable
+
+if.end55:                                         ; preds = %_ZNK4llvm14MachineOperand6getRegEv.exit247
+  br i1 undef, label %_ZNK4llvm14MachineOperand6isDeadEv.exit262, label %cond.false.i257
+
+cond.false.i257:                                  ; preds = %if.end55
+  unreachable
+
+_ZNK4llvm14MachineOperand6isDeadEv.exit262:       ; preds = %if.end55
+  %bf.load.i259 = load i24* %1, align 1
+  br i1 undef, label %if.then57, label %if.else59
+
+if.then57:                                        ; preds = %_ZNK4llvm14MachineOperand6isDeadEv.exit262
+  unreachable
+
+if.else59:                                        ; preds = %_ZNK4llvm14MachineOperand6isDeadEv.exit262
+  br i1 undef, label %if.end89, label %if.then62
+
+if.then62:                                        ; preds = %if.else59
+  unreachable
+
+if.end89:                                         ; preds = %if.else59
+  unreachable
+}
+
+attributes #0 = { nounwind }
+
+
diff --git a/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
index da637cd2548b..c510e36cb413 100644
--- a/test/CodeGen/PowerPC/subsumes-pred-regs.ll
+++ b/test/CodeGen/PowerPC/subsumes-pred-regs.ll
@@ -35,7 +35,7 @@ if.then9.i39:                                     ; preds = %if.end7.i37
   br i1 %lnot.i.i16.i23, label %return, label %lor.rhs.i.i49
 
 ; CHECK: .LBB0_7:
-; CHECK:	beq 1, .LBB0_10
+; CHECK:	bne 1, .LBB0_10
 ; CHECK:	beq 0, .LBB0_10
 ; CHECK: .LBB0_9:
 
diff --git a/test/CodeGen/PowerPC/toc-load-sched-bug.ll b/test/CodeGen/PowerPC/toc-load-sched-bug.ll
index d437915e6c3f..e92c4f4018b1 100644
--- a/test/CodeGen/PowerPC/toc-load-sched-bug.ll
+++ b/test/CodeGen/PowerPC/toc-load-sched-bug.ll
@@ -484,51 +484,51 @@ attributes #7 = { noreturn nounwind }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.6.0 (trunk 215115) (llvm/trunk 215117)"}
-!1 = metadata !{metadata !2, metadata !4, i64 0}
-!2 = metadata !{metadata !"_ZTSSs", metadata !3, i64 0}
-!3 = metadata !{metadata !"_ZTSNSs12_Alloc_hiderE", metadata !4, i64 0}
-!4 = metadata !{metadata !"any pointer", metadata !5, i64 0}
-!5 = metadata !{metadata !"omnipotent char", metadata !6, i64 0}
-!6 = metadata !{metadata !"Simple C/C++ TBAA"}
-!7 = metadata !{metadata !8, metadata !9, i64 0}
-!8 = metadata !{metadata !"_ZTSNSs9_Rep_baseE", metadata !9, i64 0, metadata !9, i64 8, metadata !10, i64 16}
-!9 = metadata !{metadata !"long", metadata !5, i64 0}
-!10 = metadata !{metadata !"int", metadata !5, i64 0}
-!11 = metadata !{metadata !12, metadata !12, i64 0}
-!12 = metadata !{metadata !"vtable pointer", metadata !6, i64 0}
-!13 = metadata !{metadata !3, metadata !4, i64 0}
-!14 = metadata !{metadata !15, metadata !10, i64 24}
-!15 = metadata !{metadata !"_ZTSN4llvm12SMDiagnosticE", metadata !4, i64 0, metadata !16, i64 8, metadata !2, i64 16, metadata !10, i64 24, metadata !10, i64 28, metadata !17, i64 32, metadata !2, i64 40, metadata !2, i64 48, metadata !18, i64 56, metadata !19, i64 80}
-!16 = metadata !{metadata !"_ZTSN4llvm5SMLocE", metadata !4, i64 0}
-!17 = metadata !{metadata !"_ZTSN4llvm9SourceMgr8DiagKindE", metadata !5, i64 0}
-!18 = metadata !{metadata !"_ZTSSt6vectorISt4pairIjjESaIS1_EE"}
-!19 = metadata !{metadata !"_ZTSN4llvm11SmallVectorINS_7SMFixItELj4EEE", metadata !20, i64 48}
-!20 = metadata !{metadata !"_ZTSN4llvm18SmallVectorStorageINS_7SMFixItELj4EEE", metadata !5, i64 0}
-!21 = metadata !{metadata !15, metadata !10, i64 28}
-!22 = metadata !{metadata !15, metadata !17, i64 32}
-!23 = metadata !{metadata !24, metadata !4, i64 0}
-!24 = metadata !{metadata !"_ZTSN4llvm15SmallVectorBaseE", metadata !4, i64 0, metadata !4, i64 8, metadata !4, i64 16}
-!25 = metadata !{metadata !24, metadata !4, i64 8}
-!26 = metadata !{metadata !24, metadata !4, i64 16}
-!27 = metadata !{metadata !4, metadata !4, i64 0}
-!28 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!29 = metadata !{metadata !10, metadata !10, i64 0}
-!30 = metadata !{metadata !31, metadata !4, i64 8}
-!31 = metadata !{metadata !"_ZTSN4llvm12MemoryBufferE", metadata !4, i64 8, metadata !4, i64 16}
-!32 = metadata !{metadata !31, metadata !4, i64 16}
-!33 = metadata !{metadata !5, metadata !5, i64 0}
-!34 = metadata !{metadata !35, metadata !4, i64 0}
-!35 = metadata !{metadata !"_ZTSSt12_Vector_baseISt4pairIjjESaIS1_EE", metadata !36, i64 0}
-!36 = metadata !{metadata !"_ZTSNSt12_Vector_baseISt4pairIjjESaIS1_EE12_Vector_implE", metadata !4, i64 0, metadata !4, i64 8, metadata !4, i64 16}
-!37 = metadata !{metadata !38, metadata !38, i64 0}
-!38 = metadata !{metadata !"bool", metadata !5, i64 0}
-!39 = metadata !{i8 0, i8 2}
-!40 = metadata !{metadata !41, metadata !4, i64 0}
-!41 = metadata !{metadata !"_ZTSN4llvm10TimeRegionE", metadata !4, i64 0}
-!42 = metadata !{metadata !43, metadata !44, i64 32}
-!43 = metadata !{metadata !"_ZTSN4llvm11raw_ostreamE", metadata !4, i64 8, metadata !4, i64 16, metadata !4, i64 24, metadata !44, i64 32}
-!44 = metadata !{metadata !"_ZTSN4llvm11raw_ostream10BufferKindE", metadata !5, i64 0}
-!45 = metadata !{metadata !43, metadata !4, i64 24}
-!46 = metadata !{metadata !43, metadata !4, i64 8}
-!47 = metadata !{i64 0, i64 8, metadata !27, i64 8, i64 8, metadata !27}
+!0 = !{!"clang version 3.6.0 (trunk 215115) (llvm/trunk 215117)"}
+!1 = !{!2, !4, i64 0}
+!2 = !{!"_ZTSSs", !3, i64 0}
+!3 = !{!"_ZTSNSs12_Alloc_hiderE", !4, i64 0}
+!4 = !{!"any pointer", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!8, !9, i64 0}
+!8 = !{!"_ZTSNSs9_Rep_baseE", !9, i64 0, !9, i64 8, !10, i64 16}
+!9 = !{!"long", !5, i64 0}
+!10 = !{!"int", !5, i64 0}
+!11 = !{!12, !12, i64 0}
+!12 = !{!"vtable pointer", !6, i64 0}
+!13 = !{!3, !4, i64 0}
+!14 = !{!15, !10, i64 24}
+!15 = !{!"_ZTSN4llvm12SMDiagnosticE", !4, i64 0, !16, i64 8, !2, i64 16, !10, i64 24, !10, i64 28, !17, i64 32, !2, i64 40, !2, i64 48, !18, i64 56, !19, i64 80}
+!16 = !{!"_ZTSN4llvm5SMLocE", !4, i64 0}
+!17 = !{!"_ZTSN4llvm9SourceMgr8DiagKindE", !5, i64 0}
+!18 = !{!"_ZTSSt6vectorISt4pairIjjESaIS1_EE"}
+!19 = !{!"_ZTSN4llvm11SmallVectorINS_7SMFixItELj4EEE", !20, i64 48}
+!20 = !{!"_ZTSN4llvm18SmallVectorStorageINS_7SMFixItELj4EEE", !5, i64 0}
+!21 = !{!15, !10, i64 28}
+!22 = !{!15, !17, i64 32}
+!23 = !{!24, !4, i64 0}
+!24 = !{!"_ZTSN4llvm15SmallVectorBaseE", !4, i64 0, !4, i64 8, !4, i64 16}
+!25 = !{!24, !4, i64 8}
+!26 = !{!24, !4, i64 16}
+!27 = !{!4, !4, i64 0}
+!28 = !{!"branch_weights", i32 64, i32 4}
+!29 = !{!10, !10, i64 0}
+!30 = !{!31, !4, i64 8}
+!31 = !{!"_ZTSN4llvm12MemoryBufferE", !4, i64 8, !4, i64 16}
+!32 = !{!31, !4, i64 16}
+!33 = !{!5, !5, i64 0}
+!34 = !{!35, !4, i64 0}
+!35 = !{!"_ZTSSt12_Vector_baseISt4pairIjjESaIS1_EE", !36, i64 0}
+!36 = !{!"_ZTSNSt12_Vector_baseISt4pairIjjESaIS1_EE12_Vector_implE", !4, i64 0, !4, i64 8, !4, i64 16}
+!37 = !{!38, !38, i64 0}
+!38 = !{!"bool", !5, i64 0}
+!39 = !{i8 0, i8 2}
+!40 = !{!41, !4, i64 0}
+!41 = !{!"_ZTSN4llvm10TimeRegionE", !4, i64 0}
+!42 = !{!43, !44, i64 32}
+!43 = !{!"_ZTSN4llvm11raw_ostreamE", !4, i64 8, !4, i64 16, !4, i64 24, !44, i64 32}
+!44 = !{!"_ZTSN4llvm11raw_ostream10BufferKindE", !5, i64 0}
+!45 = !{!43, !4, i64 24}
+!46 = !{!43, !4, i64 8}
+!47 = !{i64 0, i64 8, !27, i64 8, i64 8, !27}
diff --git a/test/CodeGen/PowerPC/unal-altivec-wint.ll b/test/CodeGen/PowerPC/unal-altivec-wint.ll
new file mode 100644
index 000000000000..7e0963f54b33
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-altivec-wint.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
+
+define <4 x i32> @test1(<4 x i32>* %h) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+
+  %a = add <4 x i32> %v0, %vl
+  ret <4 x i32> %a
+
+; CHECK-LABEL: @test1
+; CHECK: li [[REG:[0-9]+]], 16
+; CHECK-NOT: li {{[0-9]+}}, 15
+; CHECK-DAG: lvx {{[0-9]+}}, 0, 3
+; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]]
+; CHECK: blr
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
+
+define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
+entry:
+  %h1 = getelementptr <4 x i32>* %h, i64 1
+  %hv = bitcast <4 x i32>* %h1 to i8*
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+
+  %v0 = load <4 x i32>* %h, align 8
+
+  ret <4 x i32> %v0
+
+; CHECK-LABEL: @test2
+; CHECK: li [[REG:[0-9]+]], 16
+; CHECK-NOT: li {{[0-9]+}}, 15
+; CHECK-DAG: lvx {{[0-9]+}}, 0, 3
+; CHECK-DAG: lvx {{[0-9]+}}, 3, [[REG]]
+; CHECK: blr
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
+
diff --git a/test/CodeGen/PowerPC/unal4-std.ll b/test/CodeGen/PowerPC/unal4-std.ll
index 9f29e31cb902..e91109911161 100644
--- a/test/CodeGen/PowerPC/unal4-std.ll
+++ b/test/CodeGen/PowerPC/unal4-std.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=-vsx| FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -22,6 +23,9 @@ if.end210:                                        ; preds = %entry
 ; a multiple of 4).
 ; CHECK: @copy_to_conceal
 ; CHECK: stdx {{[0-9]+}}, 0,
+
+; CHECK-VSX: @copy_to_conceal
+; CHECK-VSX: stxvw4x {{[0-9]+}}, 0,
 }
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/PowerPC/unaligned.ll b/test/CodeGen/PowerPC/unaligned.ll
index d469c62f2f05..64c03cdda35e 100644
--- a/test/CodeGen/PowerPC/unaligned.ll
+++ b/test/CodeGen/PowerPC/unaligned.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 
 define void @foo1(i16* %p, i16* %r) nounwind {
@@ -10,6 +12,10 @@ entry:
 ; CHECK: @foo1
 ; CHECK: lhz
 ; CHECK: sth
+
+; CHECK-VSX: @foo1
+; CHECK-VSX: lhz
+; CHECK-VSX: sth
 }
 
 define void @foo2(i32* %p, i32* %r) nounwind {
@@ -21,6 +27,10 @@ entry:
 ; CHECK: @foo2
 ; CHECK: lwz
 ; CHECK: stw
+
+; CHECK-VSX: @foo2
+; CHECK-VSX: lwz
+; CHECK-VSX: stw
 }
 
 define void @foo3(i64* %p, i64* %r) nounwind {
@@ -32,6 +42,10 @@ entry:
 ; CHECK: @foo3
 ; CHECK: ld
 ; CHECK: std
+
+; CHECK-VSX: @foo3
+; CHECK-VSX: ld
+; CHECK-VSX: std
 }
 
 define void @foo4(float* %p, float* %r) nounwind {
@@ -43,6 +57,10 @@ entry:
 ; CHECK: @foo4
 ; CHECK: lfs
 ; CHECK: stfs
+
+; CHECK-VSX: @foo4
+; CHECK-VSX: lfs
+; CHECK-VSX: stfs
 }
 
 define void @foo5(double* %p, double* %r) nounwind {
@@ -54,6 +72,10 @@ entry:
 ; CHECK: @foo5
 ; CHECK: lfd
 ; CHECK: stfd
+
+; CHECK-VSX: @foo5
+; CHECK-VSX: lxsdx
+; CHECK-VSX: stxsdx
 }
 
 define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
@@ -69,5 +91,15 @@ entry:
 ; CHECK-DAG: ld
 ; CHECK-DAG: stdx
 ; CHECK: stdx
+
+; For VSX on P7, unaligned loads and stores are preferable to aligned
+; stack slots, but lvsl/vperm is better still.  (On P8 lxvw4x is preferable.)
+; Using unaligned stxvw4x is preferable on both machines.
+; CHECK-VSX: @foo6
+; CHECK-VSX-DAG: lvsl
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX-DAG: lvx
+; CHECK-VSX: vperm
+; CHECK-VSX: stxvw4x
 }
 
diff --git a/test/CodeGen/PowerPC/unsafe-math.ll b/test/CodeGen/PowerPC/unsafe-math.ll
index b0bdcc28d28e..f6430270eac7 100644
--- a/test/CodeGen/PowerPC/unsafe-math.ll
+++ b/test/CodeGen/PowerPC/unsafe-math.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=ppc32 | grep fmul | count 2
-; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
+; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fmul | count 2
+; RUN: llc < %s -mattr=-vsx -march=ppc32 -enable-unsafe-fp-math | \
 ; RUN:   grep fmul | count 1
 
 define double @foo(double %X) nounwind {
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
index 24b52070f4f7..4ae6ff24a038 100644
--- a/test/CodeGen/PowerPC/unwind-dw2-g.ll
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -21,15 +21,15 @@ attributes #0 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"/tmp/unwind-dw2.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/unwind-dw2.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!9 = metadata !{i32 2, i32 0, metadata !4, null}
-!10 = metadata !{i32 3, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/unwind-dw2.c] [DW_LANG_C99]
+!1 = !{!"/tmp/unwind-dw2.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\000\000\001", !1, !5, !6, null, void ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/unwind-dw2.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 3}
+!9 = !MDLocation(line: 2, scope: !4)
+!10 = !MDLocation(line: 3, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/PowerPC/vec-abi-align.ll b/test/CodeGen/PowerPC/vec-abi-align.ll
index 3239cf6c06ab..5075ff2b8c07 100644
--- a/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -16,6 +17,10 @@ entry:
 ; CHECK-LABEL: @test1
 ; CHECK: stvx 2,
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test1
+; CHECK-VSX: stxvw4x 34,
+; CHECK-VSX: blr
 }
 
 ; Function Attrs: nounwind
@@ -35,6 +40,13 @@ entry:
 ; CHECK: addi [[REGB:[0-9]+]], 1, 112
 ; CHECK: lvx 2, [[REGB]], [[REG16]]
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test2
+; CHECK-VSX: ld {{[0-9]+}}, 112(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 112
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
 }
 
 ; Function Attrs: nounwind
@@ -54,6 +66,13 @@ entry:
 ; CHECK: addi [[REGB:[0-9]+]], 1, 128
 ; CHECK: lvx 2, [[REGB]], [[REG16]]
 ; CHECK: blr
+
+; CHECK-VSX-LABEL: @test3
+; CHECK-VSX: ld {{[0-9]+}}, 128(1)
+; CHECK-VSX: li [[REG16:[0-9]+]], 16
+; CHECK-VSX: addi [[REGB:[0-9]+]], 1, 128
+; CHECK-VSX: lxvw4x {{[0-9]+}}, [[REGB]], [[REG16]]
+; CHECK-VSX: blr
 }
 
 attributes #0 = { nounwind }
diff --git a/test/CodeGen/PowerPC/vec_misaligned.ll b/test/CodeGen/PowerPC/vec_misaligned.ll
index 304a84d49a9d..49f11e4e2604 100644
--- a/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mattr=-power8-vector | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mattr=-power8-vector | FileCheck %s -check-prefix=CHECK-LE
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin8"
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 8a448156c98e..86596d4b0a87 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec -mattr=-vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx -mcpu=pwr7 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-VSX
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE-VSX
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -14,6 +16,12 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 ; CHECK-LE-LABEL: test_v4i32:
 ; CHECK-LE: vmsumuhm
 ; CHECK-LE-NOT: mullw
+; CHECK-VSX-LABEL: test_v4i32:
+; CHECK-VSX: vmsumuhm
+; CHECK-VSX-NOT: mullw
+; CHECK-LE-VSX-LABEL: test_v4i32:
+; CHECK-LE-VSX: vmsumuhm
+; CHECK-LE-VSX-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -27,6 +35,12 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 ; CHECK-LE-LABEL: test_v8i16:
 ; CHECK-LE: vmladduhm
 ; CHECK-LE-NOT: mullw
+; CHECK-VSX-LABEL: test_v8i16:
+; CHECK-VSX: vmladduhm
+; CHECK-VSX-NOT: mullw
+; CHECK-LE-VSX-LABEL: test_v8i16:
+; CHECK-LE-VSX: vmladduhm
+; CHECK-LE-VSX-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -43,6 +57,15 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 ; CHECK-LE: vmuleub [[REG2:[0-9]+]]
 ; CHECK-LE: vperm {{[0-9]+}}, [[REG2]], [[REG1]]
 ; CHECK-LE-NOT: mullw
+; CHECK-VSX-LABEL: test_v16i8:
+; CHECK-VSX: vmuloub
+; CHECK-VSX: vmuleub
+; CHECK-VSX-NOT: mullw
+; CHECK-LE-VSX-LABEL: test_v16i8:
+; CHECK-LE-VSX: vmuloub [[REG1:[0-9]+]]
+; CHECK-LE-VSX: vmuleub [[REG2:[0-9]+]]
+; CHECK-LE-VSX: vperm {{[0-9]+}}, [[REG2]], [[REG1]]
+; CHECK-LE-VSX-NOT: mullw
 
 define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 	%tmp = load <4 x float>* %X
@@ -61,3 +84,7 @@ define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
 ; CHECK-LE: vspltisw [[ZNEG:[0-9]+]], -1
 ; CHECK-LE: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
 ; CHECK-LE: vmaddfp
+; CHECK-VSX-LABEL: test_float:
+; CHECK-VSX: xvmulsp
+; CHECK-LE-VSX-LABEL: test_float:
+; CHECK-LE-VSX: xvmulsp
diff --git a/test/CodeGen/PowerPC/vec_shuffle_le.ll b/test/CodeGen/PowerPC/vec_shuffle_le.ll
index a4b2119f6ebc..c7fc1c60c5ea 100644
--- a/test/CodeGen/PowerPC/vec_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -mcpu=pwr7 | FileCheck %s
 
 define void @VPKUHUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
diff --git a/test/CodeGen/PowerPC/vrspill.ll b/test/CodeGen/PowerPC/vrspill.ll
index c3d1bf8f1ead..b55e12960fa6 100644
--- a/test/CodeGen/PowerPC/vrspill.ll
+++ b/test/CodeGen/PowerPC/vrspill.ll
@@ -1,5 +1,6 @@
-; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -verify-machineinstrs -fast-isel=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=-vsx -verify-machineinstrs -fast-isel=false -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -mattr=+vsx -verify-machineinstrs -fast-isel=false -mcpu=pwr7 < %s | FileCheck -check-prefix=CHECK-VSX %s
 
 ; This verifies that we generate correct spill/reload code for vector regs.
 
@@ -15,4 +16,9 @@ entry:
 
 ; CHECK: stvx 2,
 
+; We would prefer to test for "stxvw4x 34," but current -O0 code
+; needlessly generates "vor 3,2,2 / stxvw4x 35,0,3", so we'll settle for
+; the opcode.
+; CHECK-VSX: stxvw4x
+
 declare void @foo(i32*)
diff --git a/test/CodeGen/PowerPC/vsx-args.ll b/test/CodeGen/PowerPC/vsx-args.ll
index 520aeb5fa909..2b53c0abf09f 100644
--- a/test/CodeGen/PowerPC/vsx-args.ll
+++ b/test/CodeGen/PowerPC/vsx-args.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/vsx-div.ll b/test/CodeGen/PowerPC/vsx-div.ll
new file mode 100644
index 000000000000..8a9578e5ed80
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-div.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -O1 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@vf_res = common global <4 x float> zeroinitializer, align 16
+@vd_res = common global <2 x double> zeroinitializer, align 16
+
+define void @test1() {
+entry:
+  %0 = load <4 x float>* @vf, align 16
+  %1 = tail call <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float> %0, <4 x float> %0)
+  store <4 x float> %1, <4 x float>* @vf_res, align 16
+  ret void
+}
+; CHECK-LABEL: @test1
+; CHECK: xvdivsp
+
+define void @test2() {
+entry:
+  %0 = load <2 x double>* @vd, align 16
+  %1 = tail call <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double> %0, <2 x double> %0)
+  store <2 x double> %1, <2 x double>* @vd_res, align 16
+  ret void
+}
+; CHECK-LABEL: @test2
+; CHECK: xvdivdp
+
+declare <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double>, <2 x double>)
+declare <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float>, <4 x float>)
diff --git a/test/CodeGen/PowerPC/vsx-fma-m.ll b/test/CodeGen/PowerPC/vsx-fma-m.ll
index da4a20481e62..c492e169e10f 100644
--- a/test/CodeGen/PowerPC/vsx-fma-m.ll
+++ b/test/CodeGen/PowerPC/vsx-fma-m.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
 
 ; Also run with -schedule-ppc-vsx-fma-mutation-early as a stress test for the
 ; live-interval-updating logic.
@@ -22,6 +23,15 @@ entry:
 ; CHECK-DAG: stxsdx 3, 0, 7
 ; CHECK-DAG: stxsdx 1, 7, [[C1]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @test1
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
+; CHECK-FISL-DAG: stxsdx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddadp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsdx 1, 7, [[C1]]
+; CHECK-FISL: blr
 }
 
 define void @test2(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@@ -46,6 +56,19 @@ entry:
 ; CHECK-DAG: stxsdx 4, 8, [[C1]]
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @test2
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
+; CHECK-FISL-DAG: stxsdx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddadp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
+; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
+; CHECK-FISL: blr
 }
 
 define void @test3(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@@ -81,6 +104,20 @@ entry:
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @test3
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddadp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
+; CHECK-FISL-DAG: stxsdx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
+; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
+; CHECK-FISL: blr
 }
 
 define void @test4(double %a, double %b, double %c, double %e, double %f, double* nocapture %d) #0 {
@@ -116,6 +153,22 @@ entry:
 ; CHECK-DAG: stxsdx 4, 8, [[C3]]
 ; CHECK-DAG: stxsdx 1, 8, [[C2]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @test4
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsdx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddadp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsdx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddadp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 24
+; CHECK-FISL-DAG: stxsdx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddadp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 16
+; CHECK-FISL-DAG: stxsdx 1, 8, [[C2]]
+; CHECK-FISL: blr
 }
 
 declare double @llvm.fma.f64(double, double, double) #0
@@ -136,6 +189,15 @@ entry:
 ; CHECK-DAG: stxvd2x 36, 0, 3
 ; CHECK-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @testv1
+; CHECK-FISL-DAG: xxlor 0, 34, 34
+; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
+; CHECK-FISL-DAG: stxvd2x 0, 0, 3
+; CHECK-FISL-DAG: xvmaddadp 34, 35, 37
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
+; CHECK-FISL-DAG: stxvd2x 34, 3, [[C1:[0-9]+]]
+; CHECK-FISL: blr
 }
 
 define void @testv2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
@@ -160,6 +222,19 @@ entry:
 ; CHECK-DAG: stxvd2x 37, 3, [[C1:[0-9]+]]
 ; CHECK-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @testv2
+; CHECK-FISL-DAG: xxlor 0, 34, 34
+; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
+; CHECK-FISL-DAG: stxvd2x 0, 0, 3
+; CHECK-FISL-DAG: xxlor 0, 34, 34
+; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
+; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1:[0-9]+]]
+; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
+; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2:[0-9]+]]
+; CHECK-FISL: blr
 }
 
 define void @testv3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
@@ -177,24 +252,47 @@ entry:
   store <2 x double> %1, <2 x double>* %arrayidx3, align 8
   ret void
 
+; Note: There is some unavoidable changeability in this variant.  If the
+; FMAs are reordered differently, the algorithm can pick a different
+; multiplicand to destroy, changing the register assignment.  There isn't
+; a good way to express this possibility, so hopefully this doesn't change
+; too often.
+
 ; CHECK-LABEL: @testv3
 ; CHECK-DAG: xxlor [[V1:[0-9]+]], 34, 34
-; CHECK-DAG: xvmaddmdp 37, 35, 34
 ; CHECK-DAG: li [[C1:[0-9]+]], 48
 ; CHECK-DAG: li [[C2:[0-9]+]], 32
-; CHECK-DAG: xvmaddadp 34, 35, 38
+; CHECK-DAG: xvmaddmdp 37, 35, 34
 ; CHECK-DAG: li [[C3:[0-9]+]], 16
 
 ; Note: We could convert this next FMA to M-type as well, but it would require
 ; re-ordering the instructions.
 ; CHECK-DAG: xvmaddadp [[V1]], 35, 36
 
-; CHECK-DAG: xvmaddmdp 35, 36, 37
+; CHECK-DAG: xvmaddmdp 36, 35, 37
+; CHECK-DAG: xvmaddadp 34, 35, 38
 ; CHECK-DAG: stxvd2x 32, 0, 3
-; CHECK-DAG: stxvd2x 35, 3, [[C1]]
+; CHECK-DAG: stxvd2x 36, 3, [[C1]]
 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @testv3
+; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
+; CHECK-FISL-DAG: stxvd2x [[V1]], 0, 3
+; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
+; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
+; CHECK-FISL-DAG: xxlor [[V3:[0-9]+]], 0, 0
+; CHECK-FISL-DAG: xvmaddadp [[V3]], 35, 36
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 48
+; CHECK-FISL-DAG: stxvd2x [[V3]], 3, [[C1]]
+; CHECK-FISL-DAG: xvmaddadp 34, 35, 38
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
+; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 16
+; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
+; CHECK-FISL: blr
 }
 
 define void @testv4(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %e, <2 x double> %f, <2 x double>* nocapture %d) #0 {
@@ -230,6 +328,22 @@ entry:
 ; CHECK-DAG: stxvd2x 37, 3, [[C3]]
 ; CHECK-DAG: stxvd2x 34, 3, [[C2]]
 ; CHECK: blr
+
+; CHECK-FISL-LABEL: @testv4
+; CHECK-FISL-DAG: xxlor [[V1:[0-9]+]], 34, 34
+; CHECK-FISL-DAG: xvmaddadp [[V1]], 35, 36
+; CHECK-FISL-DAG: stxvd2x 0, 0, 3
+; CHECK-FISL-DAG: xxlor [[V2:[0-9]+]], 34, 34
+; CHECK-FISL-DAG: xvmaddadp [[V2]], 35, 37
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 16
+; CHECK-FISL-DAG: stxvd2x 0, 3, [[C1]]
+; CHECK-FISL-DAG: xvmaddadp 0, 35, 37
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 48
+; CHECK-FISL-DAG: stxvd2x 0, 3, [[C3]]
+; CHECK-FISL-DAG: xvmaddadp 0, 35, 36
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 32
+; CHECK-FISL-DAG: stxvd2x 34, 3, [[C2]]
+; CHECK-FISL: blr
 }
 
 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
diff --git a/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
new file mode 100644
index 000000000000..7367672eab8b
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
+; RUN: grep lxvd2x < %t | count 18
+; RUN: grep stxvd2x < %t | count 18
+; RUN: grep xxpermdi < %t | count 36
+
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@vsi = global <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, align 16
+@vui = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@vsll = global <2 x i64> <i64 255, i64 -937>, align 16
+@vull = global <2 x i64> <i64 1447, i64 2894>, align 16
+@res_vsi = common global <4 x i32> zeroinitializer, align 16
+@res_vui = common global <4 x i32> zeroinitializer, align 16
+@res_vf = common global <4 x float> zeroinitializer, align 16
+@res_vsll = common global <2 x i64> zeroinitializer, align 16
+@res_vull = common global <2 x i64> zeroinitializer, align 16
+@res_vd = common global <2 x double> zeroinitializer, align 16
+
+define void @test1() {
+entry:
+; CHECK-LABEL: test1
+  %__a.addr.i31 = alloca i32, align 4
+  %__b.addr.i32 = alloca <4 x i32>*, align 8
+  %__a.addr.i29 = alloca i32, align 4
+  %__b.addr.i30 = alloca <4 x float>*, align 8
+  %__a.addr.i27 = alloca i32, align 4
+  %__b.addr.i28 = alloca <2 x i64>*, align 8
+  %__a.addr.i25 = alloca i32, align 4
+  %__b.addr.i26 = alloca <2 x i64>*, align 8
+  %__a.addr.i23 = alloca i32, align 4
+  %__b.addr.i24 = alloca <2 x double>*, align 8
+  %__a.addr.i20 = alloca <4 x i32>, align 16
+  %__b.addr.i21 = alloca i32, align 4
+  %__c.addr.i22 = alloca <4 x i32>*, align 8
+  %__a.addr.i17 = alloca <4 x i32>, align 16
+  %__b.addr.i18 = alloca i32, align 4
+  %__c.addr.i19 = alloca <4 x i32>*, align 8
+  %__a.addr.i14 = alloca <4 x float>, align 16
+  %__b.addr.i15 = alloca i32, align 4
+  %__c.addr.i16 = alloca <4 x float>*, align 8
+  %__a.addr.i11 = alloca <2 x i64>, align 16
+  %__b.addr.i12 = alloca i32, align 4
+  %__c.addr.i13 = alloca <2 x i64>*, align 8
+  %__a.addr.i8 = alloca <2 x i64>, align 16
+  %__b.addr.i9 = alloca i32, align 4
+  %__c.addr.i10 = alloca <2 x i64>*, align 8
+  %__a.addr.i6 = alloca <2 x double>, align 16
+  %__b.addr.i7 = alloca i32, align 4
+  %__c.addr.i = alloca <2 x double>*, align 8
+  %__a.addr.i = alloca i32, align 4
+  %__b.addr.i = alloca <4 x i32>*, align 8
+  store i32 0, i32* %__a.addr.i, align 4
+  store <4 x i32>* @vsi, <4 x i32>** %__b.addr.i, align 8
+  %0 = load i32* %__a.addr.i, align 4
+  %1 = load <4 x i32>** %__b.addr.i, align 8
+  %2 = bitcast <4 x i32>* %1 to i8*
+  %3 = getelementptr i8* %2, i32 %0
+  %4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
+  store <4 x i32> %4, <4 x i32>* @res_vsi, align 16
+  store i32 0, i32* %__a.addr.i31, align 4
+  store <4 x i32>* @vui, <4 x i32>** %__b.addr.i32, align 8
+  %5 = load i32* %__a.addr.i31, align 4
+  %6 = load <4 x i32>** %__b.addr.i32, align 8
+  %7 = bitcast <4 x i32>* %6 to i8*
+  %8 = getelementptr i8* %7, i32 %5
+  %9 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %8)
+  store <4 x i32> %9, <4 x i32>* @res_vui, align 16
+  store i32 0, i32* %__a.addr.i29, align 4
+  store <4 x float>* @vf, <4 x float>** %__b.addr.i30, align 8
+  %10 = load i32* %__a.addr.i29, align 4
+  %11 = load <4 x float>** %__b.addr.i30, align 8
+  %12 = bitcast <4 x float>* %11 to i8*
+  %13 = getelementptr i8* %12, i32 %10
+  %14 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %13)
+  %15 = bitcast <4 x i32> %14 to <4 x float>
+  store <4 x float> %15, <4 x float>* @res_vf, align 16
+  store i32 0, i32* %__a.addr.i27, align 4
+  store <2 x i64>* @vsll, <2 x i64>** %__b.addr.i28, align 8
+  %16 = load i32* %__a.addr.i27, align 4
+  %17 = load <2 x i64>** %__b.addr.i28, align 8
+  %18 = bitcast <2 x i64>* %17 to i8*
+  %19 = getelementptr i8* %18, i32 %16
+  %20 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %19)
+  %21 = bitcast <2 x double> %20 to <2 x i64>
+  store <2 x i64> %21, <2 x i64>* @res_vsll, align 16
+  store i32 0, i32* %__a.addr.i25, align 4
+  store <2 x i64>* @vull, <2 x i64>** %__b.addr.i26, align 8
+  %22 = load i32* %__a.addr.i25, align 4
+  %23 = load <2 x i64>** %__b.addr.i26, align 8
+  %24 = bitcast <2 x i64>* %23 to i8*
+  %25 = getelementptr i8* %24, i32 %22
+  %26 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %25)
+  %27 = bitcast <2 x double> %26 to <2 x i64>
+  store <2 x i64> %27, <2 x i64>* @res_vull, align 16
+  store i32 0, i32* %__a.addr.i23, align 4
+  store <2 x double>* @vd, <2 x double>** %__b.addr.i24, align 8
+  %28 = load i32* %__a.addr.i23, align 4
+  %29 = load <2 x double>** %__b.addr.i24, align 8
+  %30 = bitcast <2 x double>* %29 to i8*
+  %31 = getelementptr i8* %30, i32 %28
+  %32 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %31)
+  store <2 x double> %32, <2 x double>* @res_vd, align 16
+  %33 = load <4 x i32>* @vsi, align 16
+  store <4 x i32> %33, <4 x i32>* %__a.addr.i20, align 16
+  store i32 0, i32* %__b.addr.i21, align 4
+  store <4 x i32>* @res_vsi, <4 x i32>** %__c.addr.i22, align 8
+  %34 = load <4 x i32>* %__a.addr.i20, align 16
+  %35 = load i32* %__b.addr.i21, align 4
+  %36 = load <4 x i32>** %__c.addr.i22, align 8
+  %37 = bitcast <4 x i32>* %36 to i8*
+  %38 = getelementptr i8* %37, i32 %35
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %34, i8* %38)
+  %39 = load <4 x i32>* @vui, align 16
+  store <4 x i32> %39, <4 x i32>* %__a.addr.i17, align 16
+  store i32 0, i32* %__b.addr.i18, align 4
+  store <4 x i32>* @res_vui, <4 x i32>** %__c.addr.i19, align 8
+  %40 = load <4 x i32>* %__a.addr.i17, align 16
+  %41 = load i32* %__b.addr.i18, align 4
+  %42 = load <4 x i32>** %__c.addr.i19, align 8
+  %43 = bitcast <4 x i32>* %42 to i8*
+  %44 = getelementptr i8* %43, i32 %41
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %40, i8* %44)
+  %45 = load <4 x float>* @vf, align 16
+  store <4 x float> %45, <4 x float>* %__a.addr.i14, align 16
+  store i32 0, i32* %__b.addr.i15, align 4
+  store <4 x float>* @res_vf, <4 x float>** %__c.addr.i16, align 8
+  %46 = load <4 x float>* %__a.addr.i14, align 16
+  %47 = bitcast <4 x float> %46 to <4 x i32>
+  %48 = load i32* %__b.addr.i15, align 4
+  %49 = load <4 x float>** %__c.addr.i16, align 8
+  %50 = bitcast <4 x float>* %49 to i8*
+  %51 = getelementptr i8* %50, i32 %48
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %47, i8* %51) #1
+  %52 = load <2 x i64>* @vsll, align 16
+  store <2 x i64> %52, <2 x i64>* %__a.addr.i11, align 16
+  store i32 0, i32* %__b.addr.i12, align 4
+  store <2 x i64>* @res_vsll, <2 x i64>** %__c.addr.i13, align 8
+  %53 = load <2 x i64>* %__a.addr.i11, align 16
+  %54 = bitcast <2 x i64> %53 to <2 x double>
+  %55 = load i32* %__b.addr.i12, align 4
+  %56 = load <2 x i64>** %__c.addr.i13, align 8
+  %57 = bitcast <2 x i64>* %56 to i8*
+  %58 = getelementptr i8* %57, i32 %55
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %54, i8* %58)
+  %59 = load <2 x i64>* @vull, align 16
+  store <2 x i64> %59, <2 x i64>* %__a.addr.i8, align 16
+  store i32 0, i32* %__b.addr.i9, align 4
+  store <2 x i64>* @res_vull, <2 x i64>** %__c.addr.i10, align 8
+  %60 = load <2 x i64>* %__a.addr.i8, align 16
+  %61 = bitcast <2 x i64> %60 to <2 x double>
+  %62 = load i32* %__b.addr.i9, align 4
+  %63 = load <2 x i64>** %__c.addr.i10, align 8
+  %64 = bitcast <2 x i64>* %63 to i8*
+  %65 = getelementptr i8* %64, i32 %62
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %61, i8* %65)
+  %66 = load <2 x double>* @vd, align 16
+  store <2 x double> %66, <2 x double>* %__a.addr.i6, align 16
+  store i32 0, i32* %__b.addr.i7, align 4
+  store <2 x double>* @res_vd, <2 x double>** %__c.addr.i, align 8
+  %67 = load <2 x double>* %__a.addr.i6, align 16
+  %68 = load i32* %__b.addr.i7, align 4
+  %69 = load <2 x double>** %__c.addr.i, align 8
+  %70 = bitcast <2 x double>* %69 to i8*
+  %71 = getelementptr i8* %70, i32 %68
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %67, i8* %71)
+  ret void
+}
+
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
diff --git a/test/CodeGen/PowerPC/vsx-ldst.ll b/test/CodeGen/PowerPC/vsx-ldst.ll
new file mode 100644
index 000000000000..688187d1fcb6
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-ldst.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64-unknown-linux-gnu < %s > %t
+; RUN: grep lxvw4x < %t | count 3
+; RUN: grep lxvd2x < %t | count 3
+; RUN: grep stxvw4x < %t | count 3
+; RUN: grep stxvd2x < %t | count 3
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -O0 -fast-isel=1 -mtriple=powerpc64-unknown-linux-gnu < %s > %t
+; RUN: grep lxvw4x < %t | count 3
+; RUN: grep lxvd2x < %t | count 3
+; RUN: grep stxvw4x < %t | count 3
+; RUN: grep stxvd2x < %t | count 3
+
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t
+; RUN: grep lxvd2x < %t | count 6
+; RUN: grep stxvd2x < %t | count 6
+; RUN: grep xxpermdi < %t | count 12
+
+@vsi = global <4 x i32> <i32 -1, i32 2, i32 -3, i32 4>, align 16
+@vui = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vsll = global <2 x i64> <i64 255, i64 -937>, align 16
+@vull = global <2 x i64> <i64 1447, i64 2894>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@res_vsi = common global <4 x i32> zeroinitializer, align 16
+@res_vui = common global <4 x i32> zeroinitializer, align 16
+@res_vf = common global <4 x float> zeroinitializer, align 16
+@res_vsll = common global <2 x i64> zeroinitializer, align 16
+@res_vull = common global <2 x i64> zeroinitializer, align 16
+@res_vd = common global <2 x double> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @test1() {
+entry:
+  %0 = load <4 x i32>* @vsi, align 16
+  %1 = load <4 x i32>* @vui, align 16
+  %2 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 16
+  %3 = load <2 x double>* bitcast (<2 x i64>* @vsll to <2 x double>*), align 16
+  %4 = load <2 x double>* bitcast (<2 x i64>* @vull to <2 x double>*), align 16
+  %5 = load <2 x double>* @vd, align 16
+  store <4 x i32> %0, <4 x i32>* @res_vsi, align 16
+  store <4 x i32> %1, <4 x i32>* @res_vui, align 16
+  store <4 x i32> %2, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 16
+  store <2 x double> %3, <2 x double>* bitcast (<2 x i64>* @res_vsll to <2 x double>*), align 16
+  store <2 x double> %4, <2 x double>* bitcast (<2 x i64>* @res_vull to <2 x double>*), align 16
+  store <2 x double> %5, <2 x double>* @res_vd, align 16
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vsx-minmax.ll b/test/CodeGen/PowerPC/vsx-minmax.ll
new file mode 100644
index 000000000000..47f50abbc2a2
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-minmax.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -O0 -fast-isel=0 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = global <4 x float> <float -1.500000e+00, float 2.500000e+00, float -3.500000e+00, float 4.500000e+00>, align 16
+@vd = global <2 x double> <double 3.500000e+00, double -7.500000e+00>, align 16
+@d = global double 2.340000e+01, align 8
+@vf1 = common global <4 x float> zeroinitializer, align 16
+@vd1 = common global <2 x double> zeroinitializer, align 16
+@vf2 = common global <4 x float> zeroinitializer, align 16
+@vf3 = common global <4 x float> zeroinitializer, align 16
+@vd2 = common global <2 x double> zeroinitializer, align 16
+@vf4 = common global <4 x float> zeroinitializer, align 16
+@d1 = common global double 0.000000e+00, align 8
+@d2 = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define void @test1() #0 {
+; CHECK-LABEL: @test1
+entry:
+  %0 = load volatile <4 x float>* @vf, align 16
+  %1 = load volatile <4 x float>* @vf, align 16
+  %2 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %0, <4 x float> %1)
+; CHECK: xvmaxsp
+  store <4 x float> %2, <4 x float>* @vf1, align 16
+  %3 = load <2 x double>* @vd, align 16
+  %4 = tail call <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %3, <2 x double> %3)
+; CHECK: xvmaxdp
+  store <2 x double> %4, <2 x double>* @vd1, align 16
+  %5 = load volatile <4 x float>* @vf, align 16
+  %6 = load volatile <4 x float>* @vf, align 16
+  %7 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %5, <4 x float> %6)
+; CHECK: xvmaxsp
+  store <4 x float> %7, <4 x float>* @vf2, align 16
+  %8 = load volatile <4 x float>* @vf, align 16
+  %9 = load volatile <4 x float>* @vf, align 16
+  %10 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %8, <4 x float> %9)
+; CHECK: xvminsp
+  store <4 x float> %10, <4 x float>* @vf3, align 16
+  %11 = load <2 x double>* @vd, align 16
+  %12 = tail call <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %11, <2 x double> %11)
+; CHECK: xvmindp
+  store <2 x double> %12, <2 x double>* @vd2, align 16
+  %13 = load volatile <4 x float>* @vf, align 16
+  %14 = load volatile <4 x float>* @vf, align 16
+  %15 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %13, <4 x float> %14)
+; CHECK: xvminsp
+  store <4 x float> %15, <4 x float>* @vf4, align 16
+  %16 = load double* @d, align 8
+  %17 = tail call double @llvm.ppc.vsx.xsmaxdp(double %16, double %16)
+; CHECK: xsmaxdp
+  store double %17, double* @d1, align 8
+  %18 = tail call double @llvm.ppc.vsx.xsmindp(double %16, double %16)
+; CHECK: xsmindp
+  store double %18, double* @d2, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmaxdp(double, double)
+
+; Function Attrs: nounwind readnone
+declare double @llvm.ppc.vsx.xsmindp(double, double)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double>, <2 x double>)
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float>, <4 x float>)
+
+; Function Attrs: nounwind readnone
+declare <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double>, <2 x double>)
+
+; Generated from C source:
+
+; % clang -O1 -maltivec -mvsx -S -emit-llvm vsx-minmax.c
+;
+;volatile vector float vf = { -1.5, 2.5, -3.5, 4.5 };
+;vector double vd = { 3.5, -7.5 };
+;double d = 23.4;
+;
+;vector float vf1, vf2, vf3, vf4;
+;vector double vd1, vd2;
+;double d1, d2;
+;
+;void test1() {
+;  vf1 = vec_max(vf, vf);
+;  vd1 = vec_max(vd, vd);
+;  vf2 = vec_vmaxfp(vf, vf);
+;  vf3 = vec_min(vf, vf);
+;  vd2 = vec_min(vd, vd);
+;  vf4 = vec_vminfp(vf, vf);
+;  d1 = __builtin_vsx_xsmaxdp(d, d);
+;  d2 = __builtin_vsx_xsmindp(d, d);
+;}
diff --git a/test/CodeGen/PowerPC/vsx-p8.ll b/test/CodeGen/PowerPC/vsx-p8.ll
new file mode 100644
index 000000000000..d5a19059c60d
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx-p8.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector < %s | FileCheck -check-prefix=CHECK-REG %s
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector -fast-isel -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mattr=+power8-vector -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Unaligned loads/stores on P8 and later should use VSX where possible.
+
+define <2 x double> @test28u(<2 x double>* %a) {
+  %v = load <2 x double>* %a, align 8
+  ret <2 x double> %v
+
+; CHECK-LABEL: @test28u
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test29u(<2 x double>* %a, <2 x double> %b) {
+  store <2 x double> %b, <2 x double>* %a, align 8
+  ret void
+
+; CHECK-LABEL: @test29u
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-REG-LABEL: @test32u
+; CHECK-REG: lxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test32u
+; CHECK-FISL: lxvw4x 0, 0, 3
+; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-REG-LABEL: @test33u
+; CHECK-REG: stxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test33u
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: blr
+}
+
diff --git a/test/CodeGen/PowerPC/vsx-self-copy.ll b/test/CodeGen/PowerPC/vsx-self-copy.ll
index 23615ca10c1e..787ac4b77164 100644
--- a/test/CodeGen/PowerPC/vsx-self-copy.ll
+++ b/test/CodeGen/PowerPC/vsx-self-copy.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/test/CodeGen/PowerPC/vsx-spill.ll b/test/CodeGen/PowerPC/vsx-spill.ll
index 29bc6fcc7100..032bcf6cc3fe 100644
--- a/test/CodeGen/PowerPC/vsx-spill.ll
+++ b/test/CodeGen/PowerPC/vsx-spill.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -7,10 +10,16 @@ entry:
   call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
   br label %return
 
-; CHECK: @foo1
-; CHECK: xxlor [[R1:[0-9]+]], 1, 1
-; CHECK: xxlor 1, [[R1]], [[R1]]
-; CHECK: blr
+; CHECK-REG: @foo1
+; CHECK-REG: xxlor [[R1:[0-9]+]], 1, 1
+; CHECK-REG: xxlor 1, [[R1]], [[R1]]
+; CHECK-REG: blr
+
+; CHECK-FISL: @foo1
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65384
+; CHECK-FISL: stxsdx 1, 1, 0
+; CHECK-FISL: blr
 
 return:                                           ; preds = %entry
   ret double %a
@@ -22,10 +31,16 @@ entry:
   call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() nounwind
   br label %return
 
-; CHECK: @foo2
-; CHECK: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
-; CHECK: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
-; CHECK: blr
+; CHECK-REG: @foo2
+; CHECK-REG: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1
+; CHECK-REG: {{xxlor|xsadddp}} 1, [[R1]], [[R1]]
+; CHECK-REG: blr
+
+; CHECK-FISL: @foo2
+; CHECK-FISL: xsadddp [[R1:[0-9]+]], 1, 1
+; CHECK-FISL: stxsdx [[R1]], [[R1]], 0
+; CHECK-FISL: lxsdx [[R1]], [[R1]], 0
+; CHECK-FISL: blr
 
 return:                                           ; preds = %entry
   ret double %b
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index 2f226e1f614c..f91ffdb960bb 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -47,9 +50,16 @@ entry:
   %v = xor <4 x i32> %a, %b
   ret <4 x i32> %v
 
-; CHECK-LABEL: @test5
-; CHECK: xxlxor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test5
+; CHECK-REG: xxlxor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test5
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlxor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
@@ -57,9 +67,16 @@ entry:
   %v = xor <8 x i16> %a, %b
   ret <8 x i16> %v
 
-; CHECK-LABEL: @test6
-; CHECK: xxlxor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test6
+; CHECK-REG: xxlxor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test6
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlxor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
@@ -67,9 +84,16 @@ entry:
   %v = xor <16 x i8> %a, %b
   ret <16 x i8> %v
 
-; CHECK-LABEL: @test7
-; CHECK: xxlxor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test7
+; CHECK-REG: xxlxor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test7
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlxor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
@@ -77,9 +101,16 @@ entry:
   %v = or <4 x i32> %a, %b
   ret <4 x i32> %v
 
-; CHECK-LABEL: @test8
-; CHECK: xxlor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test8
+; CHECK-REG: xxlor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test8
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
@@ -87,9 +118,16 @@ entry:
   %v = or <8 x i16> %a, %b
   ret <8 x i16> %v
 
-; CHECK-LABEL: @test9
-; CHECK: xxlor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test9
+; CHECK-REG: xxlor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test9
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test10(<16 x i8> %a, <16 x i8> %b) {
@@ -97,9 +135,16 @@ entry:
   %v = or <16 x i8> %a, %b
   ret <16 x i8> %v
 
-; CHECK-LABEL: @test10
-; CHECK: xxlor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test10
+; CHECK-REG: xxlor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test10
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
@@ -107,9 +152,16 @@ entry:
   %v = and <4 x i32> %a, %b
   ret <4 x i32> %v
 
-; CHECK-LABEL: @test11
-; CHECK: xxland 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test11
+; CHECK-REG: xxland 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test11
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxland 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
@@ -117,9 +169,16 @@ entry:
   %v = and <8 x i16> %a, %b
   ret <8 x i16> %v
 
-; CHECK-LABEL: @test12
-; CHECK: xxland 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test12
+; CHECK-REG: xxland 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test12
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxland 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test13(<16 x i8> %a, <16 x i8> %b) {
@@ -127,9 +186,16 @@ entry:
   %v = and <16 x i8> %a, %b
   ret <16 x i8> %v
 
-; CHECK-LABEL: @test13
-; CHECK: xxland 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test13
+; CHECK-REG: xxland 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test13
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxland 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) {
@@ -138,9 +204,23 @@ entry:
   %w = xor <4 x i32> %v, <i32 -1, i32 -1, i32 -1, i32 -1>
   ret <4 x i32> %w
 
-; CHECK-LABEL: @test14
-; CHECK: xxlnor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test14
+; CHECK-REG: xxlnor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test14
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlnor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65520
+; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
@@ -149,9 +229,23 @@ entry:
   %w = xor <8 x i16> %v, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   ret <8 x i16> %w
 
-; CHECK-LABEL: @test15
-; CHECK: xxlnor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test15
+; CHECK-REG: xxlnor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test15
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlnor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65520
+; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) {
@@ -160,9 +254,23 @@ entry:
   %w = xor <16 x i8> %v, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   ret <16 x i8> %w
 
-; CHECK-LABEL: @test16
-; CHECK: xxlnor 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test16
+; CHECK-REG: xxlnor 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test16
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlor 36, 36, 37
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: vor 4, 2, 2
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: xxlnor 36, 36, 37
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65520
+; CHECK-FISL: stvx 0, 1, 0
+; CHECK-FISL: blr
 }
 
 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
@@ -171,9 +279,21 @@ entry:
   %v = and <4 x i32> %a, %w
   ret <4 x i32> %v
 
-; CHECK-LABEL: @test17
-; CHECK: xxlandc 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test17
+; CHECK-REG: xxlandc 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test17
+; CHECK-FISL: vspltisb 4, -1
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: xxlxor 37, 37, 32
+; CHECK-FISL: vor 3, 5, 5
+; CHECK-FISL: vor 5, 2, 2
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: xxland 37, 37, 32
+; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test18(<8 x i16> %a, <8 x i16> %b) {
@@ -182,9 +302,24 @@ entry:
   %v = and <8 x i16> %a, %w
   ret <8 x i16> %v
 
-; CHECK-LABEL: @test18
-; CHECK: xxlandc 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test18
+; CHECK-REG: xxlandc 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test18
+; CHECK-FISL: vspltisb 4, -1
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: xxlxor 37, 37, 32
+; CHECK-FISL: vor 4, 5, 5
+; CHECK-FISL: vor 5, 2, 2
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: xxlandc 37, 37, 32
+; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65520
+; CHECK-FISL: stvx 4, 1, 0
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test19(<16 x i8> %a, <16 x i8> %b) {
@@ -193,9 +328,24 @@ entry:
   %v = and <16 x i8> %a, %w
   ret <16 x i8> %v
 
-; CHECK-LABEL: @test19
-; CHECK: xxlandc 34, 34, 35
-; CHECK: blr
+; CHECK-REG-LABEL: @test19
+; CHECK-REG: xxlandc 34, 34, 35
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test19
+; CHECK-FISL: vspltisb 4, -1
+; CHECK-FISL: vor 5, 3, 3
+; CHECK-FISL: vor 0, 4, 4
+; CHECK-FISL: xxlxor 37, 37, 32
+; CHECK-FISL: vor 4, 5, 5
+; CHECK-FISL: vor 5, 2, 2
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: xxlandc 37, 37, 32
+; CHECK-FISL: vor 2, 5, 5
+; CHECK-FISL: lis 0, -1
+; CHECK-FISL: ori 0, 0, 65520
+; CHECK-FISL: stvx 4, 1, 0
+; CHECK-FISL: blr
 }
 
 define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
@@ -204,10 +354,19 @@ entry:
   %v = select <4 x i1> %m, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %v
 
-; CHECK-LABEL: @test20
-; CHECK: vcmpequw {{[0-9]+}}, 4, 5
-; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
-; CHECK: blr
+; CHECK-REG-LABEL: @test20
+; CHECK-REG: vcmpequw {{[0-9]+}}, 4, 5
+; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test20
+; CHECK-FISL: vcmpequw 4, 4, 5
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: vor 1, 2, 2
+; CHECK-FISL: vor 6, 4, 4
+; CHECK-FISL: xxsel 32, 32, 33, 38
+; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: blr
 }
 
 define <4 x float> @test21(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
@@ -216,10 +375,20 @@ entry:
   %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
   ret <4 x float> %v
 
-; CHECK-LABEL: @test21
-; CHECK: xvcmpeqsp [[V1:[0-9]+]], 36, 37
-; CHECK: xxsel 34, 35, 34, [[V1]]
-; CHECK: blr
+; CHECK-REG-LABEL: @test21
+; CHECK-REG: xvcmpeqsp [[V1:[0-9]+]], 36, 37
+; CHECK-REG: xxsel 34, 35, 34, [[V1]]
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test21
+; CHECK-FISL: vor 0, 5, 5
+; CHECK-FISL: vor 1, 4, 4
+; CHECK-FISL: vor 6, 3, 3
+; CHECK-FISL: vor 7, 2, 2
+; CHECK-FISL: xvcmpeqsp 32, 33, 32
+; CHECK-FISL: xxsel 32, 38, 39, 32
+; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: blr
 }
 
 define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) {
@@ -228,16 +397,27 @@ entry:
   %v = select <4 x i1> %m, <4 x float> %a, <4 x float> %b
   ret <4 x float> %v
 
-; CHECK-LABEL: @test22
-; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
-; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
-; CHECK-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
-; CHECK-DAG: xxlnor
-; CHECK-DAG: xxlnor
-; CHECK-DAG: xxlor
-; CHECK-DAG: xxlor
-; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
-; CHECK: blr
+; CHECK-REG-LABEL: @test22
+; CHECK-REG-DAG: xvcmpeqsp {{[0-9]+}}, 37, 37
+; CHECK-REG-DAG: xvcmpeqsp {{[0-9]+}}, 36, 36
+; CHECK-REG-DAG: xvcmpeqsp {{[0-9]+}}, 36, 37
+; CHECK-REG-DAG: xxlnor
+; CHECK-REG-DAG: xxlnor
+; CHECK-REG-DAG: xxlor
+; CHECK-REG-DAG: xxlor
+; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test22
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 32
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 32, 32
+; CHECK-FISL-DAG: xvcmpeqsp {{[0-9]+}}, 33, 33
+; CHECK-FISL-DAG: xxlnor
+; CHECK-FISL-DAG: xxlnor
+; CHECK-FISL-DAG: xxlor
+; CHECK-FISL-DAG: xxlor
+; CHECK-FISL: xxsel 0, 38, 39, {{[0-9]+}}
+; CHECK-FISL: blr
 }
 
 define <8 x i16> @test23(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d) {
@@ -246,10 +426,19 @@ entry:
   %v = select <8 x i1> %m, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %v
 
-; CHECK-LABEL: @test23
-; CHECK: vcmpequh {{[0-9]+}}, 4, 5
-; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
-; CHECK: blr
+; CHECK-REG-LABEL: @test23
+; CHECK-REG: vcmpequh {{[0-9]+}}, 4, 5
+; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test23
+; CHECK-FISL: vcmpequh 4, 4, 5
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: vor 1, 2, 2
+; CHECK-FISL: vor 6, 4, 4
+; CHECK-FISL: xxsel 32, 32, 33, 38
+; CHECK-FISL: vor 2, 0, 
+; CHECK-FISL: blr
 }
 
 define <16 x i8> @test24(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
@@ -258,10 +447,19 @@ entry:
   %v = select <16 x i1> %m, <16 x i8> %a, <16 x i8> %b
   ret <16 x i8> %v
 
-; CHECK-LABEL: @test24
-; CHECK: vcmpequb {{[0-9]+}}, 4, 5
-; CHECK: xxsel 34, 35, 34, {{[0-9]+}}
-; CHECK: blr
+; CHECK-REG-LABEL: @test24
+; CHECK-REG: vcmpequb {{[0-9]+}}, 4, 5
+; CHECK-REG: xxsel 34, 35, 34, {{[0-9]+}}
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test24
+; CHECK-FISL: vcmpequb 4, 4, 5
+; CHECK-FISL: vor 0, 3, 3
+; CHECK-FISL: vor 1, 2, 2
+; CHECK-FISL: vor 6, 4, 4
+; CHECK-FISL: xxsel 32, 32, 33, 38
+; CHECK-FISL: vor 2, 0, 0
+; CHECK-FISL: blr
 }
 
 define <2 x double> @test25(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
@@ -342,9 +540,16 @@ define <2 x i64> @test30(<2 x i64>* %a) {
   %v = load <2 x i64>* %a, align 16
   ret <2 x i64> %v
 
-; CHECK-LABEL: @test30
-; CHECK: lxvd2x 34, 0, 3
-; CHECK: blr
+; CHECK-REG-LABEL: @test30
+; CHECK-REG: lxvd2x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test30
+; CHECK-FISL: lxvd2x 0, 0, 3
+; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: vor 2, 3, 3
+; CHECK-FISL: blr
 }
 
 define void @test31(<2 x i64>* %a, <2 x i64> %b) {
@@ -356,6 +561,90 @@ define void @test31(<2 x i64>* %a, <2 x i64> %b) {
 ; CHECK: blr
 }
 
+define <4 x float> @test32(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 16
+  ret <4 x float> %v
+
+; CHECK-REG-LABEL: @test32
+; CHECK-REG: lxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test32
+; CHECK-FISL: lxvw4x 0, 0, 3
+; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: blr
+}
+
+define void @test33(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 16
+  ret void
+
+; CHECK-REG-LABEL: @test33
+; CHECK-REG: stxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test33
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: blr
+}
+
+define <4 x float> @test32u(<4 x float>* %a) {
+  %v = load <4 x float>* %a, align 8
+  ret <4 x float> %v
+
+; CHECK-LABEL: @test32u
+; CHECK-DAG: lvsl
+; CHECK-DAG: lvx
+; CHECK-DAG: lvx
+; CHECK: vperm 2,
+; CHECK: blr
+}
+
+define void @test33u(<4 x float>* %a, <4 x float> %b) {
+  store <4 x float> %b, <4 x float>* %a, align 8
+  ret void
+
+; CHECK-REG-LABEL: @test33u
+; CHECK-REG: stxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test33u
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: blr
+}
+
+define <4 x i32> @test34(<4 x i32>* %a) {
+  %v = load <4 x i32>* %a, align 16
+  ret <4 x i32> %v
+
+; CHECK-REG-LABEL: @test34
+; CHECK-REG: lxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test34
+; CHECK-FISL: lxvw4x 0, 0, 3
+; CHECK-FISL: xxlor 34, 0, 0
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: vor 2, 3, 3
+; CHECK-FISL: blr
+}
+
+define void @test35(<4 x i32>* %a, <4 x i32> %b) {
+  store <4 x i32> %b, <4 x i32>* %a, align 16
+  ret void
+
+; CHECK-REG-LABEL: @test35
+; CHECK-REG: stxvw4x 34, 0, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test35
+; CHECK-FISL: vor 3, 2, 2
+; CHECK-FISL: stxvw4x 35, 0, 3
+; CHECK-FISL: blr
+}
+
 define <2 x double> @test40(<2 x i64> %a) {
   %v = uitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %v
@@ -539,37 +828,60 @@ define double @test63(<2 x double> %a) {
   %v = extractelement <2 x double> %a, i32 0
   ret double %v
 
-; CHECK-LABEL: @test63
-; CHECK: xxlor 1, 34, 34
-; CHECK: blr
+; CHECK-REG-LABEL: @test63
+; CHECK-REG: xxlor 1, 34, 34
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test63
+; CHECK-FISL: xxlor 0, 34, 34
+; CHECK-FISL: fmr 1, 0
+; CHECK-FISL: blr
 }
 
 define double @test64(<2 x double> %a) {
   %v = extractelement <2 x double> %a, i32 1
   ret double %v
 
-; CHECK-LABEL: @test64
-; CHECK: xxpermdi 1, 34, 34, 2
-; CHECK: blr
+; CHECK-REG-LABEL: @test64
+; CHECK-REG: xxpermdi 1, 34, 34, 2
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test64
+; CHECK-FISL: xxpermdi 34, 34, 34, 2
+; CHECK-FISL: xxlor 0, 34, 34
+; CHECK-FISL: fmr 1, 0
+; CHECK-FISL: blr
 }
 
 define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) {
   %w = icmp eq <2 x i64> %a, %b
   ret <2 x i1> %w
 
-; CHECK-LABEL: @test65
-; CHECK: vcmpequw 2, 2, 3
-; CHECK: blr
+; CHECK-REG-LABEL: @test65
+; CHECK-REG: vcmpequw 2, 2, 3
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test65
+; CHECK-FISL: vor 4, 3, 3
+; CHECK-FISL: vor 5, 2, 2
+; CHECK-FISL: vcmpequw 4, 5, 4
+; CHECK-FISL: vor 2, 4, 4
+; CHECK-FISL: blr
 }
 
 define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
   %w = icmp ne <2 x i64> %a, %b
   ret <2 x i1> %w
 
-; CHECK-LABEL: @test66
-; CHECK: vcmpequw {{[0-9]+}}, 2, 3
-; CHECK: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
-; CHECK: blr
+; CHECK-REG-LABEL: @test66
+; CHECK-REG: vcmpequw {{[0-9]+}}, 2, 3
+; CHECK-REG: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test66
+; CHECK-FISL: vcmpequw {{[0-9]+}}, 5, 4
+; CHECK-FISL: xxlnor 34, {{[0-9]+}}, {{[0-9]+}}
+; CHECK-FISL: blr
 }
 
 define <2 x i1> @test67(<2 x i64> %a, <2 x i64> %b) {
@@ -603,7 +915,7 @@ define <2 x double> @test69(<2 x i16> %a) {
 ; CHECK-LABEL: @test69
 ; CHECK: vspltisw [[V1:[0-9]+]], 8
 ; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
+; CHECK: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
 ; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
 ; CHECK: xvcvsxwdp 34, [[V4]]
@@ -617,7 +929,7 @@ define <2 x double> @test70(<2 x i8> %a) {
 ; CHECK-LABEL: @test70
 ; CHECK: vspltisw [[V1:[0-9]+]], 12
 ; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK: vslw [[V3:[0-9]+]], 2, [[V2]]
+; CHECK: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
 ; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
 ; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
 ; CHECK: xvcvsxwdp 34, [[V4]]
@@ -630,15 +942,24 @@ define <2 x i32> @test80(i32 %v) {
   %i = add <2 x i32> %b2, <i32 2, i32 3>
   ret <2 x i32> %i
 
-; CHECK-LABEL: @test80
-; CHECK-DAG: addi [[R1:[0-9]+]], 3, 3
-; CHECK-DAG: addi [[R2:[0-9]+]], 1, -16
-; CHECK-DAG: addi [[R3:[0-9]+]], 3, 2
-; CHECK: std [[R1]], -8(1)
-; CHECK: std [[R3]], -16(1)
-; CHECK: lxvd2x 34, 0, [[R2]]
-; CHECK-NOT: stxvd2x
-; CHECK: blr
+; CHECK-REG-LABEL: @test80
+; CHECK-REG-DAG: addi [[R1:[0-9]+]], 3, 3
+; CHECK-REG-DAG: addi [[R2:[0-9]+]], 1, -16
+; CHECK-REG-DAG: addi [[R3:[0-9]+]], 3, 2
+; CHECK-REG: std [[R1]], -8(1)
+; CHECK-REG: std [[R3]], -16(1)
+; CHECK-REG: lxvd2x 34, 0, [[R2]]
+; CHECK-REG-NOT: stxvd2x
+; CHECK-REG: blr
+
+; CHECK-FISL-LABEL: @test80
+; CHECK-FISL-DAG: addi [[R1:[0-9]+]], 3, 3
+; CHECK-FISL-DAG: addi [[R2:[0-9]+]], 1, -16
+; CHECK-FISL-DAG: addi [[R3:[0-9]+]], 3, 2
+; CHECK-FISL-DAG: std [[R1]], -8(1)
+; CHECK-FISL-DAG: std [[R3]], -16(1)
+; CHECK-FISL-DAG: lxvd2x 0, 0, [[R2]]
+; CHECK-FISL: blr
 }
 
 define <2 x double> @test81(<4 x float> %b) {
@@ -649,3 +970,17 @@ define <2 x double> @test81(<4 x float> %b) {
 ; CHECK: blr
 }
 
+define double @test82(double %a, double %b, double %c, double %d) {
+entry:
+  %m = fcmp oeq double %c, %d
+  %v = select i1 %m, double %a, double %b
+  ret double %v
+
+; CHECK-REG-LABEL: @test82
+; CHECK-REG: xscmpudp [[REG:[0-9]+]], 3, 4
+; CHECK-REG: beqlr [[REG]]
+
+; CHECK-FISL-LABEL: @test82
+; CHECK-FISL: xscmpudp [[REG:[0-9]+]], 3, 4
+; CHECK-FISL: beq [[REG]], {{.*}}
+}
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
new file mode 100644
index 000000000000..0a9df3779116
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
+  %v = load <2 x double>* %p1
+  %s = load double* %p2
+  %r = insertelement <2 x double> %v, double %s, i32 0
+  ret <2 x double> %r
+
+; CHECK-LABEL: testi0
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxsdx 34, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 34, 34, 0
+; CHECK: xxpermdi 34, 0, 1, 1
+}
+
+define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
+  %v = load <2 x double>* %p1
+  %s = load double* %p2
+  %r = insertelement <2 x double> %v, double %s, i32 1
+  ret <2 x double> %r
+
+; CHECK-LABEL: testi1
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxsdx 34, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 34, 34, 0
+; CHECK: xxpermdi 34, 1, 0, 3
+}
+
+define double @teste0(<2 x double>* %p1) {
+  %v = load <2 x double>* %p1
+  %r = extractelement <2 x double> %v, i32 0
+  ret double %r
+
+; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
+
+; CHECK-LABEL: teste0
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 0, 0, 2
+}
+
+define double @teste1(<2 x double>* %p1) {
+  %v = load <2 x double>* %p1
+  %r = extractelement <2 x double> %v, i32 1
+  ret double %r
+
+; CHECK-LABEL: teste1
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 1, 0, 0, 2
+}
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
new file mode 100644
index 000000000000..588cfdad7853
--- /dev/null
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -0,0 +1,207 @@
+; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 0>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: test00
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 3
+}
+
+define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 1>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: test01
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 34, 0, 0, 2
+}
+
+define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 2>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test02
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 1, 0, 3
+}
+
+define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 3>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test03
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 1, 0, 1
+}
+
+define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 0>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test10
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 2
+}
+
+define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 1>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test11
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 0
+}
+
+define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 2>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test12
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 1, 0, 2
+}
+
+define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 3>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test13
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 1, 0, 0
+}
+
+define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 0>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test20
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 0, 1, 3
+}
+
+define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 1>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test21
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 0, 1, 1
+}
+
+define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 2>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test22
+; CHECK: lxvd2x 0, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 3
+}
+
+define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 3>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test23
+; CHECK: lxvd2x 0, 0, 4
+; CHECK: xxpermdi 34, 0, 0, 2
+}
+
+define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 0>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test30
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 0, 1, 2
+}
+
+define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 1>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test31
+; CHECK: lxvd2x 0, 0, 3
+; CHECK: lxvd2x 1, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 1, 1, 1, 2
+; CHECK: xxpermdi 34, 0, 1, 0
+}
+
+define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 2>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test32
+; CHECK: lxvd2x 0, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 2
+}
+
+define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
+  %v1 = load <2 x double>* %p1
+  %v2 = load <2 x double>* %p2
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 3>
+  ret <2 x double> %v3
+
+; CHECK-LABEL: @test33
+; CHECK: lxvd2x 0, 0, 4
+; CHECK: xxpermdi 0, 0, 0, 2
+; CHECK: xxpermdi 34, 0, 0, 0
+}
diff --git a/test/CodeGen/PowerPC/zext-free.ll b/test/CodeGen/PowerPC/zext-free.ll
new file mode 100644
index 000000000000..080dbaa58da1
--- /dev/null
+++ b/test/CodeGen/PowerPC/zext-free.ll
@@ -0,0 +1,37 @@
+; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: noreturn nounwind
+define signext i32 @_Z1fRPc(i8** nocapture dereferenceable(8) %p) #0 {
+entry:
+  %.pre = load i8** %p, align 8
+  br label %loop
+
+loop:                                             ; preds = %loop.backedge, %entry
+  %0 = phi i8* [ %.pre, %entry ], [ %.be, %loop.backedge ]
+  %1 = load i8* %0, align 1
+  %tobool = icmp eq i8 %1, 0
+  %incdec.ptr = getelementptr inbounds i8* %0, i64 1
+  store i8* %incdec.ptr, i8** %p, align 8
+  %2 = load i8* %incdec.ptr, align 1
+  %tobool2 = icmp ne i8 %2, 0
+  %or.cond = and i1 %tobool, %tobool2
+  br i1 %or.cond, label %if.then3, label %loop.backedge
+
+if.then3:                                         ; preds = %loop
+  %incdec.ptr4 = getelementptr inbounds i8* %0, i64 2
+  store i8* %incdec.ptr4, i8** %p, align 8
+  br label %loop.backedge
+
+loop.backedge:                                    ; preds = %if.then3, %loop
+  %.be = phi i8* [ %incdec.ptr4, %if.then3 ], [ %incdec.ptr, %loop ]
+  br label %loop
+
+; CHECK-LABEL: @_Z1fRPc
+; CHECK-NOT: rlwinm {{[0-9]+}}, {{[0-9]+}}, 0, 24, 31
+; CHECK-NOT: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
+}
+
+attributes #0 = { noreturn nounwind }
+
diff --git a/test/CodeGen/R600/128bit-kernel-args.ll b/test/CodeGen/R600/128bit-kernel-args.ll
index 3c4fcf740c39..3b3fee05af7f 100644
--- a/test/CodeGen/R600/128bit-kernel-args.ll
+++ b/test/CodeGen/R600/128bit-kernel-args.ll
@@ -1,27 +1,27 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK: @v4i32_kernel_arg
+; R600-CHECK: {{^}}v4i32_kernel_arg:
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: @v4i32_kernel_arg
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK: {{^}}v4i32_kernel_arg:
+; SI-CHECK: buffer_store_dwordx4
 define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32>  %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK: @v4f32_kernel_arg
+; R600-CHECK: {{^}}v4f32_kernel_arg:
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
 ; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
-; SI-CHECK: @v4f32_kernel_arg
-; SI-CHECK: BUFFER_STORE_DWORDX4
-define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
+; SI-CHECK: {{^}}v4f32_kernel_arg:
+; SI-CHECK: buffer_store_dwordx4
+define void @v4f32_kernel_arg(<4 x float> addrspace(1)* %out, <4 x float>  %in) {
 entry:
   store <4 x float> %in, <4 x float> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
index 7dec42637421..6ab0c08d505f 100644
--- a/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
 ; the global address space(1) uses 64-bit pointers.  These tests check to make sure
@@ -9,9 +9,9 @@
 ; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
 ; instructions with B64, U64, and I64 take 64-bit operands.
 
-; CHECK-LABEL: @local_address_load
-; CHECK: V_MOV_B32_e{{32|64}} [[PTR:v[0-9]]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[PTR]]
+; FUNC-LABEL: {{^}}local_address_load:
+; SI: v_mov_b32_e{{32|64}} [[PTR:v[0-9]]]
+; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
 define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = load i32 addrspace(3)* %in
@@ -19,10 +19,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @local_address_gep
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]]
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; FUNC-LABEL: {{^}}local_address_gep:
+; SI: s_add_i32 [[SPTR:s[0-9]]]
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_read_b32 [[VPTR]]
 define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
 entry:
   %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
@@ -31,9 +31,9 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @local_address_gep_const_offset
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VPTR]], 0x4,
+; FUNC-LABEL: {{^}}local_address_gep_const_offset:
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, [[VPTR]] offset:4
 define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(3)* %in, i32 1
@@ -43,10 +43,10 @@ entry:
 }
 
 ; Offset too large, can't fold into 16-bit immediate offset.
-; CHECK-LABEL: @local_address_gep_large_const_offset
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_READ_B32 [[VPTR]]
+; FUNC-LABEL: {{^}}local_address_gep_large_const_offset:
+; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_read_b32 [[VPTR]]
 define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(3)* %in, i32 16385
@@ -55,10 +55,10 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @null_32bit_lds_ptr:
-; CHECK: V_CMP_NE_I32
-; CHECK-NOT: V_CMP_NE_I32
-; CHECK: V_CNDMASK_B32
+; FUNC-LABEL: {{^}}null_32bit_lds_ptr:
+; SI: v_cmp_ne_i32
+; SI-NOT: v_cmp_ne_i32
+; SI: v_cndmask_b32
 define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds) nounwind {
   %cmp = icmp ne i32 addrspace(3)* %lds, null
   %x = select i1 %cmp, i32 123, i32 456
@@ -66,10 +66,10 @@ define void @null_32bit_lds_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %lds)
   ret void
 }
 
-; CHECK-LABEL: @mul_32bit_ptr:
-; CHECK: V_MUL_LO_I32
-; CHECK-NEXT: V_ADD_I32_e32
-; CHECK-NEXT: DS_READ_B32
+; FUNC-LABEL: {{^}}mul_32bit_ptr:
+; SI: s_mul_i32
+; SI-NEXT: s_add_i32
+; SI: ds_read_b32
 define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
   %ptr = getelementptr [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
   %val = load float addrspace(3)* %ptr
@@ -77,11 +77,11 @@ define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %
   ret void
 }
 
-@g_lds = addrspace(3) global float zeroinitializer, align 4
+@g_lds = addrspace(3) global float undef, align 4
 
-; CHECK-LABEL: @infer_ptr_alignment_global_offset:
-; CHECK: V_MOV_B32_e32 [[REG:v[0-9]+]], 0
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[REG]]
+; FUNC-LABEL: {{^}}infer_ptr_alignment_global_offset:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
 define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
   %val = load float addrspace(3)* @g_lds
   store float %val, float addrspace(1)* %out
@@ -89,37 +89,37 @@ define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %ti
 }
 
 
-@ptr = addrspace(3) global i32 addrspace(3)* null
-@dst = addrspace(3) global [16384 x i32] zeroinitializer
+@ptr = addrspace(3) global i32 addrspace(3)* undef
+@dst = addrspace(3) global [16384 x i32] undef
 
-; CHECK-LABEL: @global_ptr:
-; CHECK: DS_WRITE_B32
+; FUNC-LABEL: {{^}}global_ptr:
+; SI: ds_write_b32
 define void @global_ptr() nounwind {
   store i32 addrspace(3)* getelementptr ([16384 x i32] addrspace(3)* @dst, i32 0, i32 16), i32 addrspace(3)* addrspace(3)* @ptr
   ret void
 }
 
-; CHECK-LABEL: @local_address_store
-; CHECK: DS_WRITE_B32
+; FUNC-LABEL: {{^}}local_address_store:
+; SI: ds_write_b32
 define void @local_address_store(i32 addrspace(3)* %out, i32 %val) {
   store i32 %val, i32 addrspace(3)* %out
   ret void
 }
 
-; CHECK-LABEL: @local_address_gep_store
-; CHECK: S_ADD_I32 [[SADDR:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
-; CHECK: DS_WRITE_B32 [[ADDR]], v{{[0-9]+}},
+; FUNC-LABEL: {{^}}local_address_gep_store:
+; SI: s_add_i32 [[SADDR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[ADDR:v[0-9]+]], [[SADDR]]
+; SI: ds_write_b32 [[ADDR]], v{{[0-9]+}}
 define void @local_address_gep_store(i32 addrspace(3)* %out, i32, i32 %val, i32 %offset) {
   %gep = getelementptr i32 addrspace(3)* %out, i32 %offset
   store i32 %val, i32 addrspace(3)* %gep, align 4
   ret void
 }
 
-; CHECK-LABEL: @local_address_gep_const_offset_store
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: V_MOV_B32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[VPTR]], [[VAL]], 0x4
+; FUNC-LABEL: {{^}}local_address_gep_const_offset_store:
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], s{{[0-9]+}}
+; SI: v_mov_b32_e32 [[VAL:v[0-9]+]], s{{[0-9]+}}
+; SI: ds_write_b32 [[VPTR]], [[VAL]] offset:4
 define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
   %gep = getelementptr i32 addrspace(3)* %out, i32 1
   store i32 %val, i32 addrspace(3)* %gep, align 4
@@ -127,10 +127,10 @@ define void @local_address_gep_const_offset_store(i32 addrspace(3)* %out, i32 %v
 }
 
 ; Offset too large, can't fold into 16-bit immediate offset.
-; CHECK-LABEL: @local_address_gep_large_const_offset_store
-; CHECK: S_ADD_I32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
-; CHECK: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; CHECK: DS_WRITE_B32 [[VPTR]], v{{[0-9]+}}, 0
+; FUNC-LABEL: {{^}}local_address_gep_large_const_offset_store:
+; SI: s_add_i32 [[SPTR:s[0-9]]], s{{[0-9]+}}, 0x10004
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_write_b32 [[VPTR]], v{{[0-9]+}} [M0]{{$}}
 define void @local_address_gep_large_const_offset_store(i32 addrspace(3)* %out, i32 %val) {
   %gep = getelementptr i32 addrspace(3)* %out, i32 16385
   store i32 %val, i32 addrspace(3)* %gep, align 4
diff --git a/test/CodeGen/R600/64bit-kernel-args.ll b/test/CodeGen/R600/64bit-kernel-args.ll
index 2d82c1e53919..02b6f34e9419 100644
--- a/test/CodeGen/R600/64bit-kernel-args.ll
+++ b/test/CodeGen/R600/64bit-kernel-args.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; SI-CHECK: @f64_kernel_arg
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
-; SI-CHECK-DAG: S_LOAD_DWORDX2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK: {{^}}f64_kernel_arg:
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9
+; SI-CHECK-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb
+; SI-CHECK: buffer_store_dwordx2
 define void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
 entry:
   store double %in, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/add-debug.ll b/test/CodeGen/R600/add-debug.ll
new file mode 100644
index 000000000000..85e9451d4a9b
--- /dev/null
+++ b/test/CodeGen/R600/add-debug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -debug
+; REQUIRES: asserts
+
+; Check that SelectionDAGDumper does not crash on int_SI_if.
+define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
+entry:
+  %0 = icmp eq i64 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i64 addrspace(1)* %in
+  br label %endif
+
+else:
+  %2 = add i64 %a, %b
+  br label %endif
+
+endif:
+  %3 = phi i64 [%1, %if], [%2, %else]
+  store i64 %3, i64 addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/R600/add.ll b/test/CodeGen/R600/add.ll
index 711a2bc41774..d95853a61048 100644
--- a/test/CodeGen/R600/add.ll
+++ b/test/CodeGen/R600/add.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
 
-;FUNC-LABEL: @test1:
+;FUNC-LABEL: {{^}}test1:
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: V_ADD_I32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
 ;SI-CHECK-NOT: [[REG]]
-;SI-CHECK: BUFFER_STORE_DWORD [[REG]],
+;SI-CHECK: buffer_store_dword [[REG]],
 define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
   %a = load i32 addrspace(1)* %in
@@ -16,12 +16,12 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   ret void
 }
 
-;FUNC-LABEL: @test2:
+;FUNC-LABEL: {{^}}test2:
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -32,16 +32,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;FUNC-LABEL: @test4:
+;FUNC-LABEL: {{^}}test4:
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ADD_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -52,7 +52,7 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @test8
+; FUNC-LABEL: {{^}}test8:
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
@@ -61,14 +61,14 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
 define void @test8(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) {
 entry:
   %0 = add <8 x i32> %a, %b
@@ -76,7 +76,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @test16
+; FUNC-LABEL: {{^}}test16:
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
@@ -93,22 +93,22 @@ entry:
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
 ; EG-CHECK: ADD_INT
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADD_I32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
+; SI-CHECK: s_add_i32
 define void @test16(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) {
 entry:
   %0 = add <16 x i32> %a, %b
@@ -116,9 +116,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @add64
-; SI-CHECK: S_ADD_I32
-; SI-CHECK: S_ADDC_U32
+; FUNC-LABEL: {{^}}add64:
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
 define void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = add i64 %a, %b
@@ -126,13 +126,13 @@ entry:
   ret void
 }
 
-; The V_ADDC_U32 and V_ADD_I32 instruction can't read SGPRs, because they
+; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they
 ; use VCC.  The test is designed so that %a will be stored in an SGPR and
 ; %0 will be stored in a VGPR, so the comiler will be forced to copy %a
 ; to a VGPR before doing the add.
 
-; FUNC-LABEL: @add64_sgpr_vgpr
-; SI-CHECK-NOT: V_ADDC_U32_e32 s
+; FUNC-LABEL: {{^}}add64_sgpr_vgpr:
+; SI-CHECK-NOT: v_addc_u32_e32 s
 define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
 entry:
   %0 = load i64 addrspace(1)* %in
@@ -141,12 +141,10 @@ entry:
   ret void
 }
 
-; Test i64 add inside a branch.  We don't allow SALU instructions inside of
-; branches.
-; FIXME: We are being conservative here.  We could allow this in some cases.
-; FUNC-LABEL: @add64_in_branch
-; SI-CHECK-NOT: S_ADD_I32
-; SI-CHECK-NOT: S_ADDC_U32
+; Test i64 add inside a branch.
+; FUNC-LABEL: {{^}}add64_in_branch:
+; SI-CHECK: s_add_u32
+; SI-CHECK: s_addc_u32
 define void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
 entry:
   %0 = icmp eq i64 %a, 0
diff --git a/test/CodeGen/R600/add_i64.ll b/test/CodeGen/R600/add_i64.ll
index f733d9040421..1769409f5ef1 100644
--- a/test/CodeGen/R600/add_i64.ll
+++ b/test/CodeGen/R600/add_i64.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 
 declare i32 @llvm.r600.read.tidig.x() readnone
 
-; SI-LABEL: @test_i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}test_i64_vreg:
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr i64 addrspace(1)* %inA, i32 %tid
@@ -18,9 +18,9 @@ define void @test_i64_vreg(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
 }
 
 ; Check that the SGPR add operand is correctly moved to a VGPR.
-; SI-LABEL: @sgpr_operand:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}sgpr_operand:
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
   %foo = load i64 addrspace(1)* %in, align 8
   %result = add i64 %foo, %a
@@ -31,9 +31,9 @@ define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noal
 ; Swap the arguments. Check that the SGPR -> VGPR copy works with the
 ; SGPR as other operand.
 ;
-; SI-LABEL: @sgpr_operand_reversed:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}sgpr_operand_reversed:
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
   %foo = load i64 addrspace(1)* %in, align 8
   %result = add i64 %a, %foo
@@ -42,22 +42,22 @@ define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace
 }
 
 
-; SI-LABEL: @test_v2i64_sreg:
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
+; SI-LABEL: {{^}}test_v2i64_sreg:
+; SI: s_add_u32
+; SI: s_addc_u32
+; SI: s_add_u32
+; SI: s_addc_u32
 define void @test_v2i64_sreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %a, <2 x i64> %b) {
   %result = add <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
 }
 
-; SI-LABEL: @test_v2i64_vreg:
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; SI-LABEL: {{^}}test_v2i64_vreg:
+; SI: v_add_i32
+; SI: v_addc_u32
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
@@ -69,13 +69,13 @@ define void @test_v2i64_vreg(<2 x i64> addrspace(1)* noalias %out, <2 x i64> add
   ret void
 }
 
-; SI-LABEL: @trunc_i64_add_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_ADD_I32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI-NOT: ADDC
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI-LABEL: {{^}}trunc_i64_add_to_i32:
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_add_i32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI-NOT: addc
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
 define void @trunc_i64_add_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
   %add = add i64 %b, %a
   %trunc = trunc i64 %add to i32
diff --git a/test/CodeGen/R600/address-space.ll b/test/CodeGen/R600/address-space.ll
index f75a8ac5e6a5..1106f4f99623 100644
--- a/test/CodeGen/R600/address-space.ll
+++ b/test/CodeGen/R600/address-space.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
 
 ; Test that codegenprepare understands address space sizes
 
@@ -7,11 +7,11 @@
 ; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
 ; already in a VGPR after the first read.
 
-; CHECK-LABEL: @do_as_ptr_calcs:
-; CHECK: S_LOAD_DWORD [[SREG1:s[0-9]+]],
-; CHECK: V_MOV_B32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
-; CHECK: DS_READ_B32 v{{[0-9]+}}, [[VREG1]], 0x14
-; CHECK: DS_READ_B32 v{{[0-9]+}}, v{{[0-9]+}}, 0xc
+; CHECK-LABEL: {{^}}do_as_ptr_calcs:
+; CHECK: s_load_dword [[SREG1:s[0-9]+]],
+; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:20
 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
 entry:
   %x = getelementptr inbounds %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/R600/and.ll b/test/CodeGen/R600/and.ll
index e20037e6bb67..bfdf8734eabb 100644
--- a/test/CodeGen/R600/and.ll
+++ b/test/CodeGen/R600/and.ll
@@ -1,12 +1,12 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test2
+; FUNC-LABEL: {{^}}test2:
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -17,16 +17,16 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @test4
+; FUNC-LABEL: {{^}}test4:
 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_AND_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_and_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -37,24 +37,24 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @s_and_i32
-; SI: S_AND_B32
+; FUNC-LABEL: {{^}}s_and_i32:
+; SI: s_and_b32
 define void @s_and_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
   %and = and i32 %a, %b
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @s_and_constant_i32
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
+; FUNC-LABEL: {{^}}s_and_constant_i32:
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x12d687
 define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
   %and = and i32 %a, 1234567
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @v_and_i32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i32:
+; SI: v_and_b32
 define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
   %a = load i32 addrspace(1)* %aptr, align 4
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -63,8 +63,8 @@ define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addr
   ret void
 }
 
-; FUNC-LABEL: @v_and_constant_i32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_constant_i32:
+; SI: v_and_b32
 define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
   %a = load i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, 1234567
@@ -72,8 +72,8 @@ define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr)
   ret void
 }
 
-; FUNC-LABEL: @s_and_i64
-; SI: S_AND_B64
+; FUNC-LABEL: {{^}}s_and_i64:
+; SI: s_and_b64
 define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
   %and = and i64 %a, %b
   store i64 %and, i64 addrspace(1)* %out, align 8
@@ -81,25 +81,25 @@ define void @s_and_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 }
 
 ; FIXME: Should use SGPRs
-; FUNC-LABEL: @s_and_i1
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}s_and_i1:
+; SI: v_and_b32
 define void @s_and_i1(i1 addrspace(1)* %out, i1 %a, i1 %b) {
   %and = and i1 %a, %b
   store i1 %and, i1 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @s_and_constant_i64
-; SI: S_AND_B64
+; FUNC-LABEL: {{^}}s_and_constant_i64:
+; SI: s_and_b64
 define void @s_and_constant_i64(i64 addrspace(1)* %out, i64 %a) {
   %and = and i64 %a, 281474976710655
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @v_and_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i64:
+; SI: v_and_b32
+; SI: v_and_b32
 define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
   %a = load i64 addrspace(1)* %aptr, align 8
   %b = load i64 addrspace(1)* %bptr, align 8
@@ -108,12 +108,51 @@ define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addr
   ret void
 }
 
-; FUNC-LABEL: @v_and_constant_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; FUNC-LABEL: {{^}}v_and_i64_br:
+; SI: v_and_b32
+; SI: v_and_b32
+define void @v_and_i64_br(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i32 %cond) {
+entry:
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %if, label %endif
+
+if:
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %b = load i64 addrspace(1)* %bptr, align 8
+  %and = and i64 %a, %b
+  br label %endif
+
+endif:
+  %tmp1 = phi i64 [%and, %if], [0, %entry]
+  store i64 %tmp1, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_constant_i64:
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
   %a = load i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 1234567
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+; FIXME: Replace and 0 with mov 0
+; FUNC-LABEL: {{^}}v_and_inline_imm_i64:
+; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %and = and i64 %a, 64
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_and_inline_imm_i64:
+; SI: s_and_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
+define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+  %and = and i64 %a, 64
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/anyext.ll b/test/CodeGen/R600/anyext.ll
index bbe5d0a393e6..8336ebcaca3b 100644
--- a/test/CodeGen/R600/anyext.ll
+++ b/test/CodeGen/R600/anyext.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-; CHECK-LABEL: @anyext_i1_i32
-; CHECK: V_CNDMASK_B32_e64
+; CHECK-LABEL: {{^}}anyext_i1_i32:
+; CHECK: v_cndmask_b32_e64
 define void @anyext_i1_i32(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %0 = icmp eq i32 %cond, 0
diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll
index a2b697823519..33a8aee0164d 100644
--- a/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
 declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
@@ -9,21 +9,21 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 ; 64-bit pointer add. This should work since private pointers should
 ; be 32-bits.
 
-; SI-LABEL: @test_private_array_ptr_calc:
+; SI-LABEL: {{^}}test_private_array_ptr_calc:
 
 ; FIXME: We end up with zero argument for ADD, because
 ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
 ; with the appropriate offset.  We should fold this into the store.
-; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]]
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
 ;
 ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
 ; alloca to a vector.  It currently fails because it does not know how
 ; to interpret:
 ; getelementptr [4 x i32]* %alloca, i32 1, i32 %b
 
-; SI-PROMOTE: V_ADD_I32_e32 [[PTRREG:v[0-9]+]]
-; SI-PROMOTE: DS_WRITE_B32 {{v[0-9]+}}, [[PTRREG]]
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: ds_write_b32 [[PTRREG]]
 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %alloca = alloca [4 x i32], i32 4, align 16
   %tid = call i32 @llvm.SI.tid() readnone
diff --git a/test/CodeGen/R600/array-ptr-calc-i64.ll b/test/CodeGen/R600/array-ptr-calc-i64.ll
index e254c5f64637..32e657db7bc6 100644
--- a/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -1,13 +1,13 @@
-; XFAIL: *
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() readnone
 
-
-; SI-LABEL: @test_array_ptr_calc(
-define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
+; SI-LABEL: {{^}}test_array_ptr_calc:
+; SI: v_mul_lo_i32
+; SI: v_mul_hi_i32
+define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %tid = call i32 @llvm.SI.tid() readnone
-  %a_ptr = getelementptr [16 x i32] addrspace(1)* %inA, i32 1, i32 %tid
+  %a_ptr = getelementptr [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
   %b_ptr = getelementptr i32 addrspace(1)* %inB, i32 %tid
   %a = load i32 addrspace(1)* %a_ptr
   %b = load i32 addrspace(1)* %b_ptr
@@ -15,4 +15,3 @@ define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [16 x i32] addr
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
-
diff --git a/test/CodeGen/R600/atomic_cmp_swap_local.ll b/test/CodeGen/R600/atomic_cmp_swap_local.ll
index eb9539eec516..0d5ece4b0e0b 100644
--- a/test/CodeGen/R600/atomic_cmp_swap_local.ll
+++ b/test/CodeGen/R600/atomic_cmp_swap_local.ll
@@ -1,13 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i32_offset:
-; SI: S_LOAD_DWORD [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: V_MOV_B32_e32 [[VCMP:v[0-9]+]], 7
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
-; SI: DS_CMPST_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]], 0x10, [M0]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
 define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
@@ -16,18 +17,17 @@ define void @lds_atomic_cmpxchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_cmpxchg_ret_i64_offset:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64  s{{\[}}[[LOSCMP:[0-9]+]]:[[HISCMP:[0-9]+]]{{\]}}, 7
-; SI-DAG: V_MOV_B32_e32 v[[LOVCMP:[0-9]+]], s[[LOSCMP]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVCMP:[0-9]+]], s[[HISCMP]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI-DAG: V_MOV_B32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
-; SI-DAG: V_MOV_B32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
-; SI: DS_CMPST_RTN_B64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}}, 0x20, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_rtn_b64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr, i64 %swap) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
@@ -35,3 +35,49 @@ define void @lds_atomic_cmpxchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrs
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_ret_i32_bad_si_offset
+; SI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_cmpst_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %swap, i32 %a, i32 %b) nounwind {
+  %sub = sub i32 %a, %b
+  %add = add i32 %sub, 4
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+  %result = extractvalue { i32, i1 } %pair, 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i32_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dword [[SWAP:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xa
+; SI-DAG: v_mov_b32_e32 [[VCMP:v[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 [[VSWAP:v[0-9]+]], [[SWAP]]
+; SI: ds_cmpst_b32 [[VPTR]], [[VCMP]], [[VSWAP]] offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_noret_i32_offset(i32 addrspace(3)* %ptr, i32 %swap) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %pair = cmpxchg i32 addrspace(3)* %gep, i32 7, i32 %swap seq_cst monotonic
+  %result = extractvalue { i32, i1 } %pair, 0
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_cmpxchg_noret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: s_load_dwordx2 s{{\[}}[[LOSWAP:[0-9]+]]:[[HISWAP:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LOVCMP:[0-9]+]], 7
+; SI-DAG: v_mov_b32_e32 v[[HIVCMP:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI-DAG: v_mov_b32_e32 v[[LOSWAPV:[0-9]+]], s[[LOSWAP]]
+; SI-DAG: v_mov_b32_e32 v[[HISWAPV:[0-9]+]], s[[HISWAP]]
+; SI: ds_cmpst_b64 [[VPTR]], v{{\[}}[[LOVCMP]]:[[HIVCMP]]{{\]}}, v{{\[}}[[LOSWAPV]]:[[HISWAPV]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
+define void @lds_atomic_cmpxchg_noret_i64_offset(i64 addrspace(3)* %ptr, i64 %swap) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %pair = cmpxchg i64 addrspace(3)* %gep, i64 7, i64 %swap seq_cst monotonic
+  %result = extractvalue { i64, i1 } %pair, 0
+  ret void
+}
diff --git a/test/CodeGen/R600/atomic_load_add.ll b/test/CodeGen/R600/atomic_load_add.ll
index c26f9cd80eaf..6dd1c51afb4a 100644
--- a/test/CodeGen/R600/atomic_load_add.ll
+++ b/test/CodeGen/R600/atomic_load_add.ll
@@ -1,35 +1,35 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; FUNC-LABEL: @atomic_add_local
+; FUNC-LABEL: {{^}}atomic_add_local:
 ; R600: LDS_ADD *
-; SI: DS_ADD_RTN_U32
+; SI: ds_add_u32
 define void @atomic_add_local(i32 addrspace(3)* %local) {
    %unused = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
    ret void
 }
 
-; FUNC-LABEL: @atomic_add_local_const_offset
+; FUNC-LABEL: {{^}}atomic_add_local_const_offset:
 ; R600: LDS_ADD *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
 define void @atomic_add_local_const_offset(i32 addrspace(3)* %local) {
   %gep = getelementptr i32 addrspace(3)* %local, i32 4
   %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
   ret void
 }
 
-; FUNC-LABEL: @atomic_add_ret_local
+; FUNC-LABEL: {{^}}atomic_add_ret_local:
 ; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32
+; SI: ds_add_rtn_u32
 define void @atomic_add_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
   %val = atomicrmw volatile add i32 addrspace(3)* %local, i32 5 seq_cst
   store i32 %val, i32 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @atomic_add_ret_local_const_offset
+; FUNC-LABEL: {{^}}atomic_add_ret_local_const_offset:
 ; R600: LDS_ADD_RET *
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
 define void @atomic_add_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
   %gep = getelementptr i32 addrspace(3)* %local, i32 5
   %val = atomicrmw volatile add i32 addrspace(3)* %gep, i32 5 seq_cst
diff --git a/test/CodeGen/R600/atomic_load_sub.ll b/test/CodeGen/R600/atomic_load_sub.ll
index 3569d91e08dc..5d47185421a0 100644
--- a/test/CodeGen/R600/atomic_load_sub.ll
+++ b/test/CodeGen/R600/atomic_load_sub.ll
@@ -1,35 +1,35 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @atomic_sub_local
+; FUNC-LABEL: {{^}}atomic_sub_local:
 ; R600: LDS_SUB *
-; SI: DS_SUB_RTN_U32
+; SI: ds_sub_u32
 define void @atomic_sub_local(i32 addrspace(3)* %local) {
    %unused = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
    ret void
 }
 
-; FUNC-LABEL: @atomic_sub_local_const_offset
+; FUNC-LABEL: {{^}}atomic_sub_local_const_offset:
 ; R600: LDS_SUB *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
 define void @atomic_sub_local_const_offset(i32 addrspace(3)* %local) {
   %gep = getelementptr i32 addrspace(3)* %local, i32 4
   %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
   ret void
 }
 
-; FUNC-LABEL: @atomic_sub_ret_local
+; FUNC-LABEL: {{^}}atomic_sub_ret_local:
 ; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32
+; SI: ds_sub_rtn_u32
 define void @atomic_sub_ret_local(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
   %val = atomicrmw volatile sub i32 addrspace(3)* %local, i32 5 seq_cst
   store i32 %val, i32 addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @atomic_sub_ret_local_const_offset
+; FUNC-LABEL: {{^}}atomic_sub_ret_local_const_offset:
 ; R600: LDS_SUB_RET *
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x14
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:20
 define void @atomic_sub_ret_local_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %local) {
   %gep = getelementptr i32 addrspace(3)* %local, i32 5
   %val = atomicrmw volatile sub i32 addrspace(3)* %gep, i32 5 seq_cst
diff --git a/test/CodeGen/R600/basic-branch.ll b/test/CodeGen/R600/basic-branch.ll
index d084132d4fcc..42ddddd2ed84 100644
--- a/test/CodeGen/R600/basic-branch.ll
+++ b/test/CodeGen/R600/basic-branch.ll
@@ -1,7 +1,7 @@
 ; XFAIL: *
-; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
 
-; CHECK-LABEL: @test_branch(
+; CHECK-LABEL: {{^}}test_branch(
 define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
   %cmp = icmp ne i32 %val, 0
   br i1 %cmp, label %store, label %end
diff --git a/test/CodeGen/R600/basic-loop.ll b/test/CodeGen/R600/basic-loop.ll
index 6d0ff0743b85..72737ae273e6 100644
--- a/test/CodeGen/R600/basic-loop.ll
+++ b/test/CodeGen/R600/basic-loop.ll
@@ -1,7 +1,7 @@
 ; XFAIL: *
-; RUN: llc -O0 -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -O0 -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s
 
-; CHECK-LABEL: @test_loop:
+; CHECK-LABEL: {{^}}test_loop:
 define void @test_loop(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) nounwind {
 entry:
   br label %loop.body
diff --git a/test/CodeGen/R600/bfe_uint.ll b/test/CodeGen/R600/bfe_uint.ll
index fe466e6ad5fd..6fe23e912952 100644
--- a/test/CodeGen/R600/bfe_uint.ll
+++ b/test/CodeGen/R600/bfe_uint.ll
@@ -2,7 +2,7 @@
 
 ; XFAIL: *
 
-; CHECK: @bfe_def
+; CHECK: {{^}}bfe_def:
 ; CHECK: BFE_UINT
 define void @bfe_def(i32 addrspace(1)* %out, i32 %x) {
 entry:
@@ -17,7 +17,7 @@ entry:
 ; implmented with a LSHR instruction, which is better, because LSHR has less
 ; operands and requires less constants.
 
-; CHECK: @bfe_shift
+; CHECK: {{^}}bfe_shift:
 ; CHECK-NOT: BFE_UINT
 define void @bfe_shift(i32 addrspace(1)* %out, i32 %x) {
 entry:
diff --git a/test/CodeGen/R600/bfi_int.ll b/test/CodeGen/R600/bfi_int.ll
index d18702a1de98..988a2f85e0ea 100644
--- a/test/CodeGen/R600/bfi_int.ll
+++ b/test/CodeGen/R600/bfi_int.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
 ;
-; R600-CHECK: @bfi_def
+; R600-CHECK: {{^}}bfi_def:
 ; R600-CHECK: BFI_INT
 ; SI-CHECK:   @bfi_def
-; SI-CHECK:   V_BFI_B32
+; SI-CHECK:   v_bfi_b32
 define void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
 entry:
   %0 = xor i32 %x, -1
@@ -20,10 +20,10 @@ entry:
 
 ; SHA-256 Ch function
 ; z ^ (x & (y ^ z))
-; R600-CHECK: @bfi_sha256_ch
+; R600-CHECK: {{^}}bfi_sha256_ch:
 ; R600-CHECK: BFI_INT
 ; SI-CHECK:   @bfi_sha256_ch
-; SI-CHECK:   V_BFI_B32
+; SI-CHECK:   v_bfi_b32
 define void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
 entry:
   %0 = xor i32 %y, %z
@@ -35,11 +35,11 @@ entry:
 
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
-; R600-CHECK: @bfi_sha256_ma
+; R600-CHECK: {{^}}bfi_sha256_ma:
 ; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
 ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
+; SI-CHECK: v_xor_b32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: v_bfi_b32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
 
 define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
 entry:
diff --git a/test/CodeGen/R600/big_alu.ll b/test/CodeGen/R600/big_alu.ll
index 511e8ef62951..28be216e76f2 100644
--- a/test/CodeGen/R600/big_alu.ll
+++ b/test/CodeGen/R600/big_alu.ll
@@ -1,5 +1,4 @@
 ;RUN: llc < %s -march=r600 -mcpu=cedar
-;REQUIRES: asserts
 
 ;This test ensures that R600 backend can handle ifcvt properly
 ;and do not generate ALU clauses with more than 128 instructions.
diff --git a/test/CodeGen/R600/bitcast.ll b/test/CodeGen/R600/bitcast.ll
index 0be79e658f5c..3607d519f013 100644
--- a/test/CodeGen/R600/bitcast.ll
+++ b/test/CodeGen/R600/bitcast.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; This test just checks that the compiler doesn't crash.
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
-; FUNC-LABEL: @v32i8_to_v8i32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v32i8_to_v8i32:
+; SI: s_endpgm
 define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
 entry:
   %1 = load <32 x i8> addrspace(2)* %0
@@ -17,8 +17,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i8ptr_v16i8ptr
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
+; SI: s_endpgm
 define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
@@ -55,8 +55,8 @@ define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nou
   ret void
 }
 
-; FUNC-LABEL: @bitcast_v2i32_to_f64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
+; SI: s_endpgm
 define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %val = load <2 x i32> addrspace(1)* %in, align 8
   %add = add <2 x i32> %val, <i32 4, i32 9>
@@ -65,8 +65,8 @@ define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace
   ret void
 }
 
-; FUNC-LABEL: @bitcast_f64_to_v2i32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
+; SI: s_endpgm
 define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
   %val = load double addrspace(1)* %in, align 8
   %add = fadd double %val, 4.0
diff --git a/test/CodeGen/R600/bswap.ll b/test/CodeGen/R600/bswap.ll
index 6aebe851366c..65998f5f1151 100644
--- a/test/CodeGen/R600/bswap.ll
+++ b/test/CodeGen/R600/bswap.ll
@@ -1,12 +1,21 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone
 declare i64 @llvm.bswap.i64(i64) nounwind readnone
 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
 declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
 
+; FUNC-LABEL: @test_bswap_i32
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_alignbit_b32 [[TMP0:v[0-9]+]], [[VAL]], [[VAL]], 8
+; SI-DAG: v_alignbit_b32 [[TMP1:v[0-9]+]], [[VAL]], [[VAL]], 24
+; SI-DAG: s_mov_b32 [[K:s[0-9]+]], 0xff00ff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[K]], [[TMP1]], [[TMP0]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
 define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
@@ -14,6 +23,14 @@ define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
+; FUNC-LABEL: @test_bswap_v2i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
 define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
   %val = load <2 x i32> addrspace(1)* %in, align 8
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
@@ -21,6 +38,20 @@ define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(
   ret void
 }
 
+; FUNC-LABEL: @test_bswap_v4i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
 define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
   %val = load <4 x i32> addrspace(1)* %in, align 16
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
@@ -28,6 +59,39 @@ define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(
   ret void
 }
 
+; FUNC-LABEL: @test_bswap_v8i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_bfi_b32
+; SI: s_endpgm
+define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
+  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
+  store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
+  ret void
+}
+
 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
diff --git a/test/CodeGen/R600/build_vector.ll b/test/CodeGen/R600/build_vector.ll
index 8179de13e869..a0ebe089bd5a 100644
--- a/test/CodeGen/R600/build_vector.ll
+++ b/test/CodeGen/R600/build_vector.ll
@@ -1,32 +1,32 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK: @build_vector2
+; R600-CHECK: {{^}}build_vector2:
 ; R600-CHECK: MOV
 ; R600-CHECK: MOV
 ; R600-CHECK-NOT: MOV
-; SI-CHECK: @build_vector2
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[X]]:[[Y]]{{\]}}
+; SI-CHECK: {{^}}build_vector2:
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK: buffer_store_dwordx2 v{{\[}}[[X]]:[[Y]]{{\]}}
 define void @build_vector2 (<2 x i32> addrspace(1)* %out) {
 entry:
   store <2 x i32> <i32 5, i32 6>, <2 x i32> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK: @build_vector4
+; R600-CHECK: {{^}}build_vector4:
 ; R600-CHECK: MOV
 ; R600-CHECK: MOV
 ; R600-CHECK: MOV
 ; R600-CHECK: MOV
 ; R600-CHECK-NOT: MOV
-; SI-CHECK: @build_vector4
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[X:[0-9]]], 5
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Y:[0-9]]], 6
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[Z:[0-9]]], 7
-; SI-CHECK-DAG: V_MOV_B32_e32 v[[W:[0-9]]], 8
-; SI-CHECK: BUFFER_STORE_DWORDX4 v{{\[}}[[X]]:[[W]]{{\]}}
+; SI-CHECK: {{^}}build_vector4:
+; SI-CHECK-DAG: v_mov_b32_e32 v[[X:[0-9]]], 5
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Y:[0-9]]], 6
+; SI-CHECK-DAG: v_mov_b32_e32 v[[Z:[0-9]]], 7
+; SI-CHECK-DAG: v_mov_b32_e32 v[[W:[0-9]]], 8
+; SI-CHECK: buffer_store_dwordx4 v{{\[}}[[X]]:[[W]]{{\]}}
 define void @build_vector4 (<4 x i32> addrspace(1)* %out) {
 entry:
   store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/call.ll b/test/CodeGen/R600/call.ll
index d80347490b39..1afe98ba5951 100644
--- a/test/CodeGen/R600/call.ll
+++ b/test/CodeGen/R600/call.ll
@@ -1,7 +1,7 @@
-; RUN: not llc -march=r600 -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s 2>&1 | FileCheck %s
 ; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
 
-; CHECK: error: unsupported call to function defined_function in test_call
+; CHECK: error: unsupported call to function external_function in test_call_external
 
 
 declare i32 @external_function(i32) nounwind
diff --git a/test/CodeGen/R600/call_fs.ll b/test/CodeGen/R600/call_fs.ll
index f7c4e5b22cb1..7df22402cff9 100644
--- a/test/CodeGen/R600/call_fs.ll
+++ b/test/CodeGen/R600/call_fs.ll
@@ -2,10 +2,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood -show-mc-encoding -o - | FileCheck --check-prefix=EG-CHECK %s
 ; RUN: llc < %s -march=r600 -mcpu=rv710 -show-mc-encoding -o - | FileCheck --check-prefix=R600-CHECK %s
 
-; EG-CHECK: @call_fs
+; EG-CHECK: {{^}}call_fs:
 ; EG-CHECK: .long 257
 ; EG-CHECK: CALL_FS  ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0xc0,0x84]
-; R600-CHECK: @call_fs
+; R600-CHECK: {{^}}call_fs:
 ; R600-CHECK: .long 257
 ; R600-CHECK:CALL_FS ; encoding: [0x00,0x00,0x00,0x00,0x00,0x00,0x80,0x89]
 
diff --git a/test/CodeGen/R600/cayman-loop-bug.ll b/test/CodeGen/R600/cayman-loop-bug.ll
index a87352895eb3..c7b8c4037316 100644
--- a/test/CodeGen/R600/cayman-loop-bug.ll
+++ b/test/CodeGen/R600/cayman-loop-bug.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
 
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
 ; CHECK: LOOP_START_DX10
 ; CHECK: ALU_PUSH_BEFORE
 ; CHECK: LOOP_START_DX10
diff --git a/test/CodeGen/R600/cf-stack-bug.ll b/test/CodeGen/R600/cf-stack-bug.ll
index c3a4612e6ac9..02c87d76bb20 100644
--- a/test/CodeGen/R600/cf-stack-bug.ll
+++ b/test/CodeGen/R600/cf-stack-bug.ll
@@ -17,7 +17,7 @@
 ; BUG64-NOT: Applying bug work-around
 ; BUG32-NOT: Applying bug work-around
 ; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested3
+; FUNC-LABEL: {{^}}nested3:
 define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %0 = icmp sgt i32 %cond, 0
@@ -50,7 +50,7 @@ end:
 ; BUG64: Applying bug work-around
 ; BUG32-NOT: Applying bug work-around
 ; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested4
+; FUNC-LABEL: {{^}}nested4:
 define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %0 = icmp sgt i32 %cond, 0
@@ -91,7 +91,7 @@ end:
 ; BUG64: Applying bug work-around
 ; BUG32-NOT: Applying bug work-around
 ; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested7
+; FUNC-LABEL: {{^}}nested7:
 define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %0 = icmp sgt i32 %cond, 0
@@ -156,7 +156,7 @@ end:
 ; BUG64: Applying bug work-around
 ; BUG32: Applying bug work-around
 ; NOBUG-NOT: Applying bug work-around
-; FUNC-LABEL: @nested8
+; FUNC-LABEL: {{^}}nested8:
 define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
 entry:
   %0 = icmp sgt i32 %cond, 0
diff --git a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
index f8b4a61a7db5..e16a397bb5a4 100644
--- a/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/R600/codegen-prepare-addrmode-sext.ll
@@ -1,14 +1,15 @@
-; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC --check-prefix=FUNC %s
+; RUN: opt -codegenprepare -S -o - %s | FileCheck --check-prefix=OPT %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-LLC %s
 
 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
 target triple = "r600--"
 
-; FUNC-LABEL: @test
+; OPT-LABEL: @test
 ; OPT: mul nsw i32
 ; OPT-NEXT: sext
-; SI-LLC: V_MUL_LO_I32
-; SI-LLC-NOT: V_MUL_HI
+; SI-LLC-LABEL: {{^}}test:
+; SI-LLC: s_mul_i32
+; SI-LLC-NOT: mul
 define void @test(i8 addrspace(1)* nocapture readonly %in, i32 %a, i8 %b) {
 entry:
   %0 = mul nsw i32 %a, 3
diff --git a/test/CodeGen/R600/combine_vloads.ll b/test/CodeGen/R600/combine_vloads.ll
index f8ec712c1ec8..38420b25cba9 100644
--- a/test/CodeGen/R600/combine_vloads.ll
+++ b/test/CodeGen/R600/combine_vloads.ll
@@ -9,7 +9,7 @@
 
 
 ; 128-bit loads instead of many 8-bit
-; EG-LABEL: @combine_vloads:
+; EG-LABEL: {{^}}combine_vloads:
 ; EG: VTX_READ_128
 ; EG: VTX_READ_128
 define void @combine_vloads(<8 x i8> addrspace(1)* nocapture %src, <8 x i8> addrspace(1)* nocapture %result) nounwind {
diff --git a/test/CodeGen/R600/commute_modifiers.ll b/test/CodeGen/R600/commute_modifiers.ll
new file mode 100644
index 000000000000..6fddb6d595c9
--- /dev/null
+++ b/test/CodeGen/R600/commute_modifiers.ll
@@ -0,0 +1,181 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+
+; FUNC-LABEL: @commute_add_imm_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %x = load float addrspace(1)* %gep.0
+  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %z = fadd float 2.0, %x.fabs
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_mul_imm_fneg_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], -4.0, |[[X]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %x = load float addrspace(1)* %gep.0
+  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
+  %z = fmul float 4.0, %x.fneg.fabs
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_mul_imm_fneg_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mul_f32_e32 [[REG:v[0-9]+]], -4.0, [[X]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %x = load float addrspace(1)* %gep.0
+  %x.fneg = fsub float -0.000000e+00, %x
+  %z = fmul float 4.0, %x.fneg
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should use SGPR for literal.
+; FUNC-LABEL: @commute_add_lit_fabs_f32
+; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: v_mov_b32_e32 [[K:v[0-9]+]], 0x44800000
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, [[K]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %x = load float addrspace(1)* %gep.0
+  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %z = fadd float 1024.0, %x.fabs
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_add_fabs_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_add_f32_e64 [[REG:v[0-9]+]], [[X]], |[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %x = load float addrspace(1)* %gep.0
+  %y = load float addrspace(1)* %gep.1
+  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %z = fadd float %x, %y.fabs
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_mul_fneg_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -[[Y]]
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %x = load float addrspace(1)* %gep.0
+  %y = load float addrspace(1)* %gep.1
+  %y.fneg = fsub float -0.000000e+00, %y
+  %z = fmul float %x, %y.fneg
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_mul_fabs_fneg_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], [[X]], -|[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %x = load float addrspace(1)* %gep.0
+  %y = load float addrspace(1)* %gep.1
+  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
+  %z = fmul float %x, %y.fabs.fneg
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; There's no reason to commute this.
+; FUNC-LABEL: @commute_mul_fabs_x_fabs_y_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, |[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_x_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %x = load float addrspace(1)* %gep.0
+  %y = load float addrspace(1)* %gep.1
+  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %z = fmul float %x.fabs, %y.fabs
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @commute_mul_fabs_x_fneg_fabs_y_f32
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mul_f32_e64 [[REG:v[0-9]+]], |[[X]]|, -|[[Y]]|
+; SI-NEXT: buffer_store_dword [[REG]]
+define void @commute_mul_fabs_x_fneg_fabs_y_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %x = load float addrspace(1)* %gep.0
+  %y = load float addrspace(1)* %gep.1
+  %x.fabs = call float @llvm.fabs.f32(float %x) #1
+  %y.fabs = call float @llvm.fabs.f32(float %y) #1
+  %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
+  %z = fmul float %x.fabs, %y.fabs.fneg
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; Make sure we commute the multiply part for the constant in src0 even
+; though we have negate modifier on src2.
+
+; SI-LABEL: {{^}}fma_a_2.0_neg_b_f32
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], |[[R2]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @fma_a_2.0_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r2.fabs = call float @llvm.fabs.f32(float %r2)
+
+  %r3 = tail call float @llvm.fma.f32(float %r1, float 2.0, float %r2.fabs)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
index 99f0d99b3529..a5399a71324c 100644
--- a/test/CodeGen/R600/complex-folding.ll
+++ b/test/CodeGen/R600/complex-folding.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @main
+; CHECK: {{^}}main:
 ; CHECK-NOT: MOV
 define void @main(<4 x float> inreg %reg0) #0 {
 entry:
diff --git a/test/CodeGen/R600/concat_vectors.ll b/test/CodeGen/R600/concat_vectors.ll
index 9abc5a627c1c..4c5b9c959516 100644
--- a/test/CodeGen/R600/concat_vectors.ll
+++ b/test/CodeGen/R600/concat_vectors.ll
@@ -1,247 +1,282 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_concat_v1i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i32:
+; 0x80f000 is the high 32 bits of the resource descriptor used by MUBUF
+; instructions that access scratch memory.  Bit 23, which is the add_tid_enable
+; bit, is only set for scratch access, so we can check for the absence of this
+; value if we want to ensure scratch memory is not being used.
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1i32(<2 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
   %concat = shufflevector <1 x i32> %a, <1 x i32> %b, <2 x i32> <i32 0, i32 1>
   store <2 x i32> %concat, <2 x i32> addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2i32(<4 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
   %concat = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x i32> %concat, <4 x i32> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4i32(<8 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
   %concat = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x i32> %concat, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8i32(<16 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
   %concat = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x i32> %concat, <16 x i32> addrspace(1)* %out, align 64
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16i32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16i32(<32 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) nounwind {
   %concat = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x i32> %concat, <32 x i32> addrspace(1)* %out, align 128
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v1f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1f32(<2 x float> addrspace(1)* %out, <1 x float> %a, <1 x float> %b) nounwind {
   %concat = shufflevector <1 x float> %a, <1 x float> %b, <2 x i32> <i32 0, i32 1>
   store <2 x float> %concat, <2 x float> addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2f32(<4 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
   %concat = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x float> %concat, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4f32(<8 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
   %concat = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x float> %concat, <8 x float> addrspace(1)* %out, align 32
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8f32(<16 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
   %concat = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x float> %concat, <16 x float> addrspace(1)* %out, align 64
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16f32
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16f32:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16f32(<32 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
   %concat = shufflevector <16 x float> %a, <16 x float> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x float> %concat, <32 x float> addrspace(1)* %out, align 128
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v1i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1i64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
   %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
   store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2i64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
   %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4i64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
   %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8i64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
   %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16i64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16i64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
   %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v1f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1f64(<2 x double> addrspace(1)* %out, <1 x double> %a, <1 x double> %b) nounwind {
   %concat = shufflevector <1 x double> %a, <1 x double> %b, <2 x i32> <i32 0, i32 1>
   store <2 x double> %concat, <2 x double> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2f64(<4 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
   %concat = shufflevector <2 x double> %a, <2 x double> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x double> %concat, <4 x double> addrspace(1)* %out, align 32
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4f64(<8 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
   %concat = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x double> %concat, <8 x double> addrspace(1)* %out, align 64
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8f64(<16 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
   %concat = shufflevector <8 x double> %a, <8 x double> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x double> %concat, <16 x double> addrspace(1)* %out, align 128
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16f64
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16f64:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16f64(<32 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
   %concat = shufflevector <16 x double> %a, <16 x double> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x double> %concat, <32 x double> addrspace(1)* %out, align 256
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v1i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1i1(<2 x i1> addrspace(1)* %out, <1 x i1> %a, <1 x i1> %b) nounwind {
   %concat = shufflevector <1 x i1> %a, <1 x i1> %b, <2 x i32> <i32 0, i32 1>
   store <2 x i1> %concat, <2 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2i1(<4 x i1> addrspace(1)* %out, <2 x i1> %a, <2 x i1> %b) nounwind {
   %concat = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x i1> %concat, <4 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4i1(<8 x i1> addrspace(1)* %out, <4 x i1> %a, <4 x i1> %b) nounwind {
   %concat = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x i1> %concat, <8 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8i1(<16 x i1> addrspace(1)* %out, <8 x i1> %a, <8 x i1> %b) nounwind {
   %concat = shufflevector <8 x i1> %a, <8 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x i1> %concat, <16 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16i1(<32 x i1> addrspace(1)* %out, <16 x i1> %a, <16 x i1> %b) nounwind {
   %concat = shufflevector <16 x i1> %a, <16 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x i1> %concat, <32 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v32i1
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v32i1:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v32i1(<64 x i1> addrspace(1)* %out, <32 x i1> %a, <32 x i1> %b) nounwind {
   %concat = shufflevector <32 x i1> %a, <32 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
   store <64 x i1> %concat, <64 x i1> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v1i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v1i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v1i16(<2 x i16> addrspace(1)* %out, <1 x i16> %a, <1 x i16> %b) nounwind {
   %concat = shufflevector <1 x i16> %a, <1 x i16> %b, <2 x i32> <i32 0, i32 1>
   store <2 x i16> %concat, <2 x i16> addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v2i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v2i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v2i16(<4 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) nounwind {
   %concat = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   store <4 x i16> %concat, <4 x i16> addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v4i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v4i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v4i16(<8 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
   %concat = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <8 x i16> %concat, <8 x i16> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v8i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v8i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v8i16(<16 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
   %concat = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   store <16 x i16> %concat, <16 x i16> addrspace(1)* %out, align 32
   ret void
 }
 
-; FUNC-LABEL: @test_concat_v16i16
-; SI-NOT: MOVREL
+; FUNC-LABEL: {{^}}test_concat_v16i16:
+; SI-NOT: s_mov_b32 s{{[0-9]}}, 0x80f000
+; SI-NOT: movrel
 define void @test_concat_v16i16(<32 x i16> addrspace(1)* %out, <16 x i16> %a, <16 x i16> %b) nounwind {
   %concat = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   store <32 x i16> %concat, <32 x i16> addrspace(1)* %out, align 64
diff --git a/test/CodeGen/R600/copy-illegal-type.ll b/test/CodeGen/R600/copy-illegal-type.ll
index f7c2321ae8fe..2dff24c432b1 100644
--- a/test/CodeGen/R600/copy-illegal-type.ll
+++ b/test/CodeGen/R600/copy-illegal-type.ll
@@ -1,20 +1,20 @@
-; RUN: llc -march=r600 -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_copy_v4i8
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
 define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_x2
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x2:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
 define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -22,12 +22,12 @@ define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_x3
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x3:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
 define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -36,13 +36,13 @@ define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_x4
-; SI: BUFFER_LOAD_DWORD [[REG:v[0-9]+]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_x4:
+; SI: buffer_load_dword [[REG:v[0-9]+]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: buffer_store_dword [[REG]]
+; SI: s_endpgm
 define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
@@ -52,34 +52,34 @@ define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_extra_use
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; FUNC-LABEL: {{^}}test_copy_v4i8_extra_use:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
 
 ; After scalarizing v4i8 loads is fixed.
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
 ; XSI: V_BFE
 ; XSI: V_ADD
 ; XSI: V_ADD
 ; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI: buffer_store_dword
 
-; SI: S_ENDPGM
+; SI: s_endpgm
 define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -88,36 +88,36 @@ define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> add
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_x2_extra_use
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: V_ADD
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI-DAG: BUFFER_STORE_BYTE
-; SI_DAG: BUFFER_STORE_BYTE
+; FUNC-LABEL: {{^}}test_copy_v4i8_x2_extra_use:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: v_add
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_store_byte
+; SI_DAG: buffer_store_byte
 
-; XSI: BUFFER_LOAD_DWORD
+; XSI: buffer_load_dword
 ; XSI: BFE
-; XSI: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
 ; XSI: V_ADD
-; XSI: BUFFER_STORE_DWORD
-; XSI-NEXT: BUFFER_STORE_DWORD
+; XSI: buffer_store_dword
+; XSI-NEXT: buffer_store_dword
 
-; SI: S_ENDPGM
+; SI: s_endpgm
 define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
@@ -127,38 +127,38 @@ define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8>
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v3i8
-; SI-NOT: BFE
-; SI-NOT: BFI
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v3i8:
+; SI-NOT: bfe
+; SI-NOT: bfi
+; SI: s_endpgm
 define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
   %val = load <3 x i8> addrspace(1)* %in, align 4
   store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_volatile_load
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_load:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: s_endpgm
 define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load volatile <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @test_copy_v4i8_volatile_store
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copy_v4i8_volatile_store:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: buffer_store_byte
+; SI: s_endpgm
 define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
   %val = load <4 x i8> addrspace(1)* %in, align 4
   store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/copy-to-reg.ll b/test/CodeGen/R600/copy-to-reg.ll
new file mode 100644
index 000000000000..4a4143567102
--- /dev/null
+++ b/test/CodeGen/R600/copy-to-reg.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+
+; Test that CopyToReg instructions don't have non-register operands prior
+; to being emitted.
+
+; Make sure this doesn't crash
+; CHECK-LABEL: {{^}}copy_to_reg_frameindex:
+define void @copy_to_reg_frameindex(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
+entry:
+  %alloca = alloca [16 x i32]
+  br label %loop
+
+loop:
+  %inc = phi i32 [0, %entry], [%inc.i, %loop]
+  %ptr = getelementptr [16 x i32]* %alloca, i32 0, i32 %inc
+  store i32 %inc, i32* %ptr
+  %inc.i = add i32 %inc, 1
+  %cnd = icmp uge i32 %inc.i, 16
+  br i1 %cnd, label %done, label %loop
+
+done:
+  %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 0
+  %tmp1 = load i32* %tmp0
+  store i32 %tmp1, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ctlz_zero_undef.ll b/test/CodeGen/R600/ctlz_zero_undef.ll
index 1340ef98c605..090610d4aac2 100644
--- a/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
 
-; FUNC-LABEL: @s_ctlz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FLBIT_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -19,11 +19,11 @@ define void @s_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
   ret void
 }
 
-; FUNC-LABEL: @v_ctlz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBH_U32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -33,12 +33,12 @@ define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @v_ctlz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v2i32:
+; SI: buffer_load_dwordx2
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
@@ -49,14 +49,14 @@ define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
   ret void
 }
 
-; FUNC-LABEL: @v_ctlz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: V_FFBH_U32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_v4i32:
+; SI: buffer_load_dwordx4
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll
index 22a3022145f1..a47bc876cb96 100644
--- a/test/CodeGen/R600/ctpop.ll
+++ b/test/CodeGen/R600/ctpop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
@@ -7,12 +7,12 @@ declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
 declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
 declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
 
-; FUNC-LABEL: @s_ctpop_i32:
-; SI: S_LOAD_DWORD [[SVAL:s[0-9]+]],
-; SI: S_BCNT1_I32_B32 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_i32:
+; SI: s_load_dword [[SVAL:s[0-9]+]],
+; SI: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -22,12 +22,12 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
 }
 
 ; XXX - Why 0 in register?
-; FUNC-LABEL: @v_ctpop_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -37,15 +37,14 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_add_chain_i32
-; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
-; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
-; SI-NOT: ADD
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_add_chain_i32:
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI: buffer_load_dword [[VAL1:v[0-9]+]],
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
@@ -59,10 +58,24 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v2i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_add_sgpr_i32:
+; SI: buffer_load_dword [[VAL0:v[0-9]+]],
+; SI-NEXT: s_waitcnt
+; SI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; SI-NEXT: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
+  %val0 = load i32 addrspace(1)* %in0, align 4
+  %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
+  %add = add i32 %ctpop0, %sval
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctpop_v2i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
@@ -73,12 +86,12 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v4i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v4i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
@@ -91,16 +104,16 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v8i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v8i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
@@ -117,24 +130,24 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v16i32:
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: V_BCNT_U32_B32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v16i32:
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: v_bcnt_u32_b32_e32
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 ; EG: BCNT_INT
@@ -159,11 +172,11 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_inline_constant:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -174,11 +187,11 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_inline_constant_inv:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_inline_constant_inv:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
@@ -189,12 +202,12 @@ define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out,
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_literal:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[LIT:v[0-9]+]], 0x1869f
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_literal:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[LIT:v[0-9]+]], 0x1869f
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[LIT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
@@ -203,12 +216,12 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_var:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_var:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -219,12 +232,12 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_var_inv:
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI-DAG: S_LOAD_DWORD [[VAR:s[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_var_inv:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: s_load_dword [[VAR:s[0-9]+]],
+; SI: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
@@ -235,12 +248,12 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
-; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0
-; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i32_add_vvar_inv:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], {{0$}}
+; SI-DAG: buffer_load_dword [[VAR:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0 offset:16
+; SI: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
@@ -256,29 +269,29 @@ define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrsp
 ; FIXME: We currently disallow SALU instructions in all branches,
 ; but there are some cases when the should be allowed.
 
-; FUNC-LABEL: @ctpop_i32_in_br
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 0
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-; EG: BCNT_INT
-define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond) {
+; FUNC-LABEL: {{^}}ctpop_i32_in_br:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b32  [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[RESULT]], [[SRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+; EG: BCNT_INT
+define void @ctpop_i32_in_br(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %ctpop_arg, i32 %cond) {
 entry:
-  %0 = icmp eq i32 %cond, 0
-  br i1 %0, label %if, label %else
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %if, label %else
 
 if:
-  %1 = load i32 addrspace(1)* %in
-  %2 = call i32 @llvm.ctpop.i32(i32 %1)
+  %tmp2 = call i32 @llvm.ctpop.i32(i32 %ctpop_arg)
   br label %endif
 
 else:
-  %3 = getelementptr i32 addrspace(1)* %in, i32 1
-  %4 = load i32 addrspace(1)* %3
+  %tmp3 = getelementptr i32 addrspace(1)* %in, i32 1
+  %tmp4 = load i32 addrspace(1)* %tmp3
   br label %endif
 
 endif:
-  %5 = phi i32 [%2, %if], [%4, %else]
-  store i32 %5, i32 addrspace(1)* %out
+  %tmp5 = phi i32 [%tmp2, %if], [%tmp4, %else]
+  store i32 %tmp5, i32 addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/ctpop64.ll b/test/CodeGen/R600/ctpop64.ll
index b36ecc68d895..8dfe571d3477 100644
--- a/test/CodeGen/R600/ctpop64.ll
+++ b/test/CodeGen/R600/ctpop64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
@@ -6,12 +6,12 @@ declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
 declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
 declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
 
-; FUNC-LABEL: @s_ctpop_i64:
-; SI: S_LOAD_DWORDX2 [[SVAL:s\[[0-9]+:[0-9]+\]]],
-; SI: S_BCNT1_I32_B64 [[SRESULT:s[0-9]+]], [[SVAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_i64:
+; SI: s_load_dwordx2 [[SVAL:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_bcnt1_i32_b64 [[SRESULT:s[0-9]+]], [[SVAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
 define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
   %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
   %truncctpop = trunc i64 %ctpop to i32
@@ -19,13 +19,13 @@ define void @s_ctpop_i64(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_i64:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
-; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
-; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
-; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_i64:
+; SI: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; SI: v_mov_b32_e32 [[VZERO:v[0-9]+]], 0
+; SI: v_bcnt_u32_b32_e32 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], [[VZERO]]
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
@@ -34,10 +34,10 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
   ret void
 }
 
-; FUNC-LABEL: @s_ctpop_v2i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_v2i64:
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
 define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val) nounwind {
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
   %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
@@ -45,12 +45,12 @@ define void @s_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> %val)
   ret void
 }
 
-; FUNC-LABEL: @s_ctpop_v4i64:
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_BCNT1_I32_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_ctpop_v4i64:
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_bcnt1_i32_b64
+; SI: s_endpgm
 define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val) nounwind {
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
   %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
@@ -58,12 +58,12 @@ define void @s_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> %val)
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v2i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v2i64:
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
 define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
   %val = load <2 x i64> addrspace(1)* %in, align 16
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
@@ -72,16 +72,16 @@ define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrs
   ret void
 }
 
-; FUNC-LABEL: @v_ctpop_v4i64:
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: V_BCNT_U32_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_ctpop_v4i64:
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: v_bcnt_u32_b32
+; SI: s_endpgm
 define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
   %val = load <4 x i64> addrspace(1)* %in, align 32
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
@@ -93,30 +93,29 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
 ; FIXME: We currently disallow SALU instructions in all branches,
 ; but there are some cases when the should be allowed.
 
-; FUNC-LABEL: @ctpop_i64_in_br
-; SI: V_BCNT_U32_B32_e64 [[BCNT_LO:v[0-9]+]], v{{[0-9]+}}, 0
-; SI: V_BCNT_U32_B32_e32 v[[BCNT:[0-9]+]], v{{[0-9]+}}, [[BCNT_LO]]
-; SI: V_MOV_B32_e32 v[[ZERO:[0-9]+]], 0
-; SI: BUFFER_STORE_DWORDX2 v[
-; SI: [[BCNT]]:[[ZERO]]]
-; SI: S_ENDPGM
-define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i32 %cond) {
+; FUNC-LABEL: {{^}}ctpop_i64_in_br:
+; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
+; SI: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
+; SI: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; SI: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; SI: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
+; SI: s_endpgm
+define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
 entry:
-  %0 = icmp eq i32 %cond, 0
-  br i1 %0, label %if, label %else
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
-  %2 = call i64 @llvm.ctpop.i64(i64 %1)
+  %tmp2 = call i64 @llvm.ctpop.i64(i64 %ctpop_arg)
   br label %endif
 
 else:
-  %3 = getelementptr i64 addrspace(1)* %in, i32 1
-  %4 = load i64 addrspace(1)* %3
+  %tmp3 = getelementptr i64 addrspace(1)* %in, i32 1
+  %tmp4 = load i64 addrspace(1)* %tmp3
   br label %endif
 
 endif:
-  %5 = phi i64 [%2, %if], [%4, %else]
-  store i64 %5, i64 addrspace(1)* %out
+  %tmp5 = phi i64 [%tmp2, %if], [%tmp4, %else]
+  store i64 %tmp5, i64 addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/cttz-ctlz.ll b/test/CodeGen/R600/cttz-ctlz.ll
new file mode 100644
index 000000000000..6be06d243eaa
--- /dev/null
+++ b/test/CodeGen/R600/cttz-ctlz.ll
@@ -0,0 +1,224 @@
+; RUN: opt -S -codegenprepare -mtriple=r600-unknown-unknown -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=ALL %s
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; SI-NEXT: ret i64 [[CTLZ]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
+; SI-NEXT: ret i32 [[CTLZ]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; SI: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
+; SI-NEXT: ret i16 [[CTLZ]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; SI-NEXT: ret i64 [[CTTZ]]
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
+; SI-NEXT: ret i32 [[CTTZ]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; SI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
+; SI-NEXT: ret i16 [[CTTZ]]
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1c(i64 %A) {
+; ALL-LABEL: @test1c(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2c(i32 %A) {
+; ALL-LABEL: @test2c(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3c(i16 signext %A) {
+; ALL-LABEL: @test3c(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1d(i64 %A) {
+; ALL-LABEL: @test1d(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2d(i32 %A) {
+; ALL-LABEL: @test2d(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3d(i16 signext %A) {
+; ALL-LABEL: @test3d(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/CodeGen/R600/cttz_zero_undef.ll b/test/CodeGen/R600/cttz_zero_undef.ll
index 9c4a3558d094..ab59360694bf 100644
--- a/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/test/CodeGen/R600/cttz_zero_undef.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
 declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
 
-; FUNC-LABEL: @s_cttz_zero_undef_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_FF1_I32_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_ff1_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
@@ -19,11 +19,11 @@ define void @s_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 %val) nou
   ret void
 }
 
-; FUNC-LABEL: @v_cttz_zero_undef_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_FFBL_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbl_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
@@ -33,12 +33,12 @@ define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @v_cttz_zero_undef_v2i32:
-; SI: BUFFER_LOAD_DWORDX2
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_v2i32:
+; SI: buffer_load_dwordx2
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
@@ -49,14 +49,14 @@ define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x
   ret void
 }
 
-; FUNC-LABEL: @v_cttz_zero_undef_v4i32:
-; SI: BUFFER_LOAD_DWORDX4
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: V_FFBL_B32_e32
-; SI: BUFFER_STORE_DWORDX4
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_cttz_zero_undef_v4i32:
+; SI: buffer_load_dwordx4
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: v_ffbl_b32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll
index 06a601065c3e..e26ee12f6f6d 100644
--- a/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI-LABEL: @load_i8_to_f32:
-; SI: BUFFER_LOAD_UBYTE [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}load_i8_to_f32:
+; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dword [[CONV]],
 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
   %load = load i8 addrspace(1)* %in, align 1
   %cvt = uitofp i8 %load to float
@@ -13,76 +13,96 @@ define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* n
   ret void
 }
 
-; SI-LABEL: @load_v2i8_to_v2f32:
-; SI: BUFFER_LOAD_USHORT [[LOADREG:v[0-9]+]],
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-NOT: AND
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v2i8_to_v2f32:
+; SI: buffer_load_ushort [[LOADREG:v[0-9]+]],
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-NOT: and
+; SI-DAG: v_cvt_f32_ubyte1_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in, align 1
+  %load = load <2 x i8> addrspace(1)* %in, align 2
   %cvt = uitofp <2 x i8> %load to <2 x float>
   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @load_v3i8_to_v3f32:
-; SI-NOT: BFE
-; SI-NOT: V_CVT_F32_UBYTE3_e32
-; SI-DAG: V_CVT_F32_UBYTE2_e32
-; SI-DAG: V_CVT_F32_UBYTE1_e32
-; SI-DAG: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v3i8_to_v3f32:
+; SI-NOT: bfe
+; SI-NOT: v_cvt_f32_ubyte3_e32
+; SI-DAG: v_cvt_f32_ubyte2_e32
+; SI-DAG: v_cvt_f32_ubyte1_e32
+; SI-DAG: v_cvt_f32_ubyte0_e32
+; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <3 x i8> addrspace(1)* %in, align 1
+  %load = load <3 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <3 x i8> %load to <3 x float>
   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @load_v4i8_to_v4f32:
-; We can't use BUFFER_LOAD_DWORD here, because the load is byte aligned, and
-; BUFFER_LOAD_DWORD requires dword alignment.
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: V_OR_B32_e32 [[LOADREG:v[0-9]+]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, [[LOADREG]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
-; SI: BUFFER_STORE_DWORDX4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+; SI-LABEL: {{^}}load_v4i8_to_v4f32:
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v[[HIRESULT:[0-9]+]], [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, [[LOADREG]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %cvt = uitofp <4 x i8> %load to <4 x float>
+  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; This should not be adding instructions to shift into the correct
+; position in the word for the component.
+
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_unaligned:
+; SI: buffer_load_ubyte [[LOADREG0:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG1:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG2:v[0-9]+]]
+; SI: buffer_load_ubyte [[LOADREG3:v[0-9]+]]
+; SI-NOT: v_lshlrev_b32
+; SI-NOT: v_or_b32
+
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG0]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG1]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, [[LOADREG2]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v[[HIRESULT:[0-9]+]], [[LOADREG3]]
+
+; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
+define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
   %load = load <4 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; XXX - This should really still be able to use the V_CVT_F32_UBYTE0
+; XXX - This should really still be able to use the v_cvt_f32_ubyte0
 ; for each component, but computeKnownBits doesn't handle vectors very
 ; well.
 
-; SI-LABEL: @load_v4i8_to_v4f32_2_uses:
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_CVT_F32_UBYTE0_e32
+; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
+; SI: v_cvt_f32_ubyte0_e32
 
 ; XXX - replace with this when v4i8 loads aren't scalarized anymore.
-; XSI: BUFFER_LOAD_DWORD
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; XSI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
+; XSI: buffer_load_dword
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; XSI: v_cvt_f32_u32_e32
+; SI: s_endpgm
 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
   %load = load <4 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
@@ -93,8 +113,8 @@ define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <
 }
 
 ; Make sure this doesn't crash.
-; SI-LABEL: @load_v7i8_to_v7f32:
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}load_v7i8_to_v7f32:
+; SI: s_endpgm
 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
   %load = load <7 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <7 x i8> %load to <7 x float>
@@ -102,28 +122,28 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
   ret void
 }
 
-; SI-LABEL: @load_v8i8_to_v8f32:
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[LOLOAD]]
-; SI-DAG: V_CVT_F32_UBYTE3_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE2_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE1_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-DAG: V_CVT_F32_UBYTE0_e32 v{{[0-9]+}}, v[[HILOAD]]
-; SI-NOT: BFE
-; SI-NOT: LSHR
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI-LABEL: {{^}}load_v8i8_to_v8f32:
+; SI: buffer_load_dwordx2 v{{\[}}[[LOLOAD:[0-9]+]]:[[HILOAD:[0-9]+]]{{\]}},
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[LOLOAD]]
+; SI-DAG: v_cvt_f32_ubyte3_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte2_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte1_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
+; SI-NOT: bfe
+; SI-NOT: lshr
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
   %load = load <8 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <8 x i8> %load to <8 x float>
@@ -131,11 +151,11 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
   ret void
 }
 
-; SI-LABEL: @i8_zext_inreg_i32_to_f32:
-; SI: BUFFER_LOAD_DWORD [[LOADREG:v[0-9]+]],
-; SI: V_ADD_I32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
-; SI-NEXT: V_CVT_F32_UBYTE0_e32 [[CONV:v[0-9]+]], [[ADD]]
-; SI: BUFFER_STORE_DWORD [[CONV]],
+; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
+; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
+; SI: buffer_store_dword [[CONV]],
 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 2
@@ -145,7 +165,7 @@ define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addr
   ret void
 }
 
-; SI-LABEL: @i8_zext_inreg_hi1_to_f32:
+; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %inreg = and i32 %load, 65280
diff --git a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index 6607c1218b56..1e47bfa0c779 100644
--- a/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -7,7 +7,7 @@
 ; ISD::UINT_TO_FP and ISD::SINT_TO_FP opcodes.
 
 
-; CHECK: @sint
+; CHECK: {{^}}sint:
 ; CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -21,7 +21,7 @@ entry:
   ret void
 }
 
-;CHECK: @uint
+;CHECK: {{^}}uint:
 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/default-fp-mode.ll b/test/CodeGen/R600/default-fp-mode.ll
index b24a7a246fda..6b6d49996eb1 100644
--- a/test/CodeGen/R600/default-fp-mode.ll
+++ b/test/CodeGen/R600/default-fp-mode.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=FP64-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=FP32-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals,+fp64-denormals < %s | FileCheck -check-prefix=BOTH-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals,-fp64-denormals < %s | FileCheck -check-prefix=NO-DENORMAL -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+fp64-denormals < %s | FileCheck -check-prefix=DEFAULT -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_kernel
+; FUNC-LABEL: {{^}}test_kernel:
 
 ; DEFAULT: FloatMode: 192
 ; DEFAULT: IeeeMode: 0
diff --git a/test/CodeGen/R600/disconnected-predset-break-bug.ll b/test/CodeGen/R600/disconnected-predset-break-bug.ll
index 012c17b8fe4b..858e4b98f3ab 100644
--- a/test/CodeGen/R600/disconnected-predset-break-bug.ll
+++ b/test/CodeGen/R600/disconnected-predset-break-bug.ll
@@ -4,7 +4,7 @@
 ; result.  This tests that there are no instructions between the PRED_SET*
 ; and the PREDICATE_BREAK in this loop.
 
-; CHECK: @loop_ge
+; CHECK: {{^}}loop_ge:
 ; CHECK: LOOP_START_DX10
 ; CHECK: ALU_PUSH_BEFORE
 ; CHECK-NEXT: JUMP
diff --git a/test/CodeGen/R600/dot4-folding.ll b/test/CodeGen/R600/dot4-folding.ll
index 3e8330f9b3ed..dca6a59c6e6a 100644
--- a/test/CodeGen/R600/dot4-folding.ll
+++ b/test/CodeGen/R600/dot4-folding.ll
@@ -2,7 +2,7 @@
 
 ; Exactly one constant vector can be folded into dot4, which means exactly
 ; 4 MOV instructions
-; CHECK: @main
+; CHECK: {{^}}main:
 ; CHECK: MOV
 ; CHECK: MOV
 ; CHECK: MOV
diff --git a/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
new file mode 100644
index 000000000000..41afd503ef88
--- /dev/null
+++ b/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -0,0 +1,69 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare void @llvm.AMDGPU.barrier.local() #1
+
+; Function Attrs: nounwind
+; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
+; CHECK: BB0_1:
+; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
+
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:0 offset1:1
+; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset0:32 offset1:33
+; CI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] offset:256
+; CHECK: s_endpgm
+define void @signed_ds_offset_addressing_loop(float addrspace(1)* noalias nocapture %out, float addrspace(3)* noalias nocapture readonly %lptr, i32 %n) #2 {
+entry:
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #0
+  %mul = shl nsw i32 %x.i, 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi float [ 0.000000e+00, %entry ], [ %add13, %for.body ]
+  %offset.02 = phi i32 [ %mul, %entry ], [ %add14, %for.body ]
+  %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.AMDGPU.barrier.local() #1
+  %arrayidx = getelementptr inbounds float addrspace(3)* %lptr, i32 %offset.02
+  %tmp = load float addrspace(3)* %arrayidx, align 4
+  %add1 = add nsw i32 %offset.02, 1
+  %arrayidx2 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add1
+  %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+  %add3 = add nsw i32 %offset.02, 32
+  %arrayidx4 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add3
+  %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+  %add5 = add nsw i32 %offset.02, 33
+  %arrayidx6 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add5
+  %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+  %add7 = add nsw i32 %offset.02, 64
+  %arrayidx8 = getelementptr inbounds float addrspace(3)* %lptr, i32 %add7
+  %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+  %add9 = fadd float %tmp, %tmp1
+  %add10 = fadd float %add9, %tmp2
+  %add11 = fadd float %add10, %tmp3
+  %add12 = fadd float %add11, %tmp4
+  %add13 = fadd float %sum.03, %add12
+  %inc = add nsw i32 %k.01, 1
+  %add14 = add nsw i32 %offset.02, 97
+  %exitcond = icmp eq i32 %inc, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %tmp5 = sext i32 %x.i to i64
+  %arrayidx15 = getelementptr inbounds float addrspace(1)* %out, i64 %tmp5
+  store float %add13, float addrspace(1)* %arrayidx15, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/R600/ds_read2.ll b/test/CodeGen/R600/ds_read2.ll
new file mode 100644
index 000000000000..c06b0b1392e2
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2.ll
@@ -0,0 +1,515 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; FIXME: We don't get cases where the address was an SGPR because we
+; get a copy to the address register for each one.
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+ @lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+; SI-LABEL: @simple_read2_f32
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f32(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f32_max_offset
+; SI: ds_read2_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 255
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f32_too_far
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
+define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 257
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f32_x2
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 0
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+  %idx.1 = add nsw i32 %tid.x, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum.0 = fadd float %val0, %val1
+
+  %idx.2 = add nsw i32 %tid.x, 11
+  %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+  %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+  %idx.3 = add nsw i32 %tid.x, 27
+  %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %sum.1 = fadd float %val2, %val3
+
+  %sum = fadd float %sum.0, %sum.1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; Make sure there is an instruction between the two sets of reads.
+; SI-LABEL: @simple_read2_f32_x2_barrier
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:0 offset1:8
+; SI: s_barrier
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2_barrier(float addrspace(1)* %out) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 0
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+  %idx.1 = add nsw i32 %tid.x, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum.0 = fadd float %val0, %val1
+
+  call void @llvm.AMDGPU.barrier.local() #2
+
+  %idx.2 = add nsw i32 %tid.x, 11
+  %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+  %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+  %idx.3 = add nsw i32 %tid.x, 27
+  %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %sum.1 = fadd float %val2, %val3
+
+  %sum = fadd float %sum.0, %sum.1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; For some reason adding something to the base address for the first
+; element results in only folding the inner pair.
+
+; SI-LABEL: @simple_read2_f32_x2_nonzero_base
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR:v[0-9]+]] offset0:2 offset1:8
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASEADDR]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_read2_f32_x2_nonzero_base(float addrspace(1)* %out) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+
+  %idx.1 = add nsw i32 %tid.x, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum.0 = fadd float %val0, %val1
+
+  %idx.2 = add nsw i32 %tid.x, 11
+  %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+  %val2 = load float addrspace(3)* %arrayidx2, align 4
+
+  %idx.3 = add nsw i32 %tid.x, 27
+  %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %sum.1 = fadd float %val2, %val3
+
+  %sum = fadd float %sum.0, %sum.1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %idx.0
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; Be careful of vectors of pointers. We don't know if the 2 pointers
+; in the vectors are really the same base, so this is not safe to
+; merge.
+; Base pointers come from different subregister of same super
+; register. We can't safely merge this.
+
+; SI-LABEL: @read2_ptr_is_subreg_arg_f32
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+  %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+  %val0 = load float addrspace(3)* %gep.0, align 4
+  %val1 = load float addrspace(3)* %gep.1, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; Apply a constant scalar offset after the pointer vector extract.  We
+; are rejecting merges that have the same, constant 0 offset, so make
+; sure we are really rejecting it because of the different
+; subregisters.
+
+; SI-LABEL: @read2_ptr_is_subreg_arg_offset_f32
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+  %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+
+  ; Apply an additional offset after the vector that will be more obviously folded.
+  %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+
+  %val0 = load float addrspace(3)* %gep.0, align 4
+  %val1 = load float addrspace(3)* %gep.1.offset, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; We should be able to merge in this case, but probably not worth the effort.
+; SI-NOT: ds_read2_b32
+; SI: ds_read_b32
+; SI: ds_read_b32
+; SI: s_endpgm
+define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr.0 = insertelement <2 x [512 x float] addrspace(3)*> undef, [512 x float] addrspace(3)* @lds, i32 0
+  %ptr.1 = insertelement <2 x [512 x float] addrspace(3)*> %ptr.0, [512 x float] addrspace(3)* @lds, i32 1
+  %x.i.v.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+  %x.i.v.1 = insertelement <2 x i32> %x.i.v.0, i32 %x.i, i32 1
+  %idx = add <2 x i32> %x.i.v.1, <i32 0, i32 8>
+  %gep = getelementptr inbounds <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
+  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+  %val0 = load float addrspace(3)* %gep.0, align 4
+  %val1 = load float addrspace(3)* %gep.1, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f32_volatile_0
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f32_volatile_1
+; SI-NOT ds_read2_b32
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; Can't fold since not correctly aligned.
+; XXX: This isn't really testing anything useful now. I think CI
+; allows unaligned LDS accesses, which would be a problem here.
+; SI-LABEL: @unaligned_read2_f32
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
+define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 1
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 1
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @misaligned_2_simple_read2_f32
+; SI-NOT: ds_read2_b32
+; SI: s_endpgm
+define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 2
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 2
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f64
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, {{v[0-9]+}}
+; SI: ds_read2_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, [[VPTR]] offset0:0 offset1:8
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2_f64(double addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f64_max_offset
+; SI: ds_read2_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:255
+; SI: s_endpgm
+define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 255
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; SI-LABEL: @simple_read2_f64_too_far
+; SI-NOT ds_read2_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:2056
+; SI: s_endpgm
+define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 257
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; Alignment only 4
+; SI-LABEL: @misaligned_read2_f64
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:14 offset1:15
+; SI: s_endpgm
+define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 7
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+@foo = addrspace(3) global [4 x i32] undef, align 4
+
+; SI-LABEL: @load_constant_adjacent_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
+  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+  %sum = add i32 %val0, %val1
+  store i32 %sum, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @load_constant_disjoint_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
+define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
+  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+  %sum = add i32 %val0, %val1
+  store i32 %sum, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+@bar = addrspace(3) global [4 x i64] undef, align 4
+
+; SI-LABEL: @load_misaligned64_constant_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
+define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
+  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  %sum = add i64 %val0, %val1
+  store i64 %sum, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] undef, align 4
+
+; SI-LABEL: @load_misaligned64_constant_large_offsets
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE0]] offset0:0 offset1:1
+; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
+; SI: s_endpgm
+define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
+  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  %sum = add i64 %val0, %val1
+  store i64 %sum, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
+@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
+
+define void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+  %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+  %add47 = add nsw i32 %x.i, 1
+  %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+  %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+  %add51 = add nsw i32 %x.i, 16
+  %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+  %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+  %add55 = add nsw i32 %x.i, 17
+  %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+  %tmp19 = load float addrspace(3)* %arrayidx56, align 4
+  %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+  %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+  %add63 = add nsw i32 %y.i, 1
+  %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+  %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+  %add67 = add nsw i32 %y.i, 32
+  %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+  %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+  %add71 = add nsw i32 %y.i, 33
+  %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+  %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+  %add75 = add nsw i32 %y.i, 64
+  %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+  %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+  %add79 = add nsw i32 %y.i, 65
+  %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+  %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+  %sum.0 = fadd float %tmp16, %tmp17
+  %sum.1 = fadd float %sum.0, %tmp18
+  %sum.2 = fadd float %sum.1, %tmp19
+  %sum.3 = fadd float %sum.2, %tmp20
+  %sum.4 = fadd float %sum.3, %tmp21
+  %sum.5 = fadd float %sum.4, %tmp22
+  %sum.6 = fadd float %sum.5, %tmp23
+  %sum.7 = fadd float %sum.6, %tmp24
+  %sum.8 = fadd float %sum.7, %tmp25
+  store float %sum.8, float addrspace(1)* %C, align 4
+  ret void
+}
+
+define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
+  %load = load <2 x i32> addrspace(3)* %in, align 4
+  store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
+  %load = load i64 addrspace(3)* %in, align 4
+  store i64 %load, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_read2_offset_order.ll b/test/CodeGen/R600/ds_read2_offset_order.ll
new file mode 100644
index 000000000000..bdbe22ff2348
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; XFAIL: *
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+
+; SI-LABEL: {{^}}offset_order:
+
+; SI: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:56
+; SI: ds_read2st64_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:0 offset1:4
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:2 offset1:3
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:11 offset1:1
+
+define void @offset_order(float addrspace(1)* %out) {
+entry:
+  %ptr0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 0
+  %val0 = load float addrspace(3)* %ptr0
+
+  %ptr1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 256
+  %val1 = load float addrspace(3)* %ptr1
+  %add1 = fadd float %val0, %val1
+
+  %ptr2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 3
+  %val2 = load float addrspace(3)* %ptr2
+  %add2 = fadd float %add1, %val2
+
+  %ptr3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 2
+  %val3 = load float addrspace(3)* %ptr3
+  %add3 = fadd float %add2, %val3
+
+  %ptr4 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 12
+  %val4 = load float addrspace(3)* %ptr4
+  %add4 = fadd float %add3, %val4
+
+  %ptr5 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 14
+  %val5 = load float addrspace(3)* %ptr5
+  %add5 = fadd float %add4, %val5
+
+  %ptr6 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 11
+  %val6 = load float addrspace(3)* %ptr6
+  %add6 = fadd float %add5, %val6
+  store float %add6, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ds_read2st64.ll b/test/CodeGen/R600/ds_read2st64.ll
new file mode 100644
index 000000000000..24834af20404
--- /dev/null
+++ b/test/CodeGen/R600/ds_read2st64.ll
@@ -0,0 +1,272 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+@lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+
+; SI-LABEL: @simple_read2st64_f32_0_1
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 64
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_1_2
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_1_2(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x.1 = add nsw i32 %x.i, 128
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_max_offset
+; SI: ds_read2st64_b32 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:255
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], v[[HI_VREG]], v[[LO_VREG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x.1 = add nsw i32 %x.i, 16320
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f32_over_max_offset
+; SI-NOT: ds_read2st64_b32
+; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
+; SI: s_endpgm
+define void @simple_read2st64_f32_over_max_offset(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x.1 = add nsw i32 %x.i, 16384
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @odd_invalid_read2st64_f32_0
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
+define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 63
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @odd_invalid_read2st64_f32_1
+; SI-NOT: ds_read2st64_b32
+; SI: s_endpgm
+define void @odd_invalid_read2st64_f32_1(float addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %add.x.1 = add nsw i32 %x.i, 127
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  %out.gep = getelementptr inbounds float addrspace(1)* %out, i32 %x.i
+  store float %sum, float addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_0_1
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 64
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_1_2
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:2
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_1_2(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x.1 = add nsw i32 %x.i, 128
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; Alignment only
+
+; SI-LABEL: @misaligned_read2st64_f64
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:0 offset1:1
+; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset0:128 offset1:129
+; SI: s_endpgm
+define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 64
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; The maximum is not the usual 0xff because 0xff * 8 * 64 > 0xffff
+; SI-LABEL: @simple_read2st64_f64_max_offset
+; SI: ds_read2st64_b64 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:4 offset1:127
+; SI: s_waitcnt lgkmcnt(0)
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[LO_VREG]]:{{[0-9]+\]}}, v{{\[[0-9]+}}:[[HI_VREG]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 256
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x.1 = add nsw i32 %x.i, 8128
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; SI-LABEL: @simple_read2st64_f64_over_max_offset
+; SI-NOT: ds_read2st64_b64
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
+; SI: s_endpgm
+define void @simple_read2st64_f64_over_max_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x.1 = add nsw i32 %x.i, 8192
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; SI-LABEL: @invalid_read2st64_f64_odd_offset
+; SI-NOT: ds_read2st64_b64
+; SI: s_endpgm
+define void @invalid_read2st64_f64_odd_offset(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %add.x.0 = add nsw i32 %x.i, 64
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x.1 = add nsw i32 %x.i, 8129
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; The stride of 8 elements is 8 * 8 bytes. We need to make sure the
+; stride in elements, not bytes, is a multiple of 64.
+
+; SI-LABEL: @byte_size_only_divisible_64_read2_f64
+; SI-NOT: ds_read2st_b64
+; SI: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:0 offset1:8
+; SI: s_endpgm
+define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %sum = fadd double %val0, %val1
+  %out.gep = getelementptr inbounds double addrspace(1)* %out, i32 %x.i
+  store double %sum, double addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_write2.ll b/test/CodeGen/R600/ds_write2.ll
new file mode 100644
index 000000000000..27273e7c674d
--- /dev/null
+++ b/test/CodeGen/R600/ds_write2.ll
@@ -0,0 +1,425 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+@lds.f64 = addrspace(3) global [512 x double] undef, align 8
+
+
+; SI-LABEL: @simple_write2_one_val_f32
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
+  %val = load float addrspace(1)* %in.gep, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+  %val0 = load float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_volatile_0
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_volatile_1
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}}
+; SI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:32
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store volatile float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; 2 data subregisters from different super registers.
+; SI-LABEL: @simple_write2_two_val_subreg2_mixed_f32
+; SI: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:{{[0-9]+\]}}
+; SI: buffer_load_dwordx2 v{{\[[0-9]+}}:[[VAL1:[0-9]+]]{{\]}}
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr <2 x float> addrspace(1)* %in.gep.0, i32 1
+  %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
+  %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+  %val0.0 = extractelement <2 x float> %val0, i32 0
+  %val1.1 = extractelement <2 x float> %val1, i32 1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0.0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1.1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_subreg2_f32
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr <2 x float> addrspace(1)* %in, i32 %x.i
+  %val = load <2 x float> addrspace(1)* %in.gep, align 8
+  %val0 = extractelement <2 x float> %val, i32 0
+  %val1 = extractelement <2 x float> %val, i32 1
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_subreg4_f32
+; SI-DAG: buffer_load_dwordx4 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr <4 x float> addrspace(1)* %in, i32 %x.i
+  %val = load <4 x float> addrspace(1)* %in.gep, align 16
+  %val0 = extractelement <4 x float> %val, i32 0
+  %val1 = extractelement <4 x float> %val, i32 3
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_max_offset_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+  %val0 = load float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 255
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_too_far_f32
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:1028
+; SI: s_endpgm
+define void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 257
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_x2
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:0 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+
+  %idx.0 = add nsw i32 %tid.x, 0
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+
+  %idx.1 = add nsw i32 %tid.x, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+
+  %idx.2 = add nsw i32 %tid.x, 11
+  %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+  store float %val0, float addrspace(3)* %arrayidx2, align 4
+
+  %idx.3 = add nsw i32 %tid.x, 27
+  %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+  store float %val1, float addrspace(3)* %arrayidx3, align 4
+
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f32_x2_nonzero_base
+; SI: ds_write2_b32 [[BASEADDR:v[0-9]+]], [[VAL0:v[0-9]+]], [[VAL1:v[0-9]+]] offset0:3 offset1:8
+; SI-NEXT: ds_write2_b32 [[BASEADDR]], [[VAL0]], [[VAL1]] offset0:11 offset1:27
+; SI: s_endpgm
+define void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %tid.x
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %tid.x
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+
+  %idx.0 = add nsw i32 %tid.x, 3
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+
+  %idx.1 = add nsw i32 %tid.x, 8
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+
+  %idx.2 = add nsw i32 %tid.x, 11
+  %arrayidx2 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
+  store float %val0, float addrspace(3)* %arrayidx2, align 4
+
+  %idx.3 = add nsw i32 %tid.x, 27
+  %arrayidx3 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
+  store float %val1, float addrspace(3)* %arrayidx3, align 4
+
+  ret void
+}
+
+; SI-LABEL: @write2_ptr_subreg_arg_two_val_f32
+; SI-NOT: ds_write2_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
+define void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in0.gep = getelementptr float addrspace(1)* %in0, i32 %x.i
+  %in1.gep = getelementptr float addrspace(1)* %in1, i32 %x.i
+  %val0 = load float addrspace(1)* %in0.gep, align 4
+  %val1 = load float addrspace(1)* %in1.gep, align 4
+
+  %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
+  %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
+  %gep = getelementptr inbounds <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
+  %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
+  %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
+
+  ; Apply an additional offset after the vector that will be more obviously folded.
+  %gep.1.offset = getelementptr float addrspace(3)* %gep.1, i32 8
+  store float %val0, float addrspace(3)* %gep.0, align 4
+
+  %add.x = add nsw i32 %x.i, 8
+  store float %val1, float addrspace(3)* %gep.1.offset, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_one_val_f64
+; SI: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]],
+; SI: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+  %val = load double addrspace(1)* %in.gep, align 8
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  store double %val, double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  store double %val, double addrspace(3)* %arrayidx1, align 8
+  ret void
+}
+
+; SI-LABEL: @misaligned_simple_write2_one_val_f64
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[VAL0:[0-9]+]]:[[VAL1:[0-9]+]]{{\]}}
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:0 offset1:1 [M0]
+; SI: ds_write2_b32 [[VPTR]], v[[VAL0]], v[[VAL1]] offset0:14 offset1:15 [M0]
+; SI: s_endpgm
+define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+  %val = load double addrspace(1)* %in.gep, align 8
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+  store double %val, double addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 7
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+  store double %val, double addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2_two_val_f64
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 3, v{{[0-9]+}}
+; SI: ds_write2_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:8 [M0]
+; SI: s_endpgm
+define void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
+  %val0 = load double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %arrayidx0 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
+  store double %val0, double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
+  store double %val1, double addrspace(3)* %arrayidx1, align 8
+  ret void
+}
+
+@foo = addrspace(3) global [4 x i32] undef, align 4
+
+; SI-LABEL: @store_constant_adjacent_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+define void @store_constant_adjacent_offsets() {
+  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+  ret void
+}
+
+; SI-LABEL: @store_constant_disjoint_offsets
+; SI-DAG: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b{{$}}
+; SI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], [[VAL]], [[VAL]] offset0:0 offset1:2
+define void @store_constant_disjoint_offsets() {
+  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  store i32 123, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+  ret void
+}
+
+@bar = addrspace(3) global [4 x i64] undef, align 4
+
+; SI-LABEL: @store_misaligned64_constant_offsets
+; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+define void @store_misaligned64_constant_offsets() {
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  ret void
+}
+
+@bar.large = addrspace(3) global [4096 x i64] undef, align 4
+
+; SI-LABEL: @store_misaligned64_constant_large_offsets
+; SI-DAG: v_mov_b32_e32 [[BASE0:v[0-9]+]], 0x7ff8{{$}}
+; SI-DAG: v_mov_b32_e32 [[BASE1:v[0-9]+]], 0x4000{{$}}
+; SI-DAG: ds_write2_b32 [[BASE0]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI-DAG: ds_write2_b32 [[BASE1]], v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @store_misaligned64_constant_large_offsets() {
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  store i64 123, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  ret void
+}
+
+@sgemm.lA = internal unnamed_addr addrspace(3) global [264 x float] undef, align 4
+@sgemm.lB = internal unnamed_addr addrspace(3) global [776 x float] undef, align 4
+
+define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
+  %val = load float addrspace(1)* %in
+  %arrayidx44 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
+  store float %val, float addrspace(3)* %arrayidx44, align 4
+  %add47 = add nsw i32 %x.i, 1
+  %arrayidx48 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
+  store float %val, float addrspace(3)* %arrayidx48, align 4
+  %add51 = add nsw i32 %x.i, 16
+  %arrayidx52 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
+  store float %val, float addrspace(3)* %arrayidx52, align 4
+  %add55 = add nsw i32 %x.i, 17
+  %arrayidx56 = getelementptr inbounds [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
+  store float %val, float addrspace(3)* %arrayidx56, align 4
+  %arrayidx60 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
+  store float %val, float addrspace(3)* %arrayidx60, align 4
+  %add63 = add nsw i32 %y.i, 1
+  %arrayidx64 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
+  store float %val, float addrspace(3)* %arrayidx64, align 4
+  %add67 = add nsw i32 %y.i, 32
+  %arrayidx68 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
+  store float %val, float addrspace(3)* %arrayidx68, align 4
+  %add71 = add nsw i32 %y.i, 33
+  %arrayidx72 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
+  store float %val, float addrspace(3)* %arrayidx72, align 4
+  %add75 = add nsw i32 %y.i, 64
+  %arrayidx76 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
+  store float %val, float addrspace(3)* %arrayidx76, align 4
+  %add79 = add nsw i32 %y.i, 65
+  %arrayidx80 = getelementptr inbounds [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
+  store float %val, float addrspace(3)* %arrayidx80, align 4
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/ds_write2st64.ll b/test/CodeGen/R600/ds_write2st64.ll
new file mode 100644
index 000000000000..de5f4efcbcd3
--- /dev/null
+++ b/test/CodeGen/R600/ds_write2st64.ll
@@ -0,0 +1,119 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+
+@lds = addrspace(3) global [512 x float] undef, align 4
+
+
+; SI-LABEL: @simple_write2st64_one_val_f32_0_1
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL]], [[VAL]] offset0:0 offset1:1 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr float addrspace(1)* %in, i32 %x.i
+  %val = load float addrspace(1)* %in.gep, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
+  store float %val, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 64
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
+  store float %val, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_f32_2_5
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:2 offset1:5 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_f32_2_5(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+  %val0 = load float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %add.x.0 = add nsw i32 %x.i, 128
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x.1 = add nsw i32 %x.i, 320
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_max_offset_f32
+; SI-DAG: buffer_load_dword [[VAL0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[VAL1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: v_lshlrev_b32_e32 [[VPTR:v[0-9]+]], 2, v{{[0-9]+}}
+; SI: ds_write2st64_b32 [[VPTR]], [[VAL0]], [[VAL1]] offset0:0 offset1:255 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in, float addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr float addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr float addrspace(1)* %in.gep.0, i32 1
+  %val0 = load float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %arrayidx0 = getelementptr inbounds float addrspace(3)* %lds, i32 %x.i
+  store float %val0, float addrspace(3)* %arrayidx0, align 4
+  %add.x = add nsw i32 %x.i, 16320
+  %arrayidx1 = getelementptr inbounds float addrspace(3)* %lds, i32 %add.x
+  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  ret void
+}
+
+; SI-LABEL: @simple_write2st64_two_val_max_offset_f64
+; SI-DAG: buffer_load_dwordx2 [[VAL0:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dwordx2 [[VAL1:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI-DAG: v_add_i32_e32 [[VPTR:v[0-9]+]],
+; SI: ds_write2st64_b64 [[VPTR]], [[VAL0]], [[VAL1]] offset0:4 offset1:127 [M0]
+; SI: s_endpgm
+define void @simple_write2st64_two_val_max_offset_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep.0 = getelementptr double addrspace(1)* %in, i32 %x.i
+  %in.gep.1 = getelementptr double addrspace(1)* %in.gep.0, i32 1
+  %val0 = load double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %add.x.0 = add nsw i32 %x.i, 256
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.0
+  store double %val0, double addrspace(3)* %arrayidx0, align 8
+  %add.x.1 = add nsw i32 %x.i, 8128
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x.1
+  store double %val1, double addrspace(3)* %arrayidx1, align 8
+  ret void
+}
+
+; SI-LABEL: @byte_size_only_divisible_64_write2st64_f64
+; SI-NOT: ds_write2st64_b64
+; SI: ds_write2_b64 {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:0 offset1:8
+; SI: s_endpgm
+define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
+  %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr double addrspace(1)* %in, i32 %x.i
+  %val = load double addrspace(1)* %in.gep, align 8
+  %arrayidx0 = getelementptr inbounds double addrspace(3)* %lds, i32 %x.i
+  store double %val, double addrspace(3)* %arrayidx0, align 8
+  %add.x = add nsw i32 %x.i, 8
+  %arrayidx1 = getelementptr inbounds double addrspace(3)* %lds, i32 %add.x
+  store double %val, double addrspace(3)* %arrayidx1, align 8
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/elf.ll b/test/CodeGen/R600/elf.ll
index 93851504bd4e..ec28ed9c1dcd 100644
--- a/test/CodeGen/R600/elf.ll
+++ b/test/CodeGen/R600/elf.ll
@@ -1,10 +1,16 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s - | FileCheck --check-prefix=ELF-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -filetype=obj | llvm-readobj -s -symbols - | FileCheck --check-prefix=ELF-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs -o - | FileCheck --check-prefix=CONFIG-CHECK %s
 
 ; ELF-CHECK: Format: ELF32
 ; ELF-CHECK: Name: .AMDGPU.config
 ; ELF-CHECK: Type: SHT_PROGBITS
 
+; ELF-CHECK: Symbol {
+; ELF-CHECK: Name: test
+; ELF-CHECK: Binding: Global
+
+; CONFIG-CHECK: .align 256
+; CONFIG-CHECK: test:
 ; CONFIG-CHECK: .section .AMDGPU.config
 ; CONFIG-CHECK-NEXT: .long   45096
 ; CONFIG-CHECK-NEXT: .long   0
diff --git a/test/CodeGen/R600/empty-function.ll b/test/CodeGen/R600/empty-function.ll
new file mode 100644
index 000000000000..4b81d971d06b
--- /dev/null
+++ b/test/CodeGen/R600/empty-function.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; Make sure we don't assert on empty functions
+
+; SI-LABEL: {{^}}empty_function_ret:
+; SI: .text
+; SI: s_endpgm
+; SI: codeLenInByte = 4
+define void @empty_function_ret() #0 {
+  ret void
+}
+
+; SI-LABEL: {{^}}empty_function_unreachable:
+; SI: .text
+; SI: codeLenInByte = 0
+define void @empty_function_unreachable() #0 {
+  unreachable
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/extload.ll b/test/CodeGen/R600/extload.ll
index dc056e0ecdd5..4a94acaba0b5 100644
--- a/test/CodeGen/R600/extload.ll
+++ b/test/CodeGen/R600/extload.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @anyext_load_i8:
+; FUNC-LABEL: {{^}}anyext_load_i8:
 ; EG: AND_INT
 ; EG: 255
 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
@@ -13,7 +13,7 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
   ret void
 }
 
-; FUNC-LABEL: @anyext_load_i16:
+; FUNC-LABEL: {{^}}anyext_load_i16:
 ; EG: AND_INT
 ; EG: AND_INT
 ; EG-DAG: 65535
@@ -27,7 +27,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
   ret void
 }
 
-; FUNC-LABEL: @anyext_load_lds_i8:
+; FUNC-LABEL: {{^}}anyext_load_lds_i8:
 ; EG: AND_INT
 ; EG: 255
 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
@@ -39,7 +39,7 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
   ret void
 }
 
-; FUNC-LABEL: @anyext_load_lds_i16:
+; FUNC-LABEL: {{^}}anyext_load_lds_i16:
 ; EG: AND_INT
 ; EG: AND_INT
 ; EG-DAG: 65535
@@ -52,72 +52,3 @@ define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 a
   store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
   ret void
 }
-
-; FUNC-LABEL: @sextload_global_i8_to_i64
-; SI: BUFFER_LOAD_SBYTE [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in, align 8
-  %ext = sext i8 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: @sextload_global_i16_to_i64
-; SI: BUFFER_LOAD_SSHORT [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in, align 8
-  %ext = sext i16 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: @sextload_global_i32_to_i64
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, [[LOAD]]
-; SI: BUFFER_STORE_DWORDX2
-define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in, align 8
-  %ext = sext i32 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: @zextload_global_i8_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_UBYTE [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in, align 8
-  %ext = zext i8 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: @zextload_global_i16_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_USHORT [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in, align 8
-  %ext = zext i16 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
-
-; FUNC-LABEL: @zextload_global_i32_to_i64
-; SI: S_MOV_B32 [[ZERO:s[0-9]+]], 0
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_MOV_B32_e32 {{v[0-9]+}}, [[ZERO]]
-; SI: BUFFER_STORE_DWORDX2
-define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in, align 8
-  %ext = zext i32 %a to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
-  ret void
-}
diff --git a/test/CodeGen/R600/extract_vector_elt_i16.ll b/test/CodeGen/R600/extract_vector_elt_i16.ll
index 5cd1b04bd1de..04c375a89ae3 100644
--- a/test/CodeGen/R600/extract_vector_elt_i16.ll
+++ b/test/CodeGen/R600/extract_vector_elt_i16.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @extract_vector_elt_v2i16
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}extract_vector_elt_v2i16:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
 define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) nounwind {
   %p0 = extractelement <2 x i16> %foo, i32 0
   %p1 = extractelement <2 x i16> %foo, i32 1
@@ -14,11 +14,11 @@ define void @extract_vector_elt_v2i16(i16 addrspace(1)* %out, <2 x i16> %foo) no
   ret void
 }
 
-; FUNC-LABEL: @extract_vector_elt_v4i16
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
-; SI: BUFFER_LOAD_USHORT
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}extract_vector_elt_v4i16:
+; SI: buffer_load_ushort
+; SI: buffer_load_ushort
+; SI: buffer_store_short
+; SI: buffer_store_short
 define void @extract_vector_elt_v4i16(i16 addrspace(1)* %out, <4 x i16> %foo) nounwind {
   %p0 = extractelement <4 x i16> %foo, i32 0
   %p1 = extractelement <4 x i16> %foo, i32 2
diff --git a/test/CodeGen/R600/fabs.f64.ll b/test/CodeGen/R600/fabs.f64.ll
new file mode 100644
index 000000000000..d87c08260b4c
--- /dev/null
+++ b/test/CodeGen/R600/fabs.f64.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+declare double @fabs(double) readnone
+declare double @llvm.fabs.f64(double) readnone
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
+
+; FUNC-LABEL: {{^}}v_fabs_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @v_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tidext = sext i32 %tid to i64
+  %gep = getelementptr double addrspace(1)* %in, i64 %tidext
+  %val = load double addrspace(1)* %gep, align 8
+  %fabs = call double @llvm.fabs.f64(double %val)
+  store double %fabs, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_f64:
+; SI: v_and_b32
+; SI-NOT: v_and_b32
+; SI: s_endpgm
+define void @fabs_f64(double addrspace(1)* %out, double %in) {
+  %fabs = call double @llvm.fabs.f64(double %in)
+  store double %fabs, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v2f64:
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+  %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
+  store <2 x double> %fabs, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v4f64:
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+  %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
+  store <4 x double> %fabs, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}fabs_fold_f64:
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
+define void @fabs_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
+  %fabs = call double @llvm.fabs.f64(double %in0)
+  %fmul = fmul double %fabs, %in1
+  store double %fmul, double addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}fabs_fn_fold_f64:
+; SI: s_load_dwordx2 [[ABS_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: and
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, |[[ABS_VALUE]]|, {{v\[[0-9]+:[0-9]+\]}}
+; SI: s_endpgm
+define void @fabs_fn_fold_f64(double addrspace(1)* %out, double %in0, double %in1) {
+  %fabs = call double @fabs(double %in0)
+  %fmul = fmul double %fabs, %in1
+  store double %fmul, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_free_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+  %bc= bitcast i64 %in to double
+  %fabs = call double @llvm.fabs.f64(double %bc)
+  store double %fabs, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_fn_free_f64:
+; SI: v_and_b32
+; SI: s_endpgm
+define void @fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+  %bc= bitcast i64 %in to double
+  %fabs = call double @fabs(double %bc)
+  store double %fabs, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fabs.ll b/test/CodeGen/R600/fabs.ll
index b87ce2254095..add6b75d22ae 100644
--- a/test/CodeGen/R600/fabs.ll
+++ b/test/CodeGen/R600/fabs.ll
@@ -1,65 +1,98 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
 
 ; DAGCombiner will transform:
 ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 ; unless isFabsFree returns true
 
-; R600-CHECK-LABEL: @fabs_free
-; R600-CHECK-NOT: AND
-; R600-CHECK: |PV.{{[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_free
-; SI-CHECK: V_AND_B32
+; FUNC-LABEL: {{^}}fabs_fn_free:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+
+; SI: v_and_b32
+
+define void @fabs_fn_free(float addrspace(1)* %out, i32 %in) {
+  %bc= bitcast i32 %in to float
+  %fabs = call float @fabs(float %bc)
+  store float %fabs, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_free:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+
+; SI: v_and_b32
 
 define void @fabs_free(float addrspace(1)* %out, i32 %in) {
-entry:
-  %0 = bitcast i32 %in to float
-  %1 = call float @fabs(float %0)
-  store float %1, float addrspace(1)* %out
+  %bc= bitcast i32 %in to float
+  %fabs = call float @llvm.fabs.f32(float %bc)
+  store float %fabs, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+define void @fabs_f32(float addrspace(1)* %out, float %in) {
+  %fabs = call float @llvm.fabs.f32(float %in)
+  store float %fabs, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fabs_v2f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+; SI: v_and_b32
+define void @fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  store <2 x float> %fabs, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK-LABEL: @fabs_v2
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_v2
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-define void @fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
-  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fabs_v4f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+; SI: v_and_b32
+define void @fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+  %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  store <4 x float> %fabs, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK-LABEL: @fabs_v4
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; SI-CHECK-LABEL: @fabs_v4
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-; SI-CHECK: V_AND_B32
-define void @fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
-  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+; SI-LABEL: {{^}}fabs_fn_fold:
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
+define void @fabs_fn_fold(float addrspace(1)* %out, float %in0, float %in1) {
+  %fabs = call float @fabs(float %in0)
+  %fmul = fmul float %fabs, %in1
+  store float %fmul, float addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @fabs_fold
-; SI-CHECK-NOT: V_AND_B32_e32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|
+; SI-LABEL: {{^}}fabs_fold:
+; SI: s_load_dword [[ABS_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: and
+; SI: v_mul_f32_e64 v{{[0-9]+}}, |[[ABS_VALUE]]|, v{{[0-9]+}}
 define void @fabs_fold(float addrspace(1)* %out, float %in0, float %in1) {
-entry:
-  %0 = call float @fabs(float %in0)
-  %1 = fmul float %0, %in1
-  store float %1, float addrspace(1)* %out
+  %fabs = call float @llvm.fabs.f32(float %in0)
+  %fmul = fmul float %fabs, %in1
+  store float %fmul, float addrspace(1)* %out
   ret void
 }
 
-declare float @fabs(float ) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
+declare float @fabs(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/fadd.ll b/test/CodeGen/R600/fadd.ll
index 5d2b806039a2..9d29c0629628 100644
--- a/test/CodeGen/R600/fadd.ll
+++ b/test/CodeGen/R600/fadd.ll
@@ -1,66 +1,63 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 
-; FUNC-LABEL: @fadd_f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+; SI: v_add_f32
 define void @fadd_f32(float addrspace(1)* %out, float %a, float %b) {
-entry:
-   %0 = fadd float %a, %b
-   store float %0, float addrspace(1)* %out
+   %add = fadd float %a, %b
+   store float %add, float addrspace(1)* %out, align 4
    ret void
 }
 
-; FUNC-LABEL: @fadd_v2f32
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v2f32:
+; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
+; R600-DAG: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
+; SI: v_add_f32
+; SI: v_add_f32
 define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
-  %0 = fadd <2 x float> %a, %b
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  %add = fadd <2 x float> %a, %b
+  store <2 x float> %add, <2 x float> addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @fadd_v4f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v4f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
 define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
   %result = fadd <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; FUNC-LABEL: @fadd_v8f32
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; R600-CHECK: ADD
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
-; SI-CHECK: V_ADD_F32
+; FUNC-LABEL: {{^}}fadd_v8f32:
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; R600: ADD
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
+; SI: v_add_f32
 define void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) {
-entry:
-  %0 = fadd <8 x float> %a, %b
-  store <8 x float> %0, <8 x float> addrspace(1)* %out
+  %add = fadd <8 x float> %a, %b
+  store <8 x float> %add, <8 x float> addrspace(1)* %out, align 32
   ret void
 }
diff --git a/test/CodeGen/R600/fadd64.ll b/test/CodeGen/R600/fadd64.ll
index 48cd3cfc8dfb..389c754c9e8d 100644
--- a/test/CodeGen/R600/fadd64.ll
+++ b/test/CodeGen/R600/fadd64.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
-; CHECK: @fadd_f64
-; CHECK: V_ADD_F64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
+; CHECK: {{^}}fadd_f64:
+; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
 
 define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fceil.ll b/test/CodeGen/R600/fceil.ll
index 458363adc1e3..7c7a7e36295c 100644
--- a/test/CodeGen/R600/fceil.ll
+++ b/test/CodeGen/R600/fceil.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.ceil.f32(float) nounwind readnone
@@ -8,8 +8,8 @@ declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
 declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
 declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
 
-; FUNC-LABEL: @fceil_f32:
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_f32:
+; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: CEIL {{\*? *}}[[RESULT]]
 define void @fceil_f32(float addrspace(1)* %out, float %x) {
@@ -18,9 +18,9 @@ define void @fceil_f32(float addrspace(1)* %out, float %x) {
   ret void
 }
 
-; FUNC-LABEL: @fceil_v2f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v2f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: CEIL {{\*? *}}[[RESULT]]
 ; EG: CEIL {{\*? *}}[[RESULT]]
@@ -30,10 +30,10 @@ define void @fceil_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
   ret void
 }
 
-; FUNC-LABEL: @fceil_v3f32:
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
-; FIXME-SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v3f32:
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
+; FIXME-SI: v_ceil_f32_e32
 ; FIXME-EG: v3 is treated as v2 and v1, hence 2 stores
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
@@ -46,11 +46,11 @@ define void @fceil_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
   ret void
 }
 
-; FUNC-LABEL: @fceil_v4f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v4f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+]]{{\.[XYZW]}}
 ; EG: CEIL {{\*? *}}[[RESULT]]
 ; EG: CEIL {{\*? *}}[[RESULT]]
@@ -62,15 +62,15 @@ define void @fceil_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
   ret void
 }
 
-; FUNC-LABEL: @fceil_v8f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v8f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
 ; EG-DAG: CEIL {{\*? *}}[[RESULT1]]
@@ -87,23 +87,23 @@ define void @fceil_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
   ret void
 }
 
-; FUNC-LABEL: @fceil_v16f32:
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
-; SI: V_CEIL_F32_e32
+; FUNC-LABEL: {{^}}fceil_v16f32:
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
+; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT1:T[0-9]+]]{{\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT2:T[0-9]+]]{{\.[XYZW]}}
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT3:T[0-9]+]]{{\.[XYZW]}}
diff --git a/test/CodeGen/R600/fceil64.ll b/test/CodeGen/R600/fceil64.ll
index b42aefa17328..77cd8eae402c 100644
--- a/test/CodeGen/R600/fceil64.ll
+++ b/test/CodeGen/R600/fceil64.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.ceil.f64(double) nounwind readnone
 declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
@@ -8,94 +8,95 @@ declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
 
-; FUNC-LABEL: @fceil_f64:
-; CI: V_CEIL_F64_e32
-; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; FUNC-LABEL: {{^}}fceil_f64:
+; CI: v_ceil_f64_e32
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_cmp_lg_f64
+; SI: v_cmp_gt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_add_f64
+; SI: s_endpgm
 define void @fceil_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.ceil.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v2f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v2f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
 define void @fceil_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   %y = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) nounwind readnone
   store <2 x double> %y, <2 x double> addrspace(1)* %out
   ret void
 }
 
-; FIXME-FUNC-LABEL: @fceil_v3f64:
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
-; FIXME-CI: V_CEIL_F64_e32
+; FIXME-FUNC-LABEL: {{^}}fceil_v3f64:
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
+; FIXME-CI: v_ceil_f64_e32
 ; define void @fceil_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
 ;   %y = call <3 x double> @llvm.ceil.v3f64(<3 x double> %x) nounwind readnone
 ;   store <3 x double> %y, <3 x double> addrspace(1)* %out
 ;   ret void
 ; }
 
-; FUNC-LABEL: @fceil_v4f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v4f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
 define void @fceil_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   %y = call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone
   store <4 x double> %y, <4 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v8f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v8f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
 define void @fceil_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   %y = call <8 x double> @llvm.ceil.v8f64(<8 x double> %x) nounwind readnone
   store <8 x double> %y, <8 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fceil_v16f64:
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
-; CI: V_CEIL_F64_e32
+; FUNC-LABEL: {{^}}fceil_v16f64:
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
+; CI: v_ceil_f64_e32
 define void @fceil_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
   %y = call <16 x double> @llvm.ceil.v16f64(<16 x double> %x) nounwind readnone
   store <16 x double> %y, <16 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/fcmp.ll b/test/CodeGen/R600/fcmp.ll
index c76a75876565..33992181e0d9 100644
--- a/test/CodeGen/R600/fcmp.ll
+++ b/test/CodeGen/R600/fcmp.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @fcmp_sext
+; CHECK: {{^}}fcmp_sext:
 ; CHECK: SETE_DX10  T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
@@ -18,7 +18,7 @@ entry:
 ; SET*_DX10 instruction.  Previously we were lowering this to:
 ; SET* + FP_TO_SINT
 
-; CHECK: @fcmp_br
+; CHECK: {{^}}fcmp_br:
 ; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
 ; CHECK-NEXT {{[0-9]+(5.0}}
 
diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll
index 8cbe9f686648..032a4e416dc1 100644
--- a/test/CodeGen/R600/fcmp64.ll
+++ b/test/CodeGen/R600/fcmp64.ll
@@ -1,60 +1,55 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
-; CHECK: @flt_f64
-; CHECK: V_CMP_LT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @flt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}flt_f64:
+; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
    %r1 = load double addrspace(1)* %in2
    %r2 = fcmp ult double %r0, %r1
-   %r3 = select i1 %r2, double %r0, double %r1
-   store double %r3, double addrspace(1)* %out
+   %r3 = zext i1 %r2 to i32
+   store i32 %r3, i32 addrspace(1)* %out
    ret void
 }
 
-; CHECK: @fle_f64
-; CHECK: V_CMP_LE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fle_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fle_f64:
+; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
    %r1 = load double addrspace(1)* %in2
    %r2 = fcmp ule double %r0, %r1
-   %r3 = select i1 %r2, double %r0, double %r1
-   store double %r3, double addrspace(1)* %out
+   %r3 = zext i1 %r2 to i32
+   store i32 %r3, i32 addrspace(1)* %out
    ret void
 }
 
-; CHECK: @fgt_f64
-; CHECK: V_CMP_GT_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fgt_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fgt_f64:
+; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
    %r1 = load double addrspace(1)* %in2
    %r2 = fcmp ugt double %r0, %r1
-   %r3 = select i1 %r2, double %r0, double %r1
-   store double %r3, double addrspace(1)* %out
+   %r3 = zext i1 %r2 to i32
+   store i32 %r3, i32 addrspace(1)* %out
    ret void
 }
 
-; CHECK: @fge_f64
-; CHECK: V_CMP_GE_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
-define void @fge_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+; CHECK-LABEL: {{^}}fge_f64:
+; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
+define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
    %r1 = load double addrspace(1)* %in2
    %r2 = fcmp uge double %r0, %r1
-   %r3 = select i1 %r2, double %r0, double %r1
-   store double %r3, double addrspace(1)* %out
+   %r3 = zext i1 %r2 to i32
+   store i32 %r3, i32 addrspace(1)* %out
    ret void
 }
 
-; CHECK: @fne_f64
-; CHECK: V_CMP_NEQ_F64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
+; CHECK-LABEL: {{^}}fne_f64:
+; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
@@ -65,9 +60,8 @@ define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
    ret void
 }
 
-; CHECK: @feq_f64
-; CHECK: V_CMP_EQ_F64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
-
+; CHECK-LABEL: {{^}}feq_f64:
+; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/fconst64.ll b/test/CodeGen/R600/fconst64.ll
index 9c3a7e3d2e93..f3bc399972d5 100644
--- a/test/CodeGen/R600/fconst64.ll
+++ b/test/CodeGen/R600/fconst64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
-; CHECK: @fconst_f64
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0x40140000
-; CHECK-DAG: S_MOV_B32 {{s[0-9]+}}, 0
+; CHECK: {{^}}fconst_f64:
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0x40140000
+; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
 
 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
    %r1 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fcopysign.f32.ll b/test/CodeGen/R600/fcopysign.f32.ll
index 7b4425bed724..4bc5145bd4de 100644
--- a/test/CodeGen/R600/fcopysign.f32.ll
+++ b/test/CodeGen/R600/fcopysign.f32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
@@ -7,15 +7,15 @@ declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind read
 declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
 ; Try to identify arg based on higher address.
-; FUNC-LABEL: @test_copysign_f32:
-; SI: S_LOAD_DWORD [[SSIGN:s[0-9]+]], {{.*}} 0xc
-; SI: V_MOV_B32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
-; SI-DAG: S_LOAD_DWORD [[SMAG:s[0-9]+]], {{.*}} 0xb
-; SI-DAG: V_MOV_B32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_f32:
+; SI: s_load_dword [[SMAG:s[0-9]+]], {{.*}} 0xb
+; SI: s_load_dword [[SSIGN:s[0-9]+]], {{.*}} 0xc
+; SI-DAG: v_mov_b32_e32 [[VSIGN:v[0-9]+]], [[SSIGN]]
+; SI-DAG: v_mov_b32_e32 [[VMAG:v[0-9]+]], [[SMAG]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 [[RESULT:v[0-9]+]], [[SCONST]], [[VMAG]], [[VSIGN]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 
 ; EG: BFI_INT
 define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign) nounwind {
@@ -24,8 +24,8 @@ define void @test_copysign_f32(float addrspace(1)* %out, float %mag, float %sign
   ret void
 }
 
-; FUNC-LABEL: @test_copysign_v2f32:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v2f32:
+; SI: s_endpgm
 
 ; EG: BFI_INT
 ; EG: BFI_INT
@@ -35,8 +35,8 @@ define void @test_copysign_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %ma
   ret void
 }
 
-; FUNC-LABEL: @test_copysign_v4f32:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v4f32:
+; SI: s_endpgm
 
 ; EG: BFI_INT
 ; EG: BFI_INT
diff --git a/test/CodeGen/R600/fcopysign.f64.ll b/test/CodeGen/R600/fcopysign.f64.ll
index ea7a6db67f34..a14a493f72c8 100644
--- a/test/CodeGen/R600/fcopysign.f64.ll
+++ b/test/CodeGen/R600/fcopysign.f64.ll
@@ -1,35 +1,35 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.copysign.f64(double, double) nounwind readnone
 declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) nounwind readnone
 declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) nounwind readnone
 
-; FUNC-LABEL: @test_copysign_f64:
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI: V_MOV_B32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
-; SI-DAG: S_LOAD_DWORDX2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: V_MOV_B32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
-; SI-DAG: S_MOV_B32 [[SCONST:s[0-9]+]], 0x7fffffff
-; SI: V_BFI_B32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
-; SI: V_MOV_B32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
-; SI: BUFFER_STORE_DWORDX2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_f64:
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SMAG_LO:[0-9]+]]:[[SMAG_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[SSIGN_LO:[0-9]+]]:[[SSIGN_HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VSIGN_HI:[0-9]+]], s[[SSIGN_HI]]
+; SI-DAG: v_mov_b32_e32 v[[VMAG_HI:[0-9]+]], s[[SMAG_HI]]
+; SI-DAG: s_mov_b32 [[SCONST:s[0-9]+]], 0x7fffffff
+; SI: v_bfi_b32 v[[VRESULT_HI:[0-9]+]], [[SCONST]], v[[VMAG_HI]], v[[VSIGN_HI]]
+; SI: v_mov_b32_e32 v[[VMAG_LO:[0-9]+]], s[[SMAG_LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VMAG_LO]]:[[VRESULT_HI]]{{\]}}
+; SI: s_endpgm
 define void @test_copysign_f64(double addrspace(1)* %out, double %mag, double %sign) nounwind {
   %result = call double @llvm.copysign.f64(double %mag, double %sign)
   store double %result, double addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @test_copysign_v2f64:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v2f64:
+; SI: s_endpgm
 define void @test_copysign_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %mag, <2 x double> %sign) nounwind {
   %result = call <2 x double> @llvm.copysign.v2f64(<2 x double> %mag, <2 x double> %sign)
   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @test_copysign_v4f64:
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}test_copysign_v4f64:
+; SI: s_endpgm
 define void @test_copysign_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %mag, <4 x double> %sign) nounwind {
   %result = call <4 x double> @llvm.copysign.v4f64(<4 x double> %mag, <4 x double> %sign)
   store <4 x double> %result, <4 x double> addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/fdiv.ll b/test/CodeGen/R600/fdiv.ll
index 20db65c5eb60..f83b88ee1c9d 100644
--- a/test/CodeGen/R600/fdiv.ll
+++ b/test/CodeGen/R600/fdiv.ll
@@ -1,18 +1,18 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 ; These tests check that fdiv is expanded correctly and also test that the
 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate
 ; instruction groups.
 
-; FUNC-LABEL: @fdiv_f32
+; FUNC-LABEL: {{^}}fdiv_f32:
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
 
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fdiv float %a, %b
@@ -22,16 +22,16 @@ entry:
 
 
 
-; FUNC-LABEL: @fdiv_v2f32
+; FUNC-LABEL: {{^}}fdiv_v2f32:
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS
 
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fdiv <2 x float> %a, %b
@@ -39,7 +39,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @fdiv_v4f32
+; FUNC-LABEL: {{^}}fdiv_v4f32:
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -49,14 +49,14 @@ entry:
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS
 
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
-; SI-DAG: V_RCP_F32
-; SI-DAG: V_MUL_F32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
+; SI-DAG: v_rcp_f32
+; SI-DAG: v_mul_f32
 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/fdiv64.ll b/test/CodeGen/R600/fdiv64.ll
index 79b5c8bb96ee..0611b153f5cc 100644
--- a/test/CodeGen/R600/fdiv64.ll
+++ b/test/CodeGen/R600/fdiv64.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
-; CHECK: @fdiv_f64
-; CHECK: V_RCP_F64_e32 {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: {{^}}fdiv_f64:
+; CHECK: v_rcp_f64_e32 {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
 
 define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
diff --git a/test/CodeGen/R600/fetch-limits.r600.ll b/test/CodeGen/R600/fetch-limits.r600.ll
index f78d1d968e5d..d35573e818d4 100644
--- a/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/test/CodeGen/R600/fetch-limits.r600.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s
 
 ; R600 supports 8 fetches in a clause
-; CHECK: @fetch_limits_r600
+; CHECK: {{^}}fetch_limits_r600:
 ; CHECK: Fetch clause
 ; CHECK: Fetch clause
 
diff --git a/test/CodeGen/R600/fetch-limits.r700+.ll b/test/CodeGen/R600/fetch-limits.r700+.ll
index 1a8a43fccc72..17760a05caa4 100644
--- a/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -12,7 +12,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
 
 ; r700+ supports 16 fetches in a clause
-; CHECK: @fetch_limits_r700
+; CHECK: {{^}}fetch_limits_r700:
 ; CHECK: Fetch clause
 ; CHECK: Fetch clause
 
diff --git a/test/CodeGen/R600/ffloor.ll b/test/CodeGen/R600/ffloor.ll
index 31c6116988e6..194d0aaa1819 100644
--- a/test/CodeGen/R600/ffloor.ll
+++ b/test/CodeGen/R600/ffloor.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.floor.f64(double) nounwind readnone
 declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
@@ -8,95 +8,96 @@ declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
 declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
 declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
 
-; FUNC-LABEL: @ffloor_f64:
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_f64:
+; CI: v_floor_f64_e32
 
-; SI: S_BFE_I32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
-; SI: S_ADD_I32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
-; SI: S_LSHR_B64
-; SI: S_NOT_B64
-; SI: S_AND_B64
-; SI: S_AND_B32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
-; SI: CMP_LT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_GT_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: CMP_LT_F64
-; SI: CNDMASK_B32
-; SI: CMP_NE_I32
-; SI: CNDMASK_B32
-; SI: CNDMASK_B32
-; SI: V_ADD_F64
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI-DAG: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: v_cmp_lg_f64
+; SI: v_cmp_lt_f64
+; SI: s_and_b64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_add_f64
+; SI: s_endpgm
 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.floor.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ffloor_v2f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v2f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
 define void @ffloor_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
   %y = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) nounwind readnone
   store <2 x double> %y, <2 x double> addrspace(1)* %out
   ret void
 }
 
-; FIXME-FUNC-LABEL: @ffloor_v3f64:
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
-; FIXME-CI: V_FLOOR_F64_e32
+; FIXME-FUNC-LABEL: {{^}}ffloor_v3f64:
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
+; FIXME-CI: v_floor_f64_e32
 ; define void @ffloor_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
 ;   %y = call <3 x double> @llvm.floor.v3f64(<3 x double> %x) nounwind readnone
 ;   store <3 x double> %y, <3 x double> addrspace(1)* %out
 ;   ret void
 ; }
 
-; FUNC-LABEL: @ffloor_v4f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v4f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
 define void @ffloor_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
   %y = call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone
   store <4 x double> %y, <4 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ffloor_v8f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v8f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
 define void @ffloor_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
   %y = call <8 x double> @llvm.floor.v8f64(<8 x double> %x) nounwind readnone
   store <8 x double> %y, <8 x double> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ffloor_v16f64:
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
-; CI: V_FLOOR_F64_e32
+; FUNC-LABEL: {{^}}ffloor_v16f64:
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
+; CI: v_floor_f64_e32
 define void @ffloor_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
   %y = call <16 x double> @llvm.floor.v16f64(<16 x double> %x) nounwind readnone
   store <16 x double> %y, <16 x double> addrspace(1)* %out
diff --git a/test/CodeGen/R600/flat-address-space.ll b/test/CodeGen/R600/flat-address-space.ll
new file mode 100644
index 000000000000..99ef41b24e4f
--- /dev/null
+++ b/test/CodeGen/R600/flat-address-space.ll
@@ -0,0 +1,182 @@
+; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+
+; Disable optimizations in case there are optimizations added that
+; specialize away generic pointer accesses.
+
+
+; CHECK-LABEL: {{^}}branch_use_flat_i32:
+; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH]
+; CHECK: s_endpgm
+define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+entry:
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %local, label %global
+
+local:
+  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+  br label %end
+
+global:
+  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+  br label %end
+
+end:
+  %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
+  store i32 %x, i32 addrspace(4)* %fptr, align 4
+;  %val = load i32 addrspace(4)* %fptr, align 4
+;  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+
+
+; These testcases might become useless when there are optimizations to
+; remove generic pointers.
+
+; CHECK-LABEL: {{^}}store_flat_i32:
+; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
+  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+  store i32 %x, i32 addrspace(4)* %fptr, align 4
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_i64:
+; CHECK: flat_store_dwordx2
+define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 {
+  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
+  store i64 %x, i64 addrspace(4)* %fptr, align 8
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_v4i32:
+; CHECK: flat_store_dwordx4
+define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 {
+  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
+  store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_trunc_i16:
+; CHECK: flat_store_short
+define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 {
+  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+  %y = trunc i32 %x to i16
+  store i16 %y, i16 addrspace(4)* %fptr, align 2
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_flat_trunc_i8:
+; CHECK: flat_store_byte
+define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
+  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+  %y = trunc i32 %x to i8
+  store i8 %y, i8 addrspace(4)* %fptr, align 2
+  ret void
+}
+
+
+
+; CHECK-LABEL @load_flat_i32:
+; CHECK: flat_load_dword
+define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+  %fload = load i32 addrspace(4)* %fptr, align 4
+  store i32 %fload, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL @load_flat_i64:
+; CHECK: flat_load_dwordx2
+define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
+  %fload = load i64 addrspace(4)* %fptr, align 4
+  store i64 %fload, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; CHECK-LABEL @load_flat_v4i32:
+; CHECK: flat_load_dwordx4
+define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
+  %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+  store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; CHECK-LABEL @sextload_flat_i8:
+; CHECK: flat_load_sbyte
+define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+  %fload = load i8 addrspace(4)* %fptr, align 4
+  %ext = sext i8 %fload to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL @zextload_flat_i8:
+; CHECK: flat_load_ubyte
+define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
+  %fload = load i8 addrspace(4)* %fptr, align 4
+  %ext = zext i8 %fload to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL @sextload_flat_i16:
+; CHECK: flat_load_sshort
+define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+  %fload = load i16 addrspace(4)* %fptr, align 4
+  %ext = sext i16 %fload to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL @zextload_flat_i16:
+; CHECK: flat_load_ushort
+define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
+  %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
+  %fload = load i16 addrspace(4)* %fptr, align 4
+  %ext = zext i16 %fload to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+
+
+; TODO: This should not be zero when registers are used for small
+; scratch allocations again.
+
+; Check for prologue initializing special SGPRs pointing to scratch.
+; CHECK-LABEL: {{^}}store_flat_scratch:
+; CHECK: s_movk_i32 flat_scratch_lo, 0
+; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
+; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
+; CHECK: flat_store_dword
+; CHECK: s_barrier
+; CHECK: flat_load_dword
+define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+  %alloca = alloca i32, i32 9, align 4
+  %x = call i32 @llvm.r600.read.tidig.x() #3
+  %pptr = getelementptr i32* %alloca, i32 %x
+  %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
+  store i32 %x, i32 addrspace(4)* %fptr
+  ; Dummy call
+  call void @llvm.AMDGPU.barrier.local() #1
+  %reload = load i32 addrspace(4)* %fptr, align 4
+  store i32 %reload, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local() #1
+declare i32 @llvm.r600.read.tidig.x() #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind noduplicate }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fma.f64.ll b/test/CodeGen/R600/fma.f64.ll
new file mode 100644
index 000000000000..48b1093ecd0b
--- /dev/null
+++ b/test/CodeGen/R600/fma.f64.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+; FUNC-LABEL: {{^}}fma_f64:
+; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
+   %r0 = load double addrspace(1)* %in1
+   %r1 = load double addrspace(1)* %in2
+   %r2 = load double addrspace(1)* %in3
+   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
+   store double %r3, double addrspace(1)* %out
+   ret void
+}
+
+; FUNC-LABEL: {{^}}fma_v2f64:
+; SI: v_fma_f64
+; SI: v_fma_f64
+define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
+   %r0 = load <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double> addrspace(1)* %in2
+   %r2 = load <2 x double> addrspace(1)* %in3
+   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
+   store <2 x double> %r3, <2 x double> addrspace(1)* %out
+   ret void
+}
+
+; FUNC-LABEL: {{^}}fma_v4f64:
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
+; SI: v_fma_f64
+define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
+                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
+   %r0 = load <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double> addrspace(1)* %in2
+   %r2 = load <4 x double> addrspace(1)* %in3
+   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
+   store <4 x double> %r3, <4 x double> addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fma.ll b/test/CodeGen/R600/fma.ll
index d72ffeceb921..f3861ffa2835 100644
--- a/test/CodeGen/R600/fma.ll
+++ b/test/CodeGen/R600/fma.ll
@@ -1,89 +1,92 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.fma.f32(float, float, float) nounwind readnone
 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
-declare double @llvm.fma.f64(double, double, double) nounwind readnone
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: @fma_f32
-; SI: V_FMA_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}fma_f32:
+; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}},
+; EG: FMA {{\*? *}}[[RES]]
 define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                      float addrspace(1)* %in2, float addrspace(1)* %in3) {
-   %r0 = load float addrspace(1)* %in1
-   %r1 = load float addrspace(1)* %in2
-   %r2 = load float addrspace(1)* %in3
-   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
-   store float %r3, float addrspace(1)* %out
-   ret void
+  %r0 = load float addrspace(1)* %in1
+  %r1 = load float addrspace(1)* %in2
+  %r2 = load float addrspace(1)* %in3
+  %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
+  store float %r3, float addrspace(1)* %out
+  ret void
 }
 
-; FUNC-LABEL: @fma_v2f32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; FUNC-LABEL: {{^}}fma_v2f32:
+; SI: v_fma_f32
+; SI: v_fma_f32
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].[[CHLO:[XYZW]]][[CHHI:[XYZW]]], {{T[0-9]\.[XYZW]}},
+; EG-DAG: FMA {{\*? *}}[[RES]].[[CHLO]]
+; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
 define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
                        <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
-   %r0 = load <2 x float> addrspace(1)* %in1
-   %r1 = load <2 x float> addrspace(1)* %in2
-   %r2 = load <2 x float> addrspace(1)* %in3
-   %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
-   store <2 x float> %r3, <2 x float> addrspace(1)* %out
-   ret void
+  %r0 = load <2 x float> addrspace(1)* %in1
+  %r1 = load <2 x float> addrspace(1)* %in2
+  %r2 = load <2 x float> addrspace(1)* %in3
+  %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
+  store <2 x float> %r3, <2 x float> addrspace(1)* %out
+  ret void
 }
 
-; FUNC-LABEL: @fma_v4f32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
-; SI: V_FMA_F32
+; FUNC-LABEL: {{^}}fma_v4f32:
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+; SI: v_fma_f32
+
+; EG: MEM_RAT_{{.*}} STORE_{{.*}} [[RES:T[0-9]]].{{[XYZW][XYZW][XYZW][XYZW]}}, {{T[0-9]\.[XYZW]}},
+; EG-DAG: FMA {{\*? *}}[[RES]].X
+; EG-DAG: FMA {{\*? *}}[[RES]].Y
+; EG-DAG: FMA {{\*? *}}[[RES]].Z
+; EG-DAG: FMA {{\*? *}}[[RES]].W
 define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
                        <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
-   %r0 = load <4 x float> addrspace(1)* %in1
-   %r1 = load <4 x float> addrspace(1)* %in2
-   %r2 = load <4 x float> addrspace(1)* %in3
-   %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
-   store <4 x float> %r3, <4 x float> addrspace(1)* %out
-   ret void
+  %r0 = load <4 x float> addrspace(1)* %in1
+  %r1 = load <4 x float> addrspace(1)* %in2
+  %r2 = load <4 x float> addrspace(1)* %in3
+  %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
+  store <4 x float> %r3, <4 x float> addrspace(1)* %out
+  ret void
 }
 
-; FUNC-LABEL: @fma_f64
-; SI: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
-define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                     double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
-   %r2 = load double addrspace(1)* %in3
-   %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
-   store double %r3, double addrspace(1)* %out
-   ret void
-}
+; FUNC-LABEL: @fma_commute_mul_inline_imm_f32
+; SI: v_fma_f32 {{v[0-9]+}}, 2.0, {{v[0-9]+}}, {{v[0-9]+}}
+define void @fma_commute_mul_inline_imm_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) nounwind {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
+  %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
+  %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
 
-; FUNC-LABEL: @fma_v2f64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
-                       <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
-   %r0 = load <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double> addrspace(1)* %in2
-   %r2 = load <2 x double> addrspace(1)* %in3
-   %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
-   store <2 x double> %r3, <2 x double> addrspace(1)* %out
-   ret void
+  %a = load float addrspace(1)* %in.a.gep, align 4
+  %b = load float addrspace(1)* %in.b.gep, align 4
+
+  %fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
+  store float %fma, float addrspace(1)* %out.gep, align 4
+  ret void
 }
 
-; FUNC-LABEL: @fma_v4f64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-; SI: V_FMA_F64
-define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
-                       <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
-   %r0 = load <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double> addrspace(1)* %in2
-   %r2 = load <4 x double> addrspace(1)* %in3
-   %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
-   store <4 x double> %r3, <4 x double> addrspace(1)* %out
-   ret void
+; FUNC-LABEL: @fma_commute_mul_s_f32
+define void @fma_commute_mul_s_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b, float %b) nounwind {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %in.a.gep = getelementptr float addrspace(1)* %in.a, i32 %tid
+  %in.b.gep = getelementptr float addrspace(1)* %in.b, i32 %tid
+  %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %a = load float addrspace(1)* %in.a.gep, align 4
+  %c = load float addrspace(1)* %in.b.gep, align 4
+
+  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
+  store float %fma, float addrspace(1)* %out.gep, align 4
+  ret void
 }
diff --git a/test/CodeGen/R600/fmax3.ll b/test/CodeGen/R600/fmax3.ll
new file mode 100644
index 000000000000..6f95bf20f73b
--- /dev/null
+++ b/test/CodeGen/R600/fmax3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmax3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %c = load float addrspace(1)* %cptr, align 4
+  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+  %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
+  store float %f1, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; Commute operand of second fmax
+; SI-LABEL: {{^}}test_fmax3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %c = load float addrspace(1)* %cptr, align 4
+  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+  %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
+  store float %f1, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/fmax_legacy.f64.ll b/test/CodeGen/R600/fmax_legacy.f64.ll
new file mode 100644
index 000000000000..a615825a45d3
--- /dev/null
+++ b/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -0,0 +1,67 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; Make sure we don't try to form FMAX_LEGACY nodes with f64
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f64
+define void @test_fmax_legacy_uge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp uge double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f64
+define void @test_fmax_legacy_oge_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp oge double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f64
+define void @test_fmax_legacy_ugt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp ugt double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f64
+define void @test_fmax_legacy_ogt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp ogt double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmax_legacy.ll b/test/CodeGen/R600/fmax_legacy.ll
new file mode 100644
index 000000000000..46f0e9831e6a
--- /dev/null
+++ b/test/CodeGen/R600/fmax_legacy.ll
@@ -0,0 +1,116 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
+; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Should replace unsafe-fp-math with no signed zeros.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmax_legacy_uge_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+
+; EG: MAX
+define void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp uge float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_oge_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp oge float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ugt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ugt float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ogt float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+
+; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-NOT: v_max_
+; SI: v_cmp_gt_f32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_max_
+
+; EG: MAX
+define void @test_fmax_legacy_ogt_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ogt float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out0, align 4
+  store i1 %cmp, i1addrspace(1)* %out1
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmaxnum.f64.ll b/test/CodeGen/R600/fmaxnum.f64.ll
new file mode 100644
index 000000000000..e92996aa2b1f
--- /dev/null
+++ b/test/CodeGen/R600/fmaxnum.f64.ll
@@ -0,0 +1,75 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.maxnum.f64(double, double) #0
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) #0
+declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
+declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
+
+; FUNC-LABEL: @test_fmax_f64
+; SI: v_max_f64
+define void @test_fmax_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
+  %val = call double @llvm.maxnum.f64(double %a, double %b) #0
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v2f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
+  %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+  store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v4f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
+  %val = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+  store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v8f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
+  %val = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+  store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v16f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+; SI: v_max_f64
+define void @test_fmax_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
+  %val = call <16 x double> @llvm.maxnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+  store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmaxnum.ll b/test/CodeGen/R600/fmaxnum.ll
new file mode 100644
index 000000000000..473184af214b
--- /dev/null
+++ b/test/CodeGen/R600/fmaxnum.ll
@@ -0,0 +1,191 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.maxnum.f32(float, float) #0
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #0
+declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #0
+
+declare double @llvm.maxnum.f64(double, double)
+
+; FUNC-LABEL: @test_fmax_f32
+; SI: v_max_f32_e32
+define void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+  %val = call float @llvm.maxnum.f32(float %a, float %b) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v2f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
+  %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v4f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
+  %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v8f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
+  %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @test_fmax_v16f32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+; SI: v_max_f32_e32
+define void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
+  %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_nan_nan
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_val_nan
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_nan_val
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_p0_p0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_p0_n0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_n0_p0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmax_f32_n0_n0
+; SI-NOT: v_max_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmax_var_immediate_f32
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmax_immediate_var_f32
+; SI: v_max_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmax_var_literal_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmax_literal_var_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmin3.ll b/test/CodeGen/R600/fmin3.ll
new file mode 100644
index 000000000000..aeeed1c7dd39
--- /dev/null
+++ b/test/CodeGen/R600/fmin3.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+; SI-LABEL: {{^}}test_fmin3_olt_0:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %c = load float addrspace(1)* %cptr, align 4
+  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+  %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
+  store float %f1, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; Commute operand of second fmin
+; SI-LABEL: {{^}}test_fmin3_olt_1:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
+  %a = load float addrspace(1)* %aptr, align 4
+  %b = load float addrspace(1)* %bptr, align 4
+  %c = load float addrspace(1)* %cptr, align 4
+  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+  %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
+  store float %f1, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/fmin_legacy.f64.ll b/test/CodeGen/R600/fmin_legacy.f64.ll
new file mode 100644
index 000000000000..51dcd06f9397
--- /dev/null
+++ b/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -0,0 +1,77 @@
+; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmin_legacy_f64
+define void @test_fmin_legacy_f64(<4 x double> addrspace(1)* %out, <4 x double> inreg %reg0) #0 {
+   %r0 = extractelement <4 x double> %reg0, i32 0
+   %r1 = extractelement <4 x double> %reg0, i32 1
+   %r2 = fcmp uge double %r0, %r1
+   %r3 = select i1 %r2, double %r1, double %r0
+   %vec = insertelement <4 x double> undef, double %r3, i32 0
+   store <4 x double> %vec, <4 x double> addrspace(1)* %out, align 16
+   ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f64
+define void @test_fmin_legacy_ule_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp ule double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f64
+define void @test_fmin_legacy_ole_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp ole double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f64
+define void @test_fmin_legacy_olt_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp olt double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f64
+define void @test_fmin_legacy_ult_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %cmp = fcmp ult double %a, %b
+  %val = select i1 %cmp, double %a, double %b
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmin_legacy.ll b/test/CodeGen/R600/fmin_legacy.ll
new file mode 100644
index 000000000000..5014f6c55329
--- /dev/null
+++ b/test/CodeGen/R600/fmin_legacy.ll
@@ -0,0 +1,123 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -enable-no-nans-fp-math -enable-unsafe-fp-math  -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FIXME: Should replace unsafe-fp-math with no signed zeros.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FUNC-LABEL: @test_fmin_legacy_f32
+; EG: MIN *
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
+   %r0 = extractelement <4 x float> %reg0, i32 0
+   %r1 = extractelement <4 x float> %reg0, i32 1
+   %r2 = fcmp uge float %r0, %r1
+   %r3 = select i1 %r2, float %r1, float %r0
+   %vec = insertelement <4 x float> undef, float %r3, i32 0
+   store <4 x float> %vec, <4 x float> addrspace(1)* %out, align 16
+   ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ule_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ule_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ule float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ole_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ole float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_olt_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_olt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp olt float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ult_f32
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ult float %a, %b
+  %val = select i1 %cmp, float %a, float %b
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-NOT: v_min
+; SI: v_cmp_le_f32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_min
+; SI: s_endpgm
+define void @test_fmin_legacy_ole_f32_multi_use(float addrspace(1)* %out0, i1 addrspace(1)* %out1, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
+  %cmp = fcmp ole float %a, %b
+  %val0 = select i1 %cmp, float %a, float %b
+  store float %val0, float addrspace(1)* %out0, align 4
+  store i1 %cmp, i1 addrspace(1)* %out1
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fminnum.f64.ll b/test/CodeGen/R600/fminnum.f64.ll
new file mode 100644
index 000000000000..b8476f98bab8
--- /dev/null
+++ b/test/CodeGen/R600/fminnum.f64.ll
@@ -0,0 +1,75 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.minnum.f64(double, double) #0
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
+declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
+declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
+
+; FUNC-LABEL: @test_fmin_f64
+; SI: v_min_f64
+define void @test_fmin_f64(double addrspace(1)* %out, double %a, double %b) nounwind {
+  %val = call double @llvm.minnum.f64(double %a, double %b) #0
+  store double %val, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v2f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b) nounwind {
+  %val = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) #0
+  store <2 x double> %val, <2 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v4f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b) nounwind {
+  %val = call <4 x double> @llvm.minnum.v4f64(<4 x double> %a, <4 x double> %b) #0
+  store <4 x double> %val, <4 x double> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v8f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b) nounwind {
+  %val = call <8 x double> @llvm.minnum.v8f64(<8 x double> %a, <8 x double> %b) #0
+  store <8 x double> %val, <8 x double> addrspace(1)* %out, align 64
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v16f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+; SI: v_min_f64
+define void @test_fmin_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %a, <16 x double> %b) nounwind {
+  %val = call <16 x double> @llvm.minnum.v16f64(<16 x double> %a, <16 x double> %b) #0
+  store <16 x double> %val, <16 x double> addrspace(1)* %out, align 128
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fminnum.ll b/test/CodeGen/R600/fminnum.ll
new file mode 100644
index 000000000000..cd1a948707e8
--- /dev/null
+++ b/test/CodeGen/R600/fminnum.ll
@@ -0,0 +1,189 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare float @llvm.minnum.f32(float, float) #0
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
+declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #0
+declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #0
+
+; FUNC-LABEL: @test_fmin_f32
+; SI: v_min_f32_e32
+define void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) nounwind {
+  %val = call float @llvm.minnum.f32(float %a, float %b) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v2f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) nounwind {
+  %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) #0
+  store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v4f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) nounwind {
+  %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) #0
+  store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v8f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) nounwind {
+  %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) #0
+  store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32
+  ret void
+}
+
+; FUNC-LABEL: @test_fmin_v16f32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+; SI: v_min_f32_e32
+define void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) nounwind {
+  %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) #0
+  store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_nan_nan
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x7fc00000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_val_nan
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_nan_val
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_p0_p0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_p0_n0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_n0_p0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @constant_fold_fmin_f32_n0_n0
+; SI-NOT: v_min_f32_e32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; SI: buffer_store_dword [[REG]]
+define void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) nounwind {
+  %val = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmin_var_immediate_f32
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.minnum.f32(float %a, float 2.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmin_immediate_var_f32
+; SI: v_min_f32_e64 {{v[0-9]+}}, 2.0, {{s[0-9]+}}
+define void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.minnum.f32(float 2.0, float %a) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmin_var_literal_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.minnum.f32(float %a, float 99.0) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @fmin_literal_var_f32
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
+; SI: v_min_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
+define void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) nounwind {
+  %val = call float @llvm.minnum.f32(float 99.0, float %a) #0
+  store float %val, float addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fmul.ll b/test/CodeGen/R600/fmul.ll
index 2a7825f9ecf7..7296a8760be2 100644
--- a/test/CodeGen/R600/fmul.ll
+++ b/test/CodeGen/R600/fmul.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; R600-CHECK: @fmul_f32
-; R600-CHECK: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI-CHECK: @fmul_f32
-; SI-CHECK: V_MUL_F32
+
+; FUNC-LABEL: {{^}}fmul_f32:
+; R600: MUL_IEEE {{\** *}}{{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].W
+
+; SI: v_mul_f32
 define void @fmul_f32(float addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fmul float %a, %b
@@ -16,12 +17,12 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; R600-CHECK: @fmul_v2f32
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
-; SI-CHECK: @fmul_v2f32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
+; FUNC-LABEL: {{^}}fmul_v2f32:
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}
+
+; SI: v_mul_f32
+; SI: v_mul_f32
 define void @fmul_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
 entry:
   %0 = fmul <2 x float> %a, %b
@@ -29,16 +30,16 @@ entry:
   ret void
 }
 
-; R600-CHECK: @fmul_v4f32
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @fmul_v4f32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
-; SI-CHECK: V_MUL_F32
+; FUNC-LABEL: {{^}}fmul_v4f32:
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
+; SI: v_mul_f32
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
   %a = load <4 x float> addrspace(1) * %in
@@ -47,3 +48,28 @@ define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: {{^}}test_mul_2_k:
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI: s_endpgm
+define void @test_mul_2_k(float addrspace(1)* %out, float %x) #0 {
+  %y = fmul float %x, 2.0
+  %z = fmul float %y, 3.0
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_mul_2_k_inv:
+; SI: v_mul_f32
+; SI-NOT: v_mul_f32
+; SI-NOT: v_mad_f32
+; SI: s_endpgm
+define void @test_mul_2_k_inv(float addrspace(1)* %out, float %x) #0 {
+  %y = fmul float %x, 3.0
+  %z = fmul float %y, 2.0
+  store float %z, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/R600/fmul64.ll b/test/CodeGen/R600/fmul64.ll
index 7c7bf041496b..882307ef458b 100644
--- a/test/CodeGen/R600/fmul64.ll
+++ b/test/CodeGen/R600/fmul64.ll
@@ -1,8 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-
-; CHECK: @fmul_f64
-; CHECK: V_MUL_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
 
+; FUNC-LABEL: {{^}}fmul_f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
@@ -11,3 +10,29 @@ define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
    store double %r2, double addrspace(1)* %out
    ret void
 }
+
+; FUNC-LABEL: {{^}}fmul_v2f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+                        <2 x double> addrspace(1)* %in2) {
+   %r0 = load <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double> addrspace(1)* %in2
+   %r2 = fmul <2 x double> %r0, %r1
+   store <2 x double> %r2, <2 x double> addrspace(1)* %out
+   ret void
+}
+
+; FUNC-LABEL: {{^}}fmul_v4f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
+                        <4 x double> addrspace(1)* %in2) {
+   %r0 = load <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double> addrspace(1)* %in2
+   %r2 = fmul <4 x double> %r0, %r1
+   store <4 x double> %r2, <4 x double> addrspace(1)* %out
+   ret void
+}
diff --git a/test/CodeGen/R600/fmuladd.ll b/test/CodeGen/R600/fmuladd.ll
index 48944f629482..2b708639b122 100644
--- a/test/CodeGen/R600/fmuladd.ll
+++ b/test/CodeGen/R600/fmuladd.ll
@@ -1,7 +1,12 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
 
-; CHECK: @fmuladd_f32
-; CHECK: V_MAD_F32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
+declare float @llvm.fmuladd.f32(float, float, float)
+declare double @llvm.fmuladd.f64(double, double, double)
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; CHECK-LABEL: {{^}}fmuladd_f32:
+; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                          float addrspace(1)* %in2, float addrspace(1)* %in3) {
@@ -13,10 +18,8 @@ define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
    ret void
 }
 
-declare float @llvm.fmuladd.f32(float, float, float)
-
-; CHECK: @fmuladd_f64
-; CHECK: V_FMA_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK-LABEL: {{^}}fmuladd_f64:
+; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                          double addrspace(1)* %in2, double addrspace(1)* %in3) {
@@ -28,4 +31,169 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
    ret void
 }
 
-declare double @llvm.fmuladd.f64(double, double, double)
+; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fadd_a_a_b_f32(float addrspace(1)* %out,
+                            float addrspace(1)* %in1,
+                            float addrspace(1)* %in2) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r0 = load float addrspace(1)* %gep.0
+  %r1 = load float addrspace(1)* %gep.1
+
+  %add.0 = fadd float %r0, %r0
+  %add.1 = fadd float %add.0, %r1
+  store float %add.1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fadd_b_a_a_f32(float addrspace(1)* %out,
+                            float addrspace(1)* %in1,
+                            float addrspace(1)* %in2) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r0 = load float addrspace(1)* %gep.0
+  %r1 = load float addrspace(1)* %gep.1
+
+  %add.0 = fadd float %r0, %r0
+  %add.1 = fadd float %r1, %add.0
+  store float %add.1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r1.fneg = fsub float -0.000000e+00, %r1
+
+  %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r1.fneg = fsub float -0.000000e+00, %r1
+
+  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+
+; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32
+; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; CHECK: buffer_store_dword [[RESULT]]
+define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %r2.fneg = fsub float -0.000000e+00, %r2
+
+  %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
diff --git a/test/CodeGen/R600/fnearbyint.ll b/test/CodeGen/R600/fnearbyint.ll
index 1c1d7315189f..30bc67689e1c 100644
--- a/test/CodeGen/R600/fnearbyint.ll
+++ b/test/CodeGen/R600/fnearbyint.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s
-; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s
 
 ; This should have the exactly the same output as the test for rint,
 ; so no need to check anything.
diff --git a/test/CodeGen/R600/fneg-fabs.f64.ll b/test/CodeGen/R600/fneg-fabs.f64.ll
new file mode 100644
index 000000000000..04a87e377857
--- /dev/null
+++ b/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -0,0 +1,101 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FIXME: Check something here. Currently it seems fabs + fneg aren't
+; into 2 modifiers, although theoretically that should work.
+
+; FUNC-LABEL: {{^}}fneg_fabs_fadd_f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x7fffffff
+; SI: v_and_b32_e32 v[[FABS:[0-9]+]], {{s[0-9]+}}, [[IMMREG]]
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -v{{\[[0-9]+}}:[[FABS]]{{\]}}
+define void @fneg_fabs_fadd_f64(double addrspace(1)* %out, double %x, double %y) {
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %fsub = fsub double -0.000000e+00, %fabs
+  %fadd = fadd double %y, %fsub
+  store double %fadd, double addrspace(1)* %out, align 8
+  ret void
+}
+
+define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
+  %x = load double addrspace(1)* %xptr, align 8
+  %y = load double addrspace(1)* %xptr, align 8
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %fsub = fsub double -0.000000e+00, %fabs
+  %fadd = fadd double %y, %fsub
+  store double %fadd, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fmul_f64:
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, -|{{v\[[0-9]+:[0-9]+\]}}|
+define void @fneg_fabs_fmul_f64(double addrspace(1)* %out, double %x, double %y) {
+  %fabs = call double @llvm.fabs.f64(double %x)
+  %fsub = fsub double -0.000000e+00, %fabs
+  %fmul = fmul double %y, %fsub
+  store double %fmul, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_free_f64:
+define void @fneg_fabs_free_f64(double addrspace(1)* %out, i64 %in) {
+  %bc = bitcast i64 %in to double
+  %fabs = call double @llvm.fabs.f64(double %bc)
+  %fsub = fsub double -0.000000e+00, %fabs
+  store double %fsub, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_fn_free_f64(double addrspace(1)* %out, i64 %in) {
+  %bc = bitcast i64 %in to double
+  %fabs = call double @fabs(double %bc)
+  %fsub = fsub double -0.000000e+00, %fabs
+  store double %fsub, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_f64:
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2 s{{\[}}[[LO_X:[0-9]+]]:[[HI_X:[0-9]+]]{{\]}}
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-DAG: v_or_b32_e32 v[[HI_V:[0-9]+]], s[[HI_X]], [[IMMREG]]
+; SI-DAG: v_mov_b32_e32 v[[LO_V:[0-9]+]], s[[LO_X]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO_V]]:[[HI_V]]{{\]}}
+define void @fneg_fabs_f64(double addrspace(1)* %out, double %in) {
+  %fabs = call double @llvm.fabs.f64(double %in)
+  %fsub = fsub double -0.000000e+00, %fabs
+  store double %fsub, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v2f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
+  %fabs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %in)
+  %fsub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %fabs
+  store <2 x double> %fsub, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v4f64:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
+  %fabs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %in)
+  %fsub = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %fabs
+  store <4 x double> %fsub, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+declare double @fabs(double) readnone
+declare double @llvm.fabs.f64(double) readnone
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
diff --git a/test/CodeGen/R600/fneg-fabs.ll b/test/CodeGen/R600/fneg-fabs.ll
index d95e1311bc10..94e8256cd261 100644
--- a/test/CodeGen/R600/fneg-fabs.ll
+++ b/test/CodeGen/R600/fneg-fabs.ll
@@ -1,55 +1,117 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}fneg_fabs_fadd_f32:
+; SI-NOT: and
+; SI: v_sub_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, |{{v[0-9]+}}|
+define void @fneg_fabs_fadd_f32(float addrspace(1)* %out, float %x, float %y) {
+  %fabs = call float @llvm.fabs.f32(float %x)
+  %fsub = fsub float -0.000000e+00, %fabs
+  %fadd = fadd float %y, %fsub
+  store float %fadd, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fmul_f32:
+; SI-NOT: and
+; SI: v_mul_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, -|{{v[0-9]+}}|
+; SI-NOT: and
+define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
+  %fabs = call float @llvm.fabs.f32(float %x)
+  %fsub = fsub float -0.000000e+00, %fabs
+  %fmul = fmul float %y, %fsub
+  store float %fmul, float addrspace(1)* %out, align 4
+  ret void
+}
 
 ; DAGCombiner will transform:
 ; (fabs (f32 bitcast (i32 a))) => (f32 bitcast (and (i32 a), 0x7FFFFFFF))
 ; unless isFabsFree returns true
 
-; R600-CHECK-LABEL: @fneg_fabs_free
-; R600-CHECK-NOT: AND
-; R600-CHECK: |PV.{{[XYZW]}}|
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_fabs_free
-; SI-CHECK: V_OR_B32
-
-define void @fneg_fabs_free(float addrspace(1)* %out, i32 %in) {
-entry:
-  %0 = bitcast i32 %in to float
-  %1 = call float @fabs(float %0)
-  %2 = fsub float -0.000000e+00, %1
-  store float %2, float addrspace(1)* %out
-  ret void
-}
-
-; R600-CHECK-LABEL: @fneg_fabs_v2
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: -PV
-; R600-CHECK: |{{(PV|T[0-9])\.[XYZW]}}|
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_fabs_v2
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-define void @fneg_fabs_v2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
-entry:
-  %0 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
-  %1 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %0
-  store <2 x float> %1, <2 x float> addrspace(1)* %out
-  ret void
-}
-
-; SI-CHECK-LABEL: @fneg_fabs_v4
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-; SI-CHECK: V_OR_B32
-define void @fneg_fabs_v4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
-entry:
-  %0 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
-  %1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
-  store <4 x float> %1, <4 x float> addrspace(1)* %out
-  ret void
-}
-
-declare float @fabs(float ) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float> ) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float> ) readnone
+; FUNC-LABEL: {{^}}fneg_fabs_free_f32:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+; R600: -PV
+
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
+  %bc = bitcast i32 %in to float
+  %fabs = call float @llvm.fabs.f32(float %bc)
+  %fsub = fsub float -0.000000e+00, %fabs
+  store float %fsub, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_fn_free_f32:
+; R600-NOT: AND
+; R600: |PV.{{[XYZW]}}|
+; R600: -PV
+
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
+  %bc = bitcast i32 %in to float
+  %fabs = call float @fabs(float %bc)
+  %fsub = fsub float -0.000000e+00, %fabs
+  store float %fsub, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_f32:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
+  %fabs = call float @llvm.fabs.f32(float %in)
+  %fsub = fsub float -0.000000e+00, %fabs
+  store float %fsub, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %val = load float addrspace(1)* %in, align 4
+  %fabs = call float @llvm.fabs.f32(float %val)
+  %fsub = fsub float -0.000000e+00, %fabs
+  store float %fsub, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_fabs_v2f32:
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: -PV
+; R600: |{{(PV|T[0-9])\.[XYZW]}}|
+; R600: -PV
+
+; FIXME: SGPR should be used directly for first src operand.
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
+  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
+  %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
+  store <2 x float> %fsub, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FIXME: SGPR should be used directly for first src operand.
+; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
+; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
+; SI-NOT: 0x80000000
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
+  %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
+  %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
+  store <4 x float> %fsub, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare float @fabs(float) readnone
+declare float @llvm.fabs.f32(float) readnone
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll
new file mode 100644
index 000000000000..eb2eb08b88b1
--- /dev/null
+++ b/test/CodeGen/R600/fneg.f64.ll
@@ -0,0 +1,59 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}fneg_f64:
+; SI: v_xor_b32
+define void @fneg_f64(double addrspace(1)* %out, double %in) {
+  %fneg = fsub double -0.000000e+00, %in
+  store double %fneg, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_v2f64:
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v2f64(<2 x double> addrspace(1)* nocapture %out, <2 x double> %in) {
+  %fneg = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %in
+  store <2 x double> %fneg, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fneg_v4f64:
+; R600: -PV
+; R600: -T
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> %in) {
+  %fneg = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %in
+  store <4 x double> %fneg, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; DAGCombiner will transform:
+; (fneg (f64 bitcast (i64 a))) => (f64 bitcast (xor (i64 a), 0x80000000))
+; unless the target returns true for isNegFree()
+
+; FUNC-LABEL: {{^}}fneg_free_f64:
+; FIXME: Unnecessary copy to VGPRs
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}}
+define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) {
+  %bc = bitcast i64 %in to double
+  %fsub = fsub double 0.0, %bc
+  store double %fsub, double addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}fneg_fold_f64:
+; SI: s_load_dwordx2 [[NEG_VALUE:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NOT: xor
+; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\]}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+define void @fneg_fold_f64(double addrspace(1)* %out, double %in) {
+  %fsub = fsub double -0.0, %in
+  %fmul = fmul double %fsub, %in
+  store double %fmul, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll
index 4cddc7378956..ca3350dd7f48 100644
--- a/test/CodeGen/R600/fneg.ll
+++ b/test/CodeGen/R600/fneg.ll
@@ -1,44 +1,41 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; R600-CHECK-LABEL: @fneg
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg
-; SI-CHECK: V_XOR_B32
-define void @fneg(float addrspace(1)* %out, float %in) {
-entry:
-  %0 = fsub float -0.000000e+00, %in
-  store float %0, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_f32:
+; R600: -PV
+
+; SI: v_xor_b32
+define void @fneg_f32(float addrspace(1)* %out, float %in) {
+  %fneg = fsub float -0.000000e+00, %in
+  store float %fneg, float addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK-LABEL: @fneg_v2
-; R600-CHECK: -PV
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_v2
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-define void @fneg_v2(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
-entry:
-  %0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_v2f32:
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v2f32(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) {
+  %fneg = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %in
+  store <2 x float> %fneg, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK-LABEL: @fneg_v4
-; R600-CHECK: -PV
-; R600-CHECK: -T
-; R600-CHECK: -PV
-; R600-CHECK: -PV
-; SI-CHECK-LABEL: @fneg_v4
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-; SI-CHECK: V_XOR_B32
-define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
-entry:
-  %0 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
-  store <4 x float> %0, <4 x float> addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_v4f32:
+; R600: -PV
+; R600: -T
+; R600: -PV
+; R600: -PV
+
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+; SI: v_xor_b32
+define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {
+  %fneg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %in
+  store <4 x float> %fneg, <4 x float> addrspace(1)* %out
   ret void
 }
 
@@ -46,27 +43,26 @@ entry:
 ; (fneg (f32 bitcast (i32 a))) => (f32 bitcast (xor (i32 a), 0x80000000))
 ; unless the target returns true for isNegFree()
 
-; R600-CHECK-LABEL: @fneg_free
-; R600-CHECK-NOT: XOR
-; R600-CHECK: -KC0[2].Z
-; SI-CHECK-LABEL: @fneg_free
-; XXX: We could use V_ADD_F32_e64 with the negate bit here instead.
-; SI-CHECK: V_SUB_F32_e64 v{{[0-9]}}, 0.000000e+00, s{{[0-9]}}, 0, 0
-define void @fneg_free(float addrspace(1)* %out, i32 %in) {
-entry:
-  %0 = bitcast i32 %in to float
-  %1 = fsub float 0.0, %0
-  store float %1, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_free_f32:
+; R600-NOT: XOR
+; R600: -KC0[2].Z
+
+; XXX: We could use v_add_f32_e64 with the negate bit here instead.
+; SI: v_sub_f32_e64 v{{[0-9]}}, 0, s{{[0-9]+$}}
+define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) {
+  %bc = bitcast i32 %in to float
+  %fsub = fsub float 0.0, %bc
+  store float %fsub, float addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @fneg_fold
-; SI-CHECK-NOT: V_XOR_B32
-; SI-CHECK: V_MUL_F32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}
-define void @fneg_fold(float addrspace(1)* %out, float %in) {
-entry:
-  %0 = fsub float -0.0, %in
-  %1 = fmul float %0, %in
-  store float %1, float addrspace(1)* %out
+; FUNC-LABEL: {{^}}fneg_fold_f32:
+; SI: s_load_dword [[NEG_VALUE:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; SI-NOT: xor
+; SI: v_mul_f32_e64 v{{[0-9]+}}, -[[NEG_VALUE]], [[NEG_VALUE]]
+define void @fneg_fold_f32(float addrspace(1)* %out, float %in) {
+  %fsub = fsub float -0.0, %in
+  %fmul = fmul float %fsub, %in
+  store float %fmul, float addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/fp-classify.ll b/test/CodeGen/R600/fp-classify.ll
new file mode 100644
index 000000000000..a1b2f08eddeb
--- /dev/null
+++ b/test/CodeGen/R600/fp-classify.ll
@@ -0,0 +1,130 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
+declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare double @llvm.fabs.f64(double) #1
+
+; SI-LABEL: {{^}}test_isinf_pattern:
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}}
+; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; SI-NOT: v_cmp
+; SI: s_endpgm
+define void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %cmp = fcmp oeq float %fabs, 0x7FF0000000000000
+  %ext = zext i1 %cmp to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_not_isinf_pattern_0:
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %cmp = fcmp ueq float %fabs, 0x7FF0000000000000
+  %ext = zext i1 %cmp to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_not_isinf_pattern_1:
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %cmp = fcmp oeq float %fabs, 0xFFF0000000000000
+  %ext = zext i1 %cmp to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_isfinite_pattern_0:
+; SI-NOT: v_cmp
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1f8{{$}}
+; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
+; SI-NOT: v_cmp
+; SI: s_endpgm
+define void @test_isfinite_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Use negative infinity
+; SI-LABEL: {{^}}test_isfinite_not_pattern_0:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0xFFF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; No fabs
+; SI-LABEL: {{^}}test_isfinite_not_pattern_1:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %ninf = fcmp une float %x, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; fabs of different value
+; SI-LABEL: {{^}}test_isfinite_not_pattern_2:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_2(i32 addrspace(1)* nocapture %out, float %x, float %y) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %y) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Wrong ordered compare type
+; SI-LABEL: {{^}}test_isfinite_not_pattern_3:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_3(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp uno float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp une float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Wrong unordered compare
+; SI-LABEL: {{^}}test_isfinite_not_pattern_4:
+; SI-NOT: v_cmp_class_f32
+; SI: s_endpgm
+define void @test_isfinite_not_pattern_4(i32 addrspace(1)* nocapture %out, float %x) #0 {
+  %ord = fcmp ord float %x, 0.000000e+00
+  %x.fabs = tail call float @llvm.fabs.f32(float %x) #1
+  %ninf = fcmp one float %x.fabs, 0x7FF0000000000000
+  %and = and i1 %ord, %ninf
+  %ext = zext i1 %and to i32
+  store i32 %ext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/fp16_to_fp.ll b/test/CodeGen/R600/fp16_to_fp.ll
index 777eadc34ead..be84582a73a6 100644
--- a/test/CodeGen/R600/fp16_to_fp.ll
+++ b/test/CodeGen/R600/fp16_to_fp.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
 declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
 
-; SI-LABEL: @test_convert_fp16_to_fp32:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp16_to_fp32:
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]]
 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
   %val = load i16 addrspace(1)* %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
@@ -15,11 +15,11 @@ define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 add
 }
 
 
-; SI-LABEL: @test_convert_fp16_to_fp64:
-; SI: BUFFER_LOAD_USHORT [[VAL:v[0-9]+]]
-; SI: V_CVT_F32_F16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
-; SI: V_CVT_F64_F32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp16_to_fp64:
+; SI: buffer_load_ushort [[VAL:v[0-9]+]]
+; SI: v_cvt_f32_f16_e32 [[RESULT32:v[0-9]+]], [[VAL]]
+; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
+; SI: buffer_store_dwordx2 [[RESULT]]
 define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
   %val = load i16 addrspace(1)* %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
diff --git a/test/CodeGen/R600/fp32_to_fp16.ll b/test/CodeGen/R600/fp32_to_fp16.ll
index 6b5ff00b5f60..43dd09b5ec05 100644
--- a/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/test/CodeGen/R600/fp32_to_fp16.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
 
-; SI-LABEL: @test_convert_fp32_to_fp16:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]]
-; SI: V_CVT_F16_F32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
+; SI-LABEL: {{^}}test_convert_fp32_to_fp16:
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_short [[RESULT]]
 define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
   %val = load float addrspace(1)* %in, align 4
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
diff --git a/test/CodeGen/R600/fp64_to_sint.ll b/test/CodeGen/R600/fp64_to_sint.ll
deleted file mode 100644
index 185e21c9caa3..000000000000
--- a/test/CodeGen/R600/fp64_to_sint.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
-
-; CHECK: @fp64_to_sint
-; CHECK: V_CVT_I32_F64_e32
-define void @fp64_to_sint(i32 addrspace(1)* %out, double %in) {
-  %result = fptosi double %in to i32
-  store i32 %result, i32 addrspace(1)* %out
-  ret void
-}
diff --git a/test/CodeGen/R600/fp_to_sint.f64.ll b/test/CodeGen/R600/fp_to_sint.f64.ll
new file mode 100644
index 000000000000..e6418477a9b4
--- /dev/null
+++ b/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @fp_to_sint_f64_i32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_f64_i32(i32 addrspace(1)* %out, double %in) {
+  %result = fptosi double %in to i32
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_v2f64_v2i32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_v2f64_v2i32(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+  %result = fptosi <2 x double> %in to <2 x i32>
+  store <2 x i32> %result, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_v4f64_v4i32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+; SI: v_cvt_i32_f64_e32
+define void @fp_to_sint_v4f64_v4i32(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+  %result = fptosi <4 x double> %in to <4 x i32>
+  store <4 x i32> %result, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: @fp_to_sint_i64_f64
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
+
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
+
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_i32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+  %val = load double addrspace(1)* %gep, align 8
+  %cast = fptosi double %val to i64
+  store i64 %cast, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_sint.ll b/test/CodeGen/R600/fp_to_sint.ll
index 235045aaaaaa..35cfb03d39b4 100644
--- a/test/CodeGen/R600/fp_to_sint.ll
+++ b/test/CodeGen/R600/fp_to_sint.ll
@@ -1,26 +1,36 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; FUNC-LABEL: @fp_to_sint_v2i32
+; FUNC-LABEL: {{^}}fp_to_sint_i32:
+; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; SI: v_cvt_i32_f32_e32
+; SI: s_endpgm
+define void @fp_to_sint_i32 (i32 addrspace(1)* %out, float %in) {
+  %conv = fptosi float %in to i32
+  store i32 %conv, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_sint_v2i32:
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
 define void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
   %result = fptosi <2 x float> %in to <2 x i32>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fp_to_sint_v4i32
+; FUNC-LABEL: {{^}}fp_to_sint_v4i32:
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW]}}
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
-; SI: V_CVT_I32_F32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
+; SI: v_cvt_i32_f32_e32
 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %value = load <4 x float> addrspace(1) * %in
   %result = fptosi <4 x float> %value to <4 x i32>
@@ -28,7 +38,7 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
   ret void
 }
 
-; FUNC-LABEL: @fp_to_sint_i64
+; FUNC-LABEL: {{^}}fp_to_sint_i64:
 
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
@@ -49,11 +59,11 @@ define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
 ; EG-DAG: XOR_INT
 ; EG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
 ; Check that the compiler doesn't crash with a "cannot select" error
-; SI: S_ENDPGM
+; SI: s_endpgm
 define void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) {
 entry:
   %0 = fptosi float %in to i64
@@ -61,7 +71,7 @@ entry:
   ret void
 }
 
-; FUNC: @fp_to_sint_v2i64
+; FUNC: {{^}}fp_to_sint_v2i64:
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -81,8 +91,8 @@ entry:
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -102,17 +112,17 @@ entry:
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
-; SI: S_ENDPGM
+; SI: s_endpgm
 define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
   %conv = fptosi <2 x float> %x to <2 x i64>
   store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
   ret void
 }
 
-; FUNC: @fp_to_sint_v4i64
+; FUNC: {{^}}fp_to_sint_v4i64:
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -132,8 +142,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -153,8 +163,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -174,8 +184,8 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -195,10 +205,10 @@ define void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
-; SI: S_ENDPGM
+; SI: s_endpgm
 define void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
   %conv = fptosi <4 x float> %x to <4 x i64>
   store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
diff --git a/test/CodeGen/R600/fp_to_uint.f64.ll b/test/CodeGen/R600/fp_to_uint.f64.ll
index bf607cef0884..1ffe2faadf33 100644
--- a/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -1,9 +1,70 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-; SI-LABEL: @fp_to_uint_i32_f64
-; SI: V_CVT_U32_F64_e32
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}fp_to_uint_i32_f64:
+; SI: v_cvt_u32_f64_e32
 define void @fp_to_uint_i32_f64(i32 addrspace(1)* %out, double %in) {
   %cast = fptoui double %in to i32
   store i32 %cast, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; SI-LABEL: @fp_to_uint_v2i32_v2f64
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+define void @fp_to_uint_v2i32_v2f64(<2 x i32> addrspace(1)* %out, <2 x double> %in) {
+  %cast = fptoui <2 x double> %in to <2 x i32>
+  store <2 x i32> %cast, <2 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @fp_to_uint_v4i32_v4f64
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+; SI: v_cvt_u32_f64_e32
+define void @fp_to_uint_v4i32_v4f64(<4 x i32> addrspace(1)* %out, <4 x double> %in) {
+  %cast = fptoui <4 x double> %in to <4 x i32>
+  store <4 x i32> %cast, <4 x i32> addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: @fp_to_uint_i64_f64
+; CI-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]]
+; CI-DAG: v_trunc_f64_e32 [[TRUNC:v\[[0-9]+:[0-9]+\]]], [[VAL]]
+; CI-DAG: s_mov_b32 s[[K0_LO:[0-9]+]], 0{{$}}
+; CI-DAG: s_mov_b32 s[[K0_HI:[0-9]+]], 0x3df00000
+
+; CI-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], [[VAL]], s{{\[}}[[K0_LO]]:[[K0_HI]]{{\]}}
+; CI-DAG: v_floor_f64_e32 [[FLOOR:v\[[0-9]+:[0-9]+\]]], [[MUL]]
+
+; CI-DAG: s_mov_b32 s[[K1_HI:[0-9]+]], 0xc1f00000
+
+; CI-DAG: v_fma_f64 [[FMA:v\[[0-9]+:[0-9]+\]]], [[FLOOR]], s{{\[[0-9]+}}:[[K1_HI]]{{\]}}, [[TRUNC]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[LO:[0-9]+]], [[FMA]]
+; CI-DAG: v_cvt_u32_f64_e32 v[[HI:[0-9]+]], [[FLOOR]]
+; CI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+  %val = load double addrspace(1)* %gep, align 8
+  %cast = fptoui double %val to i64
+  store i64 %cast, i64 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: @fp_to_uint_v2i64_v2f64
+define void @fp_to_uint_v2i64_v2f64(<2 x i64> addrspace(1)* %out, <2 x double> %in) {
+  %cast = fptoui <2 x double> %in to <2 x i64>
+  store <2 x i64> %cast, <2 x i64> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: @fp_to_uint_v4i64_v4f64
+define void @fp_to_uint_v4i64_v4f64(<4 x i64> addrspace(1)* %out, <4 x double> %in) {
+  %cast = fptoui <4 x double> %in to <4 x i64>
+  store <4 x i64> %cast, <4 x i64> addrspace(1)* %out, align 32
+  ret void
+}
diff --git a/test/CodeGen/R600/fp_to_uint.ll b/test/CodeGen/R600/fp_to_uint.ll
index a13018bdfecf..5970adf999c9 100644
--- a/test/CodeGen/R600/fp_to_uint.ll
+++ b/test/CodeGen/R600/fp_to_uint.ll
@@ -1,38 +1,47 @@
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=EG -check-prefix=FUNC
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 
-; FUNC-LABEL: @fp_to_uint_v2i32
+; FUNC-LABEL: {{^}}fp_to_uint_f32_to_i32:
+; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+
+; SI: v_cvt_u32_f32_e32
+; SI: s_endpgm
+define void @fp_to_uint_f32_to_i32 (i32 addrspace(1)* %out, float %in) {
+  %conv = fptoui float %in to i32
+  store i32 %conv, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fp_to_uint_v2f32_to_v2i32:
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
 
-define void @fp_to_uint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+define void @fp_to_uint_v2f32_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) {
   %result = fptoui <2 x float> %in to <2 x i32>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @fp_to_uint_v4i32
+; FUNC-LABEL: {{^}}fp_to_uint_v4f32_to_v4i32:
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ; EG: FLT_TO_UINT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
-; SI: V_CVT_U32_F32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
+; SI: v_cvt_u32_f32_e32
 
-define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %value = load <4 x float> addrspace(1) * %in
   %result = fptoui <4 x float> %value to <4 x i32>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
 
-; FUNC: @fp_to_uint_i64
+; FUNC: {{^}}fp_to_uint_f32_to_i64:
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -52,17 +61,17 @@ define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspac
 ; EG-DAG: XOR_INT
 ; EG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
-; SI: S_ENDPGM
-define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
+; SI: s_endpgm
+define void @fp_to_uint_f32_to_i64(i64 addrspace(1)* %out, float %x) {
   %conv = fptoui float %x to i64
   store i64 %conv, i64 addrspace(1)* %out
   ret void
 }
 
-; FUNC: @fp_to_uint_v2i64
+; FUNC: {{^}}fp_to_uint_v2f32_to_v2i64:
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -82,8 +91,8 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -103,17 +112,17 @@ define void @fp_to_uint_i64(i64 addrspace(1)* %out, float %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
-; SI: S_ENDPGM
-define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
+; SI: s_endpgm
+define void @fp_to_uint_v2f32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
   %conv = fptoui <2 x float> %x to <2 x i64>
   store <2 x i64> %conv, <2 x i64> addrspace(1)* %out
   ret void
 }
 
-; FUNC: @fp_to_uint_v4i64
+; FUNC: {{^}}fp_to_uint_v4f32_to_v4i64:
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -133,8 +142,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -154,8 +163,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -175,8 +184,8 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 ; EG-DAG: AND_INT
 ; EG-DAG: LSHR
 ; EG-DAG: SUB_INT
@@ -196,11 +205,11 @@ define void @fp_to_uint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) {
 ; EG-DAG: XOR_INT
 ; EG-DAG: SUB_INT
 ; EG-DAG: SUB_INT
-; EG-DAG: CNDGE_INT
-; EG-DAG: CNDGE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 
-; SI: S_ENDPGM
-define void @fp_to_uint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
+; SI: s_endpgm
+define void @fp_to_uint_v4f32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) {
   %conv = fptoui <4 x float> %x to <4 x i64>
   store <4 x i64> %conv, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/fpext.ll b/test/CodeGen/R600/fpext.ll
index 143ee79fa151..320545edf56b 100644
--- a/test/CodeGen/R600/fpext.ll
+++ b/test/CodeGen/R600/fpext.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
 
-; CHECK: @fpext
-; CHECK: V_CVT_F64_F32_e32
+; CHECK: {{^}}fpext:
+; CHECK: v_cvt_f64_f32_e32
 define void @fpext(double addrspace(1)* %out, float %in) {
   %result = fpext float %in to double
   store double %result, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/fptrunc.ll b/test/CodeGen/R600/fptrunc.ll
index 20a8c00ba498..15ae4e18ff38 100644
--- a/test/CodeGen/R600/fptrunc.ll
+++ b/test/CodeGen/R600/fptrunc.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=CHECK
 
-; CHECK: @fptrunc
-; CHECK: V_CVT_F32_F64_e32
+; CHECK: {{^}}fptrunc:
+; CHECK: v_cvt_f32_f64_e32
 define void @fptrunc(float addrspace(1)* %out, double %in) {
   %result = fptrunc double %in to float
   store float %result, float addrspace(1)* %out
diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll
new file mode 100644
index 000000000000..50d6687abeec
--- /dev/null
+++ b/test/CodeGen/R600/frem.ll
@@ -0,0 +1,103 @@
+; RUN: llc -march=amdgcn -mcpu=SI -enable-misched < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}frem_f32:
+; SI-DAG: buffer_load_dword [[X:v[0-9]+]], {{.*$}}
+; SI-DAG: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; SI-DAG: v_cmp
+; SI-DAG: v_mul_f32
+; SI: v_rcp_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_mul_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
+define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                      float addrspace(1)* %in2) #0 {
+   %gep2 = getelementptr float addrspace(1)* %in2, i32 4
+   %r0 = load float addrspace(1)* %in1, align 4
+   %r1 = load float addrspace(1)* %gep2, align 4
+   %r2 = frem float %r0, %r1
+   store float %r2, float addrspace(1)* %out, align 4
+   ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_frem_f32:
+; SI: buffer_load_dword [[Y:v[0-9]+]], {{.*}} offset:16
+; SI: buffer_load_dword [[X:v[0-9]+]], {{.*}}
+; SI: v_rcp_f32_e32 [[INVY:v[0-9]+]], [[Y]]
+; SI: v_mul_f32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]]
+; SI: v_trunc_f32_e32 [[TRUNC:v[0-9]+]], [[DIV]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
+                             float addrspace(1)* %in2) #1 {
+   %gep2 = getelementptr float addrspace(1)* %in2, i32 4
+   %r0 = load float addrspace(1)* %in1, align 4
+   %r1 = load float addrspace(1)* %gep2, align 4
+   %r2 = frem float %r0, %r1
+   store float %r2, float addrspace(1)* %out, align 4
+   ret void
+}
+
+; TODO: This should check something when f64 fdiv is implemented
+; correctly
+
+; FUNC-LABEL: {{^}}frem_f64:
+; SI: s_endpgm
+define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                      double addrspace(1)* %in2) #0 {
+   %r0 = load double addrspace(1)* %in1, align 8
+   %r1 = load double addrspace(1)* %in2, align 8
+   %r2 = frem double %r0, %r1
+   store double %r2, double addrspace(1)* %out, align 8
+   ret void
+}
+
+; FUNC-LABEL: {{^}}unsafe_frem_f64:
+; SI: v_rcp_f64_e32
+; SI: v_mul_f64
+; SI: v_bfe_u32
+; SI: v_fma_f64
+; SI: s_endpgm
+define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                             double addrspace(1)* %in2) #1 {
+   %r0 = load double addrspace(1)* %in1, align 8
+   %r1 = load double addrspace(1)* %in2, align 8
+   %r2 = frem double %r0, %r1
+   store double %r2, double addrspace(1)* %out, align 8
+   ret void
+}
+
+define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
+                        <2 x float> addrspace(1)* %in2) #0 {
+   %gep2 = getelementptr <2 x float> addrspace(1)* %in2, i32 4
+   %r0 = load <2 x float> addrspace(1)* %in1, align 8
+   %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+   %r2 = frem <2 x float> %r0, %r1
+   store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
+   ret void
+}
+
+define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
+                        <4 x float> addrspace(1)* %in2) #0 {
+   %gep2 = getelementptr <4 x float> addrspace(1)* %in2, i32 4
+   %r0 = load <4 x float> addrspace(1)* %in1, align 16
+   %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+   %r2 = frem <4 x float> %r0, %r1
+   store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
+   ret void
+}
+
+define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+                        <2 x double> addrspace(1)* %in2) #0 {
+   %gep2 = getelementptr <2 x double> addrspace(1)* %in2, i32 4
+   %r0 = load <2 x double> addrspace(1)* %in1, align 16
+   %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+   %r2 = frem <2 x double> %r0, %r1
+   store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
+   ret void
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="false" }
+attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/R600/fsqrt.ll b/test/CodeGen/R600/fsqrt.ll
index ae50b17d38fc..0d1095cf2683 100644
--- a/test/CodeGen/R600/fsqrt.ll
+++ b/test/CodeGen/R600/fsqrt.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck %s
 
-; CHECK: @fsqrt_f32
-; CHECK: V_SQRT_F32_e32 {{v[0-9]+, v[0-9]+}}
+; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x)
+
+; CHECK: {{^}}fsqrt_f32:
+; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
 
 define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    %r0 = load float addrspace(1)* %in
@@ -10,8 +13,8 @@ define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
    ret void
 }
 
-; CHECK: @fsqrt_f64
-; CHECK: V_SQRT_F64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
+; CHECK: {{^}}fsqrt_f64:
+; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
    %r0 = load double addrspace(1)* %in
diff --git a/test/CodeGen/R600/fsub.ll b/test/CodeGen/R600/fsub.ll
index 4f74efba4d8b..4fe47e7badf3 100644
--- a/test/CodeGen/R600/fsub.ll
+++ b/test/CodeGen/R600/fsub.ll
@@ -1,14 +1,25 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; R600-CHECK: @fsub_f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
-; SI-CHECK: @fsub_f32
-; SI-CHECK: V_SUB_F32
-define void @fsub_f32(float addrspace(1)* %out, float %a, float %b) {
-entry:
-  %0 = fsub float %a, %b
-  store float %0, float addrspace(1)* %out
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}v_fsub_f32:
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %b_ptr = getelementptr float addrspace(1)* %in, i32 1
+  %a = load float addrspace(1)* %in, align 4
+  %b = load float addrspace(1)* %b_ptr, align 4
+  %result = fsub float %a, %b
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_fsub_f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, -KC0[2].W
+
+; SI: v_sub_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
+  %sub = fsub float %a, %b
+  store float %sub, float addrspace(1)* %out, align 4
   ret void
 }
 
@@ -16,34 +27,48 @@ declare float @llvm.R600.load.input(i32) readnone
 
 declare void @llvm.AMDGPU.store.output(float, i32)
 
-; R600-CHECK: @fsub_v2f32
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
-; R600-CHECK-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
-; SI-CHECK: @fsub_v2f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
+; FUNC-LABEL: {{^}}fsub_v2f32:
+; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
+; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
+
+; FIXME: Should be using SGPR directly for first operand
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
-entry:
-  %0 = fsub <2 x float> %a, %b
-  store <2 x float> %0, <2 x float> addrspace(1)* %out
+  %sub = fsub <2 x float> %a, %b
+  store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
   ret void
 }
 
-; R600-CHECK: @fsub_v4f32
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; R600-CHECK: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
-; SI-CHECK: @fsub_v4f32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-; SI-CHECK: V_SUB_F32
-define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}v_fsub_v4f32:
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+; R600: ADD {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], -T[0-9]+\.[XYZW]}}
+
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+  %result = fsub <4 x float> %a, %b
+  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
+  ret void
+}
+
+; FIXME: Should be using SGPR directly for first operand
+
+; FUNC-LABEL: {{^}}s_fsub_v4f32:
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: s_endpgm
+define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
   %result = fsub <4 x float> %a, %b
-  store <4 x float> %result, <4 x float> addrspace(1)* %out
+  store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
diff --git a/test/CodeGen/R600/fsub64.ll b/test/CodeGen/R600/fsub64.ll
index f5e5708f1b41..d0f894607a61 100644
--- a/test/CodeGen/R600/fsub64.ll
+++ b/test/CodeGen/R600/fsub64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @fsub_f64:
-; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
+; SI-LABEL: {{^}}fsub_f64:
+; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
    %r0 = load double addrspace(1)* %in1
diff --git a/test/CodeGen/R600/ftrunc.f64.ll b/test/CodeGen/R600/ftrunc.f64.ll
new file mode 100644
index 000000000000..2c7217ef0561
--- /dev/null
+++ b/test/CodeGen/R600/ftrunc.f64.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
+declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
+declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
+declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
+declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
+
+; FUNC-LABEL: {{^}}v_ftrunc_f64:
+; CI: v_trunc_f64
+; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
+; SI: s_endpgm
+define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
+  %x = load double addrspace(1)* %in, align 8
+  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_f64:
+; CI: v_trunc_f64_e32
+
+; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
+; SI: s_add_i32 s{{[0-9]+}}, [[SEXP]], 0xfffffc01
+; SI: s_lshr_b64
+; SI: s_not_b64
+; SI: s_and_b64
+; SI: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
+; SI: cmp_lt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: cmp_gt_i32
+; SI: cndmask_b32
+; SI: cndmask_b32
+; SI: s_endpgm
+define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
+  %y = call double @llvm.trunc.f64(double %x) nounwind readnone
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v2f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %x) {
+  %y = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) nounwind readnone
+  store <2 x double> %y, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f64:
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
+; FIXME-CI: v_trunc_f64_e32
+; define void @ftrunc_v3f64(<3 x double> addrspace(1)* %out, <3 x double> %x) {
+;   %y = call <3 x double> @llvm.trunc.v3f64(<3 x double> %x) nounwind readnone
+;   store <3 x double> %y, <3 x double> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}ftrunc_v4f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %x) {
+  %y = call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone
+  store <4 x double> %y, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v8f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %x) {
+  %y = call <8 x double> @llvm.trunc.v8f64(<8 x double> %x) nounwind readnone
+  store <8 x double> %y, <8 x double> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}ftrunc_v16f64:
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+; CI: v_trunc_f64_e32
+define void @ftrunc_v16f64(<16 x double> addrspace(1)* %out, <16 x double> %x) {
+  %y = call <16 x double> @llvm.trunc.v16f64(<16 x double> %x) nounwind readnone
+  store <16 x double> %y, <16 x double> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/ftrunc.ll b/test/CodeGen/R600/ftrunc.ll
index 0d7d4679fe3d..39eb2b5accbf 100644
--- a/test/CodeGen/R600/ftrunc.ll
+++ b/test/CodeGen/R600/ftrunc.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
 
 declare float @llvm.trunc.f32(float) nounwind readnone
 declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
@@ -8,55 +8,55 @@ declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
 declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
 declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
 
-; FUNC-LABEL: @ftrunc_f32:
+; FUNC-LABEL: {{^}}ftrunc_f32:
 ; EG: TRUNC
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
 define void @ftrunc_f32(float addrspace(1)* %out, float %x) {
   %y = call float @llvm.trunc.f32(float %x) nounwind readnone
   store float %y, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ftrunc_v2f32:
+; FUNC-LABEL: {{^}}ftrunc_v2f32:
 ; EG: TRUNC
 ; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
 define void @ftrunc_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %x) {
   %y = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) nounwind readnone
   store <2 x float> %y, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FIXME-FUNC-LABEL: @ftrunc_v3f32:
+; FIXME-FUNC-LABEL: {{^}}ftrunc_v3f32:
 ; FIXME-EG: TRUNC
 ; FIXME-EG: TRUNC
 ; FIXME-EG: TRUNC
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
-; FIXME-SI: V_TRUNC_F32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
+; FIXME-SI: v_trunc_f32_e32
 ; define void @ftrunc_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %x) {
 ;   %y = call <3 x float> @llvm.trunc.v3f32(<3 x float> %x) nounwind readnone
 ;   store <3 x float> %y, <3 x float> addrspace(1)* %out
 ;   ret void
 ; }
 
-; FUNC-LABEL: @ftrunc_v4f32:
+; FUNC-LABEL: {{^}}ftrunc_v4f32:
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
 define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
   %y = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone
   store <4 x float> %y, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ftrunc_v8f32:
+; FUNC-LABEL: {{^}}ftrunc_v8f32:
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
@@ -65,21 +65,21 @@ define void @ftrunc_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %x) {
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
 define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
   %y = call <8 x float> @llvm.trunc.v8f32(<8 x float> %x) nounwind readnone
   store <8 x float> %y, <8 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @ftrunc_v16f32:
+; FUNC-LABEL: {{^}}ftrunc_v16f32:
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
@@ -96,22 +96,22 @@ define void @ftrunc_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %x) {
 ; EG: TRUNC
 ; EG: TRUNC
 ; EG: TRUNC
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
-; SI: V_TRUNC_F32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
+; SI: v_trunc_f32_e32
 define void @ftrunc_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %x) {
   %y = call <16 x float> @llvm.trunc.v16f32(<16 x float> %x) nounwind readnone
   store <16 x float> %y, <16 x float> addrspace(1)* %out
diff --git a/test/CodeGen/R600/gep-address-space.ll b/test/CodeGen/R600/gep-address-space.ll
index ab2c0bf92fe3..2d1892534dc5 100644
--- a/test/CodeGen/R600/gep-address-space.ll
+++ b/test/CodeGen/R600/gep-address-space.ll
@@ -1,29 +1,33 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=CHECK %s
 
 define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
-; CHECK-LABEL: @use_gep_address_space:
-; CHECK: V_MOV_B32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
-; CHECK: DS_WRITE_B32 [[PTR]], v{{[0-9]+}}, 0x40
+; CHECK-LABEL: {{^}}use_gep_address_space:
+; CHECK: v_mov_b32_e32 [[PTR:v[0-9]+]], s{{[0-9]+}}
+; CHECK: ds_write_b32 [[PTR]], v{{[0-9]+}} offset:64
   %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16
   store i32 99, i32 addrspace(3)* %p
   ret void
 }
 
 define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
-; CHECK-LABEL: @use_gep_address_space_large_offset:
-; CHECK: S_ADD_I32
-; CHECK: DS_WRITE_B32
+; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
+; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
+; SI, which is why it is being OR'd with the base pointer.
+; SI: s_or_b32
+; CI: s_add_i32
+; CHECK: ds_write_b32
   %p = getelementptr [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
   store i32 99, i32 addrspace(3)* %p
   ret void
 }
 
 define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
-; CHECK-LABEL: @gep_as_vector_v4:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK-LABEL: {{^}}gep_as_vector_v4:
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
+; CHECK: s_add_i32
   %p = getelementptr <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
   %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
   %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
@@ -37,9 +41,9 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
 }
 
 define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
-; CHECK-LABEL: @gep_as_vector_v2:
-; CHECK: S_ADD_I32
-; CHECK: S_ADD_I32
+; CHECK-LABEL: {{^}}gep_as_vector_v2:
+; CHECK: s_add_i32
+; CHECK: s_add_i32
   %p = getelementptr <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
   %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
   %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
diff --git a/test/CodeGen/R600/global-directive.ll b/test/CodeGen/R600/global-directive.ll
new file mode 100644
index 000000000000..189510ad08fe
--- /dev/null
+++ b/test/CodeGen/R600/global-directive.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; Make sure the GlobalDirective isn't merged with the function name
+
+; SI:	.globl	foo
+; SI: {{^}}foo:
+define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i1.ll b/test/CodeGen/R600/global-extload-i1.ll
new file mode 100644
index 000000000000..940911e73453
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i1.ll
@@ -0,0 +1,301 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: Evergreen broken
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %a = load i1 addrspace(1)* %in
+  %ext = zext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1{{$}}
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %a = load i1 addrspace(1)* %in
+  %ext = sext i1 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i1> addrspace(1)* %in
+  %ext = zext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i1> addrspace(1)* %in
+  %ext = sext <1 x i1> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i1> addrspace(1)* %in
+  %ext = zext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i1> addrspace(1)* %in
+  %ext = sext <2 x i1> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i1> addrspace(1)* %in
+  %ext = zext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i1> addrspace(1)* %in
+  %ext = sext <4 x i1> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i1> addrspace(1)* %in
+  %ext = zext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i1> addrspace(1)* %in
+  %ext = sext <8 x i1> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i1> addrspace(1)* %in
+  %ext = zext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i1> addrspace(1)* %in
+  %ext = sext <16 x i1> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i1> addrspace(1)* %in
+;   %ext = zext <32 x i1> %load to <32 x i32>
+;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i1> addrspace(1)* %in
+;   %ext = sext <32 x i1> %load to <32 x i32>
+;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i1> addrspace(1)* %in
+;   %ext = zext <64 x i1> %load to <64 x i32>
+;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i1> addrspace(1)* %in
+;   %ext = sext <64 x i1> %load to <64 x i32>
+;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
+; SI: buffer_store_dwordx2
+define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %a = load i1 addrspace(1)* %in
+  %ext = zext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i1_to_i64:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]],
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], {{v[0-9]+}}, 0, 1{{$}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %a = load i1 addrspace(1)* %in
+  %ext = sext i1 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i1> addrspace(1)* %in
+  %ext = zext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i1> addrspace(1)* %in
+  %ext = sext <1 x i1> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i1> addrspace(1)* %in
+  %ext = zext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i1> addrspace(1)* %in
+  %ext = sext <2 x i1> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i1> addrspace(1)* %in
+  %ext = zext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i1> addrspace(1)* %in
+  %ext = sext <4 x i1> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i1> addrspace(1)* %in
+  %ext = zext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i1> addrspace(1)* %in
+  %ext = sext <8 x i1> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i1> addrspace(1)* %in
+  %ext = zext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i1> addrspace(1)* %in
+  %ext = sext <16 x i1> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i1> addrspace(1)* %in
+;   %ext = zext <32 x i1> %load to <32 x i64>
+;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i1> addrspace(1)* %in
+;   %ext = sext <32 x i1> %load to <32 x i64>
+;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i1> addrspace(1)* %in
+;   %ext = zext <64 x i1> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i1> addrspace(1)* %in
+;   %ext = sext <64 x i1> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
diff --git a/test/CodeGen/R600/global-extload-i16.ll b/test/CodeGen/R600/global-extload-i16.ll
new file mode 100644
index 000000000000..838068470ff2
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i16.ll
@@ -0,0 +1,301 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; FIXME: cypress is broken because the bigger testcases spill and it's not implemented
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i32:
+; SI: buffer_load_ushort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in
+  %ext = zext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i32:
+; SI: buffer_load_sshort
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in
+  %ext = sext i16 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i32:
+; SI: buffer_load_ushort
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i16> addrspace(1)* %in
+  %ext = zext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i32:
+; SI: buffer_load_sshort
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i16> addrspace(1)* %in
+  %ext = sext <1 x i16> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i16> addrspace(1)* %in
+  %ext = zext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i16> addrspace(1)* %in
+  %ext = sext <2 x i16> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i16> addrspace(1)* %in
+  %ext = zext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i16> addrspace(1)* %in
+  %ext = sext <4 x i16> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i16> addrspace(1)* %in
+  %ext = zext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i16> addrspace(1)* %in
+  %ext = sext <8 x i16> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i16> addrspace(1)* %in
+  %ext = zext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i16> addrspace(1)* %in
+  %ext = sext <16 x i16> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i16> addrspace(1)* %in
+  %ext = zext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i16> addrspace(1)* %in
+  %ext = sext <32 x i16> %load to <32 x i32>
+  store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <64 x i16> addrspace(1)* %in
+  %ext = zext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <64 x i16> addrspace(1)* %in
+  %ext = sext <64 x i16> %load to <64 x i32>
+  store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_i16_to_i64:
+; SI: buffer_load_ushort v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in
+  %ext = zext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i16_to_i64:
+; SI: buffer_load_sshort [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %a = load i16 addrspace(1)* %in
+  %ext = sext i16 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i16> addrspace(1)* %in
+  %ext = zext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i16> addrspace(1)* %in
+  %ext = sext <1 x i16> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i16> addrspace(1)* %in
+  %ext = zext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i16> addrspace(1)* %in
+  %ext = sext <2 x i16> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i16> addrspace(1)* %in
+  %ext = zext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i16> addrspace(1)* %in
+  %ext = sext <4 x i16> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i16> addrspace(1)* %in
+  %ext = zext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i16> addrspace(1)* %in
+  %ext = sext <8 x i16> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i16> addrspace(1)* %in
+  %ext = zext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i16> addrspace(1)* %in
+  %ext = sext <16 x i16> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i16> addrspace(1)* %in
+  %ext = zext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
+; SI: s_endpgm
+define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i16> addrspace(1)* %in
+  %ext = sext <32 x i16> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <64 x i16> addrspace(1)* %in
+  %ext = zext <64 x i16> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
+; SI: s_endpgm
+define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
+  %load = load <64 x i16> addrspace(1)* %in
+  %ext = sext <64 x i16> %load to <64 x i64>
+  store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i32.ll b/test/CodeGen/R600/global-extload-i32.ll
new file mode 100644
index 000000000000..ce78c446c3ba
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i32.ll
@@ -0,0 +1,456 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
+; SI: buffer_load_dword v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %a = load i32 addrspace(1)* %in
+  %ext = zext i32 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %a = load i32 addrspace(1)* %in
+  %ext = sext i32 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i32> addrspace(1)* %in
+  %ext = zext <1 x i32> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
+; SI: buffer_load_dword
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i32> addrspace(1)* %in
+  %ext = sext <1 x i32> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i32> addrspace(1)* %in
+  %ext = zext <2 x i32> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
+; SI: buffer_load_dwordx2
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i32> addrspace(1)* %in
+  %ext = sext <2 x i32> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i32> addrspace(1)* %in
+  %ext = zext <4 x i32> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
+; SI: buffer_load_dwordx4
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i32> addrspace(1)* %in
+  %ext = sext <4 x i32> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i32> addrspace(1)* %in
+  %ext = zext <8 x i32> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i32> addrspace(1)* %in
+  %ext = sext <8 x i32> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i32> addrspace(1)* %in
+  %ext = sext <16 x i32> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i32> addrspace(1)* %in
+  %ext = zext <16 x i32> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+; SI-DAG: v_ashrrev_i32
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i32> addrspace(1)* %in
+  %ext = sext <32 x i32> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx2
+
+; SI: s_endpgm
+define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
+  %load = load <32 x i32> addrspace(1)* %in
+  %ext = zext <32 x i32> %load to <32 x i64>
+  store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/global-extload-i8.ll b/test/CodeGen/R600/global-extload-i8.ll
new file mode 100644
index 000000000000..8d6042f1de02
--- /dev/null
+++ b/test/CodeGen/R600/global-extload-i8.ll
@@ -0,0 +1,298 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in
+  %ext = zext i8 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i32:
+; SI: buffer_load_sbyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in
+  %ext = sext i8 %a to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i8> addrspace(1)* %in
+  %ext = zext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i8> addrspace(1)* %in
+  %ext = sext <1 x i8> %load to <1 x i32>
+  store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i8> addrspace(1)* %in
+  %ext = zext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i8> addrspace(1)* %in
+  %ext = sext <2 x i8> %load to <2 x i32>
+  store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in
+  %ext = zext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in
+  %ext = sext <4 x i8> %load to <4 x i32>
+  store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i8> addrspace(1)* %in
+  %ext = zext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i8> addrspace(1)* %in
+  %ext = sext <8 x i8> %load to <8 x i32>
+  store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i8> addrspace(1)* %in
+  %ext = zext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i8> addrspace(1)* %in
+  %ext = sext <16 x i8> %load to <16 x i32>
+  store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i8> addrspace(1)* %in
+;   %ext = zext <32 x i8> %load to <32 x i32>
+;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i8> addrspace(1)* %in
+;   %ext = sext <32 x i8> %load to <32 x i32>
+;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i8> addrspace(1)* %in
+;   %ext = zext <64 x i8> %load to <64 x i32>
+;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i8> addrspace(1)* %in
+;   %ext = sext <64 x i8> %load to <64 x i32>
+;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}zextload_global_i8_to_i64:
+; SI: buffer_load_ubyte v[[LO:[0-9]+]],
+; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
+define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in
+  %ext = zext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_i8_to_i64:
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]],
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
+; SI: buffer_store_dwordx2
+define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %a = load i8 addrspace(1)* %in
+  %ext = sext i8 %a to i64
+  store i64 %ext, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i8> addrspace(1)* %in
+  %ext = zext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
+; SI: s_endpgm
+define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <1 x i8> addrspace(1)* %in
+  %ext = sext <1 x i8> %load to <1 x i64>
+  store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i8> addrspace(1)* %in
+  %ext = zext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
+; SI: s_endpgm
+define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <2 x i8> addrspace(1)* %in
+  %ext = sext <2 x i8> %load to <2 x i64>
+  store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in
+  %ext = zext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
+; SI: s_endpgm
+define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <4 x i8> addrspace(1)* %in
+  %ext = sext <4 x i8> %load to <4 x i64>
+  store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i8> addrspace(1)* %in
+  %ext = zext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
+; SI: s_endpgm
+define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <8 x i8> addrspace(1)* %in
+  %ext = sext <8 x i8> %load to <8 x i64>
+  store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i8> addrspace(1)* %in
+  %ext = zext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
+; SI: s_endpgm
+define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
+  %load = load <16 x i8> addrspace(1)* %in
+  %ext = sext <16 x i8> %load to <16 x i64>
+  store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i8> addrspace(1)* %in
+;   %ext = zext <32 x i8> %load to <32 x i64>
+;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
+; XSI: s_endpgm
+; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <32 x i8> addrspace(1)* %in
+;   %ext = sext <32 x i8> %load to <32 x i64>
+;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i8> addrspace(1)* %in
+;   %ext = zext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
+
+; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
+; XSI: s_endpgm
+; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
+;   %load = load <64 x i8> addrspace(1)* %in
+;   %ext = sext <64 x i8> %load to <64 x i64>
+;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
+;   ret void
+; }
diff --git a/test/CodeGen/R600/global-zero-initializer.ll b/test/CodeGen/R600/global-zero-initializer.ll
new file mode 100644
index 000000000000..031df59cd1e1
--- /dev/null
+++ b/test/CodeGen/R600/global-zero-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_init_global_global
+
+@lds = addrspace(1) global [256 x i32] zeroinitializer
+
+define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [256 x i32] addrspace(1)* @lds, i32 0, i32 10
+  %ld = load i32 addrspace(1)* %gep
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll
new file mode 100644
index 000000000000..5a07a028f44f
--- /dev/null
+++ b/test/CodeGen/R600/global_atomics.ll
@@ -0,0 +1,801 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}atomic_add_i32_offset:
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset:
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32:
+; SI: buffer_atomic_add v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_addr64:
+; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_add_i32_ret_addr64:
+; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_offset:
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset:
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32:
+; SI: buffer_atomic_and v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_addr64:
+; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_and_i32_ret_addr64:
+; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_offset:
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset:
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32:
+; SI: buffer_atomic_sub v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_addr64:
+; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_sub_i32_ret_addr64:
+; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_offset:
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset:
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32:
+; SI: buffer_atomic_smax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_addr64:
+; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_max_i32_ret_addr64:
+; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_offset:
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset:
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32:
+; SI: buffer_atomic_umax v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_addr64:
+; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umax_i32_ret_addr64:
+; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_offset:
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset:
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32:
+; SI: buffer_atomic_smin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_addr64:
+; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_min_i32_ret_addr64:
+; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_offset:
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset:
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32:
+; SI: buffer_atomic_umin v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_addr64:
+; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_umin_i32_ret_addr64:
+; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_offset:
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset:
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32:
+; SI: buffer_atomic_or v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_addr64:
+; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_or_i32_ret_addr64:
+; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_offset:
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32:
+; SI: buffer_atomic_swap v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64:
+; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
+; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_offset:
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
+define void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %gep = getelementptr i32 addrspace(1)* %out, i32 4
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset:
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 4
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32:
+; SI: buffer_atomic_xor v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
+define void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
+entry:
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
+entry:
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_addr64:
+; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
+define void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}atomic_xor_i32_ret_addr64:
+; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
+; SI: buffer_store_dword [[RET]]
+define void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
+entry:
+  %ptr = getelementptr i32 addrspace(1)* %out, i64 %index
+  %0  = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
+  store i32 %0, i32 addrspace(1)* %out2
+  ret void
+}
diff --git a/test/CodeGen/R600/gv-const-addrspace-fail.ll b/test/CodeGen/R600/gv-const-addrspace-fail.ll
index ebd781107627..af0df413ca58 100644
--- a/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -1,14 +1,13 @@
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
 @a = internal addrspace(2) constant [1 x i8] [ i8 7 ], align 1
 
-; FUNC-LABEL: @test_i8
+; FUNC-LABEL: {{^}}test_i8:
 ; EG: CF_END
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; SI: buffer_store_byte
+; SI: s_endpgm
 define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i8] addrspace(2)* @a, i32 0, i32 %s
   %1 = load i8 addrspace(2)* %arrayidx, align 1
@@ -18,10 +17,10 @@ define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
 
 @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
 
-; FUNC-LABEL: @test_i16
+; FUNC-LABEL: {{^}}test_i16:
 ; EG: CF_END
-; SI: BUFFER_STORE_SHORT
-; SI: S_ENDPGM
+; SI: buffer_store_short
+; SI: s_endpgm
 define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i16] addrspace(2)* @b, i32 0, i32 %s
   %1 = load i16 addrspace(2)* %arrayidx, align 2
@@ -32,9 +31,9 @@ define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
 %struct.bar = type { float, [5 x i8] }
 
 ; The illegal i8s aren't handled
-@struct_bar_gv = internal addrspace(2) unnamed_addr constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
+@struct_bar_gv = internal addrspace(2) constant [1 x %struct.bar] [ %struct.bar { float 16.0, [5 x i8] [i8 0, i8 1, i8 2, i8 3, i8 4] } ]
 
-; FUNC-LABEL: @struct_bar_gv_load
+; FUNC-LABEL: {{^}}struct_bar_gv_load:
 define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
   %load = load i8 addrspace(2)* %gep, align 1
@@ -49,7 +48,7 @@ define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
                                                                     <4 x i32> <i32 9, i32 10, i32 11, i32 12>,
                                                                     <4 x i32> <i32 13, i32 14, i32 15, i32 16> ]
 
-; FUNC-LABEL: @array_vector_gv_load
+; FUNC-LABEL: {{^}}array_vector_gv_load:
 define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
   %load = load <4 x i32> addrspace(2)* %gep, align 16
diff --git a/test/CodeGen/R600/gv-const-addrspace.ll b/test/CodeGen/R600/gv-const-addrspace.ll
index e0ac317f9986..c58e5846d98c 100644
--- a/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/test/CodeGen/R600/gv-const-addrspace.ll
@@ -1,14 +1,14 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 
 @b = internal addrspace(2) constant [1 x i16] [ i16 7 ], align 2
 
 @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.0, float 1.0, float 2.0, float 3.0, float 4.0], align 4
 
-; FUNC-LABEL: @float
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FUNC-LABEL: {{^}}float:
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
 
 ; EG-DAG: MOV {{\** *}}T2.X
 ; EG-DAG: MOV {{\** *}}T3.X
@@ -27,10 +27,10 @@ entry:
 
 @i32_gv = internal unnamed_addr addrspace(2) constant [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4], align 4
 
-; FUNC-LABEL: @i32
+; FUNC-LABEL: {{^}}i32:
 
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
 
 ; EG-DAG: MOV {{\** *}}T2.X
 ; EG-DAG: MOV {{\** *}}T3.X
@@ -52,8 +52,8 @@ entry:
 
 @struct_foo_gv = internal unnamed_addr addrspace(2) constant [1 x %struct.foo] [ %struct.foo { float 16.0, [5 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4] } ]
 
-; FUNC-LABEL: @struct_foo_gv_load
-; SI: S_LOAD_DWORD
+; FUNC-LABEL: {{^}}struct_foo_gv_load:
+; SI: s_load_dword
 
 define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
@@ -67,9 +67,9 @@ define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
                                                                 <1 x i32> <i32 3>,
                                                                 <1 x i32> <i32 4> ]
 
-; FUNC-LABEL: @array_v1_gv_load
-; FIXME: We should be using S_LOAD_DWORD here.
-; SI: BUFFER_LOAD_DWORD
+; FUNC-LABEL: {{^}}array_v1_gv_load:
+; FIXME: We should be using s_load_dword here.
+; SI: buffer_load_dword
 define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
   %load = load <1 x i32> addrspace(2)* %gep, align 4
diff --git a/test/CodeGen/R600/half.ll b/test/CodeGen/R600/half.ll
index 42aa4faa99f4..cb7a94a0b859 100644
--- a/test/CodeGen/R600/half.ll
+++ b/test/CodeGen/R600/half.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s
 
 define void @test_load_store(half addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: @test_load_store
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_load_store:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
   %val = load half addrspace(1)* %in
   store half %val, half addrspace(1) * %out
   ret void
 }
 
 define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) {
-; CHECK-LABEL: @test_bitcast_from_half
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_bitcast_from_half:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
   %val = load half addrspace(1) * %in
   %val_int = bitcast half %val to i16
   store i16 %val_int, i16 addrspace(1)* %out
@@ -20,9 +20,9 @@ define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %o
 }
 
 define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) {
-; CHECK-LABEL: @test_bitcast_to_half
-; CHECK: BUFFER_LOAD_USHORT [[TMP:v[0-9]+]]
-; CHECK: BUFFER_STORE_SHORT [[TMP]]
+; CHECK-LABEL: {{^}}test_bitcast_to_half:
+; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
+; CHECK: buffer_store_short [[TMP]]
   %val = load i16 addrspace(1)* %in
   %val_fp = bitcast i16 %val to half
   store half %val_fp, half addrspace(1)* %out
@@ -30,8 +30,8 @@ define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in
 }
 
 define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
-; CHECK-LABEL: @test_extend32
-; CHECK: V_CVT_F32_F16_e32
+; CHECK-LABEL: {{^}}test_extend32:
+; CHECK: v_cvt_f32_f16_e32
 
   %val16 = load half addrspace(1)* %in
   %val32 = fpext half %val16 to float
@@ -40,9 +40,9 @@ define void @test_extend32(half addrspace(1)* %in, float addrspace(1)* %out) {
 }
 
 define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
-; CHECK-LABEL: @test_extend64
-; CHECK: V_CVT_F32_F16_e32
-; CHECK: V_CVT_F64_F32_e32
+; CHECK-LABEL: {{^}}test_extend64:
+; CHECK: v_cvt_f32_f16_e32
+; CHECK: v_cvt_f64_f32_e32
 
   %val16 = load half addrspace(1)* %in
   %val64 = fpext half %val16 to double
@@ -51,8 +51,8 @@ define void @test_extend64(half addrspace(1)* %in, double addrspace(1)* %out) {
 }
 
 define void @test_trunc32(float addrspace(1)* %in, half addrspace(1)* %out) {
-; CHECK-LABEL: @test_trunc32
-; CHECK: V_CVT_F16_F32_e32
+; CHECK-LABEL: {{^}}test_trunc32:
+; CHECK: v_cvt_f16_f32_e32
 
   %val32 = load float addrspace(1)* %in
   %val16 = fptrunc float %val32 to half
diff --git a/test/CodeGen/R600/hsa.ll b/test/CodeGen/R600/hsa.ll
new file mode 100644
index 000000000000..2e79866362ac
--- /dev/null
+++ b/test/CodeGen/R600/hsa.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+
+; HSA: {{^}}simple:
+; Make sure we are setting the ATC bit:
+; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
+; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+
+define void @simple(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/i1-copy-implicit-def.ll b/test/CodeGen/R600/i1-copy-implicit-def.ll
new file mode 100644
index 000000000000..51e230196bf6
--- /dev/null
+++ b/test/CodeGen/R600/i1-copy-implicit-def.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SILowerI1Copies was not handling IMPLICIT_DEF
+; SI-LABEL: {{^}}br_implicit_def:
+; SI: BB#0:
+; SI-NEXT: s_and_saveexec_b64
+; SI-NEXT: s_xor_b64
+; SI-NEXT: BB#1:
+define void @br_implicit_def(i32 addrspace(1)* %out, i32 %arg) #0 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  store volatile i32 123, i32 addrspace(1)* %out
+  ret void
+
+bb2:
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/R600/i1-copy-phi.ll b/test/CodeGen/R600/i1-copy-phi.ll
new file mode 100644
index 000000000000..8b761710f9d2
--- /dev/null
+++ b/test/CodeGen/R600/i1-copy-phi.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}br_i1_phi:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; SI: s_and_saveexec_b64
+; SI: s_xor_b64
+; SI: v_mov_b32_e32 [[REG]], -1{{$}}
+; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[REG]], 0
+; SI: s_and_saveexec_b64
+; SI: s_xor_b64
+; SI: s_endpgm
+define void @br_i1_phi(i32 %arg, i1 %arg1) #0 {
+bb:
+  br i1 %arg1, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %tmp = phi i1 [ true, %bb2 ], [ false, %bb ]
+  br i1 %tmp, label %bb4, label %bb6
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = mul i32 undef, %arg
+  br label %bb6
+
+bb6:                                              ; preds = %bb4, %bb3
+  ret void
+}
diff --git a/test/CodeGen/R600/icmp64.ll b/test/CodeGen/R600/icmp64.ll
index c9e62ff934ee..ed0f221b87b1 100644
--- a/test/CodeGen/R600/icmp64.ll
+++ b/test/CodeGen/R600/icmp64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @test_i64_eq:
-; SI: V_CMP_EQ_I64
+; SI-LABEL: {{^}}test_i64_eq:
+; SI: v_cmp_eq_i64
 define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp eq i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -9,8 +9,8 @@ define void @test_i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_ne:
-; SI: V_CMP_NE_I64
+; SI-LABEL: {{^}}test_i64_ne:
+; SI: v_cmp_ne_i64
 define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp ne i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -18,8 +18,8 @@ define void @test_i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_slt:
-; SI: V_CMP_LT_I64
+; SI-LABEL: {{^}}test_i64_slt:
+; SI: v_cmp_lt_i64
 define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp slt i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -27,8 +27,8 @@ define void @test_i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_ult:
-; SI: V_CMP_LT_U64
+; SI-LABEL: {{^}}test_i64_ult:
+; SI: v_cmp_lt_u64
 define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp ult i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -36,8 +36,8 @@ define void @test_i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_sle:
-; SI: V_CMP_LE_I64
+; SI-LABEL: {{^}}test_i64_sle:
+; SI: v_cmp_le_i64
 define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp sle i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -45,8 +45,8 @@ define void @test_i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_ule:
-; SI: V_CMP_LE_U64
+; SI-LABEL: {{^}}test_i64_ule:
+; SI: v_cmp_le_u64
 define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp ule i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -54,8 +54,8 @@ define void @test_i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_sgt:
-; SI: V_CMP_GT_I64
+; SI-LABEL: {{^}}test_i64_sgt:
+; SI: v_cmp_gt_i64
 define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp sgt i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -63,8 +63,8 @@ define void @test_i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_ugt:
-; SI: V_CMP_GT_U64
+; SI-LABEL: {{^}}test_i64_ugt:
+; SI: v_cmp_gt_u64
 define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp ugt i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -72,8 +72,8 @@ define void @test_i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_sge:
-; SI: V_CMP_GE_I64
+; SI-LABEL: {{^}}test_i64_sge:
+; SI: v_cmp_ge_i64
 define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp sge i64 %a, %b
   %result = sext i1 %cmp to i32
@@ -81,8 +81,8 @@ define void @test_i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test_i64_uge:
-; SI: V_CMP_GE_U64
+; SI-LABEL: {{^}}test_i64_uge:
+; SI: v_cmp_ge_u64
 define void @test_i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %cmp = icmp uge i64 %a, %b
   %result = sext i1 %cmp to i32
diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll
index b047315be71a..6e4fa3cc60cb 100644
--- a/test/CodeGen/R600/imm.ll
+++ b/test/CodeGen/R600/imm.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
 ; Use a 64-bit value with lo bits that can be represented as an inline constant
-; CHECK: @i64_imm_inline_lo
-; CHECK: S_MOV_B32 [[LO:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[}}[[LO_VGPR]]:
+; CHECK-LABEL: {{^}}i64_imm_inline_lo:
+; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
 define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
 entry:
   store i64 1311768464867721221, i64 addrspace(1) *%out ; 0x1234567800000005
@@ -12,12 +12,473 @@ entry:
 }
 
 ; Use a 64-bit value with hi bits that can be represented as an inline constant
-; CHECK: @i64_imm_inline_hi
-; CHECK: S_MOV_B32 [[HI:s[0-9]+]], 5
-; CHECK: V_MOV_B32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
-; CHECK: BUFFER_STORE_DWORDX2 v{{\[[0-9]+:}}[[HI_VGPR]]
+; CHECK-LABEL: {{^}}i64_imm_inline_hi:
+; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
+; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
 define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
 entry:
   store i64 21780256376, i64 addrspace(1) *%out ; 0x0000000512345678
   ret void
 }
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_0.0_f32(float addrspace(1)* %out) {
+  store float 0.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_imm_neg_0.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80000000
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_imm_neg_0.0_f32(float addrspace(1)* %out) {
+  store float -0.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.5_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_0.5_f32(float addrspace(1)* %out) {
+  store float 0.5, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_0.5_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -0.5{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_0.5_f32(float addrspace(1)* %out) {
+  store float -0.5, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_1.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_1.0_f32(float addrspace(1)* %out) {
+  store float 1.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_1.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -1.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_1.0_f32(float addrspace(1)* %out) {
+  store float -1.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_2.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_2.0_f32(float addrspace(1)* %out) {
+  store float 2.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_2.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -2.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_2.0_f32(float addrspace(1)* %out) {
+  store float -2.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_4.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_4.0_f32(float addrspace(1)* %out) {
+  store float 4.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_inline_imm_m_4.0_f32
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], -4.0{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_inline_imm_m_4.0_f32(float addrspace(1)* %out) {
+  store float -4.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_literal_imm_f32:
+; CHECK: v_mov_b32_e32 [[REG:v[0-9]+]], 0x45800000
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @store_literal_imm_f32(float addrspace(1)* %out) {
+  store float 4096.0, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0.5
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, -0.5
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 1.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, -1.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 2.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, -2.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 4.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, -4.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @commute_add_inline_imm_0.5_f32
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %x = load float addrspace(1)* %in
+  %y = fadd float %x, 0.5
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @commute_add_literal_f32
+; CHECK: buffer_load_dword [[VAL:v[0-9]+]]
+; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %x = load float addrspace(1)* %in
+  %y = fadd float %x, 1024.0
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_1_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0x36a0000000000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_2_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0x36b0000000000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_16_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 16, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_16_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0x36e0000000000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -1, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_1_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0xffffffffe0000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -2, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_2_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0xffffffffc0000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], -16, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_neg_16_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0xfffffffe00000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_63_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 63, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_63_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0x36ff800000000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_64_f32
+; CHECK: s_load_dword [[VAL:s[0-9]+]]
+; CHECK: v_add_f32_e64 [[REG:v[0-9]+]], 64, [[VAL]]
+; CHECK-NEXT: buffer_store_dword [[REG]]
+define void @add_inline_imm_64_f32(float addrspace(1)* %out, float %x) {
+  %y = fadd float %x, 0x3700000000000000
+  store float %y, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_0.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_0.5_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_0.5_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0.5
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_0.5_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -0.5, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_0.5_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, -0.5
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_1.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_1.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 1.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_1.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, -1.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_2.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 2.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_2.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, -2.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_4.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 4.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_4.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 4.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_4.0_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -4.0, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_4.0_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, -4.0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+
+; CHECK-LABEL: {{^}}add_inline_imm_1_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 1, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_1_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x0000000000000001
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_2_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 2, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_2_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x0000000000000002
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_16_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 16, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_16_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x0000000000000010
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_1_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -1, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_1_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0xffffffffffffffff
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_2_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -2, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_2_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0xfffffffffffffffe
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_neg_16_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], -16, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_neg_16_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0xfffffffffffffff0
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_63_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 63, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_63_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x000000000000003F
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}add_inline_imm_64_f64
+; CHECK: s_load_dwordx2 [[VAL:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; CHECK: v_add_f64 [[REG:v\[[0-9]+:[0-9]+\]]], 64, [[VAL]]
+; CHECK-NEXT: buffer_store_dwordx2 [[REG]]
+define void @add_inline_imm_64_f64(double addrspace(1)* %out, double %x) {
+  %y = fadd double %x, 0x0000000000000040
+  store double %y, double addrspace(1)* %out
+  ret void
+}
+
+
+; CHECK-LABEL: {{^}}store_inline_imm_0.0_f64
+; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0
+; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0
+; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
+  store double 0.0, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/indirect-addressing-si.ll b/test/CodeGen/R600/indirect-addressing-si.ll
index 169d69b7c25c..db597a363775 100644
--- a/test/CodeGen/R600/indirect-addressing-si.ll
+++ b/test/CodeGen/R600/indirect-addressing-si.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; Tests for indirect addressing on SI, which is implemented using dynamic
 ; indexing of vectors.
 
 ; CHECK: extract_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = add i32 %in, 1
@@ -15,8 +15,8 @@ entry:
 }
 
 ; CHECK: extract_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELS_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
@@ -25,8 +25,8 @@ entry:
 }
 
 ; CHECK: insert_w_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
 define void @insert_w_offset(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = add i32 %in, 1
@@ -37,8 +37,8 @@ entry:
 }
 
 ; CHECK: insert_wo_offset
-; CHECK: S_MOV_B32 m0
-; CHECK-NEXT: V_MOVRELD_B32_e32
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movreld_b32_e32
 define void @insert_wo_offset(float addrspace(1)* %out, i32 %in) {
 entry:
   %0 = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll
index 5747434935b3..24006f8799b2 100644
--- a/test/CodeGen/R600/indirect-private-64.ll
+++ b/test/CodeGen/R600/indirect-private-64.ll
@@ -1,16 +1,16 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
 
 declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
 
-; SI-LABEL: @private_access_f64_alloca:
+; SI-LABEL: {{^}}private_access_f64_alloca:
 
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
 
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
@@ -22,19 +22,19 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
   ret void
 }
 
-; SI-LABEL: @private_access_v2f64_alloca:
+; SI-LABEL: {{^}}private_access_v2f64_alloca:
 
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
 
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
@@ -46,13 +46,13 @@ define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out
   ret void
 }
 
-; SI-LABEL: @private_access_i64_alloca:
+; SI-LABEL: {{^}}private_access_i64_alloca:
 
-; SI-ALLOCA: BUFFER_STORE_DWORDX2
-; SI-ALLOCA: BUFFER_LOAD_DWORDX2
+; SI-ALLOCA: buffer_store_dwordx2
+; SI-ALLOCA: buffer_load_dwordx2
 
-; SI-PROMOTE: DS_WRITE_B64
-; SI-PROMOTE: DS_READ_B64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
@@ -64,19 +64,19 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
   ret void
 }
 
-; SI-LABEL: @private_access_v2i64_alloca:
+; SI-LABEL: {{^}}private_access_v2i64_alloca:
 
-; SI-ALLOCA: BUFFER_STORE_DWORDX4
-; SI-ALLOCA: BUFFER_LOAD_DWORDX4
+; SI-ALLOCA: buffer_store_dwordx4
+; SI-ALLOCA: buffer_load_dwordx4
 
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
diff --git a/test/CodeGen/R600/infinite-loop.ll b/test/CodeGen/R600/infinite-loop.ll
index 68ffaae1c428..0f82a7df6098 100644
--- a/test/CodeGen/R600/infinite-loop.ll
+++ b/test/CodeGen/R600/infinite-loop.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @infinite_loop:
-; SI: V_MOV_B32_e32 [[REG:v[0-9]+]], 0x3e7
+; SI-LABEL: {{^}}infinite_loop:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
 ; SI: BB0_1:
-; SI: BUFFER_STORE_DWORD [[REG]]
-; SI: S_WAITCNT vmcnt(0) expcnt(0)
-; SI: S_BRANCH BB0_1
+; SI: buffer_store_dword [[REG]]
+; SI: s_waitcnt vmcnt(0) expcnt(0)
+; SI: s_branch BB0_1
 define void @infinite_loop(i32 addrspace(1)* %out) {
 entry:
   br label %for.body
diff --git a/test/CodeGen/R600/inline-asm.ll b/test/CodeGen/R600/inline-asm.ll
new file mode 100644
index 000000000000..6f1f977de2a4
--- /dev/null
+++ b/test/CodeGen/R600/inline-asm.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK: {{^}}inline_asm:
+; CHECK: s_endpgm
+; CHECK: s_endpgm
+define void @inline_asm(i32 addrspace(1)* %out) {
+entry:
+  store i32 5, i32 addrspace(1)* %out
+  call void asm sideeffect "s_endpgm", ""()
+  ret void
+}
diff --git a/test/CodeGen/R600/inline-calls.ll b/test/CodeGen/R600/inline-calls.ll
new file mode 100644
index 000000000000..b8700d55e155
--- /dev/null
+++ b/test/CodeGen/R600/inline-calls.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck  %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-NOT: {{^}}func:
+define internal fastcc i32 @func(i32 %a) {
+entry:
+  %tmp0 = add i32 %a, 1
+  ret i32 %tmp0
+}
+
+; CHECK: {{^}}kernel:
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @func(i32 1)
+  store i32 %tmp0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: {{^}}kernel2:
+define void @kernel2(i32 addrspace(1)* %out) {
+entry:
+  call void @kernel(i32 addrspace(1)* %out)
+  ret void
+}
diff --git a/test/CodeGen/R600/input-mods.ll b/test/CodeGen/R600/input-mods.ll
index 13bfbab85695..e3e94995fc95 100644
--- a/test/CodeGen/R600/input-mods.ll
+++ b/test/CodeGen/R600/input-mods.ll
@@ -1,9 +1,9 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK
 
-;EG-CHECK-LABEL: @test
+;EG-CHECK-LABEL: {{^}}test:
 ;EG-CHECK: EXP_IEEE *
-;CM-CHECK-LABEL: @test
+;CM-CHECK-LABEL: {{^}}test:
 ;CM-CHECK: EXP_IEEE T{{[0-9]+}}.X, -|T{{[0-9]+}}.X|
 ;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Y (MASKED), -|T{{[0-9]+}}.X|
 ;CM-CHECK: EXP_IEEE T{{[0-9]+}}.Z (MASKED), -|T{{[0-9]+}}.X|
diff --git a/test/CodeGen/R600/insert_subreg.ll b/test/CodeGen/R600/insert_subreg.ll
new file mode 100644
index 000000000000..dfb58d54796c
--- /dev/null
+++ b/test/CodeGen/R600/insert_subreg.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-promote-alloca -verify-machineinstrs < %s
+
+; Test that INSERT_SUBREG instructions don't have non-register operands after
+; instruction selection.
+
+; Make sure this doesn't crash
+; CHECK-LABEL: test:
+define void @test(i64 addrspace(1)* %out) {
+entry:
+  %tmp0 = alloca [16 x i32]
+  %tmp1 = ptrtoint [16 x i32]* %tmp0 to i32
+  %tmp2 = sext i32 %tmp1 to i64
+  store i64 %tmp2, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt.ll b/test/CodeGen/R600/insert_vector_elt.ll
index 43b4efc93377..2442c868444f 100644
--- a/test/CodeGen/R600/insert_vector_elt.ll
+++ b/test/CodeGen/R600/insert_vector_elt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
 ; FIXME: Broken on evergreen
 ; FIXME: For some reason the 8 and 16 vectors are being stored as
@@ -8,116 +8,116 @@
 ; FIXME: Why is the constant moved into the intermediate register and
 ; not just directly into the vector component?
 
-; SI-LABEL: @insertelement_v4f32_0:
-; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
-; V_MOV_B32_e32
-; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
-; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
-; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
+; SI-LABEL: {{^}}insertelement_v4f32_0:
+; s_load_dwordx4 s{{[}}[[LOW_REG:[0-9]+]]:
+; v_mov_b32_e32
+; v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
+; v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
+; buffer_store_dwordx4 v{{[}}[[LOW_REG]]:
 define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @insertelement_v4f32_1:
+; SI-LABEL: {{^}}insertelement_v4f32_1:
 define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @insertelement_v4f32_2:
+; SI-LABEL: {{^}}insertelement_v4f32_2:
 define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @insertelement_v4f32_3:
+; SI-LABEL: {{^}}insertelement_v4f32_3:
 define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @insertelement_v4i32_0:
+; SI-LABEL: {{^}}insertelement_v4i32_0:
 define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
   %vecins = insertelement <4 x i32> %a, i32 999, i32 0
   store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v2f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
+; SI-LABEL: {{^}}dynamic_insertelement_v2f32:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
 define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
   store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v4f32:
-; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
-; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
-; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
+; SI-LABEL: {{^}}dynamic_insertelement_v4f32:
+; SI: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
+; SI: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
+; SI: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
 define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
   store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v8f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
   store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v16f32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
   store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v2i32:
-; SI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v2i32:
+; SI: buffer_store_dwordx2
 define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
   store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v4i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v4i32:
+; SI: buffer_store_dwordx4
 define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
   %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
   store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v8i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v8i32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
   %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
   store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v16i32:
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16i32:
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
   %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
   store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
@@ -125,16 +125,16 @@ define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i
 }
 
 
-; SI-LABEL: @dynamic_insertelement_v2i16:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v2i16:
+; FIXMESI: buffer_store_dwordx2
 define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
   store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v4i16:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v4i16:
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
   %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
   store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
@@ -142,7 +142,7 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 }
 
 
-; SI-LABEL: @dynamic_insertelement_v2i8:
+; SI-LABEL: {{^}}dynamic_insertelement_v2i8:
 ; FIXMESI: BUFFER_STORE_USHORT
 define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
@@ -150,24 +150,24 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v4i8:
-; FIXMESI: BUFFER_STORE_DWORD
+; SI-LABEL: {{^}}dynamic_insertelement_v4i8:
+; FIXMESI: buffer_store_dword
 define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
   %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
   store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v8i8:
-; FIXMESI: BUFFER_STORE_DWORDX2
+; SI-LABEL: {{^}}dynamic_insertelement_v8i8:
+; FIXMESI: buffer_store_dwordx2
 define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
   %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
   store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @dynamic_insertelement_v16i8:
-; FIXMESI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}dynamic_insertelement_v16i8:
+; FIXMESI: buffer_store_dwordx4
 define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
   %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
   store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
@@ -176,7 +176,7 @@ define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8>
 
 ; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
 ; the compiler doesn't crash.
-; SI-LABEL: @insert_split_bb
+; SI-LABEL: {{^}}insert_split_bb:
 define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
 entry:
   %0 = insertelement <2 x i32> undef, i32 %a, i32 0
@@ -199,3 +199,53 @@ endif:
   store <2 x i32> %7, <2 x i32> addrspace(1)* %out
   ret void
 }
+
+; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
+  %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
+  store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
+  %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
+  store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
+  %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
+  store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
+  %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
+  store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll
deleted file mode 100644
index 595bc59655ac..000000000000
--- a/test/CodeGen/R600/insert_vector_elt_f64.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
-
-
-; SI-LABEL: @dynamic_insertelement_v2f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
-  %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
-  store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
-  ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v2f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
-  %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
-  store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
-  ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v4f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
-  %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
-  store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
-  ret void
-}
-
-; SI-LABEL: @dynamic_insertelement_v8f64:
-; SI: BUFFER_STORE_DWORDX4
-define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
-  %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
-  store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
-  ret void
-}
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 0baa3cd3e1a3..27840b2e1609 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @main1
+; CHECK: {{^}}main1:
 ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
@@ -48,7 +48,7 @@ main_body:
   ret void
 }
 
-; CHECK: @main2
+; CHECK: {{^}}main2:
 ; CHECK-NOT: MOV
 define void @main2() {
 main_body:
diff --git a/test/CodeGen/R600/kernel-args.ll b/test/CodeGen/R600/kernel-args.ll
index 6fc69792fd3d..1984d333d4d5 100644
--- a/test/CodeGen/R600/kernel-args.ll
+++ b/test/CodeGen/R600/kernel-args.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; EG-CHECK-LABEL: @i8_arg
+; EG-CHECK-LABEL: {{^}}i8_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}i8_arg:
+; SI-CHECK: buffer_load_ubyte
 
 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
 entry:
@@ -14,10 +14,10 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i8_zext_arg
+; EG-CHECK-LABEL: {{^}}i8_zext_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i8_zext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 
 define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
 entry:
@@ -26,10 +26,10 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i8_sext_arg
+; EG-CHECK-LABEL: {{^}}i8_sext_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i8_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i8_sext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 
 define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
 entry:
@@ -38,10 +38,10 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i16_arg
+; EG-CHECK-LABEL: {{^}}i16_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}i16_arg:
+; SI-CHECK: buffer_load_ushort
 
 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
 entry:
@@ -50,10 +50,10 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i16_zext_arg
+; EG-CHECK-LABEL: {{^}}i16_zext_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_zext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i16_zext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 
 define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
 entry:
@@ -62,10 +62,10 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i16_sext_arg
+; EG-CHECK-LABEL: {{^}}i16_sext_arg:
 ; EG-CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i16_sext_arg
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i16_sext_arg:
+; SI-CHECK: s_load_dword s{{[0-9]}}, s[0:1], 0xb
 
 define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
 entry:
@@ -74,176 +74,176 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @i32_arg
+; EG-CHECK-LABEL: {{^}}i32_arg:
 ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @i32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}i32_arg:
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
 define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
 entry:
   store i32 %in, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @f32_arg
+; EG-CHECK-LABEL: {{^}}f32_arg:
 ; EG-CHECK: T{{[0-9]\.[XYZW]}}, KC0[2].Z
-; SI-CHECK-LABEL: @f32_arg
-; S_LOAD_DWORD s{{[0-9]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}f32_arg:
+; s_load_dword s{{[0-9]}}, s[0:1], 0xb
 define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
 entry:
   store float %in, float addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v2i8_arg
+; EG-CHECK-LABEL: {{^}}v2i8_arg:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v2i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v2i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
 entry:
   store <2 x i8> %in, <2 x i8> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v2i16_arg
+; EG-CHECK-LABEL: {{^}}v2i16_arg:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v2i16_arg
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
-; SI-CHECK-DAG: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v2i16_arg:
+; SI-CHECK-DAG: buffer_load_ushort
+; SI-CHECK-DAG: buffer_load_ushort
 define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
 entry:
   store <2 x i16> %in, <2 x i16> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v2i32_arg
+; EG-CHECK-LABEL: {{^}}v2i32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: @v2i32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}v2i32_arg:
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
 entry:
   store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v2f32_arg
+; EG-CHECK-LABEL: {{^}}v2f32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W
-; SI-CHECK-LABEL: @v2f32_arg
-; SI-CHECK: S_LOAD_DWORDX2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
+; SI-CHECK-LABEL: {{^}}v2f32_arg:
+; SI-CHECK: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb
 define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
 entry:
   store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v3i8_arg
+; EG-CHECK-LABEL: {{^}}v3i8_arg:
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41
 ; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42
-; SI-CHECK-LABEL: @v3i8_arg
+; SI-CHECK-LABEL: {{^}}v3i8_arg:
 define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
 entry:
   store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v3i16_arg
+; EG-CHECK-LABEL: {{^}}v3i16_arg:
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46
 ; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48
-; SI-CHECK-LABEL: @v3i16_arg
+; SI-CHECK-LABEL: {{^}}v3i16_arg:
 define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
 entry:
   store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
   ret void
 }
-; EG-CHECK-LABEL: @v3i32_arg
+; EG-CHECK-LABEL: {{^}}v3i32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: @v3i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v3i32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
 entry:
   store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v3f32_arg
+; EG-CHECK-LABEL: {{^}}v3f32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
-; SI-CHECK-LABEL: @v3f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v3f32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd
 define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
 entry:
   store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v4i8_arg
+; EG-CHECK-LABEL: {{^}}v4i8_arg:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v4i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v4i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
 entry:
   store <4 x i8> %in, <4 x i8> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v4i16_arg
+; EG-CHECK-LABEL: {{^}}v4i16_arg:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v4i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v4i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
 define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
 entry:
   store <4 x i16> %in, <4 x i16> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v4i32_arg
+; EG-CHECK-LABEL: {{^}}v4i32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-CHECK-LABEL: @v4i32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v4i32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v4f32_arg
+; EG-CHECK-LABEL: {{^}}v4f32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X
-; SI-CHECK-LABEL: @v4f32_arg
-; SI-CHECK: S_LOAD_DWORDX4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
+; SI-CHECK-LABEL: {{^}}v4f32_arg:
+; SI-CHECK: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd
 define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
 entry:
   store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v8i8_arg
+; EG-CHECK-LABEL: {{^}}v8i8_arg:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
@@ -252,21 +252,21 @@ entry:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v8i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v8i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
 entry:
   store <8 x i8> %in, <8 x i8> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v8i16_arg
+; EG-CHECK-LABEL: {{^}}v8i16_arg:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
@@ -275,22 +275,22 @@ entry:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v8i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v8i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
 define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
 entry:
   store <8 x i16> %in, <8 x i16> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v8i32_arg
+; EG-CHECK-LABEL: {{^}}v8i32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -299,15 +299,15 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-CHECK-LABEL: @v8i32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK-LABEL: {{^}}v8i32_arg:
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
 entry:
   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v8f32_arg
+; EG-CHECK-LABEL: {{^}}v8f32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W
@@ -316,15 +316,15 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI-CHECK-LABEL: @v8f32_arg
-; SI-CHECK: S_LOAD_DWORDX8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI-CHECK-LABEL: {{^}}v8f32_arg:
+; SI-CHECK: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
 entry:
   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v16i8_arg
+; EG-CHECK-LABEL: {{^}}v16i8_arg:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
@@ -341,30 +341,30 @@ entry:
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
 ; EG-CHECK: VTX_READ_8
-; SI-CHECK-LABEL: @v16i8_arg
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK-LABEL: {{^}}v16i8_arg:
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
 entry:
   store <16 x i8> %in, <16 x i8> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v16i16_arg
+; EG-CHECK-LABEL: {{^}}v16i16_arg:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
@@ -381,30 +381,30 @@ entry:
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
 ; EG-CHECK: VTX_READ_16
-; SI-CHECK-LABEL: @v16i16_arg
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK-LABEL: {{^}}v16i16_arg:
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
 define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
 entry:
   store <16 x i16> %in, <16 x i16> addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @v16i32_arg
+; EG-CHECK-LABEL: {{^}}v16i32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -421,15 +421,15 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-CHECK-LABEL: @v16i32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK-LABEL: {{^}}v16i32_arg:
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
 entry:
   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
   ret void
 }
 
-; EG-CHECK-LABEL: @v16f32_arg
+; EG-CHECK-LABEL: {{^}}v16f32_arg:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W
@@ -446,10 +446,28 @@ entry:
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-CHECK-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI-CHECK-LABEL: @v16f32_arg
-; SI-CHECK: S_LOAD_DWORDX16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
+; SI-CHECK-LABEL: {{^}}v16f32_arg:
+; SI-CHECK: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
 entry:
   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: {{^}}kernel_arg_i64:
+; SI: s_load_dwordx2
+; SI: s_load_dwordx2
+; SI: buffer_store_dwordx2
+define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
+  store i64 %a, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; XFUNC-LABEL: {{^}}kernel_arg_v1i64:
+; XSI: s_load_dwordx2
+; XSI: s_load_dwordx2
+; XSI: buffer_store_dwordx2
+; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
+;   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
+;   ret void
+; }
diff --git a/test/CodeGen/R600/large-alloca.ll b/test/CodeGen/R600/large-alloca.ll
index d8be6d40f310..0a5e592ae893 100644
--- a/test/CodeGen/R600/large-alloca.ll
+++ b/test/CodeGen/R600/large-alloca.ll
@@ -1,6 +1,6 @@
 ; XFAIL: *
 ; REQUIRES: asserts
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s
 
 define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
   %large = alloca [8192 x i32], align 4
diff --git a/test/CodeGen/R600/large-constant-initializer.ll b/test/CodeGen/R600/large-constant-initializer.ll
index 191b5c3de912..c11e82e76e65 100644
--- a/test/CodeGen/R600/large-constant-initializer.ll
+++ b/test/CodeGen/R600/large-constant-initializer.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
-; CHECK: S_ENDPGM
+; RUN: llc -march=amdgcn -mcpu=SI < %s
+; CHECK: s_endpgm
 
 @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
 
diff --git a/test/CodeGen/R600/lds-initializer.ll b/test/CodeGen/R600/lds-initializer.ll
new file mode 100644
index 000000000000..9a209e50ca14
--- /dev/null
+++ b/test/CodeGen/R600/lds-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_init_lds_global
+
+@lds = addrspace(3) global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8]
+
+define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [8 x i32] addrspace(3)* @lds, i32 0, i32 10
+  %ld = load i32 addrspace(3)* %gep
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/lds-oqap-crash.ll b/test/CodeGen/R600/lds-oqap-crash.ll
index 79591506e8cb..fbcd778de2c2 100644
--- a/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/test/CodeGen/R600/lds-oqap-crash.ll
@@ -9,7 +9,7 @@
 ; because the LDS instructions are pseudo instructions and the OQAP
 ; reads and writes are bundled together in the same instruction.
 
-; CHECK: @lds_crash
+; CHECK: {{^}}lds_crash:
 define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = load i32 addrspace(3)* %in
diff --git a/test/CodeGen/R600/lds-output-queue.ll b/test/CodeGen/R600/lds-output-queue.ll
index d5dc061964e9..cda75b0e0ccc 100644
--- a/test/CodeGen/R600/lds-output-queue.ll
+++ b/test/CodeGen/R600/lds-output-queue.ll
@@ -3,12 +3,12 @@
 ; This test checks that the lds input queue will is empty at the end of
 ; the ALU clause.
 
-; CHECK-LABEL: @lds_input_queue
+; CHECK-LABEL: {{^}}lds_input_queue:
 ; CHECK: LDS_READ_RET * OQAP
 ; CHECK-NOT: ALU clause
 ; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
 
-@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] [i32 1, i32 2], align 4
+@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
 
 define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
 entry:
@@ -84,7 +84,7 @@ declare void @llvm.AMDGPU.barrier.local()
 ; analysis, we should be able to keep these instructions sparate before
 ; scheduling.
 ;
-; CHECK-LABEL: @local_global_alias
+; CHECK-LABEL: {{^}}local_global_alias:
 ; CHECK: LDS_READ_RET
 ; CHECK-NOT: ALU clause
 ; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
diff --git a/test/CodeGen/R600/lds-size.ll b/test/CodeGen/R600/lds-size.ll
index 9182e2561500..5287723ce191 100644
--- a/test/CodeGen/R600/lds-size.ll
+++ b/test/CodeGen/R600/lds-size.ll
@@ -3,10 +3,10 @@
 ; This test makes sure we do not double count global values when they are
 ; used in different basic blocks.
 
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
 ; CHECK: .long   166120
 ; CHECK-NEXT: .long   1
-@lds = internal unnamed_addr addrspace(3) global i32 zeroinitializer, align 4
+@lds = internal unnamed_addr addrspace(3) global i32 undef, align 4
 
 define void @test(i32 addrspace(1)* %out, i32 %cond) {
 entry:
diff --git a/test/CodeGen/R600/lds-zero-initializer.ll b/test/CodeGen/R600/lds-zero-initializer.ll
new file mode 100644
index 000000000000..87e2c334879b
--- /dev/null
+++ b/test/CodeGen/R600/lds-zero-initializer.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported initializer for address space in load_zeroinit_lds_global
+
+@lds = addrspace(3) global [256 x i32] zeroinitializer
+
+define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
+ %gep = getelementptr [256 x i32] addrspace(3)* @lds, i32 0, i32 10
+  %ld = load i32 addrspace(3)* %gep
+  store i32 %ld, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
index 1aae7f9f91f4..b9fa8e938ae4 100644
--- a/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
+++ b/test/CodeGen/R600/legalizedag-bug-expand-setcc.ll
@@ -8,7 +8,7 @@
 ; instructions, when only one is needed.
 ;
 
-; CHECK: @setcc_expand
+; CHECK: {{^}}setcc_expand:
 ; CHECK: SET
 ; CHECK-NOT: CND
 define void @setcc_expand(i32 addrspace(1)* %out, i32 %in) {
diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
index 47191e0a27fb..cff1c24f89d6 100644
--- a/test/CodeGen/R600/literals.ll
+++ b/test/CodeGen/R600/literals.ll
@@ -6,7 +6,7 @@
 ; or
 ; ADD_INT literal.x KC0[2].Z, 5
 
-; CHECK: @i32_literal
+; CHECK: {{^}}i32_literal:
 ; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5
@@ -23,7 +23,7 @@ entry:
 ; or
 ; ADD literal.x KC0[2].Z, 5.0
 
-; CHECK: @float_literal
+; CHECK: {{^}}float_literal:
 ; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.0
@@ -35,7 +35,7 @@ entry:
 }
 
 ; Make sure inline literals are folded into REG_SEQUENCE instructions.
-; CHECK: @inline_literal_reg_sequence
+; CHECK: {{^}}inline_literal_reg_sequence:
 ; CHECK: MOV {{\** *}}T[[GPR:[0-9]]].X, 0.0
 ; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Y, 0.0
 ; CHECK-NEXT: MOV {{\** *}}T[[GPR]].Z, 0.0
@@ -47,7 +47,7 @@ entry:
   ret void
 }
 
-; CHECK: @inline_literal_dot4
+; CHECK: {{^}}inline_literal_dot4:
 ; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
 ; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
 ; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
diff --git a/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index a0a47b7c4701..f143fd640989 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
@@ -6,10 +6,10 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
 ; Legacy name
 declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
 
-; FUNC-LABEL: @s_abs_i32
-; SI: S_SUB_I32
-; SI: S_MAX_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_abs_i32:
+; SI: s_sub_i32
+; SI: s_max_i32
+; SI: s_endpgm
 
 ; EG: SUB_INT
 ; EG: MAX_INT
@@ -19,10 +19,10 @@ define void @s_abs_i32(i32 addrspace(1)* %out, i32 %src) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @v_abs_i32
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_abs_i32:
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
 
 ; EG: SUB_INT
 ; EG: MAX_INT
@@ -33,10 +33,10 @@ define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind
   ret void
 }
 
-; FUNC-LABEL: @abs_i32_legacy_amdil
-; SI: V_SUB_I32_e32
-; SI: V_MAX_I32_e32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}abs_i32_legacy_amdil:
+; SI: v_sub_i32_e32
+; SI: v_max_i32_e32
+; SI: s_endpgm
 
 ; EG: SUB_INT
 ; EG: MAX_INT
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index 47f5255e5012..a11d9ae7af08 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -1,9 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_barrier_global
+; FUNC-LABEL: {{^}}test_barrier_global:
 ; EG: GROUP_BARRIER
-; SI: S_BARRIER
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI: s_barrier
 
 define void @test_barrier_global(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 7203675bb47b..76c2453d089f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -1,9 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_barrier_local
+; FUNC-LABEL: {{^}}test_barrier_local:
 ; EG: GROUP_BARRIER
-; SI: S_BARRIER
+
+; SI: buffer_store_dword
+; SI: s_waitcnt
+; SI: s_barrier
 
 define void @test_barrier_local(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index eb5094232825..de8b928aa3a0 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
 
-; FUNC-LABEL: @bfe_i32_arg_arg_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_arg_arg:
+; SI: v_bfe_i32
 ; EG: BFE_INT
 ; EG: encoding: [{{[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+,[x0-9a-f]+}},0xac
 define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
@@ -13,8 +13,8 @@ define void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_arg_arg_imm
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_arg_imm:
+; SI: v_bfe_i32
 ; EG: BFE_INT
 define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -22,8 +22,8 @@ define void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_arg_imm_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_arg_imm_arg:
+; SI: v_bfe_i32
 ; EG: BFE_INT
 define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -31,8 +31,8 @@ define void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_imm_arg_arg
-; SI: V_BFE_I32
+; FUNC-LABEL: {{^}}bfe_i32_imm_arg_arg:
+; SI: v_bfe_i32
 ; EG: BFE_INT
 define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -40,8 +40,8 @@ define void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
   ret void
 }
 
-; FUNC-LABEL: @v_bfe_print_arg
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
+; FUNC-LABEL: {{^}}v_bfe_print_arg:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
   %load = load i32 addrspace(1)* %src0, align 4
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
@@ -49,9 +49,9 @@ define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) no
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_arg_0_width_reg_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -59,9 +59,9 @@ define void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_arg_0_width_imm_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -69,10 +69,10 @@ define void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_6
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_6:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -81,12 +81,12 @@ define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_7
-; SI-NOT: SHL
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_7:
+; SI-NOT: shl
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -96,10 +96,10 @@ define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
 }
 
 ; FIXME: The shifts should be 1 BFE
-; FUNC-LABEL: @bfe_i32_test_8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_8:
+; SI: buffer_load_dword
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -108,11 +108,11 @@ define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_9
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
@@ -120,11 +120,11 @@ define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_10
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
@@ -132,11 +132,11 @@ define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_11
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
@@ -144,11 +144,11 @@ define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_12
-; SI-NOT: BFE
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
@@ -156,10 +156,10 @@ define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_13
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_13:
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
@@ -167,10 +167,10 @@ define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
 }
 
-; FUNC-LABEL: @bfe_i32_test_14
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_test_14:
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
@@ -178,11 +178,11 @@ define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_0
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_0:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 0) nounwind readnone
@@ -190,11 +190,11 @@ define void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_1
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_1:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -202,11 +202,11 @@ define void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_2
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_2:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 0, i32 0, i32 1) nounwind readnone
@@ -214,11 +214,11 @@ define void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_3
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_3:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 1, i32 0, i32 1) nounwind readnone
@@ -226,11 +226,11 @@ define void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_4
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_4:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -238,11 +238,11 @@ define void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_5
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_5:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 7, i32 1) nounwind readnone
@@ -250,11 +250,11 @@ define void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_6
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0xffffff80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_6:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 128, i32 0, i32 8) nounwind readnone
@@ -262,11 +262,11 @@ define void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_7
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_7:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 0, i32 8) nounwind readnone
@@ -274,11 +274,11 @@ define void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_8
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 127, i32 6, i32 8) nounwind readnone
@@ -286,11 +286,11 @@ define void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_9
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -298,11 +298,11 @@ define void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_10
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -310,11 +310,11 @@ define void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_11
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -6
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -6
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 4) nounwind readnone
@@ -322,11 +322,11 @@ define void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_12
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 31, i32 1) nounwind readnone
@@ -334,11 +334,11 @@ define void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_13
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_13:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -346,11 +346,11 @@ define void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_14
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_14:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 2, i32 30) nounwind readnone
@@ -358,11 +358,11 @@ define void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_15
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_15:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 160, i32 4, i32 28) nounwind readnone
@@ -370,11 +370,11 @@ define void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_16
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_16:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -382,11 +382,11 @@ define void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_17
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_17:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 1, i32 31) nounwind readnone
@@ -394,11 +394,11 @@ define void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_i32_constant_fold_test_18
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_i32_constant_fold_test_18:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 255, i32 31, i32 1) nounwind readnone
@@ -408,14 +408,14 @@ define void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
 
 ; XXX - This should really be a single BFE, but the sext_inreg of the
 ; extended type i24 is never custom lowered.
-; FUNC-LABEL: @bfe_sext_in_reg_i24
-; SI: BUFFER_LOAD_DWORD [[LOAD:v[0-9]+]],
-; SI: V_LSHLREV_B32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; SI: V_ASHRREV_I32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
-; XSI: V_BFE_I32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
+; FUNC-LABEL: {{^}}bfe_sext_in_reg_i24:
+; SI: buffer_load_dword [[LOAD:v[0-9]+]],
+; SI: v_lshlrev_b32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; SI: v_ashrrev_i32_e32 {{v[0-9]+}}, 8, {{v[0-9]+}}
+; XSI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 8
 ; XSI-NOT: SHL
 ; XSI-NOT: SHR
-; XSI: BUFFER_STORE_DWORD [[BFE]],
+; XSI: buffer_store_dword [[BFE]],
 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
@@ -424,3 +424,18 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
   store i32 %ashr, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: @simplify_demanded_bfe_sdiv
+; SI: buffer_load_dword [[LOAD:v[0-9]+]]
+; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
+; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
+; SI: buffer_store_dword [[TMP2]]
+define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %src = load i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
+  %div = sdiv i32 %bfe, 2
+  store i32 %div, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 1a62253eeb74..85bf831534a8 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.bfe.u32(i32, i32, i32) nounwind readnone
 
-; FUNC-LABEL: @bfe_u32_arg_arg_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_arg_arg:
+; SI: v_bfe_u32
 ; EG: BFE_UINT
 define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_arg_arg_imm
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_arg_imm:
+; SI: v_bfe_u32
 ; EG: BFE_UINT
 define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_arg_imm_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_arg_imm_arg:
+; SI: v_bfe_u32
 ; EG: BFE_UINT
 define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_imm_arg_arg
-; SI: V_BFE_U32
+; FUNC-LABEL: {{^}}bfe_u32_imm_arg_arg:
+; SI: v_bfe_u32
 ; EG: BFE_UINT
 define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 123, i32 %src1, i32 %src2) nounwind readnone
@@ -39,9 +39,9 @@ define void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_arg_0_width_reg_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_reg_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 %src1, i32 0) nounwind readnone
@@ -49,9 +49,9 @@ define void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_arg_0_width_imm_offset
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_arg_0_width_imm_offset:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %src0, i32 8, i32 0) nounwind readnone
@@ -59,10 +59,10 @@ define void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zextload_i8
-; SI: BUFFER_LOAD_UBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zextload_i8:
+; SI: buffer_load_ubyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
   %load = load i8 addrspace(1)* %in
   %ext = zext i8 %load to i32
@@ -71,12 +71,12 @@ define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) n
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -86,12 +86,12 @@ define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %i
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i16
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -101,11 +101,11 @@ define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_1
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI: bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -115,12 +115,12 @@ define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspa
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_3
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0xf8
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8
+; SI-NEXT: bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -130,12 +130,12 @@ define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspa
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i8_offset_7
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: V_AND_B32_e32 {{v[0-9]+}}, 0x80
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80
+; SI-NEXT: bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -145,11 +145,11 @@ define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspa
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_zext_in_reg_i16_offset_8
-; SI: BUFFER_LOAD_DWORD
-; SI: V_ADD_I32
-; SI-NEXT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8:
+; SI: buffer_load_dword
+; SI: v_add_i32
+; SI-NEXT: bfe
+; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %load = load i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
@@ -159,10 +159,10 @@ define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrsp
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_1
-; SI: BUFFER_LOAD_DWORD
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_1:
+; SI: buffer_load_dword
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_endpgm
 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
@@ -187,13 +187,13 @@ define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_4
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_4:
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -203,12 +203,12 @@ define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_5
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: LSHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_5:
+; SI: buffer_load_dword
+; SI-NOT: lshl
+; SI-NOT: shr
+; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
+; SI: s_endpgm
 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -218,10 +218,10 @@ define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_6
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_6:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: s_endpgm
 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -230,10 +230,10 @@ define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_7
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_7:
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -242,11 +242,11 @@ define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_8
-; SI-NOT: BFE
-; SI: V_AND_B32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -255,11 +255,11 @@ define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_9
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
@@ -267,11 +267,11 @@ define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_10
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
@@ -279,11 +279,11 @@ define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_11
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
@@ -291,11 +291,11 @@ define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_12
-; SI-NOT: BFE
-; SI: V_LSHRREV_B32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
@@ -303,10 +303,10 @@ define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_13
+; FUNC-LABEL: {{^}}bfe_u32_test_13:
 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
@@ -314,10 +314,10 @@ define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
 }
 
-; FUNC-LABEL: @bfe_u32_test_14
-; SI-NOT: LSHR
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_test_14:
+; SI-NOT: lshr
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
@@ -325,11 +325,11 @@ define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) noun
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_0
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_0:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 0) nounwind readnone
@@ -337,11 +337,11 @@ define void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_1
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_1:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 12334, i32 0, i32 0) nounwind readnone
@@ -349,11 +349,11 @@ define void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_2
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_2:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 0, i32 0, i32 1) nounwind readnone
@@ -361,11 +361,11 @@ define void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_3
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_3:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 1, i32 0, i32 1) nounwind readnone
@@ -373,11 +373,11 @@ define void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_4
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], -1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_4:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], -1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 0, i32 1) nounwind readnone
@@ -385,11 +385,11 @@ define void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_5
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_5:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 7, i32 1) nounwind readnone
@@ -397,11 +397,11 @@ define void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_6
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x80
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_6:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x80
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 128, i32 0, i32 8) nounwind readnone
@@ -409,11 +409,11 @@ define void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_7
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_7:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 0, i32 8) nounwind readnone
@@ -421,11 +421,11 @@ define void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_8
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_8:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 127, i32 6, i32 8) nounwind readnone
@@ -433,11 +433,11 @@ define void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_9
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_9:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFEfppppppppppppp
 define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65536, i32 16, i32 8) nounwind readnone
@@ -445,11 +445,11 @@ define void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_10
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_10:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 65535, i32 16, i32 16) nounwind readnone
@@ -457,11 +457,11 @@ define void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_11
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_11:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 4) nounwind readnone
@@ -469,11 +469,11 @@ define void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_12
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_12:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 31, i32 1) nounwind readnone
@@ -481,11 +481,11 @@ define void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_13
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 1
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_13:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 1
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 131070, i32 16, i32 16) nounwind readnone
@@ -493,11 +493,11 @@ define void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_14
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 40
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_14:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 40
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 2, i32 30) nounwind readnone
@@ -505,11 +505,11 @@ define void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_15
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 10
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_15:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 10
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 160, i32 4, i32 28) nounwind readnone
@@ -517,11 +517,11 @@ define void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_16
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_16:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 4294967295, i32 1, i32 7) nounwind readnone
@@ -529,11 +529,11 @@ define void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_17
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0x7f
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_17:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 1, i32 31) nounwind readnone
@@ -541,14 +541,36 @@ define void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfe_u32_constant_fold_test_18
-; SI-NOT: BFE
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], 0
-; SI: BUFFER_STORE_DWORD [[VREG]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_u32_constant_fold_test_18:
+; SI-NOT: {{[^@]}}bfe
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; SI: buffer_store_dword [[VREG]],
+; SI: s_endpgm
 ; EG-NOT: BFE
 define void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) nounwind {
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 255, i32 31, i32 1) nounwind readnone
   store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; Make sure that SimplifyDemandedBits doesn't cause the and to be
+; reduced to the bits demanded by the bfe.
+
+; XXX: The operand to v_bfe_u32 could also just directly be the load register.
+; FUNC-LABEL: {{^}}simplify_bfe_u32_multi_use_arg:
+; SI: buffer_load_dword [[ARG:v[0-9]+]]
+; SI: v_and_b32_e32 [[AND:v[0-9]+]], 63, [[ARG]]
+; SI: v_bfe_u32 [[BFE:v[0-9]+]], [[AND]], 2, 2
+; SI-DAG: buffer_store_dword [[AND]]
+; SI-DAG: buffer_store_dword [[BFE]]
+; SI: s_endpgm
+define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+                                            i32 addrspace(1)* %out1,
+                                            i32 addrspace(1)* %in) nounwind {
+  %src = load i32 addrspace(1)* %in, align 4
+  %and = and i32 %src, 63
+  %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
+  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
+  store i32 %and, i32 addrspace(1)* %out1, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
index e1de45b4ba29..b3da24657f49 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfi.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.bfi(i32, i32, i32) nounwind readnone
 
-; FUNC-LABEL: @bfi_arg_arg_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_arg_arg:
+; SI: v_bfi_b32
 ; EG: BFI_INT
 define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
   %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfi_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %
   ret void
 }
 
-; FUNC-LABEL: @bfi_arg_arg_imm
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_arg_imm:
+; SI: v_bfi_b32
 ; EG: BFI_INT
 define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 %src1, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfi_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfi_arg_imm_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_arg_imm_arg:
+; SI: v_bfi_b32
 ; EG: BFI_INT
 define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounwind {
   %bfi = call i32 @llvm.AMDGPU.bfi(i32 %src0, i32 123, i32 %src2) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfi_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) nounw
   ret void
 }
 
-; FUNC-LABEL: @bfi_imm_arg_arg
-; SI: V_BFI_B32
+; FUNC-LABEL: {{^}}bfi_imm_arg_arg:
+; SI: v_bfi_b32
 ; EG: BFI_INT
 define void @bfi_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) nounwind {
   %bfi = call i32 @llvm.AMDGPU.bfi(i32 123, i32 %src1, i32 %src2) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
index ef8721e4a978..80aecc743182 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.bfm.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.bfm(i32, i32) nounwind readnone
 
-; FUNC-LABEL: @bfm_arg_arg
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_arg_arg:
+; SI: v_bfm
 ; EG: BFM_INT
 define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
   %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 %src1) nounwind readnone
@@ -12,8 +12,8 @@ define void @bfm_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind
   ret void
 }
 
-; FUNC-LABEL: @bfm_arg_imm
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_arg_imm:
+; SI: v_bfm
 ; EG: BFM_INT
 define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
   %bfm = call i32 @llvm.AMDGPU.bfm(i32 %src0, i32 123) nounwind readnone
@@ -21,8 +21,8 @@ define void @bfm_arg_imm(i32 addrspace(1)* %out, i32 %src0) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfm_imm_arg
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_imm_arg:
+; SI: v_bfm
 ; EG: BFM_INT
 define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
   %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 %src1) nounwind readnone
@@ -30,8 +30,8 @@ define void @bfm_imm_arg(i32 addrspace(1)* %out, i32 %src1) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @bfm_imm_imm
-; SI: V_BFM
+; FUNC-LABEL: {{^}}bfm_imm_imm:
+; SI: v_bfm
 ; EG: BFM_INT
 define void @bfm_imm_imm(i32 addrspace(1)* %out) nounwind {
   %bfm = call i32 @llvm.AMDGPU.bfm(i32 123, i32 456) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 68a5ad0649c2..dd4459be536b 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -1,24 +1,24 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
 
-; FUNC-LABEL: @s_brev_i32:
-; SI: S_LOAD_DWORD [[VAL:s[0-9]+]],
-; SI: S_BREV_B32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}s_brev_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
 define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
   %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @v_brev_i32:
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_BFREV_B32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}v_brev_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
   %val = load i32 addrspace(1)* %valptr, align 4
   %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
index d608953a0dd2..b9d5f6fbac07 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll
@@ -1,14 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
+declare float @llvm.fabs.f32(float) nounwind readnone
 declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone
 declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone
 
-; FUNC-LABEL: @clamp_0_1_f32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
-; SI: BUFFER_STORE_DWORD [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}clamp_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
 
 ; EG: MOV_SAT
 define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
@@ -17,10 +18,47 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[ARG]], 0, 1, 0
-; SI: BUFFER_STORE_DWORD [[RESULT]]
+; FUNC-LABEL: {{^}}clamp_fabs_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, |[[ARG]]| clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fabs, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_fneg_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -[[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fneg_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %src.fneg = fsub float -0.0, %src
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_fneg_fabs_0_1_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, -|[[ARG]]| clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @clamp_fneg_fabs_0_1_f32(float addrspace(1)* %out, float %src) nounwind {
+  %src.fabs = call float @llvm.fabs.f32(float %src) nounwind readnone
+  %src.fneg.fabs = fsub float -0.0, %src.fabs
+  %clamp = call float @llvm.AMDGPU.clamp.f32(float %src.fneg.fabs, float 0.0, float 1.0) nounwind readnone
+  store float %clamp, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}clamp_0_1_amdil_legacy_f32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}}
+; SI: buffer_store_dword [[RESULT]]
 define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
   %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone
   store float %clamp, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.class.ll b/test/CodeGen/R600/llvm.AMDGPU.class.ll
new file mode 100644
index 000000000000..974e3c71e622
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -0,0 +1,497 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
+declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
+declare i32 @llvm.r600.read.tidig.x() #1
+declare float @llvm.fabs.f32(float) #1
+declare double @llvm.fabs.f64(double) #1
+
+; SI-LABEL: {{^}}test_class_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fabs_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+  %a.fabs = call float @llvm.fabs.f32(float %a) #1
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a.fabs, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+  %a.fneg = fsub float -0.0, %a
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_fabs_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 {
+  %a.fabs = call float @llvm.fabs.f32(float %a) #1
+  %a.fneg.fabs = fsub float -0.0, %a.fabs
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a.fneg.fabs, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_1_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_64_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_cmp_class_f32_e64 [[COND:s\[[0-9]+:[0-9]+\]]], [[SA]], 64{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Set all 10 bits of mask
+; SI-LABEL: {{^}}test_class_full_mask_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1023) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_9bit_mask_f32:
+; SI: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}v_test_class_full_mask_f32:
+; SI-DAG: buffer_load_dword [[VA:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, [[VA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f32:
+; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
+; SI: v_cmp_class_f32_e32 vcc, 1.0, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %b = load i32 addrspace(1)* %gep.in
+
+  %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; FIXME: Why isn't this using a literal constant operand?
+; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f32:
+; SI-DAG: buffer_load_dword [[VB:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; SI: v_cmp_class_f32_e32 vcc, [[VK]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %b = load i32 addrspace(1)* %gep.in
+
+  %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fabs_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], |[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+  %a.fabs = call double @llvm.fabs.f64(double %a) #1
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a.fabs, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -[[SA]], [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+  %a.fneg = fsub double -0.0, %a
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_fneg_fabs_f64:
+; SI-DAG: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_cmp_class_f64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], -|[[SA]]|, [[VB]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]]
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, double %a, i32 %b) #0 {
+  %a.fabs = call double @llvm.fabs.f64(double %a) #1
+  %a.fneg.fabs = fsub double -0.0, %a.fabs
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a.fneg.fabs, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_1_f64:
+; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}}
+; SI: s_endpgm
+define void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 1) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_64_f64:
+; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}}
+; SI: s_endpgm
+define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 64) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Set all 9 bits of mask
+; SI-LABEL: {{^}}test_class_full_mask_f64:
+; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}v_test_class_full_mask_f64:
+; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
+; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load double addrspace(1)* %in
+
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_inline_imm_constant_dynamic_mask_f64:
+; XSI: v_cmp_class_f64_e32 vcc, 1.0,
+; SI: v_cmp_class_f64_e32 vcc,
+; SI: s_endpgm
+define void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %b = load i32 addrspace(1)* %gep.in
+
+  %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64:
+; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
+; SI: s_endpgm
+define void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %b = load i32 addrspace(1)* %gep.in
+
+  %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %gep.out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
+  %or = or i1 %class0, %class1
+
+  %sext = sext i1 %or to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or3_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
+  %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+  %or.0 = or i1 %class0, %class1
+  %or.1 = or i1 %or.0, %class2
+
+  %sext = sext i1 %or.1 to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_all_tests_class_f32_0:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x3ff{{$}}
+; SI: v_cmp_class_f32_e32 vcc, v{{[0-9]+}}, [[MASK]]{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
+  %class2 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+  %class3 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+  %class4 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 16) #1
+  %class5 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 32) #1
+  %class6 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 64) #1
+  %class7 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 128) #1
+  %class8 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 256) #1
+  %class9 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 512) #1
+  %or.0 = or i1 %class0, %class1
+  %or.1 = or i1 %or.0, %class2
+  %or.2 = or i1 %or.1, %class3
+  %or.3 = or i1 %or.2, %class4
+  %or.4 = or i1 %or.3, %class5
+  %or.5 = or i1 %or.4, %class6
+  %or.6 = or i1 %or.5, %class7
+  %or.7 = or i1 %or.6, %class8
+  %or.8 = or i1 %or.7, %class9
+  %sext = sext i1 %or.8 to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_1:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
+  %or = or i1 %class0, %class1
+
+  %sext = sext i1 %or to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_fold_or_class_f32_2:
+; SI-NOT: v_cmp_class
+; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}}
+; SI-NOT: v_cmp_class
+; SI: s_endpgm
+define void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
+  %or = or i1 %class0, %class1
+
+  %sext = sext i1 %or to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_no_fold_or_class_f32_0:
+; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 4{{$}}
+; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}}
+; SI: s_or_b64
+; SI: s_endpgm
+define void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.in = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load float addrspace(1)* %gep.in
+
+  %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
+  %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
+  %or = or i1 %class0, %class1
+
+  %sext = sext i1 %or to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_0_f32:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 0) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_class_0_f64:
+; SI-NOT: v_cmp_class
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 {
+  %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 0) #1
+  %sext = sext i1 %result to i32
+  store i32 %sext, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index 110bbfde68b9..aa07afdebea6 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -1,7 +1,7 @@
 
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @cube
+; CHECK: {{^}}cube:
 ; CHECK: CUBE T{{[0-9]}}.X
 ; CHECK: CUBE T{{[0-9]}}.Y
 ; CHECK: CUBE T{{[0-9]}}.Z
diff --git a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 6facb4782e98..4d187d5193c5 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
 declare float @llvm.AMDGPU.cvt.f32.ubyte0(i32) nounwind readnone
 declare float @llvm.AMDGPU.cvt.f32.ubyte1(i32) nounwind readnone
 declare float @llvm.AMDGPU.cvt.f32.ubyte2(i32) nounwind readnone
 declare float @llvm.AMDGPU.cvt.f32.ubyte3(i32) nounwind readnone
 
-; SI-LABEL: @test_unpack_byte0_to_float:
-; SI: V_CVT_F32_UBYTE0
+; SI-LABEL: {{^}}test_unpack_byte0_to_float:
+; SI: v_cvt_f32_ubyte0
 define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
@@ -14,8 +14,8 @@ define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(
   ret void
 }
 
-; SI-LABEL: @test_unpack_byte1_to_float:
-; SI: V_CVT_F32_UBYTE1
+; SI-LABEL: {{^}}test_unpack_byte1_to_float:
+; SI: v_cvt_f32_ubyte1
 define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
@@ -23,8 +23,8 @@ define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(
   ret void
 }
 
-; SI-LABEL: @test_unpack_byte2_to_float:
-; SI: V_CVT_F32_UBYTE2
+; SI-LABEL: {{^}}test_unpack_byte2_to_float:
+; SI: v_cvt_f32_ubyte2
 define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
@@ -32,8 +32,8 @@ define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(
   ret void
 }
 
-; SI-LABEL: @test_unpack_byte3_to_float:
-; SI: V_CVT_F32_UBYTE3
+; SI-LABEL: {{^}}test_unpack_byte3_to_float:
+; SI: v_cvt_f32_ubyte3
 define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
index c8c73573e073..52d0519ef277 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fixup.ll
@@ -1,25 +1,25 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare float @llvm.AMDGPU.div.fixup.f32(float, float, float) nounwind readnone
 declare double @llvm.AMDGPU.div.fixup.f64(double, double, double) nounwind readnone
 
-; SI-LABEL: @test_div_fixup_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FIXUP_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_div_fixup_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @test_div_fixup_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
   %result = call float @llvm.AMDGPU.div.fixup.f32(float %a, float %b, float %c) nounwind readnone
   store float %result, float addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @test_div_fixup_f64:
-; SI: V_DIV_FIXUP_F64
+; SI-LABEL: {{^}}test_div_fixup_f64:
+; SI: v_div_fixup_f64
 define void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
   %result = call double @llvm.AMDGPU.div.fixup.f64(double %a, double %b, double %c) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index 4f1e827c2cbd..2c9085e926dd 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -1,27 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare float @llvm.AMDGPU.div.fmas.f32(float, float, float) nounwind readnone
-declare double @llvm.AMDGPU.div.fmas.f64(double, double, double) nounwind readnone
+declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
+declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
 
-; SI-LABEL: @test_div_fmas_f32:
-; SI-DAG: S_LOAD_DWORD [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI-DAG: S_LOAD_DWORD [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
-; SI-DAG: V_MOV_B32_e32 [[VC:v[0-9]+]], [[SC]]
-; SI-DAG: S_LOAD_DWORD [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: V_MOV_B32_e32 [[VB:v[0-9]+]], [[SB]]
-; SI: V_DIV_FMAS_F32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c) nounwind {
-  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c) nounwind readnone
+; SI-LABEL: {{^}}test_div_fmas_f32:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SC:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI-DAG: v_mov_b32_e32 [[VC:v[0-9]+]], [[SC]]
+; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @test_div_fmas_f32(float addrspace(1)* %out, float %a, float %b, float %c, i1 %d) nounwind {
+  %result = call float @llvm.AMDGPU.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone
   store float %result, float addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @test_div_fmas_f64:
-; SI: V_DIV_FMAS_F64
-define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind {
-  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c) nounwind readnone
+; SI-LABEL: {{^}}test_div_fmas_f64:
+; SI: v_div_fmas_f64
+define void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind {
+  %result = call double @llvm.AMDGPU.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
   ret void
 }
diff --git a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 527c8da10a3c..32e6eaa1a7a9 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -1,13 +1,23 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
 declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
 
 ; SI-LABEL @test_div_scale_f32_1:
-; SI: V_DIV_SCALE_F32
-define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
   store float %result0, float addrspace(1)* %out, align 4
@@ -15,10 +25,19 @@ define void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)*
 }
 
 ; SI-LABEL @test_div_scale_f32_2:
-; SI: V_DIV_SCALE_F32
-define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+
+  %a = load float addrspace(1)* %gep.0, align 4
+  %b = load float addrspace(1)* %gep.1, align 4
+
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
   store float %result0, float addrspace(1)* %out, align 4
@@ -26,10 +45,19 @@ define void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)*
 }
 
 ; SI-LABEL @test_div_scale_f64_1:
-; SI: V_DIV_SCALE_F64
-define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
-  %b = load double addrspace(1)* %bptr, align 8
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
   store double %result0, double addrspace(1)* %out, align 8
@@ -37,10 +65,221 @@ define void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)
 }
 
 ; SI-LABEL @test_div_scale_f64_1:
-; SI: V_DIV_SCALE_F64
-define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %bptr, double addrspace(1)* %cptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
-  %b = load double addrspace(1)* %bptr, align 8
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr double addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr double addrspace(1)* %gep.0, i32 1
+
+  %a = load double addrspace(1)* %gep.0, align 8
+  %b = load double addrspace(1)* %gep.1, align 8
+
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_num_1:
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+  %b = load float addrspace(1)* %gep, align 4
+
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_num_2:
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: s_load_dword [[A:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+  %b = load float addrspace(1)* %gep, align 4
+
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_den_1:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+  %a = load float addrspace(1)* %gep, align 4
+
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_scalar_den_2:
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]]
+; SI-DAG: s_load_dword [[B:s[0-9]+]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr float addrspace(1)* %in, i32 %tid
+
+  %a = load float addrspace(1)* %gep, align 4
+
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_num_1:
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+  %b = load double addrspace(1)* %gep, align 8
+
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_num_2:
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+  %b = load double addrspace(1)* %gep, align 8
+
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_den_1:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+  %a = load double addrspace(1)* %gep, align 8
+
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_scalar_den_2:
+; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr double addrspace(1)* %in, i32 %tid
+
+  %a = load double addrspace(1)* %gep, align 8
+
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_all_scalar_1:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VA:v[0-9]+]], [[A]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, float %a, float %b) nounwind {
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f32_all_scalar_2:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]]
+; SI: buffer_store_dword [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, float %a, float %b) nounwind {
+  %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
+  %result0 = extractvalue { float, i1 } %result, 0
+  store float %result0, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_all_scalar_1:
+; SI-DAG: s_load_dwordx2 s{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VA_LO:[0-9]+]], s[[A_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VA_HI:[0-9]+]], s[[A_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v{{\[}}[[VA_LO]]:[[VA_HI]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, double %a, double %b) nounwind {
+  %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
+  %result0 = extractvalue { double, i1 } %result, 0
+  store double %result0, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL @test_div_scale_f64_all_scalar_2:
+; SI-DAG: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dwordx2 s{{\[}}[[B_LO:[0-9]+]]:[[B_HI:[0-9]+]]{{\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-DAG: v_mov_b32_e32 v[[VB_LO:[0-9]+]], s[[B_LO]]
+; SI-DAG: v_mov_b32_e32 v[[VB_HI:[0-9]+]], s[[B_HI]]
+; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v{{\[}}[[VB_LO]]:[[VB_HI]]{{\]}}, [[A]]
+; SI: buffer_store_dwordx2 [[RESULT0]]
+; SI: s_endpgm
+define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %a, double %b) nounwind {
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
   store double %result0, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 72ec1c57571e..df43b0d9063d 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
@@ -6,8 +6,8 @@ declare float @llvm.AMDGPU.fract.f32(float) nounwind readnone
 ; Legacy name
 declare float @llvm.AMDIL.fraction.f32(float) nounwind readnone
 
-; FUNC-LABEL: @fract_f32
-; SI: V_FRACT_F32
+; FUNC-LABEL: {{^}}fract_f32:
+; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
   %val = load float addrspace(1)* %src, align 4
@@ -16,8 +16,8 @@ define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounw
   ret void
 }
 
-; FUNC-LABEL: @fract_f32_legacy_amdil
-; SI: V_FRACT_F32
+; FUNC-LABEL: {{^}}fract_f32_legacy_amdil:
+; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
   %val = load float addrspace(1)* %src, align 4
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
index 95795ea63b93..26a370436fd3 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imad24.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -8,8 +8,8 @@
 
 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) nounwind readnone
 
-; FUNC-LABEL: @test_imad24
-; SI: V_MAD_I32_I24
+; FUNC-LABEL: {{^}}test_imad24:
+; SI: v_mad_i32_i24
 ; CM: MULADD_INT24
 ; R600: MULLO_INT
 ; R600: ADD_INT
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index 01c9f435b9fc..ec8f001a1a63 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @vector_imax
-; SI: V_MAX_I32_e32
+; SI-LABEL: {{^}}vector_imax:
+; SI: v_max_i32_e32
 define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
   %load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
   ret void
 }
 
-; SI-LABEL: @scalar_imax
-; SI: S_MAX_I32
+; SI-LABEL: {{^}}scalar_imax:
+; SI: s_max_i32
 define void @scalar_imax(i32 %p0, i32 %p1) #0 {
 entry:
   %max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %p1)
@@ -29,4 +29,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 565bf3444081..07a0fe78e2a7 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @vector_imin
-; SI: V_MIN_I32_e32
+; SI-LABEL: {{^}}vector_imin:
+; SI: v_min_i32_e32
 define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
   %load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
   ret void
 }
 
-; SI-LABEL: @scalar_imin
-; SI: S_MIN_I32
+; SI-LABEL: {{^}}scalar_imin:
+; SI: s_min_i32
 define void @scalar_imin(i32 %p0, i32 %p1) #0 {
 entry:
   %min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %p1)
@@ -29,4 +29,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
index 8ee3520daeae..1a9806d4b97a 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.imul24.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
 
-; FUNC-LABEL: @test_imul24
-; SI: V_MUL_I32_I24
+; FUNC-LABEL: {{^}}test_imul24:
+; SI: v_mul_i32_i24
 ; CM: MUL_INT24
 ; R600: MULLO_INT
 define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
diff --git a/test/CodeGen/R600/llvm.AMDGPU.kill.ll b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
index 1f82ffb53f1d..3995c9a742d6 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.kill.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.kill.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @kill_gs_const
-; SI-NOT: V_CMPX_LE_F32
-; SI: S_MOV_B64 exec, 0
+; SI-LABEL: {{^}}kill_gs_const:
+; SI-NOT: v_cmpx_le_f32
+; SI: s_mov_b64 exec, 0
 
 define void @kill_gs_const() #0 {
 main_body:
@@ -19,4 +19,4 @@ declare void @llvm.AMDGPU.kill(float)
 
 attributes #0 = { "ShaderType"="2" }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
new file mode 100644
index 000000000000..f0b8dace17ff
--- /dev/null
+++ b/test/CodeGen/R600/llvm.AMDGPU.ldexp.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.AMDGPU.ldexp.f32(float, i32) nounwind readnone
+declare double @llvm.AMDGPU.ldexp.f64(double, i32) nounwind readnone
+
+; SI-LABEL: {{^}}test_ldexp_f32:
+; SI: v_ldexp_f32
+; SI: s_endpgm
+define void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind {
+  %result = call float @llvm.AMDGPU.ldexp.f32(float %a, i32 %b) nounwind readnone
+  store float %result, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_ldexp_f64:
+; SI: v_ldexp_f64
+; SI: s_endpgm
+define void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind {
+  %result = call double @llvm.AMDGPU.ldexp.f64(double %a, i32 %b) nounwind readnone
+  store double %result, double addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
index 51964eefa64f..4cafd563685e 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.legacy.rsq.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.AMDGPU.legacy.rsq(float) nounwind readnone
 
-; FUNC-LABEL: @rsq_legacy_f32
-; SI: V_RSQ_LEGACY_F32_e32
+; FUNC-LABEL: {{^}}rsq_legacy_f32:
+; SI: v_rsq_legacy_f32_e32
 ; EG: RECIPSQRT_IEEE
 define void @rsq_legacy_f32(float addrspace(1)* %out, float %src) nounwind {
   %rsq = call float @llvm.AMDGPU.legacy.rsq(float %src) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
index b5dda0ce81f9..68412950b05a 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.f64.ll
@@ -1,27 +1,27 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
 declare double @llvm.sqrt.f64(double) nounwind readnone
 
-; FUNC-LABEL: @rcp_f64
-; SI: V_RCP_F64_e32
+; FUNC-LABEL: {{^}}rcp_f64:
+; SI: v_rcp_f64_e32
 define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
   %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
   store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @rcp_pat_f64
-; SI: V_RCP_F64_e32
+; FUNC-LABEL: {{^}}rcp_pat_f64:
+; SI: v_rcp_f64_e32
 define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
   %rcp = fdiv double 1.0, %src
   store double %rcp, double addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @rsq_rcp_pat_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE-NOT: V_RSQ_F64_e32
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE-NOT: v_rsq_f64_e32
 define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
   %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
   %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
index 8d5d66e149ba..4979a14ccdfc 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rcp.ll
@@ -1,65 +1,47 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
+; XUN: llc -march=amdgcn -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
 
-; XUN: llc -march=r600 -mcpu=SI -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE-SPDENORM -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
 declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
 
-
 declare float @llvm.sqrt.f32(float) nounwind readnone
-declare double @llvm.sqrt.f64(double) nounwind readnone
 
-; FUNC-LABEL: @rcp_f32
-; SI: V_RCP_F32_e32
+; FUNC-LABEL: {{^}}rcp_f32:
+; SI: v_rcp_f32_e32
+; EG: RECIP_IEEE
 define void @rcp_f32(float addrspace(1)* %out, float %src) nounwind {
   %rcp = call float @llvm.AMDGPU.rcp.f32(float %src) nounwind readnone
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @rcp_f64
-; SI: V_RCP_F64_e32
-define void @rcp_f64(double addrspace(1)* %out, double %src) nounwind {
-  %rcp = call double @llvm.AMDGPU.rcp.f64(double %src) nounwind readnone
-  store double %rcp, double addrspace(1)* %out, align 8
-  ret void
-}
+; FIXME: Evergreen only ever does unsafe fp math.
+; FUNC-LABEL: {{^}}rcp_pat_f32:
+
+; SI-SAFE: v_rcp_f32_e32
+; XSI-SAFE-SPDENORM-NOT: v_rcp_f32_e32
+
+; EG: RECIP_IEEE
 
-; FUNC-LABEL: @rcp_pat_f32
-; SI-SAFE: V_RCP_F32_e32
-; XSI-SAFE-SPDENORM-NOT: V_RCP_F32_e32
 define void @rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
   %rcp = fdiv float 1.0, %src
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @rcp_pat_f64
-; SI: V_RCP_F64_e32
-define void @rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
-  %rcp = fdiv double 1.0, %src
-  store double %rcp, double addrspace(1)* %out, align 8
-  ret void
-}
+; FUNC-LABEL: {{^}}rsq_rcp_pat_f32:
+; SI-UNSAFE: v_rsq_f32_e32
+; SI-SAFE: v_sqrt_f32_e32
+; SI-SAFE: v_rcp_f32_e32
 
-; FUNC-LABEL: @rsq_rcp_pat_f32
-; SI-UNSAFE: V_RSQ_F32_e32
-; SI-SAFE: V_SQRT_F32_e32
-; SI-SAFE: V_RCP_F32_e32
+; EG: RECIPSQRT_IEEE
 define void @rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) nounwind {
   %sqrt = call float @llvm.sqrt.f32(float %src) nounwind readnone
   %rcp = call float @llvm.AMDGPU.rcp.f32(float %sqrt) nounwind readnone
   store float %rcp, float addrspace(1)* %out, align 4
   ret void
 }
-
-; FUNC-LABEL: @rsq_rcp_pat_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE-NOT: V_RSQ_F64_e32
-define void @rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) nounwind {
-  %sqrt = call double @llvm.sqrt.f64(double %src) nounwind readnone
-  %rcp = call double @llvm.AMDGPU.rcp.f64(double %sqrt) nounwind readnone
-  store double %rcp, double addrspace(1)* %out, align 8
-  ret void
-}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
index 100d6ff77707..4318aeaac786 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.f64.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare double @llvm.AMDGPU.rsq.clamped.f64(double) nounwind readnone
 
-; FUNC-LABEL: @rsq_clamped_f64
-; SI: V_RSQ_CLAMP_F64_e32
+; FUNC-LABEL: {{^}}rsq_clamped_f64:
+; SI: v_rsq_clamp_f64_e32
 define void @rsq_clamped_f64(double addrspace(1)* %out, double %src) nounwind {
   %rsq_clamped = call double @llvm.AMDGPU.rsq.clamped.f64(double %src) nounwind readnone
   store double %rsq_clamped, double addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
index 683df7355ac6..9336baffc97f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.clamped.ll
@@ -1,11 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
 declare float @llvm.AMDGPU.rsq.clamped.f32(float) nounwind readnone
 
-; FUNC-LABEL: @rsq_clamped_f32
-; SI: V_RSQ_CLAMP_F32_e32
+; FUNC-LABEL: {{^}}rsq_clamped_f32:
+; SI: v_rsq_clamp_f32_e32
 ; EG: RECIPSQRT_CLAMPED
 define void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind {
   %rsq_clamped = call float @llvm.AMDGPU.rsq.clamped.f32(float %src) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
index 27cf6b28fd66..f987ef3995de 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
@@ -1,13 +1,32 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
 
-; FUNC-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
+; FUNC-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 ; EG: RECIPSQRT_IEEE
 define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
   %rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
   store float %rsq, float addrspace(1)* %out, align 4
   ret void
 }
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: {{^}}rsq_f32_constant_4.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+  %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}rsq_f32_constant_100.0
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+  %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
+  store float %rsq, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 1c736d447ea9..0b1342ee45ce 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
 declare double @llvm.AMDGPU.trig.preop.f64(double, i32) nounwind readnone
 
-; SI-LABEL: @test_trig_preop_f64:
-; SI-DAG: BUFFER_LOAD_DWORD [[SEG:v[0-9]+]]
-; SI-DAG: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_trig_preop_f64:
+; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
+; SI-DAG: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %a = load double addrspace(1)* %aptr, align 8
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -16,11 +16,11 @@ define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)*
   ret void
 }
 
-; SI-LABEL: @test_trig_preop_f64_imm_segment:
-; SI: BUFFER_LOAD_DWORDX2 [[SRC:v\[[0-9]+:[0-9]+\]]],
-; SI: V_TRIG_PREOP_F64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}test_trig_preop_f64_imm_segment:
+; SI: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
+; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
   %a = load double addrspace(1)* %aptr, align 8
   %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
diff --git a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
index e6bb2c4e9455..9c5c74e23315 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-; R600-CHECK: @amdgpu_trunc
+; R600-CHECK: {{^}}amdgpu_trunc:
 ; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; SI-CHECK: @amdgpu_trunc
-; SI-CHECK: V_TRUNC_F32
+; SI-CHECK: {{^}}amdgpu_trunc:
+; SI-CHECK: v_trunc_f32
 
 define void @amdgpu_trunc(float addrspace(1)* %out, float %x) {
 entry:
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index afdfb18a563b..88613db2161f 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -1,13 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 ; XUN: llc -march=r600 -mcpu=rv770 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.umad24(i32, i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: @test_umad24
-; SI: V_MAD_U32_U24
+; FUNC-LABEL: {{^}}test_umad24:
+; SI: v_mad_u32_u24
 ; EG: MULADD_UINT24
 ; R600: MULLO_UINT
 ; R600: ADD_INT
@@ -17,3 +18,21 @@ define void @test_umad24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2
   ret void
 }
 
+; FUNC-LABEL: {{^}}commute_umad24:
+; SI-DAG: buffer_load_dword [[SRC0:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[SRC2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_u32_u24 [[RESULT:v[0-9]+]], 4, [[SRC0]], [[SRC2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @commute_umad24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %src0.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %src2.gep = getelementptr i32 addrspace(1)* %src0.gep, i32 1
+
+  %src0 = load i32 addrspace(1)* %src0.gep, align 4
+  %src2 = load i32 addrspace(1)* %src2.gep, align 4
+  %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
+  store i32 %mad, i32 addrspace(1)* %out.gep, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 1b8da2e15534..094ecaca2b4c 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @vector_umax
-; SI: V_MAX_U32_e32
+; SI-LABEL: {{^}}vector_umax:
+; SI: v_max_u32_e32
 define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
   %load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
   ret void
 }
 
-; SI-LABEL: @scalar_umax
-; SI: S_MAX_U32
+; SI-LABEL: {{^}}scalar_umax:
+; SI: s_max_u32
 define void @scalar_umax(i32 %p0, i32 %p1) #0 {
 entry:
   %max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %p1)
@@ -21,11 +21,11 @@ entry:
   ret void
 }
 
-; SI-LABEL: @trunc_zext_umax
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI-LABEL: {{^}}trunc_zext_umax:
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_max_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
   %tmp5 = load i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
@@ -44,4 +44,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index 08397f8356c9..97fc40a72339 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @vector_umin
-; SI: V_MIN_U32_e32
+; SI-LABEL: {{^}}vector_umin:
+; SI: v_min_u32_e32
 define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
   %load = load i32 addrspace(1)* %in, align 4
@@ -11,8 +11,8 @@ main_body:
   ret void
 }
 
-; SI-LABEL: @scalar_umin
-; SI: S_MIN_U32
+; SI-LABEL: {{^}}scalar_umin:
+; SI: s_min_u32
 define void @scalar_umin(i32 %p0, i32 %p1) #0 {
 entry:
   %min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %p1)
@@ -21,11 +21,11 @@ entry:
   ret void
 }
 
-; SI-LABEL: @trunc_zext_umin
-; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
-; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
-; SI-NOT: AND
-; SI: BUFFER_STORE_SHORT [[RESULT]],
+; SI-LABEL: {{^}}trunc_zext_umin:
+; SI: buffer_load_ubyte [[VREG:v[0-9]+]],
+; SI: v_min_u32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
+; SI-NOT: and
+; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
   %tmp5 = load i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
@@ -44,4 +44,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
index 72a36029fb31..5a849c224d22 100644
--- a/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
+++ b/test/CodeGen/R600/llvm.AMDGPU.umul24.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 ; XUN: llc -march=r600 -mcpu=r600 -verify-machineinstrs < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
@@ -6,8 +6,8 @@
 
 declare i32 @llvm.AMDGPU.umul24(i32, i32) nounwind readnone
 
-; FUNC-LABEL: @test_umul24
-; SI: V_MUL_U32_U24
+; FUNC-LABEL: {{^}}test_umul24:
+; SI: v_mul_u32_u24
 ; R600: MUL_UINT24
 ; R600: MULLO_UINT
 define void @test_umul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1) nounwind {
diff --git a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
index 0438eccc8862..a7454ef34937 100644
--- a/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
+++ b/test/CodeGen/R600/llvm.SI.fs.interp.constant.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: S_MOV_B32
-;CHECK-NEXT: V_INTERP_MOV_F32
+;CHECK: s_mov_b32
+;CHECK-NEXT: v_interp_mov_f32
 
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
 main_body:
diff --git a/test/CodeGen/R600/llvm.SI.gather4.ll b/test/CodeGen/R600/llvm.SI.gather4.ll
index 8402faaa4dca..cdf34ca3e7ff 100644
--- a/test/CodeGen/R600/llvm.SI.gather4.ll
+++ b/test/CodeGen/R600/llvm.SI.gather4.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @gather4_v2
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_v2:
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_v2() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4
-;CHECK: IMAGE_GATHER4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4:
+;CHECK: image_gather4 {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_cl
-;CHECK: IMAGE_GATHER4_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl:
+;CHECK: image_gather4_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_l
-;CHECK: IMAGE_GATHER4_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l:
+;CHECK: image_gather4_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b
-;CHECK: IMAGE_GATHER4_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b:
+;CHECK: image_gather4_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b_cl
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl:
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b_cl_v8
-;CHECK: IMAGE_GATHER4_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl_v8:
+;CHECK: image_gather4_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b_cl_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_lz_v2
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz_v2:
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_lz_v2() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_lz
-;CHECK: IMAGE_GATHER4_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz:
+;CHECK: image_gather4_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -119,8 +119,8 @@ main_body:
 
 
 
-;CHECK-LABEL: @gather4_o
-;CHECK: IMAGE_GATHER4_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_o:
+;CHECK: image_gather4_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -132,8 +132,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_cl_o
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl_o:
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_cl_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.cl.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -145,8 +145,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_cl_o_v8
-;CHECK: IMAGE_GATHER4_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_cl_o_v8:
+;CHECK: image_gather4_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_cl_o_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -158,8 +158,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_l_o
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l_o:
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_l_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.l.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -171,8 +171,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_l_o_v8
-;CHECK: IMAGE_GATHER4_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_l_o_v8:
+;CHECK: image_gather4_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_l_o_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -184,8 +184,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b_o
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_o:
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -197,8 +197,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b_o_v8
-;CHECK: IMAGE_GATHER4_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_o_v8:
+;CHECK: image_gather4_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b_o_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -210,8 +210,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_b_cl_o
-;CHECK: IMAGE_GATHER4_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_b_cl_o:
+;CHECK: image_gather4_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_b_cl_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -223,8 +223,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_lz_o
-;CHECK: IMAGE_GATHER4_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_lz_o:
+;CHECK: image_gather4_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_lz_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -238,8 +238,8 @@ main_body:
 
 
 
-;CHECK-LABEL: @gather4_c
-;CHECK: IMAGE_GATHER4_C {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c:
+;CHECK: image_gather4_c {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -251,8 +251,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_cl
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl:
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.cl.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -264,8 +264,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_cl_v8
-;CHECK: IMAGE_GATHER4_C_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl_v8:
+;CHECK: image_gather4_c_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_cl_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -277,8 +277,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_l
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l:
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.l.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -290,8 +290,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_l_v8
-;CHECK: IMAGE_GATHER4_C_L {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l_v8:
+;CHECK: image_gather4_c_l {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_l_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.l.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -303,8 +303,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_b
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b:
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.b.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -316,8 +316,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_b_v8
-;CHECK: IMAGE_GATHER4_C_B {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_v8:
+;CHECK: image_gather4_c_b {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_b_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.b.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -329,8 +329,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_b_cl
-;CHECK: IMAGE_GATHER4_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_cl:
+;CHECK: image_gather4_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.b.cl.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -342,8 +342,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_lz
-;CHECK: IMAGE_GATHER4_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz:
+;CHECK: image_gather4_c_lz {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.lz.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -357,8 +357,8 @@ main_body:
 
 
 
-;CHECK-LABEL: @gather4_c_o
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_o:
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -370,8 +370,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_o_v8
-;CHECK: IMAGE_GATHER4_C_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_o_v8:
+;CHECK: image_gather4_c_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_o_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -383,8 +383,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_cl_o
-;CHECK: IMAGE_GATHER4_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_cl_o:
+;CHECK: image_gather4_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_cl_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -396,8 +396,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_l_o
-;CHECK: IMAGE_GATHER4_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_l_o:
+;CHECK: image_gather4_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_l_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.l.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -409,8 +409,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_b_o
-;CHECK: IMAGE_GATHER4_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_o:
+;CHECK: image_gather4_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_b_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.b.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -422,8 +422,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_b_cl_o
-;CHECK: IMAGE_GATHER4_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_b_cl_o:
+;CHECK: image_gather4_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_b_cl_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.b.cl.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -435,8 +435,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_lz_o
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz_o:
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_lz_o() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -448,8 +448,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @gather4_c_lz_o_v8
-;CHECK: IMAGE_GATHER4_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}gather4_c_lz_o_v8:
+;CHECK: image_gather4_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 1, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @gather4_c_lz_o_v8() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.gather4.c.lz.o.v8i32(<8 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.getlod.ll b/test/CodeGen/R600/llvm.SI.getlod.ll
index a7a17ec3fffa..775dd3cd7569 100644
--- a/test/CodeGen/R600/llvm.SI.getlod.ll
+++ b/test/CodeGen/R600/llvm.SI.getlod.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @getlod
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @getlod() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.getlod.i32(i32 undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -11,8 +11,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @getlod_v2
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod_v2:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @getlod_v2() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.getlod.v2i32(<2 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
@@ -22,8 +22,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @getlod_v4
-;CHECK: IMAGE_GET_LOD {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getlod_v4:
+;CHECK: image_get_lod {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, -1, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @getlod_v4() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.getlod.v4i32(<4 x i32> undef, <32 x i8> undef, <16 x i8> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.ll b/test/CodeGen/R600/llvm.SI.image.ll
index eac0b8eead3a..7c9af7b40f67 100644
--- a/test/CodeGen/R600/llvm.SI.image.ll
+++ b/test/CodeGen/R600/llvm.SI.image.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @image_load
-;CHECK: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}image_load:
+;CHECK: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @image_load() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @image_load_mip
-;CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}image_load_mip:
+;CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @image_load_mip() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.load.mip.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @getresinfo
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}getresinfo:
+;CHECK: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @getresinfo() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.getresinfo.i32(i32 undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.sample.ll b/test/CodeGen/R600/llvm.SI.image.sample.ll
index 14dff7eb5fea..779c8cc573b8 100644
--- a/test/CodeGen/R600/llvm.SI.image.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.image.sample.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @sample
-;CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample:
+;CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cl
-;CHECK: IMAGE_SAMPLE_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cl:
+;CHECK: image_sample_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_d
-;CHECK: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d:
+;CHECK: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_d() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_d_cl
-;CHECK: IMAGE_SAMPLE_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d_cl:
+;CHECK: image_sample_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_d_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_l
-;CHECK: IMAGE_SAMPLE_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_l:
+;CHECK: image_sample_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_b
-;CHECK: IMAGE_SAMPLE_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b:
+;CHECK: image_sample_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_b_cl
-;CHECK: IMAGE_SAMPLE_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b_cl:
+;CHECK: image_sample_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_lz
-;CHECK: IMAGE_SAMPLE_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_lz:
+;CHECK: image_sample_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cd
-;CHECK: IMAGE_SAMPLE_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd:
+;CHECK: image_sample_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cd() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -117,8 +117,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cd_cl
-;CHECK: IMAGE_SAMPLE_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd_cl:
+;CHECK: image_sample_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cd_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -130,8 +130,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c
-;CHECK: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c:
+;CHECK: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -143,8 +143,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cl
-;CHECK: IMAGE_SAMPLE_C_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cl:
+;CHECK: image_sample_c_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -156,8 +156,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_d
-;CHECK: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d:
+;CHECK: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_d() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.d.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -169,8 +169,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_d_cl
-;CHECK: IMAGE_SAMPLE_C_D_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d_cl:
+;CHECK: image_sample_c_d_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_d_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -182,8 +182,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_l
-;CHECK: IMAGE_SAMPLE_C_L {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_l:
+;CHECK: image_sample_c_l {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.l.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -195,8 +195,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_b
-;CHECK: IMAGE_SAMPLE_C_B {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b:
+;CHECK: image_sample_c_b {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.b.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -208,8 +208,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_b_cl
-;CHECK: IMAGE_SAMPLE_C_B_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b_cl:
+;CHECK: image_sample_c_b_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -221,8 +221,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_lz
-;CHECK: IMAGE_SAMPLE_C_LZ {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_lz:
+;CHECK: image_sample_c_lz {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.lz.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -234,8 +234,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cd
-;CHECK: IMAGE_SAMPLE_C_CD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd:
+;CHECK: image_sample_c_cd {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cd() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cd.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -247,8 +247,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cd_cl
-;CHECK: IMAGE_SAMPLE_C_CD_CL {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd_cl:
+;CHECK: image_sample_c_cd_cl {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cd_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.image.sample.o.ll b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
index ed3ef9140143..7bfb5501206c 100644
--- a/test/CodeGen/R600/llvm.SI.image.sample.o.ll
+++ b/test/CodeGen/R600/llvm.SI.image.sample.o.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @sample
-;CHECK: IMAGE_SAMPLE_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample:
+;CHECK: image_sample_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -13,8 +13,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cl
-;CHECK: IMAGE_SAMPLE_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cl:
+;CHECK: image_sample_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -26,8 +26,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_d
-;CHECK: IMAGE_SAMPLE_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d:
+;CHECK: image_sample_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_d() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -39,8 +39,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_d_cl
-;CHECK: IMAGE_SAMPLE_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_d_cl:
+;CHECK: image_sample_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_d_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -52,8 +52,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_l
-;CHECK: IMAGE_SAMPLE_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_l:
+;CHECK: image_sample_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -65,8 +65,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_b
-;CHECK: IMAGE_SAMPLE_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b:
+;CHECK: image_sample_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -78,8 +78,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_b_cl
-;CHECK: IMAGE_SAMPLE_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_b_cl:
+;CHECK: image_sample_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -91,8 +91,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_lz
-;CHECK: IMAGE_SAMPLE_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_lz:
+;CHECK: image_sample_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -104,8 +104,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cd
-;CHECK: IMAGE_SAMPLE_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd:
+;CHECK: image_sample_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cd() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -117,8 +117,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_cd_cl
-;CHECK: IMAGE_SAMPLE_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_cd_cl:
+;CHECK: image_sample_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_cd_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -130,8 +130,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c
-;CHECK: IMAGE_SAMPLE_C_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c:
+;CHECK: image_sample_c_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -143,8 +143,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cl
-;CHECK: IMAGE_SAMPLE_C_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cl:
+;CHECK: image_sample_c_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -156,8 +156,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_d
-;CHECK: IMAGE_SAMPLE_C_D_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d:
+;CHECK: image_sample_c_d_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_d() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.d.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -169,8 +169,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_d_cl
-;CHECK: IMAGE_SAMPLE_C_D_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_d_cl:
+;CHECK: image_sample_c_d_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_d_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.d.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -182,8 +182,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_l
-;CHECK: IMAGE_SAMPLE_C_L_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_l:
+;CHECK: image_sample_c_l_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_l() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.l.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -195,8 +195,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_b
-;CHECK: IMAGE_SAMPLE_C_B_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b:
+;CHECK: image_sample_c_b_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_b() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.b.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -208,8 +208,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_b_cl
-;CHECK: IMAGE_SAMPLE_C_B_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_b_cl:
+;CHECK: image_sample_c_b_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_b_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.b.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -221,8 +221,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_lz
-;CHECK: IMAGE_SAMPLE_C_LZ_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_lz:
+;CHECK: image_sample_c_lz_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_lz() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.lz.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -234,8 +234,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cd
-;CHECK: IMAGE_SAMPLE_C_CD_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd:
+;CHECK: image_sample_c_cd_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cd() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cd.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
@@ -247,8 +247,8 @@ main_body:
   ret void
 }
 
-;CHECK-LABEL: @sample_c_cd_cl
-;CHECK: IMAGE_SAMPLE_C_CD_CL_O {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+;CHECK-LABEL: {{^}}sample_c_cd_cl:
+;CHECK: image_sample_c_cd_cl_o {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
 define void @sample_c_cd_cl() #0 {
 main_body:
   %r = call <4 x float> @llvm.SI.image.sample.c.cd.cl.o.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
diff --git a/test/CodeGen/R600/llvm.SI.imageload.ll b/test/CodeGen/R600/llvm.SI.imageload.ll
index 59e00f01c96b..dba6e8f3f0c7 100644
--- a/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -1,15 +1,15 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_LOAD {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK-DAG: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK-DAG: IMAGE_LOAD_MIP {{v[0-9]+}}, 8, 0, 0, -1
+;CHECK-DAG: image_load {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 2, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 1, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 4, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+;CHECK-DAG: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+;CHECK-DAG: image_load_mip {{v[0-9]+}}, 8, 0, 0, -1
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -84,7 +84,7 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
 
 ; Test that ccordinates are stored in vgprs and not sgprs
 ; CHECK: vgpr_coords
-; CHECK: IMAGE_LOAD_MIP {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: image_load_mip {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, 0, 0, 0, 0, 0, {{v\[[0-9]+:[0-9]+\]}}
 define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr float addrspace(2)* addrspace(2)* %0, i32 0
@@ -126,6 +126,6 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{}
-!2 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{}
+!2 = !{!0, !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.load.dword.ll b/test/CodeGen/R600/llvm.SI.load.dword.ll
index a6227755b72e..ebd16e376302 100644
--- a/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -1,13 +1,13 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ; Example of a simple geometry shader loading vertex attributes from the
 ; ESGS ring buffer
 
-; CHECK-LABEL: @main
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK-LABEL: {{^}}main:
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
+; CHECK: buffer_load_dword
 
 define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
 main_body:
@@ -37,4 +37,4 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { "ShaderType"="1" }
 attributes #1 = { nounwind readonly }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.SI.resinfo.ll b/test/CodeGen/R600/llvm.SI.resinfo.ll
index af3afc1e1d92..278e05558c6e 100644
--- a/test/CodeGen/R600/llvm.SI.resinfo.ll
+++ b/test/CodeGen/R600/llvm.SI.resinfo.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 2, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 1, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 4, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
-;CHECK: IMAGE_GET_RESINFO {{v[0-9]+}}, 8, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 15, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 3, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 2, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 1, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 4, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 5, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 9, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 6, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 10, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 12, 0, 0, -1
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 7, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 11, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 13, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v\[[0-9]+:[0-9]+\]}}, 14, 0, 0, 0
+; CHECK-DAG: image_get_resinfo {{v[0-9]+}}, 8, 0, 0, -1
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8,
 		  i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15, i32 %a16) {
diff --git a/test/CodeGen/R600/llvm.SI.sample-masked.ll b/test/CodeGen/R600/llvm.SI.sample-masked.ll
index 445359a4ced7..071938f2c249 100644
--- a/test/CodeGen/R600/llvm.SI.sample-masked.ll
+++ b/test/CodeGen/R600/llvm.SI.sample-masked.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde | FileCheck %s
 
-; CHECK-LABEL: @v1
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 13
+; CHECK-LABEL: {{^}}v1:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 13
 define void @v1(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -13,8 +13,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v2
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 11
+; CHECK-LABEL: {{^}}v2:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 11
 define void @v2(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -26,8 +26,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v3
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
+; CHECK-LABEL: {{^}}v3:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
 define void @v3(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -39,8 +39,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v4
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 7
+; CHECK-LABEL: {{^}}v4:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 7
 define void @v4(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -52,8 +52,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v5
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
+; CHECK-LABEL: {{^}}v5:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
 define void @v5(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -64,8 +64,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v6
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 6
+; CHECK-LABEL: {{^}}v6:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 6
 define void @v6(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
@@ -76,8 +76,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @v7
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 9
+; CHECK-LABEL: {{^}}v7:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 9
 define void @v7(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sample.ll b/test/CodeGen/R600/llvm.SI.sample.ll
index 24e8f640d90e..2c2905aaa0c0 100644
--- a/test/CodeGen/R600/llvm.SI.sample.ll
+++ b/test/CodeGen/R600/llvm.SI.sample.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample {{v[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
@@ -135,8 +135,8 @@ define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
    ret void
 }
 
-; CHECK: @v1
-; CHECK: IMAGE_SAMPLE {{v\[[0-9]+:[0-9]+\]}}, 15
+; CHECK: {{^}}v1:
+; CHECK: image_sample {{v\[[0-9]+:[0-9]+\]}}, 15
 define void @v1(i32 %a1) #0 {
 entry:
   %0 = insertelement <1 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sampled.ll b/test/CodeGen/R600/llvm.SI.sampled.ll
index 366456f44e6c..e42a48ee7852 100644
--- a/test/CodeGen/R600/llvm.SI.sampled.ll
+++ b/test/CodeGen/R600/llvm.SI.sampled.ll
@@ -1,21 +1,21 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 15
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 3
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 2
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 1
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 4
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 5
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 9
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 6
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 10
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 12
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 7
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 11
-;CHECK-DAG: IMAGE_SAMPLE_C_D {{v\[[0-9]+:[0-9]+\]}}, 13
-;CHECK-DAG: IMAGE_SAMPLE_D {{v\[[0-9]+:[0-9]+\]}}, 14
-;CHECK-DAG: IMAGE_SAMPLE_D {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 15
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 3
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 2
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 1
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 4
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 5
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 9
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 6
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 10
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 12
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 7
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 11
+;CHECK-DAG: image_sample_c_d {{v\[[0-9]+:[0-9]+\]}}, 13
+;CHECK-DAG: image_sample_d {{v\[[0-9]+:[0-9]+\]}}, 14
+;CHECK-DAG: image_sample_d {{v[0-9]+}}, 8
 
 define void @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) #0 {
    %v1 = insertelement <4 x i32> undef, i32 %a1, i32 0
diff --git a/test/CodeGen/R600/llvm.SI.sendmsg.ll b/test/CodeGen/R600/llvm.SI.sendmsg.ll
index 581d422b0952..d94b137bc012 100644
--- a/test/CodeGen/R600/llvm.SI.sendmsg.ll
+++ b/test/CodeGen/R600/llvm.SI.sendmsg.ll
@@ -1,10 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-; CHECK-LABEL: @main
-; CHECK: S_SENDMSG Gs(emit stream 0)
-; CHECK: S_SENDMSG Gs(cut stream 1)
-; CHECK: S_SENDMSG Gs(emit-cut stream 2)
-; CHECK: S_SENDMSG Gs_done(nop)
+; CHECK-LABEL: {{^}}main:
+; CHECK: s_sendmsg Gs(emit stream 0)
+; CHECK: s_sendmsg Gs(cut stream 1)
+; CHECK: s_sendmsg Gs(emit-cut stream 2)
+; CHECK: s_sendmsg Gs_done(nop)
 
 define void @main() {
 main_body:
diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
index 740581a69666..320597ec8475 100644
--- a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
+++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK-LABEL: @test1
-;CHECK: TBUFFER_STORE_FORMAT_XYZW {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test1:
+;CHECK: tbuffer_store_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, 0x20, -1, 0, -1, 0, 14, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test1(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -10,8 +10,8 @@ define void @test1(i32 %a1, i32 %vaddr) #0 {
     ret void
 }
 
-;CHECK-LABEL: @test2
-;CHECK: TBUFFER_STORE_FORMAT_XYZ {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test2:
+;CHECK: tbuffer_store_format_xyz {{v\[[0-9]+:[0-9]+\]}}, 0x18, -1, 0, -1, 0, 13, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test2(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
@@ -20,8 +20,8 @@ define void @test2(i32 %a1, i32 %vaddr) #0 {
     ret void
 }
 
-;CHECK-LABEL: @test3
-;CHECK: TBUFFER_STORE_FORMAT_XY {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test3:
+;CHECK: tbuffer_store_format_xy {{v\[[0-9]+:[0-9]+\]}}, 0x10, -1, 0, -1, 0, 11, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test3(i32 %a1, i32 %vaddr) #0 {
     %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0
     call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata,
@@ -30,8 +30,8 @@ define void @test3(i32 %a1, i32 %vaddr) #0 {
     ret void
 }
 
-;CHECK-LABEL: @test4
-;CHECK: TBUFFER_STORE_FORMAT_X {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
+;CHECK-LABEL: {{^}}test4:
+;CHECK: tbuffer_store_format_x {{v[0-9]+}}, 0x8, -1, 0, -1, 0, 4, 4, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, -1, 0, 0
 define void @test4(i32 %vdata, i32 %vaddr) #0 {
     call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata,
         i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1,
diff --git a/test/CodeGen/R600/llvm.SI.tid.ll b/test/CodeGen/R600/llvm.SI.tid.ll
index fe17304732ad..64efd2daf338 100644
--- a/test/CodeGen/R600/llvm.SI.tid.ll
+++ b/test/CodeGen/R600/llvm.SI.tid.ll
@@ -1,7 +1,7 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_MBCNT_LO_U32_B32_e64
-;CHECK: V_MBCNT_HI_U32_B32_e32
+;CHECK: v_mbcnt_lo_u32_b32_e64
+;CHECK: v_mbcnt_hi_u32_b32_e32
 
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
 main_body:
diff --git a/test/CodeGen/R600/llvm.amdgpu.kilp.ll b/test/CodeGen/R600/llvm.amdgpu.kilp.ll
index 1b8b1bfd2089..cca42bf35393 100644
--- a/test/CodeGen/R600/llvm.amdgpu.kilp.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.kilp.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @kilp_gs_const
-; SI: S_MOV_B64 exec, 0
+; SI-LABEL: {{^}}kilp_gs_const:
+; SI: s_mov_b64 exec, 0
 define void @kilp_gs_const() #0 {
 main_body:
   %0 = icmp ule i32 0, 3
@@ -17,4 +17,4 @@ declare void @llvm.AMDGPU.kilp(float)
 
 attributes #0 = { "ShaderType"="2" }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/R600/llvm.amdgpu.lrp.ll b/test/CodeGen/R600/llvm.amdgpu.lrp.ll
index c493a016e330..a1b7bf0ec43c 100644
--- a/test/CodeGen/R600/llvm.amdgpu.lrp.ll
+++ b/test/CodeGen/R600/llvm.amdgpu.lrp.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
 
-; FUNC-LABEL: @test_lrp
-; SI: V_SUB_F32
-; SI: V_MAD_F32
+; FUNC-LABEL: {{^}}test_lrp:
+; SI: v_sub_f32
+; SI: v_mad_f32
 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
   %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
   store float %mad, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/llvm.cos.ll b/test/CodeGen/R600/llvm.cos.ll
index 9e7a4deda69b..9be81807be21 100644
--- a/test/CodeGen/R600/llvm.cos.ll
+++ b/test/CodeGen/R600/llvm.cos.ll
@@ -1,5 +1,5 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s -check-prefix=EG -check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s -check-prefix=SI -check-prefix=FUNC
 
 ;FUNC-LABEL: test
 ;EG: MULADD_IEEE *
@@ -7,8 +7,8 @@
 ;EG: ADD *
 ;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ;EG-NOT: COS
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
 
 define void @test(float addrspace(1)* %out, float %x) #1 {
    %cos = call float @llvm.cos.f32(float %x)
@@ -22,11 +22,11 @@ define void @test(float addrspace(1)* %out, float %x) #1 {
 ;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ;EG: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
 ;EG-NOT: COS
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI: V_COS_F32
-;SI-NOT: V_COS_F32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI: v_cos_f32
+;SI-NOT: v_cos_f32
 
 define void @testv(<4 x float> addrspace(1)* %out, <4 x float> inreg %vx) #1 {
    %cos = call <4 x float> @llvm.cos.v4f32(<4 x float> %vx)
diff --git a/test/CodeGen/R600/llvm.exp2.ll b/test/CodeGen/R600/llvm.exp2.ll
index 119d5ef49a5e..9e78134b084f 100644
--- a/test/CodeGen/R600/llvm.exp2.ll
+++ b/test/CodeGen/R600/llvm.exp2.ll
@@ -1,14 +1,14 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
 ;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-;FUNC-LABEL: @test
+;FUNC-LABEL: {{^}}test:
 ;EG-CHECK: EXP_IEEE
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
 
 define void @test(float addrspace(1)* %out, float %in) {
 entry:
@@ -17,7 +17,7 @@ entry:
    ret void
 }
 
-;FUNC-LABEL: @testv2
+;FUNC-LABEL: {{^}}testv2:
 ;EG-CHECK: EXP_IEEE
 ;EG-CHECK: EXP_IEEE
 ; FIXME: We should be able to merge these packets together on Cayman so we
@@ -30,8 +30,8 @@ entry:
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
 
 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
@@ -40,7 +40,7 @@ entry:
   ret void
 }
 
-;FUNC-LABEL: @testv4
+;FUNC-LABEL: {{^}}testv4:
 ;EG-CHECK: EXP_IEEE
 ;EG-CHECK: EXP_IEEE
 ;EG-CHECK: EXP_IEEE
@@ -63,10 +63,10 @@ entry:
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: EXP_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
-;SI-CHECK: V_EXP_F32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
+;SI-CHECK: v_exp_f32
 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.floor.ll b/test/CodeGen/R600/llvm.floor.ll
index f7071cd9b879..1016ff75ce9b 100644
--- a/test/CodeGen/R600/llvm.floor.ll
+++ b/test/CodeGen/R600/llvm.floor.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK: @f32
+; R600-CHECK: {{^}}f32:
 ; R600-CHECK: FLOOR
-; SI-CHECK: @f32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}f32:
+; SI-CHECK: v_floor_f32_e32
 define void @f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.floor.f32(float %in)
@@ -12,12 +12,12 @@ entry:
   ret void
 }
 
-; R600-CHECK: @v2f32
+; R600-CHECK: {{^}}v2f32:
 ; R600-CHECK: FLOOR
 ; R600-CHECK: FLOOR
-; SI-CHECK: @v2f32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}v2f32:
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
 define void @v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.floor.v2f32(<2 x float> %in)
@@ -25,16 +25,16 @@ entry:
   ret void
 }
 
-; R600-CHECK: @v4f32
+; R600-CHECK: {{^}}v4f32:
 ; R600-CHECK: FLOOR
 ; R600-CHECK: FLOOR
 ; R600-CHECK: FLOOR
 ; R600-CHECK: FLOOR
-; SI-CHECK: @v4f32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
-; SI-CHECK: V_FLOOR_F32_e32
+; SI-CHECK: {{^}}v4f32:
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
+; SI-CHECK: v_floor_f32_e32
 define void @v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.floor.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.log2.ll b/test/CodeGen/R600/llvm.log2.ll
index 4cba2d44a5c3..2373c6b036d2 100644
--- a/test/CodeGen/R600/llvm.log2.ll
+++ b/test/CodeGen/R600/llvm.log2.ll
@@ -1,14 +1,14 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG-CHECK --check-prefix=FUNC
 ;RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM-CHECK --check-prefix=FUNC
-;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
+;RUN: llc < %s -march=amdgcn -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 
-;FUNC-LABEL: @test
+;FUNC-LABEL: {{^}}test:
 ;EG-CHECK: LOG_IEEE
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
 
 define void @test(float addrspace(1)* %out, float %in) {
 entry:
@@ -17,7 +17,7 @@ entry:
    ret void
 }
 
-;FUNC-LABEL: @testv2
+;FUNC-LABEL: {{^}}testv2:
 ;EG-CHECK: LOG_IEEE
 ;EG-CHECK: LOG_IEEE
 ; FIXME: We should be able to merge these packets together on Cayman so we
@@ -30,8 +30,8 @@ entry:
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}} (MASKED)
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
 
 define void @testv2(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
@@ -40,7 +40,7 @@ entry:
   ret void
 }
 
-;FUNC-LABEL: @testv4
+;FUNC-LABEL: {{^}}testv4:
 ;EG-CHECK: LOG_IEEE
 ;EG-CHECK: LOG_IEEE
 ;EG-CHECK: LOG_IEEE
@@ -63,10 +63,10 @@ entry:
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
 ;CM-CHECK-DAG: LOG_IEEE T{{[0-9]+\.[XYZW]}}
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
-;SI-CHECK: V_LOG_F32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
+;SI-CHECK: v_log_f32
 define void @testv4(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.log2.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.memcpy.ll b/test/CodeGen/R600/llvm.memcpy.ll
new file mode 100644
index 000000000000..9771062ed0dd
--- /dev/null
+++ b/test/CodeGen/R600/llvm.memcpy.ll
@@ -0,0 +1,364 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
+
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_write_b8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+; SI: ds_write_b8
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+; SI: ds_read_u16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+; SI: ds_write_b16
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
+  ret void
+}
+
+; FIXME: Use 64-bit ops
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: ds_read_b32
+; SI-DAG: ds_write_b32
+
+; SI-DAG: s_endpgm
+define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
+  %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
+  call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+; SI-DAG: buffer_load_ubyte
+; SI-DAG: buffer_store_byte
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+; SI-DAG: buffer_load_ushort
+
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+; SI-DAG: buffer_store_short
+
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+  %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
+  %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
+  call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
+  ret void
+}
diff --git a/test/CodeGen/R600/llvm.rint.f64.ll b/test/CodeGen/R600/llvm.rint.f64.ll
index 3e2884b7ce02..2c926341f78a 100644
--- a/test/CodeGen/R600/llvm.rint.f64.ll
+++ b/test/CodeGen/R600/llvm.rint.f64.ll
@@ -1,15 +1,15 @@
-; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @rint_f64
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_f64:
+; CI: v_rndne_f64_e32
 
-; SI-DAG: V_ADD_F64
-; SI-DAG: V_ADD_F64
-; SI-DAG V_CMP_GT_F64_e64
-; SI: V_CNDMASK_B32
-; SI: V_CNDMASK_B32
-; SI: S_ENDPGM
+; SI-DAG: v_add_f64
+; SI-DAG: v_add_f64
+; SI-DAG v_cmp_gt_f64_e64
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: s_endpgm
 define void @rint_f64(double addrspace(1)* %out, double %in) {
 entry:
   %0 = call double @llvm.rint.f64(double %in)
@@ -17,9 +17,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rint_v2f64
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_v2f64:
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
 define void @rint_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %in) {
 entry:
   %0 = call <2 x double> @llvm.rint.v2f64(<2 x double> %in)
@@ -27,11 +27,11 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rint_v4f64
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
-; CI: V_RNDNE_F64_e32
+; FUNC-LABEL: {{^}}rint_v4f64:
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
+; CI: v_rndne_f64_e32
 define void @rint_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %in) {
 entry:
   %0 = call <4 x double> @llvm.rint.v4f64(<4 x double> %in)
diff --git a/test/CodeGen/R600/llvm.rint.ll b/test/CodeGen/R600/llvm.rint.ll
index 209bb4358fd5..496cf07e7ca8 100644
--- a/test/CodeGen/R600/llvm.rint.ll
+++ b/test/CodeGen/R600/llvm.rint.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @rint_f32
+; FUNC-LABEL: {{^}}rint_f32:
 ; R600: RNDNE
 
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
 define void @rint_f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.rint.f32(float %in) #0
@@ -12,12 +12,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rint_v2f32
+; FUNC-LABEL: {{^}}rint_v2f32:
 ; R600: RNDNE
 ; R600: RNDNE
 
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
 define void @rint_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.rint.v2f32(<2 x float> %in) #0
@@ -25,16 +25,16 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rint_v4f32
+; FUNC-LABEL: {{^}}rint_v4f32:
 ; R600: RNDNE
 ; R600: RNDNE
 ; R600: RNDNE
 ; R600: RNDNE
 
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
+; SI: v_rndne_f32_e32
 define void @rint_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.rint.v4f32(<4 x float> %in) #0
@@ -42,10 +42,10 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @legacy_amdil_round_nearest_f32
+; FUNC-LABEL: {{^}}legacy_amdil_round_nearest_f32:
 ; R600: RNDNE
 
-; SI: V_RNDNE_F32_e32
+; SI: v_rndne_f32_e32
 define void @legacy_amdil_round_nearest_f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.AMDIL.round.nearest.f32(float %in) #0
diff --git a/test/CodeGen/R600/llvm.round.ll b/test/CodeGen/R600/llvm.round.ll
index e06d45d4a373..bedf4ba72ae4 100644
--- a/test/CodeGen/R600/llvm.round.ll
+++ b/test/CodeGen/R600/llvm.round.ll
@@ -1,11 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefix=R600 --check-prefix=FUNC
 
-; FUNC-LABEL: @f32
-; R600: FRACT
-; R600-DAG: ADD
-; R600-DAG: CEIL
-; R600-DAG: FLOOR
-; R600: CNDGE
+; FUNC-LABEL: {{^}}f32:
+; R600: FRACT {{.*}}, [[ARG:KC[0-9]\[[0-9]+\]\.[XYZW]]]
+; R600-DAG: ADD  {{.*}}, -0.5
+; R600-DAG: CEIL {{.*}} [[ARG]]
+; R600-DAG: FLOOR {{.*}} [[ARG]]
+; R600-DAG: CNDGE
+; R600-DAG: CNDGT
+; R600: CNDGE {{[^,]+}}, [[ARG]]
 define void @f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.round.f32(float %in)
diff --git a/test/CodeGen/R600/llvm.sin.ll b/test/CodeGen/R600/llvm.sin.ll
index 53006bad5c4b..d63d698b5554 100644
--- a/test/CodeGen/R600/llvm.sin.ll
+++ b/test/CodeGen/R600/llvm.sin.ll
@@ -1,53 +1,84 @@
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
-
-;FUNC-LABEL: test
-;EG: MULADD_IEEE *
-;EG: FRACT *
-;EG: ADD *
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: SIN
-;SI: V_MUL_F32
-;SI: V_FRACT_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @test(float addrspace(1)* %out, float %x) #1 {
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=SI-SAFE -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI -check-prefix=SI-UNSAFE -check-prefix=FUNC %s
+
+; FUNC-LABEL: sin_f32
+; EG: MULADD_IEEE *
+; EG: FRACT *
+; EG: ADD *
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG-NOT: SIN
+; SI: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+
+define void @sin_f32(float addrspace(1)* %out, float %x) #1 {
    %sin = call float @llvm.sin.f32(float %x)
    store float %sin, float addrspace(1)* %out
    ret void
 }
 
-;FUNC-LABEL: testf
-;SI-UNSAFE: 4.774
-;SI-UNSAFE: V_MUL_F32
-;SI-SAFE: V_MUL_F32
-;SI-SAFE: V_MUL_F32
-;SI: V_FRACT_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @testf(float addrspace(1)* %out, float %x) #1 {
-   %y = fmul float 3.0, %x
+; FUNC-LABEL: {{^}}sin_3x_f32:
+; SI-UNSAFE-NOT: v_add_f32
+; SI-UNSAFE: 0x3ef47644
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @sin_3x_f32(float addrspace(1)* %out, float %x) #1 {
+  %y = fmul float 3.0, %x
+  %sin = call float @llvm.sin.f32(float %y)
+  store float %sin, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sin_2x_f32:
+; SI-UNSAFE-NOT: v_add_f32
+; SI-UNSAFE: 0x3ea2f983
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @sin_2x_f32(float addrspace(1)* %out, float %x) #1 {
+  %y = fmul float 2.0, %x
+  %sin = call float @llvm.sin.f32(float %y)
+  store float %sin, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2sin_f32:
+; SI-UNSAFE: 0x3ea2f983
+; SI-UNSAFE: v_mul_f32
+; SI-SAFE: v_add_f32
+; SI-SAFE: v_mul_f32
+; SI: v_fract_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+define void @test_2sin_f32(float addrspace(1)* %out, float %x) #1 {
+   %y = fmul float 2.0, %x
    %sin = call float @llvm.sin.f32(float %y)
    store float %sin, float addrspace(1)* %out
    ret void
 }
 
-;FUNC-LABEL: testv
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
-;EG-NOT: SIN
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI: V_SIN_F32
-;SI-NOT: V_SIN_F32
-
-define void @testv(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
+; FUNC-LABEL: {{^}}sin_v4f32:
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
+; EG-NOT: SIN
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI: v_sin_f32
+; SI-NOT: v_sin_f32
+
+define void @sin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %vx) #1 {
    %sin = call <4 x float> @llvm.sin.v4f32( <4 x float> %vx)
    store <4 x float> %sin, <4 x float> addrspace(1)* %out
    ret void
diff --git a/test/CodeGen/R600/llvm.sqrt.ll b/test/CodeGen/R600/llvm.sqrt.ll
index 4eee37ffbe21..c0392256c202 100644
--- a/test/CodeGen/R600/llvm.sqrt.ll
+++ b/test/CodeGen/R600/llvm.sqrt.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=r600 --mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
 ; RUN: llc < %s -march=r600 --mcpu=SI -verify-machineinstrs| FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK-LABEL: @sqrt_f32
+; R600-CHECK-LABEL: {{^}}sqrt_f32:
 ; R600-CHECK: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].Z
 ; R600-CHECK: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].Z, PS
-; SI-CHECK-LABEL: @sqrt_f32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_f32:
+; SI-CHECK: v_sqrt_f32_e32
 define void @sqrt_f32(float addrspace(1)* %out, float %in) {
 entry:
   %0 = call float @llvm.sqrt.f32(float %in)
@@ -13,14 +13,14 @@ entry:
   ret void
 }
 
-; R600-CHECK-LABEL: @sqrt_v2f32
+; R600-CHECK-LABEL: {{^}}sqrt_v2f32:
 ; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[2].W
 ; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[2].W, PS
 ; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].X
 ; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].X, PS
-; SI-CHECK-LABEL: @sqrt_v2f32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_v2f32:
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
 define void @sqrt_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
 entry:
   %0 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
@@ -28,7 +28,7 @@ entry:
   ret void
 }
 
-; R600-CHECK-LABEL: @sqrt_v4f32
+; R600-CHECK-LABEL: {{^}}sqrt_v4f32:
 ; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Y
 ; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].Y, PS
 ; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[3].Z
@@ -37,11 +37,11 @@ entry:
 ; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[3].W, PS
 ; R600-CHECK-DAG: RECIPSQRT_CLAMPED * T{{[0-9]\.[XYZW]}}, KC0[4].X
 ; R600-CHECK-DAG: MUL NON-IEEE T{{[0-9]\.[XYZW]}}, KC0[4].X, PS
-; SI-CHECK-LABEL: @sqrt_v4f32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
-; SI-CHECK: V_SQRT_F32_e32
+; SI-CHECK-LABEL: {{^}}sqrt_v4f32:
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
+; SI-CHECK: v_sqrt_f32_e32
 define void @sqrt_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
 entry:
   %0 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
diff --git a/test/CodeGen/R600/llvm.trunc.ll b/test/CodeGen/R600/llvm.trunc.ll
index fa6fb9906dde..5585477ef294 100644
--- a/test/CodeGen/R600/llvm.trunc.ll
+++ b/test/CodeGen/R600/llvm.trunc.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK-LABEL: @trunc_f32
+; CHECK-LABEL: {{^}}trunc_f32:
 ; CHECK: TRUNC
 
 define void @trunc_f32(float addrspace(1)* %out, float %in) {
diff --git a/test/CodeGen/R600/load-i1.ll b/test/CodeGen/R600/load-i1.ll
index 9ba81b85f59b..85ec5eb9ec8a 100644
--- a/test/CodeGen/R600/load-i1.ll
+++ b/test/CodeGen/R600/load-i1.ll
@@ -1,21 +1,57 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
+; FUNC-LABEL: {{^}}global_copy_i1_to_i1:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: buffer_store_byte
+; SI: s_endpgm
 
-; SI-LABEL: @global_copy_i1_to_i1
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32 v{{[0-9]+}}, 1
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; EG: VTX_READ_8
+; EG: AND_INT
 define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
   %load = load i1 addrspace(1)* %in
   store i1 %load, i1 addrspace(1)* %out, align 1
   ret void
 }
 
-; SI-LABEL: @global_sextload_i1_to_i32
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}local_copy_i1_to_i1:
+; SI: ds_read_u8
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: ds_write_b8
+; SI: s_endpgm
+
+; EG: LDS_UBYTE_READ_RET
+; EG: AND_INT
+; EG: LDS_BYTE_WRITE
+define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
+  %load = load i1 addrspace(3)* %in
+  store i1 %load, i1 addrspace(3)* %out, align 1
+  ret void
+}
+
+; FUNC-LABEL: {{^}}constant_copy_i1_to_i1:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1
+; SI: buffer_store_byte
+; SI: s_endpgm
+
+; EG: VTX_READ_8
+; EG: AND_INT
+define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
+  %load = load i1 addrspace(2)* %in
+  store i1 %load, i1 addrspace(1)* %out, align 1
+  ret void
+}
+
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
+
+; EG: VTX_READ_8
+; EG: BFE_INT
 define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
   %load = load i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
@@ -23,10 +59,11 @@ define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
   ret void
 }
 
-; SI-LABEL: @global_zextload_i1_to_i32
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
+
 define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
   %load = load i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
@@ -34,10 +71,11 @@ define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
   ret void
 }
 
-; SI-LABEL: @global_sextload_i1_to_i64
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_sextload_i1_to_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
   %load = load i1 addrspace(1)* %in
   %ext = sext i1 %load to i64
@@ -45,10 +83,11 @@ define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
   ret void
 }
 
-; SI-LABEL: @global_zextload_i1_to_i64
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}global_zextload_i1_to_i64:
+; SI: buffer_load_ubyte
+; SI: v_mov_b32_e32 {{v[0-9]+}}, 0
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
   %load = load i1 addrspace(1)* %in
   %ext = zext i1 %load to i64
@@ -56,50 +95,52 @@ define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
   ret void
 }
 
-; SI-LABEL: @i1_arg
-; SI: BUFFER_LOAD_UBYTE
-; SI: V_AND_B32_e32
-; SI: BUFFER_STORE_BYTE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg:
+; SI: buffer_load_ubyte
+; SI: v_and_b32_e32
+; SI: buffer_store_byte
+; SI: s_endpgm
 define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
   store i1 %x, i1 addrspace(1)* %out, align 1
   ret void
 }
 
-; SI-LABEL: @i1_arg_zext_i32
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_zext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
 define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
   %ext = zext i1 %x to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @i1_arg_zext_i64
-; SI: BUFFER_LOAD_UBYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_zext_i64:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
   %ext = zext i1 %x to i64
   store i64 %ext, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @i1_arg_sext_i32
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_sext_i32:
+; SI: buffer_load_ubyte
+; SI: buffer_store_dword
+; SI: s_endpgm
 define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
   %ext = sext i1 %x to i32
   store i32 %ext, i32addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @i1_arg_sext_i64
-; XSI: BUFFER_LOAD_BYTE
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}i1_arg_sext_i64:
+; SI: buffer_load_ubyte
+; SI: v_bfe_i32
+; SI: v_ashrrev_i32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
   %ext = sext i1 %x to i64
   store i64 %ext, i64 addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/load-input-fold.ll b/test/CodeGen/R600/load-input-fold.ll
index ca86d0e36907..265fa9bfeb42 100644
--- a/test/CodeGen/R600/load-input-fold.ll
+++ b/test/CodeGen/R600/load-input-fold.ll
@@ -1,5 +1,4 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman
-;REQUIRES: asserts
 
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
 main_body:
diff --git a/test/CodeGen/R600/load.ll b/test/CodeGen/R600/load.ll
index 8905fbd3aeb6..5d043b423871 100644
--- a/test/CodeGen/R600/load.ll
+++ b/test/CodeGen/R600/load.ll
@@ -1,16 +1,16 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
 
 ;===------------------------------------------------------------------------===;
 ; GLOBAL ADDRESS SPACE
 ;===------------------------------------------------------------------------===;
 
 ; Load an i8 value from the global address space.
-; FUNC-LABEL: @load_i8
+; FUNC-LABEL: {{^}}load_i8:
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
 
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %1 = load i8 addrspace(1)* %in
   %2 = zext i8 %1 to i32
@@ -18,13 +18,13 @@ define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @load_i8_sext
+; FUNC-LABEL: {{^}}load_i8_sext:
 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 24
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = load i8 addrspace(1)* %in
@@ -33,11 +33,11 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i8
+; FUNC-LABEL: {{^}}load_v2i8:
 ; R600-CHECK: VTX_READ_8
 ; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
   %0 = load <2 x i8> addrspace(1)* %in
@@ -46,7 +46,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i8_sext
+; FUNC-LABEL: {{^}}load_v2i8_sext:
 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -57,8 +57,8 @@ entry:
 ; R600-CHECK-DAG: 24
 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
 ; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
   %0 = load <2 x i8> addrspace(1)* %in
@@ -67,15 +67,15 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i8
+; FUNC-LABEL: {{^}}load_v4i8:
 ; R600-CHECK: VTX_READ_8
 ; R600-CHECK: VTX_READ_8
 ; R600-CHECK: VTX_READ_8
 ; R600-CHECK: VTX_READ_8
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
-; SI-CHECK: BUFFER_LOAD_UBYTE
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
+; SI-CHECK: buffer_load_ubyte
 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
   %0 = load <4 x i8> addrspace(1)* %in
@@ -84,7 +84,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i8_sext
+; FUNC-LABEL: {{^}}load_v4i8_sext:
 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -105,10 +105,10 @@ entry:
 ; R600-CHECK-DAG: 24
 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
 ; R600-CHECK-DAG: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
-; SI-CHECK: BUFFER_LOAD_SBYTE
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
+; SI-CHECK: buffer_load_sbyte
 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
   %0 = load <4 x i8> addrspace(1)* %in
@@ -118,9 +118,9 @@ entry:
 }
 
 ; Load an i16 value from the global address space.
-; FUNC-LABEL: @load_i16
+; FUNC-LABEL: {{^}}load_i16:
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
   %0 = load i16	 addrspace(1)* %in
@@ -129,13 +129,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_i16_sext
+; FUNC-LABEL: {{^}}load_i16_sext:
 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 16
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
   %0 = load i16 addrspace(1)* %in
@@ -144,11 +144,11 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i16
+; FUNC-LABEL: {{^}}load_v2i16:
 ; R600-CHECK: VTX_READ_16
 ; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
   %0 = load <2 x i16> addrspace(1)* %in
@@ -157,7 +157,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i16_sext
+; FUNC-LABEL: {{^}}load_v2i16_sext:
 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]]
@@ -168,8 +168,8 @@ entry:
 ; R600-CHECK-DAG: 16
 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]]
 ; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
   %0 = load <2 x i16> addrspace(1)* %in
@@ -178,15 +178,15 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i16
+; FUNC-LABEL: {{^}}load_v4i16:
 ; R600-CHECK: VTX_READ_16
 ; R600-CHECK: VTX_READ_16
 ; R600-CHECK: VTX_READ_16
 ; R600-CHECK: VTX_READ_16
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
+; SI-CHECK: buffer_load_ushort
 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
   %0 = load <4 x i16> addrspace(1)* %in
@@ -195,7 +195,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i16_sext
+; FUNC-LABEL: {{^}}load_v4i16_sext:
 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
@@ -216,10 +216,10 @@ entry:
 ; R600-CHECK-DAG: 16
 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]]
 ; R600-CHECK-DAG: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
+; SI-CHECK: buffer_load_sshort
 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
   %0 = load <4 x i16> addrspace(1)* %in
@@ -229,10 +229,10 @@ entry:
 }
 
 ; load an i32 value from the global address space.
-; FUNC-LABEL: @load_i32
+; FUNC-LABEL: {{^}}load_i32:
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = load i32 addrspace(1)* %in
@@ -241,10 +241,10 @@ entry:
 }
 
 ; load a f32 value from the global address space.
-; FUNC-LABEL: @load_f32
+; FUNC-LABEL: {{^}}load_f32:
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}}
+; SI-CHECK: buffer_load_dword v{{[0-9]+}}
 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
   %0 = load float addrspace(1)* %in
@@ -253,10 +253,10 @@ entry:
 }
 
 ; load a v2f32 value from the global address space
-; FUNC-LABEL: @load_v2f32
+; FUNC-LABEL: {{^}}load_v2f32:
 ; R600-CHECK: MEM_RAT
 ; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
 entry:
   %0 = load <2 x float> addrspace(1)* %in
@@ -264,9 +264,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_i64
+; FUNC-LABEL: {{^}}load_i64:
 ; R600-CHECK: VTX_READ_64
-; SI-CHECK: BUFFER_LOAD_DWORDX2
+; SI-CHECK: buffer_load_dwordx2
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
   %0 = load i64 addrspace(1)* %in
@@ -274,12 +274,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_i64_sext
+; FUNC-LABEL: {{^}}load_i64_sext:
 ; R600-CHECK: MEM_RAT
 ; R600-CHECK: MEM_RAT
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.x
 ; R600-CHECK: 31
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
 
 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
@@ -289,7 +289,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_i64_zext
+; FUNC-LABEL: {{^}}load_i64_zext:
 ; R600-CHECK: MEM_RAT
 ; R600-CHECK: MEM_RAT
 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -300,18 +300,18 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v8i32
+; FUNC-LABEL: {{^}}load_v8i32:
 ; R600-CHECK: VTX_READ_128
 ; R600-CHECK: VTX_READ_128
 ; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
 entry:
   %0 = load <8 x i32> addrspace(1)* %in
@@ -319,28 +319,28 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v16i32
+; FUNC-LABEL: {{^}}load_v16i32:
 ; R600-CHECK: VTX_READ_128
 ; R600-CHECK: VTX_READ_128
 ; R600-CHECK: VTX_READ_128
 ; R600-CHECK: VTX_READ_128
 ; XXX: We should be using DWORDX4 instructions on SI.
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
-; SI-CHECK: BUFFER_LOAD_DWORD
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
+; SI-CHECK: buffer_load_dword
 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
 entry:
   %0 = load <16 x i32> addrspace(1)* %in
@@ -353,13 +353,13 @@ entry:
 ;===------------------------------------------------------------------------===;
 
 ; Load a sign-extended i8 value
-; FUNC-LABEL: @load_const_i8_sext
+; FUNC-LABEL: {{^}}load_const_i8_sext:
 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 24
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 24
-; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_sbyte v{{[0-9]+}},
 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = load i8 addrspace(2)* %in
@@ -369,9 +369,9 @@ entry:
 }
 
 ; Load an aligned i8 value
-; FUNC-LABEL: @load_const_i8_aligned
+; FUNC-LABEL: {{^}}load_const_i8_aligned:
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = load i8 addrspace(2)* %in
@@ -381,9 +381,9 @@ entry:
 }
 
 ; Load an un-aligned i8 value
-; FUNC-LABEL: @load_const_i8_unaligned
+; FUNC-LABEL: {{^}}load_const_i8_unaligned:
 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}},
+; SI-CHECK: buffer_load_ubyte v{{[0-9]+}},
 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i8 addrspace(2)* %in, i32 1
@@ -394,13 +394,13 @@ entry:
 }
 
 ; Load a sign-extended i16 value
-; FUNC-LABEL: @load_const_i16_sext
+; FUNC-LABEL: {{^}}load_const_i16_sext:
 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]]
 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]]
 ; R600-CHECK: 16
 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]]
 ; R600-CHECK: 16
-; SI-CHECK: BUFFER_LOAD_SSHORT
+; SI-CHECK: buffer_load_sshort
 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
   %0 = load i16 addrspace(2)* %in
@@ -410,9 +410,9 @@ entry:
 }
 
 ; Load an aligned i16 value
-; FUNC-LABEL: @load_const_i16_aligned
+; FUNC-LABEL: {{^}}load_const_i16_aligned:
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
   %0 = load i16 addrspace(2)* %in
@@ -422,9 +422,9 @@ entry:
 }
 
 ; Load an un-aligned i16 value
-; FUNC-LABEL: @load_const_i16_unaligned
+; FUNC-LABEL: {{^}}load_const_i16_unaligned:
 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK: BUFFER_LOAD_USHORT
+; SI-CHECK: buffer_load_ushort
 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i16 addrspace(2)* %in, i32 1
@@ -435,10 +435,10 @@ entry:
 }
 
 ; Load an i32 value from the constant address space.
-; FUNC-LABEL: @load_const_addrspace_i32
+; FUNC-LABEL: {{^}}load_const_addrspace_i32:
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
 entry:
   %0 = load i32 addrspace(2)* %in
@@ -447,10 +447,10 @@ entry:
 }
 
 ; Load a f32 value from the constant address space.
-; FUNC-LABEL: @load_const_addrspace_f32
+; FUNC-LABEL: {{^}}load_const_addrspace_f32:
 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
 
-; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}}
+; SI-CHECK: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
   %1 = load float addrspace(2)* %in
   store float %1, float addrspace(1)* %out
@@ -462,11 +462,11 @@ define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(
 ;===------------------------------------------------------------------------===;
 
 ; Load an i8 value from the local address space.
-; FUNC-LABEL: @load_i8_local
+; FUNC-LABEL: {{^}}load_i8_local:
 ; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
   %1 = load i8 addrspace(3)* %in
   %2 = zext i8 %1 to i32
@@ -474,12 +474,12 @@ define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @load_i8_sext_local
+; FUNC-LABEL: {{^}}load_i8_sext_local:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
   %0 = load i8 addrspace(3)* %in
@@ -488,13 +488,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i8_local
+; FUNC-LABEL: {{^}}load_v2i8_local:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
   %0 = load <2 x i8> addrspace(3)* %in
@@ -503,15 +503,15 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i8_sext_local
+; FUNC-LABEL: {{^}}load_v2i8_sext_local:
 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
   %0 = load <2 x i8> addrspace(3)* %in
@@ -520,17 +520,17 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i8_local
+; FUNC-LABEL: {{^}}load_v4i8_local:
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; R600-CHECK: LDS_UBYTE_READ_RET
 ; R600-CHECK: LDS_UBYTE_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
-; SI-CHECK: DS_READ_U8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
+; SI-CHECK: ds_read_u8
 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
   %0 = load <4 x i8> addrspace(3)* %in
@@ -539,7 +539,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i8_sext_local
+; FUNC-LABEL: {{^}}load_v4i8_sext_local:
 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET
@@ -548,12 +548,12 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
-; SI-CHECK: DS_READ_I8
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
+; SI-CHECK: ds_read_i8
 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
   %0 = load <4 x i8> addrspace(3)* %in
@@ -563,11 +563,11 @@ entry:
 }
 
 ; Load an i16 value from the local address space.
-; FUNC-LABEL: @load_i16_local
+; FUNC-LABEL: {{^}}load_i16_local:
 ; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
   %0 = load i16	 addrspace(3)* %in
@@ -576,12 +576,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_i16_sext_local
+; FUNC-LABEL: {{^}}load_i16_sext_local:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; R600-CHECK: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
   %0 = load i16 addrspace(3)* %in
@@ -590,13 +590,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i16_local
+; FUNC-LABEL: {{^}}load_v2i16_local:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
   %0 = load <2 x i16> addrspace(3)* %in
@@ -605,15 +605,15 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v2i16_sext_local
+; FUNC-LABEL: {{^}}load_v2i16_sext_local:
 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
   %0 = load <2 x i16> addrspace(3)* %in
@@ -622,17 +622,17 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i16_local
+; FUNC-LABEL: {{^}}load_v4i16_local:
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; R600-CHECK: LDS_USHORT_READ_RET
 ; R600-CHECK: LDS_USHORT_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
-; SI-CHECK: DS_READ_U16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
+; SI-CHECK: ds_read_u16
 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
   %0 = load <4 x i16> addrspace(3)* %in
@@ -641,7 +641,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @load_v4i16_sext_local
+; FUNC-LABEL: {{^}}load_v4i16_sext_local:
 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
 ; R600-CHECK-DAG: LDS_USHORT_READ_RET
@@ -650,12 +650,12 @@ entry:
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
 ; R600-CHECK-DAG: ASHR
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
-; SI-CHECK: DS_READ_I16
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
+; SI-CHECK: ds_read_i16
 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
   %0 = load <4 x i16> addrspace(3)* %in
@@ -665,11 +665,11 @@ entry:
 }
 
 ; load an i32 value from the local address space.
-; FUNC-LABEL: @load_i32_local
+; FUNC-LABEL: {{^}}load_i32_local:
 ; R600-CHECK: LDS_READ_RET
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK-NOT: s_wqm_b64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = load i32 addrspace(3)* %in
@@ -678,10 +678,10 @@ entry:
 }
 
 ; load a f32 value from the local address space.
-; FUNC-LABEL: @load_f32_local
+; FUNC-LABEL: {{^}}load_f32_local:
 ; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B32
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
   %0 = load float addrspace(3)* %in
@@ -690,14 +690,50 @@ entry:
 }
 
 ; load a v2f32 value from the local address space
-; FUNC-LABEL: @load_v2f32_local
+; FUNC-LABEL: {{^}}load_v2f32_local:
 ; R600-CHECK: LDS_READ_RET
 ; R600-CHECK: LDS_READ_RET
-; SI-CHECK: S_MOV_B32 m0
-; SI-CHECK: DS_READ_B64
+; SI-CHECK: s_mov_b32 m0
+; SI-CHECK: ds_read_b64
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
 entry:
   %0 = load <2 x float> addrspace(3)* %in
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
+
+; Test loading a i32 and v2i32 value from the same base pointer.
+; FUNC-LABEL: {{^}}load_i32_v2i32_local:
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; R600-CHECK: LDS_READ_RET
+; SI-CHECK-DAG: ds_read_b32
+; SI-CHECK-DAG: ds_read2_b32
+define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
+  %scalar = load i32 addrspace(3)* %in
+  %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
+  %vec_ptr = getelementptr <2 x i32> addrspace(3)* %tmp0, i32 2
+  %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+  %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
+  %vec = add <2 x i32> %vec0, %vec1
+  store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+
+@lds = addrspace(3) global [512 x i32] undef, align 4
+
+; On SI we need to make sure that the base offset is a register and not
+; an immediate.
+; FUNC-LABEL: {{^}}load_i32_local_const_ptr:
+; SI-CHECK: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0
+; SI-CHECK: ds_read_b32 v0, v[[ZERO]] offset:4
+; R600-CHECK: LDS_READ_RET
+define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %tmp0 = getelementptr [512 x i32] addrspace(3)* @lds, i32 0, i32 1
+  %tmp1 = load i32 addrspace(3)* %tmp0
+  %tmp2 = getelementptr i32 addrspace(1)* %out, i32 1
+  store i32 %tmp1, i32 addrspace(1)* %tmp2
+  ret void
+}
diff --git a/test/CodeGen/R600/load.vec.ll b/test/CodeGen/R600/load.vec.ll
index 81a6310bbcc9..bdd35d8f2f35 100644
--- a/test/CodeGen/R600/load.vec.ll
+++ b/test/CodeGen/R600/load.vec.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK  %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK  %s
 
 ; load a v2i32 value from the global address space.
-; EG-CHECK: @load_v2i32
+; EG-CHECK: {{^}}load_v2i32:
 ; EG-CHECK: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_v2i32
-; SI-CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}load_v2i32:
+; SI-CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %a = load <2 x i32> addrspace(1) * %in
   store <2 x i32> %a, <2 x i32> addrspace(1)* %out
@@ -13,10 +13,10 @@ define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
 }
 
 ; load a v4i32 value from the global address space.
-; EG-CHECK: @load_v4i32
+; EG-CHECK: {{^}}load_v4i32:
 ; EG-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0
-; SI-CHECK: @load_v4i32
-; SI-CHECK: BUFFER_LOAD_DWORDX4 v[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}load_v4i32:
+; SI-CHECK: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
 define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %a = load <4 x i32> addrspace(1) * %in
   store <4 x i32> %a, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/load64.ll b/test/CodeGen/R600/load64.ll
index a117557e98e0..b32d2d56267e 100644
--- a/test/CodeGen/R600/load64.ll
+++ b/test/CodeGen/R600/load64.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
 
 ; load a f64 value from the global address space.
-; CHECK-LABEL: @load_f64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_f64:
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
   %1 = load double addrspace(1)* %in
   store double %1, double addrspace(1)* %out
   ret void
 }
 
-; CHECK-LABEL: @load_i64:
-; CHECK: BUFFER_LOAD_DWORDX2 v[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_i64:
+; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %tmp = load i64 addrspace(1)* %in
   store i64 %tmp, i64 addrspace(1)* %out, align 8
@@ -20,9 +20,9 @@ define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 }
 
 ; Load a f64 value from the constant address space.
-; CHECK-LABEL: @load_const_addrspace_f64:
-; CHECK: S_LOAD_DWORDX2 s[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_STORE_DWORDX2 v[{{[0-9]+:[0-9]+}}]
+; CHECK-LABEL: {{^}}load_const_addrspace_f64:
+; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
   %1 = load double addrspace(2)* %in
   store double %1, double addrspace(1)* %out
diff --git a/test/CodeGen/R600/local-64.ll b/test/CodeGen/R600/local-64.ll
index c52b41bb1b5a..f975bc1f56b0 100644
--- a/test/CodeGen/R600/local-64.ll
+++ b/test/CodeGen/R600/local-64.ll
@@ -1,8 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck --check-prefix=SI --check-prefix=BOTH %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck --check-prefix=CI --check-prefix=BOTH %s
 
-; SI-LABEL: @local_i32_load
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x1c, [M0]
-; SI: BUFFER_STORE_DWORD [[REG]],
+; BOTH-LABEL: {{^}}local_i32_load
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} offset:28 [M0]
+; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %gep = getelementptr i32 addrspace(3)* %in, i32 7
   %val = load i32 addrspace(3)* %gep, align 4
@@ -10,19 +11,19 @@ define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounw
   ret void
 }
 
-; SI-LABEL: @local_i32_load_0_offset
-; SI: DS_READ_B32 [[REG:v[0-9]+]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[REG]],
+; BOTH-LABEL: {{^}}local_i32_load_0_offset
+; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %val = load i32 addrspace(3)* %in, align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @local_i8_load_i16_max_offset
-; SI-NOT: ADD
-; SI: DS_READ_U8 [[REG:v[0-9]+]], {{v[0-9]+}}, 0xffff, [M0]
-; SI: BUFFER_STORE_BYTE [[REG]],
+; BOTH-LABEL: {{^}}local_i8_load_i16_max_offset:
+; BOTH-NOT: ADD
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], {{v[0-9]+}} offset:65535 [M0]
+; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8 addrspace(3)* %in, i32 65535
   %val = load i8 addrspace(3)* %gep, align 4
@@ -30,11 +31,14 @@ define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)
   ret void
 }
 
-; SI-LABEL: @local_i8_load_over_i16_max_offset
-; SI: S_ADD_I32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
-; SI: V_MOV_B32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
-; SI: DS_READ_U8 [[REG:v[0-9]+]], [[VREGADDR]], 0x0, [M0]
-; SI: BUFFER_STORE_BYTE [[REG]],
+; BOTH-LABEL: {{^}}local_i8_load_over_i16_max_offset:
+; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
+; SI, which is why it is being OR'd with the base pointer.
+; SI: s_or_b32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; CI: s_add_i32 [[ADDR:s[0-9]+]], s{{[0-9]+}}, 0x10000
+; BOTH: v_mov_b32_e32 [[VREGADDR:v[0-9]+]], [[ADDR]]
+; BOTH: ds_read_u8 [[REG:v[0-9]+]], [[VREGADDR]] [M0]
+; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8 addrspace(3)* %in, i32 65536
   %val = load i8 addrspace(3)* %gep, align 4
@@ -42,10 +46,10 @@ define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspa
   ret void
 }
 
-; SI-LABEL: @local_i64_load
-; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_i64_load:
+; BOTH-NOT: ADD
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %gep = getelementptr i64 addrspace(3)* %in, i32 7
   %val = load i64 addrspace(3)* %gep, align 8
@@ -53,19 +57,19 @@ define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounw
   ret void
 }
 
-; SI-LABEL: @local_i64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_i64_load_0_offset
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %val = load i64 addrspace(3)* %in, align 8
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @local_f64_load
-; SI-NOT: ADD
-; SI: DS_READ_B64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}}, 0x38, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_f64_load:
+; BOTH-NOT: ADD
+; BOTH: ds_read_b64 [[REG:v[[0-9]+:[0-9]+]]], v{{[0-9]+}} offset:56 [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %gep = getelementptr double addrspace(3)* %in, i32 7
   %val = load double addrspace(3)* %gep, align 8
@@ -73,85 +77,89 @@ define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in)
   ret void
 }
 
-; SI-LABEL: @local_f64_load_0_offset
-; SI: DS_READ_B64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0x0, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[REG]],
+; BOTH-LABEL: {{^}}local_f64_load_0_offset
+; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}} [M0]
+; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %val = load double addrspace(3)* %in, align 8
   store double %val, double addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @local_i64_store
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
+; BOTH-LABEL: {{^}}local_i64_store:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
 define void @local_i64_store(i64 addrspace(3)* %out) nounwind {
   %gep = getelementptr i64 addrspace(3)* %out, i32 7
   store i64 5678, i64 addrspace(3)* %gep, align 8
   ret void
 }
 
-; SI-LABEL: @local_i64_store_0_offset
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
 define void @local_i64_store_0_offset(i64 addrspace(3)* %out) nounwind {
   store i64 1234, i64 addrspace(3)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @local_f64_store
-; SI-NOT: ADD
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x38 [M0]
+; BOTH-LABEL: {{^}}local_f64_store:
+; BOTH-NOT: ADD
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:56 [M0]
 define void @local_f64_store(double addrspace(3)* %out) nounwind {
   %gep = getelementptr double addrspace(3)* %out, i32 7
   store double 16.0, double addrspace(3)* %gep, align 8
   ret void
 }
 
-; SI-LABEL: @local_f64_store_0_offset
-; SI: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_f64_store_0_offset
+; BOTH: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
 define void @local_f64_store_0_offset(double addrspace(3)* %out) nounwind {
   store double 20.0, double addrspace(3)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @local_v2i64_store
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x78 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x70 [M0]
+; BOTH-LABEL: {{^}}local_v2i64_store:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:120 [M0]
+; BOTH: s_endpgm
 define void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
   %gep = getelementptr <2 x i64> addrspace(3)* %out, i32 7
   store <2 x i64> <i64 5678, i64 5678>, <2 x i64> addrspace(3)* %gep, align 16
   ret void
 }
 
-; SI-LABEL: @local_v2i64_store_0_offset
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_v2i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH: s_endpgm
 define void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
   store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
   ret void
 }
 
-; SI-LABEL: @local_v4i64_store
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xf0 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0xe0 [M0]
+; BOTH-LABEL: {{^}}local_v4i64_store:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:232 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:248 [M0]
+; BOTH: s_endpgm
 define void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
   %gep = getelementptr <4 x i64> addrspace(3)* %out, i32 7
   store <4 x i64> <i64 5678, i64 5678, i64 5678, i64 5678>, <4 x i64> addrspace(3)* %gep, align 16
   ret void
 }
 
-; SI-LABEL: @local_v4i64_store_0_offset
-; SI-NOT: ADD
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x18 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x10 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x8 [M0]
-; SI-DAG: DS_WRITE_B64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, 0x0 [M0]
+; BOTH-LABEL: {{^}}local_v4i64_store_0_offset:
+; BOTH-NOT: ADD
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16 [M0]
+; BOTH-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24 [M0]
+; BOTH: s_endpgm
 define void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
   store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
   ret void
diff --git a/test/CodeGen/R600/local-atomics.ll b/test/CodeGen/R600/local-atomics.ll
index 5a44951055ea..16d3173f3692 100644
--- a/test/CodeGen/R600/local-atomics.ll
+++ b/test/CodeGen/R600/local-atomics.ll
@@ -1,21 +1,25 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; FUNC-LABEL: @lds_atomic_xchg_ret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_WRXCHG_RTN_B32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=CI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32:
+; EG: LDS_WRXCHG_RET *
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_xchg_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xchg_ret_i32_offset:
-; SI: DS_WRXCHG_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i32_offset:
+; EG: LDS_WRXCHG_RET *
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -24,22 +28,24 @@ define void @lds_atomic_xchg_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
 }
 
 ; XXX - Is it really necessary to load 4 into VGPR?
-; FUNC-LABEL: @lds_atomic_add_ret_i32:
-; SI: S_LOAD_DWORD [[SPTR:s[0-9]+]],
-; SI: V_MOV_B32_e32 [[DATA:v[0-9]+]], 4
-; SI: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
-; SI: DS_ADD_RTN_U32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]], 0x0, [M0]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32:
+; EG: LDS_ADD_RET *
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_rtn_u32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_add_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_add_ret_i32_offset:
-; SI: DS_ADD_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -47,22 +53,36 @@ define void @lds_atomic_add_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_inc_ret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i32_bad_si_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_add_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+  %sub = sub i32 %a, %b
+  %add = add i32 %sub, 4
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32:
+; EG: LDS_ADD_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
 define void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_inc_ret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_INC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_offset:
+; EG: LDS_ADD_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
 define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
@@ -70,18 +90,34 @@ define void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_sub_ret_i32:
-; SI: DS_SUB_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i32_bad_si_offset:
+; EG: LDS_ADD_RET *
+; SI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_ret_i32_bad_si_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+  %sub = sub i32 %a, %b
+  %add = add i32 %sub, 4
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32:
+; EG: LDS_SUB_RET *
+; SI: ds_sub_rtn_u32
+; SI: s_endpgm
 define void @lds_atomic_sub_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_sub_ret_i32_offset:
-; SI: DS_SUB_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i32_offset:
+; EG: LDS_SUB_RET *
+; SI: ds_sub_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -89,22 +125,22 @@ define void @lds_atomic_sub_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_dec_ret_i32:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32  v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x0
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32:
+; EG: LDS_SUB_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_rtn_u32  v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
 define void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_dec_ret_i32_offset:
-; SI: S_MOV_B32 [[SNEGONE:s[0-9]+]], -1
-; SI: V_MOV_B32_e32 [[NEGONE:v[0-9]+]], [[SNEGONE]]
-; SI: DS_DEC_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]], 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i32_offset:
+; EG: LDS_SUB_RET *
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
 define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
@@ -112,18 +148,20 @@ define void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_and_ret_i32:
-; SI: DS_AND_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32:
+; EG: LDS_AND_RET *
+; SI: ds_and_rtn_b32
+; SI: s_endpgm
 define void @lds_atomic_and_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_and_ret_i32_offset:
-; SI: DS_AND_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i32_offset:
+; EG: LDS_AND_RET *
+; SI: ds_and_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -131,18 +169,20 @@ define void @lds_atomic_and_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_or_ret_i32:
-; SI: DS_OR_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32:
+; EG: LDS_OR_RET *
+; SI: ds_or_rtn_b32
+; SI: s_endpgm
 define void @lds_atomic_or_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_or_ret_i32_offset:
-; SI: DS_OR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i32_offset:
+; EG: LDS_OR_RET *
+; SI: ds_or_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -150,18 +190,20 @@ define void @lds_atomic_or_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xor_ret_i32:
-; SI: DS_XOR_RTN_B32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32:
+; EG: LDS_XOR_RET *
+; SI: ds_xor_rtn_b32
+; SI: s_endpgm
 define void @lds_atomic_xor_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xor_ret_i32_offset:
-; SI: DS_XOR_RTN_B32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i32_offset:
+; EG: LDS_XOR_RET *
+; SI: ds_xor_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -170,25 +212,27 @@ define void @lds_atomic_xor_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
 }
 
 ; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: @lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
+; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i32:uction, so we somehow need to expand this.
 ; define void @lds_atomic_nand_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
 ;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
 ;   store i32 %result, i32 addrspace(1)* %out, align 4
 ;   ret void
 ; }
 
-; FUNC-LABEL: @lds_atomic_min_ret_i32:
-; SI: DS_MIN_RTN_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32:
+; EG: LDS_MIN_INT_RET *
+; SI: ds_min_rtn_i32
+; SI: s_endpgm
 define void @lds_atomic_min_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_min_ret_i32_offset:
-; SI: DS_MIN_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i32_offset:
+; EG: LDS_MIN_INT_RET *
+; SI: ds_min_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -196,18 +240,20 @@ define void @lds_atomic_min_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_max_ret_i32:
-; SI: DS_MAX_RTN_I32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32:
+; EG: LDS_MAX_INT_RET *
+; SI: ds_max_rtn_i32
+; SI: s_endpgm
 define void @lds_atomic_max_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_max_ret_i32_offset:
-; SI: DS_MAX_RTN_I32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i32_offset:
+; EG: LDS_MAX_INT_RET *
+; SI: ds_max_rtn_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -215,18 +261,20 @@ define void @lds_atomic_max_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umin_ret_i32:
-; SI: DS_MIN_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32:
+; EG: LDS_MIN_UINT_RET *
+; SI: ds_min_rtn_u32
+; SI: s_endpgm
 define void @lds_atomic_umin_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umin_ret_i32_offset:
-; SI: DS_MIN_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i32_offset:
+; EG: LDS_MIN_UINT_RET *
+; SI: ds_min_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
@@ -234,21 +282,269 @@ define void @lds_atomic_umin_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspac
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umax_ret_i32:
-; SI: DS_MAX_RTN_U32
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32:
+; EG: LDS_MAX_UINT_RET *
+; SI: ds_max_rtn_u32
+; SI: s_endpgm
 define void @lds_atomic_umax_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umax_ret_i32_offset:
-; SI: DS_MAX_RTN_U32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, 0x10
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i32_offset:
+; EG: LDS_MAX_UINT_RET *
+; SI: ds_max_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
 define void @lds_atomic_umax_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
   %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
   store i32 %result, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32:
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_wrxchg_rtn_b32 [[RESULT:v[0-9]+]], [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xchg i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i32_offset:
+; SI: ds_wrxchg_rtn_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; XXX - Is it really necessary to load 4 into VGPR?
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32:
+; SI: s_load_dword [[SPTR:s[0-9]+]],
+; SI: v_mov_b32_e32 [[DATA:v[0-9]+]], 4
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]]
+; SI: ds_add_u32 [[VPTR]], [[DATA]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_offset:
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i32_bad_si_offset
+; SI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} [M0]
+; CI: ds_add_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16 [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+  %sub = sub i32 %a, %b
+  %add = add i32 %sub, 4
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] [M0]
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i32 addrspace(3)* %ptr, i32 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_offset:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_inc_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i32_bad_si_offset:
+; SI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}}
+; CI: ds_inc_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i32_bad_si_offset(i32 addrspace(3)* %ptr, i32 %a, i32 %b) nounwind {
+  %sub = sub i32 %a, %b
+  %add = add i32 %sub, 4
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 %add
+  %result = atomicrmw add i32 addrspace(3)* %gep, i32 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32:
+; SI: ds_sub_u32
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i32_offset:
+; SI: ds_sub_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_u32  v{{[0-9]+}}, [[NEGONE]]
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i32 addrspace(3)* %ptr, i32 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i32_offset:
+; SI: v_mov_b32_e32 [[NEGONE:v[0-9]+]], -1
+; SI: ds_dec_u32 v{{[0-9]+}}, [[NEGONE]] offset:16
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i32 addrspace(3)* %gep, i32 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32:
+; SI: ds_and_b32
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw and i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i32_offset:
+; SI: ds_and_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw and i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32:
+; SI: ds_or_b32
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw or i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i32_offset:
+; SI: ds_or_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw or i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32:
+; SI: ds_xor_b32
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xor i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i32_offset:
+; SI: ds_xor_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xor i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i32:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+;   %result = atomicrmw nand i32 addrspace(3)* %ptr, i32 4 seq_cst
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32:
+; SI: ds_min_i32
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw min i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i32_offset:
+; SI: ds_min_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw min i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32:
+; SI: ds_max_i32
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw max i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i32_offset:
+; SI: ds_max_i32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw max i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32:
+; SI: ds_min_u32
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umin i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i32_offset:
+; SI: ds_min_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umin i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32:
+; SI: ds_max_u32
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umax i32 addrspace(3)* %ptr, i32 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i32_offset:
+; SI: ds_max_u32 v{{[0-9]+}}, v{{[0-9]+}} offset:16
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i32 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umax i32 addrspace(3)* %gep, i32 4 seq_cst
+  ret void
+}
diff --git a/test/CodeGen/R600/local-atomics64.ll b/test/CodeGen/R600/local-atomics64.ll
index 849b033d84a9..ce6ddbd66265 100644
--- a/test/CodeGen/R600/local-atomics64.ll
+++ b/test/CodeGen/R600/local-atomics64.ll
@@ -1,17 +1,17 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
 
-; FUNC-LABEL: @lds_atomic_xchg_ret_i64:
-; SI: DS_WRXCHG_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64:
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
 define void @lds_atomic_xchg_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xchg_ret_i64_offset:
-; SI: DS_WRXCHG_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -19,24 +19,23 @@ define void @lds_atomic_xchg_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_add_ret_i64:
-; SI: DS_ADD_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64:
+; SI: ds_add_rtn_u64
+; SI: s_endpgm
 define void @lds_atomic_add_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_add_ret_i64_offset:
-; SI: S_LOAD_DWORD [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, 9
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI-DAG: V_MOV_B32_e32 [[VPTR:v[0-9]+]], [[PTR]]
-; SI: DS_ADD_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}, 0x20, [M0]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; SI-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; SI-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
   %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
@@ -44,22 +43,21 @@ define void @lds_atomic_add_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_inc_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_INC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_inc_ret_i64_offset:
-; SI: DS_INC_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
+; SI: ds_inc_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
@@ -67,18 +65,18 @@ define void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_sub_ret_i64:
-; SI: DS_SUB_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64:
+; SI: ds_sub_rtn_u64
+; SI: s_endpgm
 define void @lds_atomic_sub_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_sub_ret_i64_offset:
-; SI: DS_SUB_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
+; SI: ds_sub_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -86,22 +84,21 @@ define void @lds_atomic_sub_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_dec_ret_i64:
-; SI: S_MOV_B64 s{{\[}}[[LOSDATA:[0-9]+]]:[[HISDATA:[0-9]+]]{{\]}}, -1
-; SI-DAG: V_MOV_B32_e32 v[[LOVDATA:[0-9]+]], s[[LOSDATA]]
-; SI-DAG: V_MOV_B32_e32 v[[HIVDATA:[0-9]+]], s[[HISDATA]]
-; SI: DS_DEC_RTN_U64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}},
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: buffer_store_dwordx2 [[RESULT]],
+; SI: s_endpgm
 define void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_dec_ret_i64_offset:
-; SI: DS_DEC_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
+; SI: ds_dec_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
@@ -109,18 +106,18 @@ define void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_and_ret_i64:
-; SI: DS_AND_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64:
+; SI: ds_and_rtn_b64
+; SI: s_endpgm
 define void @lds_atomic_and_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_and_ret_i64_offset:
-; SI: DS_AND_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
+; SI: ds_and_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -128,18 +125,18 @@ define void @lds_atomic_and_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_or_ret_i64:
-; SI: DS_OR_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64:
+; SI: ds_or_rtn_b64
+; SI: s_endpgm
 define void @lds_atomic_or_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_or_ret_i64_offset:
-; SI: DS_OR_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
+; SI: ds_or_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -147,18 +144,18 @@ define void @lds_atomic_or_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xor_ret_i64:
-; SI: DS_XOR_RTN_B64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64:
+; SI: ds_xor_rtn_b64
+; SI: s_endpgm
 define void @lds_atomic_xor_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_xor_ret_i64_offset:
-; SI: DS_XOR_RTN_B64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
+; SI: ds_xor_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -167,25 +164,25 @@ define void @lds_atomic_xor_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
 }
 
 ; FIXME: There is no atomic nand instr
-; XFUNC-LABEL: @lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
+; XFUNC-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
 ; define void @lds_atomic_nand_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
 ;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
 ;   store i64 %result, i64 addrspace(1)* %out, align 8
 ;   ret void
 ; }
 
-; FUNC-LABEL: @lds_atomic_min_ret_i64:
-; SI: DS_MIN_RTN_I64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64:
+; SI: ds_min_rtn_i64
+; SI: s_endpgm
 define void @lds_atomic_min_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_min_ret_i64_offset:
-; SI: DS_MIN_RTN_I64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
+; SI: ds_min_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -193,18 +190,18 @@ define void @lds_atomic_min_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_max_ret_i64:
-; SI: DS_MAX_RTN_I64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64:
+; SI: ds_max_rtn_i64
+; SI: s_endpgm
 define void @lds_atomic_max_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_max_ret_i64_offset:
-; SI: DS_MAX_RTN_I64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
+; SI: ds_max_rtn_i64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -212,18 +209,18 @@ define void @lds_atomic_max_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umin_ret_i64:
-; SI: DS_MIN_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64:
+; SI: ds_min_rtn_u64
+; SI: s_endpgm
 define void @lds_atomic_umin_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umin_ret_i64_offset:
-; SI: DS_MIN_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
+; SI: ds_min_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
@@ -231,21 +228,240 @@ define void @lds_atomic_umin_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspac
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umax_ret_i64:
-; SI: DS_MAX_RTN_U64
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64:
+; SI: ds_max_rtn_u64
+; SI: s_endpgm
 define void @lds_atomic_umax_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @lds_atomic_umax_ret_i64_offset:
-; SI: DS_MAX_RTN_U64 {{.*}} 0x20
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
+; SI: ds_max_rtn_u64 {{.*}} offset:32
+; SI: s_endpgm
 define void @lds_atomic_umax_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) nounwind {
   %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
   %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64:
+; SI: ds_wrxchg_rtn_b64
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xchg i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
+; SI: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_xchg_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64:
+; SI: ds_add_u64
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
+; SI: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
+; SI: ds_add_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}} offset:32 [M0]
+; SI: s_endpgm
+define void @lds_atomic_add_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i64 4
+  %result = atomicrmw add i64 addrspace(3)* %gep, i64 9 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_inc_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw add i64 addrspace(3)* %ptr, i64 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_inc_noret_i64_offset:
+; SI: ds_inc_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw add i64 addrspace(3)* %gep, i64 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64:
+; SI: ds_sub_u64
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
+; SI: ds_sub_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_sub_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64:
+; SI: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], -1
+; SI: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], -1
+; SI: ds_dec_u64 [[VPTR]], v{{\[}}[[LOVDATA]]:[[HIVDATA]]{{\]}}
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw sub i64 addrspace(3)* %ptr, i64 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_dec_noret_i64_offset:
+; SI: ds_dec_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw sub i64 addrspace(3)* %gep, i64 1 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64:
+; SI: ds_and_b64
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw and i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
+; SI: ds_and_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_and_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw and i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64:
+; SI: ds_or_b64
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw or i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
+; SI: ds_or_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_or_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw or i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64:
+; SI: ds_xor_b64
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw xor i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
+; SI: ds_xor_b64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_xor_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xor i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FIXME: There is no atomic nand instr
+; XFUNC-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
+; define void @lds_atomic_nand_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+;   %result = atomicrmw nand i64 addrspace(3)* %ptr, i32 4 seq_cst
+;   ret void
+; }
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64:
+; SI: ds_min_i64
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw min i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
+; SI: ds_min_i64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_min_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw min i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64:
+; SI: ds_max_i64
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw max i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
+; SI: ds_max_i64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_max_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw max i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64:
+; SI: ds_min_u64
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umin i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
+; SI: ds_min_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_umin_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umin i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64:
+; SI: ds_max_u64
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+  %result = atomicrmw umax i64 addrspace(3)* %ptr, i64 4 seq_cst
+  ret void
+}
+
+; FUNC-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
+; SI: ds_max_u64 {{.*}} offset:32
+; SI: s_endpgm
+define void @lds_atomic_umax_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i64 addrspace(3)* %ptr, i32 4
+  %result = atomicrmw umax i64 addrspace(3)* %gep, i64 4 seq_cst
+  ret void
+}
diff --git a/test/CodeGen/R600/local-memory-two-objects.ll b/test/CodeGen/R600/local-memory-two-objects.ll
index e29e4cc88fd9..5c77ad5e6ca1 100644
--- a/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/test/CodeGen/R600/local-memory-two-objects.ll
@@ -1,24 +1,25 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=SI %s
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=CI %s
 
-@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
-@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] zeroinitializer, align 4
+@local_memory_two_objects.local_mem0 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
+@local_memory_two_objects.local_mem1 = internal unnamed_addr addrspace(3) global [4 x i32] undef, align 4
 
-; EG-CHECK: @local_memory_two_objects
+; EG-CHECK: {{^}}local_memory_two_objects:
 
 ; Check that the LDS size emitted correctly
 ; EG-CHECK: .long 166120
 ; EG-CHECK-NEXT: .long 8
 ; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 32768
+; SI-CHECK-NEXT: .long 38792
 
 ; We would like to check the the lds writes are using different
 ; addresses, but due to variations in the scheduler, we can't do
 ; this consistently on evergreen GPUs.
 ; EG-CHECK: LDS_WRITE
 ; EG-CHECK: LDS_WRITE
-; SI-CHECK: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
-; SI-CHECK-NOT: DS_WRITE_B32 {{v[0-9]*}}, v[[ADDRW]]
+; SI-CHECK: ds_write_b32 {{v[0-9]*}}, v[[ADDRW:[0-9]*]]
+; SI-CHECK-NOT: ds_write_b32 {{v[0-9]*}}, v[[ADDRW]]
 
 ; GROUP_BARRIER must be the last instruction in a clause
 ; EG-CHECK: GROUP_BARRIER
@@ -28,8 +29,10 @@
 ; constant offsets.
 ; EG-CHECK: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
 ; EG-CHECK-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]], 0x10
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}}, [[ADDRR]], 0x0,
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 [M0]
+; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] [M0]
 
 define void @local_memory_two_objects(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/local-memory.ll b/test/CodeGen/R600/local-memory.ll
index 51af4844cb5a..68e72c556f66 100644
--- a/test/CodeGen/R600/local-memory.ll
+++ b/test/CodeGen/R600/local-memory.ll
@@ -1,32 +1,30 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=CI-CHECK %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] zeroinitializer, align 4
+@local_memory.local_mem = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
 
-; EG-CHECK-LABEL: @local_memory
-; SI-CHECK-LABEL: @local_memory
-; CI-CHECK-LABEL: @local_memory
+; FUNC-LABEL: {{^}}local_memory:
 
 ; Check that the LDS size emitted correctly
-; EG-CHECK: .long 166120
-; EG-CHECK-NEXT: .long 128
-; SI-CHECK: .long 47180
-; SI-CHECK-NEXT: .long 65536
-; CI-CHECK: .long 47180
-; CI-CHECK-NEXT: .long 32768
+; EG: .long 166120
+; EG-NEXT: .long 128
+; SI: .long 47180
+; SI-NEXT: .long 71560
+; CI: .long 47180
+; CI-NEXT: .long 38792
 
-; EG-CHECK: LDS_WRITE
-; SI-CHECK-NOT: S_WQM_B64
-; SI-CHECK: DS_WRITE_B32
+; EG: LDS_WRITE
+; SI-NOT: s_wqm_b64
+; SI: ds_write_b32
 
 ; GROUP_BARRIER must be the last instruction in a clause
-; EG-CHECK: GROUP_BARRIER
-; EG-CHECK-NEXT: ALU clause
-; SI-CHECK: S_BARRIER
+; EG: GROUP_BARRIER
+; EG-NEXT: ALU clause
+; SI: s_barrier
 
-; EG-CHECK: LDS_READ_RET
-; SI-CHECK: DS_READ_B32 {{v[0-9]+}},
+; EG: LDS_READ_RET
+; SI: ds_read_b32 {{v[0-9]+}},
 
 define void @local_memory(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
index b46d8e9dfb04..03e0f011fffc 100644
--- a/test/CodeGen/R600/loop-address.ll
+++ b/test/CodeGen/R600/loop-address.ll
@@ -31,7 +31,7 @@ attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pi
 
 !opencl.kernels = !{!0, !1, !2, !3}
 
-!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge}
-!1 = metadata !{null}
-!2 = metadata !{null}
-!3 = metadata !{null}
+!0 = !{void (i32 addrspace(1)*, i32)* @loop_ge}
+!1 = !{null}
+!2 = !{null}
+!3 = !{null}
diff --git a/test/CodeGen/R600/loop-idiom.ll b/test/CodeGen/R600/loop-idiom.ll
index 128f661077ea..847a34bc4cda 100644
--- a/test/CodeGen/R600/loop-idiom.ll
+++ b/test/CodeGen/R600/loop-idiom.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-; RUN: opt -basicaa -loop-idiom -S < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: opt -basicaa -loop-idiom -S < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
 target triple = "r600--"
@@ -10,8 +10,8 @@ target triple = "r600--"
 ; implementations of these for R600.
 
 ; FUNC: @no_memcpy
-; R600-NOT: @llvm.memcpy
-; SI-NOT: @llvm.memcpy
+; R600-NOT: {{^}}llvm.memcpy
+; SI-NOT: {{^}}llvm.memcpy
 define void @no_memcpy(i8 addrspace(3)* %in, i32 %size) {
 entry:
   %dest = alloca i8, i32 32
@@ -32,10 +32,10 @@ for.end:
 }
 
 ; FUNC: @no_memset
-; R600-NOT: @llvm.memset
-; R600-NOT: @memset_pattern16
-; SI-NOT: @llvm.memset
-; SI-NOT: @memset_pattern16
+; R600-NOT: {{^}}llvm.memset
+; R600-NOT: {{^}}memset_pattern16:
+; SI-NOT: {{^}}llvm.memset
+; SI-NOT: {{^}}memset_pattern16:
 define void @no_memset(i32 %size) {
 entry:
   %dest = alloca i8, i32 32
diff --git a/test/CodeGen/R600/lshl.ll b/test/CodeGen/R600/lshl.ll
index 216283910009..66f7cbf1d5a6 100644
--- a/test/CodeGen/R600/lshl.ll
+++ b/test/CodeGen/R600/lshl.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: S_LSHL_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshl_b32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = mul i32 %p, 2
diff --git a/test/CodeGen/R600/lshr.ll b/test/CodeGen/R600/lshr.ll
index 886d1c4854df..ff6e6fd14890 100644
--- a/test/CodeGen/R600/lshr.ll
+++ b/test/CodeGen/R600/lshr.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: S_LSHR_B32 s{{[0-9]}}, s{{[0-9]}}, 1
+;CHECK: s_lshr_b32 s{{[0-9]}}, s{{[0-9]}}, 1
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 2
diff --git a/test/CodeGen/R600/m0-spill.ll b/test/CodeGen/R600/m0-spill.ll
new file mode 100644
index 000000000000..dc9206ed3f95
--- /dev/null
+++ b/test/CodeGen/R600/m0-spill.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+@lds = external addrspace(3) global [64 x float]
+
+; CHECK-LABEL: {{^}}main:
+; CHECK-NOT: v_readlane_b32 m0
+define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %4 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+  %cmp = fcmp ueq float 0.0, %4
+  br i1 %cmp, label %if, label %else
+
+if:
+  %lds_ptr = getelementptr [64 x float] addrspace(3)* @lds, i32 0, i32 0
+  %lds_data = load float addrspace(3)* %lds_ptr
+  br label %endif
+
+else:
+  %interp = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %3)
+  br label %endif
+
+endif:
+  %export = phi float [%lds_data, %if], [%interp, %else]
+  %5 = call i32 @llvm.SI.packf16(float %export, float %export)
+  %6 = bitcast i32 %5 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %6, float %6, float %6, float %6)
+  ret void
+}
+
+declare float @llvm.SI.fs.constant(i32, i32, i32) readnone
+
+declare i32 @llvm.SI.packf16(float, float) readnone
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
diff --git a/test/CodeGen/R600/mad-sub.ll b/test/CodeGen/R600/mad-sub.ll
new file mode 100644
index 000000000000..7b4020d2973c
--- /dev/null
+++ b/test/CodeGen/R600/mad-sub.ll
@@ -0,0 +1,215 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare float @llvm.fabs.f32(float) #0
+
+; FUNC-LABEL: {{^}}mad_sub_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_inv_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], [[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %mul = fmul float %a, %b
+  %sub = fsub float %c, %mul
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_f64:
+; SI: v_mul_f64
+; SI: v_add_f64
+define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr double addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr double addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr double addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr double addrspace(1)* %out, i64 %tid.ext
+  %a = load double addrspace(1)* %gep0, align 8
+  %b = load double addrspace(1)* %gep1, align 8
+  %c = load double addrspace(1)* %gep2, align 8
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  store double %sub, double addrspace(1)* %outgep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_fabs_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], -|[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_fabs_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %c.abs = call float @llvm.fabs.f32(float %c) #0
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c.abs
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mad_sub_fabs_inv_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -[[REGA]], [[REGB]], |[[REGC]]|
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_sub_fabs_inv_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %c.abs = call float @llvm.fabs.f32(float %c) #0
+  %mul = fmul float %a, %b
+  %sub = fsub float %c.abs, %mul
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}neg_neg_mad_f32:
+; SI: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+define void @neg_neg_mad_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %nega = fsub float -0.000000e+00, %a
+  %negb = fsub float -0.000000e+00, %b
+  %mul = fmul float %nega, %negb
+  %sub = fadd float %mul, %c
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mad_fabs_sub_f32:
+; SI: buffer_load_dword [[REGA:v[0-9]+]]
+; SI: buffer_load_dword [[REGB:v[0-9]+]]
+; SI: buffer_load_dword [[REGC:v[0-9]+]]
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], [[REGA]], |[[REGB]]|, -[[REGC]]
+; SI: buffer_store_dword [[RESULT]]
+define void @mad_fabs_sub_f32(float addrspace(1)* noalias nocapture %out, float addrspace(1)* noalias nocapture readonly %ptr) #1 {
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tid.ext = sext i32 %tid to i64
+  %gep0 = getelementptr float addrspace(1)* %ptr, i64 %tid.ext
+  %add1 = add i64 %tid.ext, 1
+  %gep1 = getelementptr float addrspace(1)* %ptr, i64 %add1
+  %add2 = add i64 %tid.ext, 2
+  %gep2 = getelementptr float addrspace(1)* %ptr, i64 %add2
+  %outgep = getelementptr float addrspace(1)* %out, i64 %tid.ext
+  %a = load float addrspace(1)* %gep0, align 4
+  %b = load float addrspace(1)* %gep1, align 4
+  %c = load float addrspace(1)* %gep2, align 4
+  %b.abs = call float @llvm.fabs.f32(float %b) #0
+  %mul = fmul float %a, %b.abs
+  %sub = fsub float %mul, %c
+  store float %sub, float addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fsub_c_fadd_a_a:
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fsub_c_fadd_a_a(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %add = fadd float %r1, %r1
+  %r3 = fsub float %r2, %add
+
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}fsub_fadd_a_a_c:
+; SI-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]]
+; SI: buffer_store_dword [[RESULT]]
+define void @fsub_fadd_a_a_c(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.0 = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.out = getelementptr float addrspace(1)* %out, i32 %tid
+
+  %r1 = load float addrspace(1)* %gep.0
+  %r2 = load float addrspace(1)* %gep.1
+
+  %add = fadd float %r1, %r1
+  %r3 = fsub float %add, %r2
+
+  store float %r3, float addrspace(1)* %gep.out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/mad_int24.ll b/test/CodeGen/R600/mad_int24.ll
index abb52907b9b8..60f6e15f2f57 100644
--- a/test/CodeGen/R600/mad_int24.ll
+++ b/test/CodeGen/R600/mad_int24.ll
@@ -1,15 +1,17 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; FUNC-LABEL: @i32_mad24
+declare i32 @llvm.AMDGPU.imul24(i32, i32) nounwind readnone
+
+; FUNC-LABEL: {{^}}i32_mad24:
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
 ; EG: MULLO_INT
 ; Make sure we aren't masking the inputs.
 ; CM-NOT: AND
 ; CM: MULADD_INT24
-; SI-NOT: AND
-; SI: V_MAD_I32_I24
+; SI-NOT: and
+; SI: v_mad_i32_i24
 define void @i32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = shl i32 %a, 8
@@ -21,3 +23,12 @@ entry:
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: @test_imul24
+; SI: v_mad_i32_i24
+define void @test_imul24(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) nounwind {
+  %mul = call i32 @llvm.AMDGPU.imul24(i32 %src0, i32 %src1) nounwind readnone
+  %add = add i32 %mul, %src2
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/mad_uint24.ll b/test/CodeGen/R600/mad_uint24.ll
index 0f0893bd53c4..db776521151a 100644
--- a/test/CodeGen/R600/mad_uint24.ll
+++ b/test/CodeGen/R600/mad_uint24.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; FUNC-LABEL: @u32_mad24
+; FUNC-LABEL: {{^}}u32_mad24:
 ; EG: MULADD_UINT24
-; SI: V_MAD_U32_U24
+; SI: v_mad_u32_u24
 
 define void @u32_mad24(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
@@ -18,14 +18,14 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i16_mad24
+; FUNC-LABEL: {{^}}i16_mad24:
 ; The order of A and B does not matter.
 ; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
 ; EG: 16
-; SI: V_MAD_U32_U24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MAD]], 0, 16
+; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MAD]], 0, 16
 
 define void @i16_mad24(i32 addrspace(1)* %out, i16 %a, i16 %b, i16 %c) {
 entry:
@@ -36,13 +36,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i8_mad24
+; FUNC-LABEL: {{^}}i8_mad24:
 ; EG: MULADD_UINT24 {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
 ; The result must be sign-extended
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
 ; EG: 8
-; SI: V_MAD_U32_U24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8
+; SI: v_mad_u32_u24 [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
 
 define void @i8_mad24(i32 addrspace(1)* %out, i8 %a, i8 %b, i8 %c) {
 entry:
@@ -60,9 +60,9 @@ entry:
 ; 24-bit mad pattern wasn't being matched.
 
 ; Check that the select instruction is not deleted.
-; FUNC-LABEL: @i24_i32_i32_mad
+; FUNC-LABEL: {{^}}i24_i32_i32_mad:
 ; EG: CNDE_INT
-; SI: V_CNDMASK
+; SI: v_cndmask
 define void @i24_i32_i32_mad(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
 entry:
   %0 = ashr i32 %a, 8
diff --git a/test/CodeGen/R600/max-literals.ll b/test/CodeGen/R600/max-literals.ll
index 65a6d2b5fc95..c357524b140f 100644
--- a/test/CodeGen/R600/max-literals.ll
+++ b/test/CodeGen/R600/max-literals.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @main
+; CHECK-LABEL: {{^}}main:
 ; CHECK: ADD *
 
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
@@ -29,7 +29,7 @@ main_body:
   ret void
 }
 
-; CHECK: @main
+; CHECK-LABEL: {{^}}main2:
 ; CHECK-NOT: ADD *
 
 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll
new file mode 100644
index 000000000000..20af99332453
--- /dev/null
+++ b/test/CodeGen/R600/max.ll
@@ -0,0 +1,99 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax_sge_i32
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp sge i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_imax_sge_i32
+; SI: s_max_i32
+define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp sge i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_imax_sgt_i32
+; SI: v_max_i32_e32
+define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp sgt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_imax_sgt_i32
+; SI: s_max_i32
+define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp sgt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umax_uge_i32
+; SI: v_max_u32_e32
+define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp uge i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_umax_uge_i32
+; SI: s_max_u32
+define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp uge i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umax_ugt_i32
+; SI: v_max_u32_e32
+define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp ugt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_umax_ugt_i32
+; SI: s_max_u32
+define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp ugt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/max3.ll b/test/CodeGen/R600/max3.ll
new file mode 100644
index 000000000000..f905e171b334
--- /dev/null
+++ b/test/CodeGen/R600/max3.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imax3_sgt_i32
+; SI: v_max3_i32
+define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %icmp0 = icmp sgt i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+  %icmp1 = icmp sgt i32 %i0, %c
+  %i1 = select i1 %icmp1, i32 %i0, i32 %c
+  store i32 %i1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umax3_ugt_i32
+; SI: v_max3_u32
+define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %icmp0 = icmp ugt i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+  %icmp1 = icmp ugt i32 %i0, %c
+  %i1 = select i1 %icmp1, i32 %i0, i32 %c
+  store i32 %i1, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll
new file mode 100644
index 000000000000..00ba5c6cddb4
--- /dev/null
+++ b/test/CodeGen/R600/min.ll
@@ -0,0 +1,120 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin_sle_i32
+; SI: v_min_i32_e32
+define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp sle i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_imin_sle_i32
+; SI: s_min_i32
+define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp sle i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_imin_slt_i32
+; SI: v_min_i32_e32
+define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp slt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_imin_slt_i32
+; SI: s_min_i32
+define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp slt i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ule_i32
+; SI: v_min_u32_e32
+define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp ule i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_umin_ule_i32
+; SI: s_min_u32
+define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp ule i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ult_i32
+; SI: v_min_u32_e32
+define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp ult i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @s_test_umin_ult_i32
+; SI: s_min_u32
+define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %cmp = icmp ult i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin_ult_i32_multi_use
+; SI-NOT: v_min
+; SI: v_cmp_lt_u32
+; SI-NEXT: v_cndmask_b32
+; SI-NOT: v_min
+; SI: s_endpgm
+define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace(1)* %out1, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %outgep0 = getelementptr i32 addrspace(1)* %out0, i32 %tid
+  %outgep1 = getelementptr i1 addrspace(1)* %out1, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %cmp = icmp ult i32 %a, %b
+  %val = select i1 %cmp, i32 %a, i32 %b
+  store i32 %val, i32 addrspace(1)* %outgep0, align 4
+  store i1 %cmp, i1 addrspace(1)* %outgep1
+  ret void
+}
diff --git a/test/CodeGen/R600/min3.ll b/test/CodeGen/R600/min3.ll
new file mode 100644
index 000000000000..6c11a650fcbb
--- /dev/null
+++ b/test/CodeGen/R600/min3.ll
@@ -0,0 +1,111 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: @v_test_imin3_slt_i32
+; SI: v_min3_i32
+define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %icmp0 = icmp slt i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+  %icmp1 = icmp slt i32 %i0, %c
+  %i1 = select i1 %icmp1, i32 %i0, i32 %c
+  store i32 %i1, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_ult_i32
+; SI: v_min3_u32
+define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %icmp0 = icmp ult i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+  %icmp1 = icmp ult i32 %i0, %c
+  %i1 = select i1 %icmp1, i32 %i0, i32 %c
+  store i32 %i1, i32 addrspace(1)* %outgep, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin_umin_umin
+; SI: v_min_i32
+; SI: v_min3_i32
+define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid2 = mul i32 %tid, 2
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+  %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+  %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+  %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+  %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %d = load i32 addrspace(1)* %gep3, align 4
+
+  %icmp0 = icmp slt i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+  %icmp1 = icmp slt i32 %c, %d
+  %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+  %icmp2 = icmp slt i32 %i0, %i1
+  %i2 = select i1 %icmp2, i32 %i0, i32 %i1
+
+  store i32 %i2, i32 addrspace(1)* %outgep1, align 4
+  ret void
+}
+
+; FUNC-LABEL: @v_test_umin3_2_uses
+; SI-NOT: v_min3
+define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %tid2 = mul i32 %tid, 2
+  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
+  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
+
+  %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
+  %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
+  %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
+
+  %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
+
+  %a = load i32 addrspace(1)* %gep0, align 4
+  %b = load i32 addrspace(1)* %gep1, align 4
+  %c = load i32 addrspace(1)* %gep2, align 4
+  %d = load i32 addrspace(1)* %gep3, align 4
+
+  %icmp0 = icmp slt i32 %a, %b
+  %i0 = select i1 %icmp0, i32 %a, i32 %b
+
+  %icmp1 = icmp slt i32 %c, %d
+  %i1 = select i1 %icmp1, i32 %c, i32 %d
+
+  %icmp2 = icmp slt i32 %i0, %c
+  %i2 = select i1 %icmp2, i32 %i0, i32 %c
+
+  store i32 %i2, i32 addrspace(1)* %outgep0, align 4
+  store i32 %i0, i32 addrspace(1)* %outgep1, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/missing-store.ll b/test/CodeGen/R600/missing-store.ll
new file mode 100644
index 000000000000..8ddef35a694a
--- /dev/null
+++ b/test/CodeGen/R600/missing-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+@ptr_load = addrspace(3) global i32 addrspace(2)* undef, align 8
+
+; Make sure when the load from %ptr2 is folded the chain isn't lost,
+; resulting in losing the store to gptr
+
+; FUNC-LABEL: {{^}}missing_store_reduced:
+; SI: ds_read_b64
+; SI: buffer_store_dword
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
+define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+  %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+  %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+  store i32 99, i32 addrspace(1)* %gptr, align 4
+  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+  store i32 %tmp2, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll
index f465d3dad8f0..9c2a17ce04f4 100644
--- a/test/CodeGen/R600/mubuf.ll
+++ b/test/CodeGen/R600/mubuf.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.r600.read.tidig.x() readnone
 
 ;;;==========================================================================;;;
 ;;; MUBUF LOAD TESTS
 ;;;==========================================================================;;;
 
 ; MUBUF load with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: @mubuf_load0
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK-LABEL: {{^}}mubuf_load0:
+; CHECK: buffer_load_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x30,0xe0
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 1
@@ -16,8 +18,8 @@ entry:
 }
 
 ; MUBUF load with the largest possible immediate offset
-; CHECK-LABEL: @mubuf_load1
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
+; CHECK-LABEL: {{^}}mubuf_load1:
+; CHECK: buffer_load_ubyte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x20,0xe0
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8 addrspace(1)* %in, i64 4095
@@ -27,8 +29,8 @@ entry:
 }
 
 ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: @mubuf_load2
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
+; CHECK-LABEL: {{^}}mubuf_load2:
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 ; encoding: [0x00,0x80,0x30,0xe0
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 1024
@@ -38,9 +40,9 @@ entry:
 }
 
 ; MUBUF load with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: @mubuf_load3
+; CHECK-LABEL: {{^}}mubuf_load3:
 ; CHECK-NOT: ADD
-; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK: buffer_load_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0
 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %in, i64 %offset
@@ -50,13 +52,52 @@ entry:
   ret void
 }
 
+; CHECK-LABEL: {{^}}soffset_max_imm:
+; CHECK: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 64 offen glc
+define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+main_body:
+  %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp2 = shl i32 %6, 2
+  %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+  %tmp4 = add i32 %6, 16
+  %tmp5 = bitcast float 0.0 to i32
+  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+  ret void
+}
+
+; Make sure immediates that aren't inline constants don't get folded into
+; the soffset operand.
+; FIXME: for this test we should be smart enough to shift the immediate into
+; the offset field.
+; CHECK-LABEL: {{^}}soffset_no_fold:
+; CHECK: s_movk_i32 [[SOFFSET:s[0-9]+]], 0x41
+; CHECK: buffer_load_dword v{{[0-9+]}}, v{{[0-9+]}}, s[{{[0-9]+}}:{{[0-9]+}}], [[SOFFSET]] offen glc
+define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
+main_body:
+  %tmp0 = getelementptr [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
+  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp2 = shl i32 %6, 2
+  %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+  %tmp4 = add i32 %6, 16
+  %tmp5 = bitcast float 0.0 to i32
+  call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+  ret void
+}
+
+declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #3
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+attributes #1 = { "ShaderType"="2" "unsafe-fp-math"="true" }
+attributes #3 = { nounwind readonly }
+
 ;;;==========================================================================;;;
 ;;; MUBUF STORE TESTS
 ;;;==========================================================================;;;
 
 ; MUBUF store with an immediate byte offset that fits into 12-bits
-; CHECK-LABEL: @mubuf_store0
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK-LABEL: {{^}}mubuf_store0:
+; CHECK: buffer_store_dword v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4 ; encoding: [0x04,0x00,0x70,0xe0
 define void @mubuf_store0(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 1
@@ -65,8 +106,8 @@ entry:
 }
 
 ; MUBUF store with the largest possible immediate offset
-; CHECK-LABEL: @mubuf_store1
-; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
+; CHECK-LABEL: {{^}}mubuf_store1:
+; CHECK: buffer_store_byte v{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0 offset:4095 ; encoding: [0xff,0x0f,0x60,0xe0
 
 define void @mubuf_store1(i8 addrspace(1)* %out) {
 entry:
@@ -76,8 +117,8 @@ entry:
 }
 
 ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
-; CHECK-LABEL: @mubuf_store2
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
+; CHECK-LABEL: {{^}}mubuf_store2:
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80,0x70,0xe0
 define void @mubuf_store2(i32 addrspace(1)* %out) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 1024
@@ -86,9 +127,9 @@ entry:
 }
 
 ; MUBUF store with a 12-bit immediate offset and a register offset
-; CHECK-LABEL: @mubuf_store3
+; CHECK-LABEL: {{^}}mubuf_store3:
 ; CHECK-NOT: ADD
-; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
+; CHECK: buffer_store_dword v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0
 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
 entry:
   %0 = getelementptr i32 addrspace(1)* %out, i64 %offset
@@ -96,3 +137,35 @@ entry:
   store i32 0, i32 addrspace(1)* %1
   ret void
 }
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr:
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0
+define void @store_sgpr_ptr(i32 addrspace(1)* %out) #0 {
+  store i32 99, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr_offset:
+; CHECK: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:40
+define void @store_sgpr_ptr_offset(i32 addrspace(1)* %out) #0 {
+  %out.gep = getelementptr i32 addrspace(1)* %out, i32 10
+  store i32 99, i32 addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_sgpr_ptr_large_offset:
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @store_sgpr_ptr_large_offset(i32 addrspace(1)* %out) #0 {
+  %out.gep = getelementptr i32 addrspace(1)* %out, i32 32768
+  store i32 99, i32 addrspace(1)* %out.gep, align 4
+  ret void
+}
+
+; CHECK-LABEL: {{^}}store_vgpr_ptr:
+; CHECK: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64
+define void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %out.gep = getelementptr i32 addrspace(1)* %out, i32 %tid
+  store i32 99, i32 addrspace(1)* %out.gep, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/mul.ll b/test/CodeGen/R600/mul.ll
index d231e92e27fa..bdf18e3ff9ef 100644
--- a/test/CodeGen/R600/mul.ll
+++ b/test/CodeGen/R600/mul.ll
@@ -1,16 +1,16 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -check-prefix=FUNC
-; RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; mul24 and mad24 are affected
 
-; FUNC-LABEL: @test2
+; FUNC-LABEL: {{^}}test_mul_v2i32:
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
   %a = load <2 x i32> addrspace(1) * %in
   %b = load <2 x i32> addrspace(1) * %b_ptr
@@ -19,18 +19,18 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @test4
+; FUNC-LABEL: {{^}}v_mul_v4i32:
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: MULLO_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_MUL_LO_I32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_mul_lo_i32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
@@ -39,12 +39,26 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @trunc_i64_mul_to_i32
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: V_MUL_LO_I32
-; SI: BUFFER_STORE_DWORD
-define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+; FUNC-LABEL: {{^}}s_trunc_i64_mul_to_i32:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_mul_i32
+; SI: buffer_store_dword
+define void @s_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
+  %mul = mul i64 %b, %a
+  %trunc = trunc i64 %mul to i32
+  store i32 %trunc, i32 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_trunc_i64_mul_to_i32:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: v_mul_lo_i32
+; SI: buffer_store_dword
+define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %b = load i64 addrspace(1)* %bptr, align 8
   %mul = mul i64 %b, %a
   %trunc = trunc i64 %mul to i32
   store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -53,11 +67,11 @@ define void @trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 
 ; This 64-bit multiply should just use MUL_HI and MUL_LO, since the top
 ; 32-bits of both arguments are sign bits.
-; FUNC-LABEL: @mul64_sext_c
+; FUNC-LABEL: {{^}}mul64_sext_c:
 ; EG-DAG: MULLO_INT
 ; EG-DAG: MULHI_INT
-; SI-DAG: V_MUL_LO_I32
-; SI-DAG: V_MUL_HI_I32
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_i32
 define void @mul64_sext_c(i64 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = sext i32 %in to i64
@@ -66,16 +80,120 @@ entry:
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_mul64_sext_c:
+; EG-DAG: MULLO_INT
+; EG-DAG: MULHI_INT
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_mul_hi_i32
+; SI: s_endpgm
+define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ext = sext i32 %val to i64
+  %mul = mul i64 %ext, 80
+  store i64 %mul, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul64_sext_inline_imm:
+; SI-DAG: v_mul_lo_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
+; SI: s_endpgm
+define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %val = load i32 addrspace(1)* %in, align 4
+  %ext = sext i32 %val to i64
+  %mul = mul i64 %ext, 9
+  store i64 %mul, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_mul_i32:
+; SI: s_load_dword [[SRC0:s[0-9]+]],
+; SI: s_load_dword [[SRC1:s[0-9]+]],
+; SI: s_mul_i32 [[SRESULT:s[0-9]+]], [[SRC0]], [[SRC1]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
+define void @s_mul_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %mul = mul i32 %a, %b
+  store i32 %mul, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul_i32:
+; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = mul i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
 ; A standard 64-bit multiply.  The expansion should be around 6 instructions.
 ; It would be difficult to match the expansion correctly without writing
 ; a really complicated list of FileCheck expressions.  I don't want
 ; to confuse people who may 'break' this test with a correct optimization,
 ; so this test just uses FUNC-LABEL to make sure the compiler does not
 ; crash with a 'failed to select' error.
-; FUNC-LABEL: @mul64
-define void @mul64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+
+; FUNC-LABEL: {{^}}s_mul_i64:
+define void @s_mul_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
+  %mul = mul i64 %a, %b
+  store i64 %mul, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_mul_i64:
+; SI: v_mul_lo_i32
+define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
+  %a = load i64 addrspace(1)* %aptr, align 8
+  %b = load i64 addrspace(1)* %bptr, align 8
+  %mul = mul i64 %a, %b
+  store i64 %mul, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mul32_in_branch:
+; SI: s_mul_i32
+define void @mul32_in_branch(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = icmp eq i32 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i32 addrspace(1)* %in
+  br label %endif
+
+else:
+  %2 = mul i32 %a, %b
+  br label %endif
+
+endif:
+  %3 = phi i32 [%1, %if], [%2, %else]
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}mul64_in_branch:
+; SI-DAG: s_mul_i32
+; SI-DAG: v_mul_hi_u32
+; SI: s_endpgm
+define void @mul64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) {
 entry:
-  %0 = mul i64 %a, %b
-  store i64 %0, i64 addrspace(1)* %out
+  %0 = icmp eq i64 %a, 0
+  br i1 %0, label %if, label %else
+
+if:
+  %1 = load i64 addrspace(1)* %in
+  br label %endif
+
+else:
+  %2 = mul i64 %a, %b
+  br label %endif
+
+endif:
+  %3 = phi i64 [%1, %if], [%2, %else]
+  store i64 %3, i64 addrspace(1)* %out
   ret void
 }
diff --git a/test/CodeGen/R600/mul_int24.ll b/test/CodeGen/R600/mul_int24.ll
index 046911ba147d..eecde0d73af4 100644
--- a/test/CodeGen/R600/mul_int24.ll
+++ b/test/CodeGen/R600/mul_int24.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; FUNC-LABEL: @i32_mul24
+; FUNC-LABEL: {{^}}i32_mul24:
 ; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
 ; EG: MULLO_INT
 ; Make sure we are not masking the inputs
 ; CM-NOT: AND
 ; CM: MUL_INT24
-; SI-NOT: AND
-; SI: V_MUL_I32_I24
+; SI-NOT: and
+; SI: v_mul_i32_i24
 define void @i32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = shl i32 %a, 8
diff --git a/test/CodeGen/R600/mul_uint24.ll b/test/CodeGen/R600/mul_uint24.ll
index 419f2751b83e..6d526c4b5d78 100644
--- a/test/CodeGen/R600/mul_uint24.ll
+++ b/test/CodeGen/R600/mul_uint24.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=FUNC
 
-; FUNC-LABEL: @u32_mul24
+; FUNC-LABEL: {{^}}u32_mul24:
 ; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, KC0[2].W
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
 
 define void @u32_mul24(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
@@ -17,13 +17,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i16_mul24
+; FUNC-LABEL: {{^}}i16_mul24:
 ; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
 ; The result must be sign-extended
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
 ; EG: 16
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 16,
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 16
 define void @i16_mul24(i32 addrspace(1)* %out, i16 %a, i16 %b) {
 entry:
   %0 = mul i16 %a, %b
@@ -32,12 +32,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i8_mul24
+; FUNC-LABEL: {{^}}i8_mul24:
 ; EG: MUL_UINT24 {{[* ]*}}T{{[0-9]}}.[[MUL_CHAN:[XYZW]]]
 ; The result must be sign-extended
 ; EG: BFE_INT {{[* ]*}}T{{[0-9]}}.{{[XYZW]}}, PV.[[MUL_CHAN]], 0.0, literal.x
-; SI: V_MUL_U32_U24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: V_BFE_I32 v{{[0-9]}}, [[MUL]], 0, 8,
+; SI: v_mul_u32_u24_e{{(32|64)}} [[MUL:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
+; SI: v_bfe_i32 v{{[0-9]}}, [[MUL]], 0, 8
 
 define void @i8_mul24(i32 addrspace(1)* %out, i8 %a, i8 %b) {
 entry:
@@ -48,12 +48,12 @@ entry:
 }
 
 ; Multiply with 24-bit inputs and 64-bit output
-; FUNC_LABEL: @mul24_i64
+; FUNC_LABEL: {{^}}mul24_i64:
 ; EG; MUL_UINT24
 ; EG: MULHI
-; SI: V_MUL_U32_U24
+; SI: v_mul_u32_u24
 ; FIXME: SI support 24-bit mulhi
-; SI: V_MUL_HI_U32
+; SI: v_mul_hi_u32
 define void @mul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = shl i64 %a, 40
diff --git a/test/CodeGen/R600/mulhu.ll b/test/CodeGen/R600/mulhu.ll
index 864012748118..865971712306 100644
--- a/test/CodeGen/R600/mulhu.ll
+++ b/test/CodeGen/R600/mulhu.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_MOV_B32_e32 v{{[0-9]+}}, 0xaaaaaaab
-;CHECK: V_MUL_HI_U32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
-;CHECK-NEXT: V_LSHRREV_B32_e32 v0, 1, v0
+;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
+;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
+;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
 
 define void @test(i32 %p) {
    %i = udiv i32 %p, 3
diff --git a/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index ab82e7ee7993..3c5e127625d6 100644
--- a/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -1,9 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -o /dev/null %s
+; RUN: llc -march=amdgcn -mcpu=SI -o /dev/null %s
 ; RUN: llc -march=r600 -mcpu=cypress -o /dev/null %s
 
 @extern_const_addrspace = external unnamed_addr addrspace(2) constant [5 x i32], align 4
 
-; FUNC-LABEL: @load_extern_const_init
+; FUNC-LABEL: {{^}}load_extern_const_init:
 define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
   %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
@@ -12,7 +12,7 @@ define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
 
 @undef_const_addrspace = unnamed_addr addrspace(2) constant [5 x i32] undef, align 4
 
-; FUNC-LABEL: @load_undef_const_init
+; FUNC-LABEL: {{^}}load_undef_const_init:
 define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
   %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/no-shrink-extloads.ll b/test/CodeGen/R600/no-shrink-extloads.ll
new file mode 100644
index 000000000000..135d22d3036d
--- /dev/null
+++ b/test/CodeGen/R600/no-shrink-extloads.ll
@@ -0,0 +1,191 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; Make sure we don't turn the 32-bit argument load into a 16-bit
+; load. There aren't extending scalar lods, so that would require
+; using a buffer_load instruction.
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i16:
+; SI: s_load_dword s
+; SI: buffer_store_short v
+define void @truncate_kernarg_i32_to_i16(i16 addrspace(1)* %out, i32 %arg) nounwind {
+  %trunc = trunc i32 %arg to i16
+  store i16 %trunc, i16 addrspace(1)* %out
+  ret void
+}
+
+; It should be OK (and probably performance neutral) to reduce this,
+; but we don't know if the load is uniform yet.
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i16:
+; SI: buffer_load_dword v
+; SI: buffer_store_short v
+define void @truncate_buffer_load_i32_to_i16(i16 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i16 addrspace(1)* %out, i32 %tid
+  %load = load i32 addrspace(1)* %gep.in
+  %trunc = trunc i32 %load to i16
+  store i16 %trunc, i16 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i32_to_i8(i8 addrspace(1)* %out, i32 %arg) nounwind {
+  %trunc = trunc i32 %arg to i8
+  store i8 %trunc, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i32_to_i8(i8 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+  %load = load i32 addrspace(1)* %gep.in
+  %trunc = trunc i32 %load to i8
+  store i8 %trunc, i8 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i32_to_i1:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i32_to_i1(i1 addrspace(1)* %out, i32 %arg) nounwind {
+  %trunc = trunc i32 %arg to i1
+  store i1 %trunc, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i32_to_i1:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i32_to_i1(i1 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i1 addrspace(1)* %out, i32 %tid
+  %load = load i32 addrspace(1)* %gep.in
+  %trunc = trunc i32 %load to i1
+  store i1 %trunc, i1 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i32:
+; SI: s_load_dword s
+; SI: buffer_store_dword v
+define void @truncate_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+  %trunc = trunc i64 %arg to i32
+  store i32 %trunc, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i32:
+; SI: buffer_load_dword v
+; SI: buffer_store_dword v
+define void @truncate_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %load = load i64 addrspace(1)* %gep.in
+  %trunc = trunc i64 %load to i32
+  store i32 %trunc, i32 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i32:
+; SI: s_load_dword s
+; SI: buffer_store_dword v
+define void @srl_kernarg_i64_to_i32(i32 addrspace(1)* %out, i64 %arg) nounwind {
+  %srl = lshr i64 %arg, 32
+  %trunc = trunc i64 %srl to i32
+  store i32 %trunc, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i32:
+; SI: buffer_load_dword v
+; SI: buffer_store_dword v
+define void @srl_buffer_load_i64_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i32 addrspace(1)* %out, i32 %tid
+  %load = load i64 addrspace(1)* %gep.in
+  %srl = lshr i64 %load, 32
+  %trunc = trunc i64 %srl to i32
+  store i32 %trunc, i32 addrspace(1)* %gep.out
+  ret void
+}
+
+; Might as well reduce to 8-bit loads.
+; FUNC-LABEL: {{^}}truncate_kernarg_i16_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i16_to_i8(i8 addrspace(1)* %out, i16 %arg) nounwind {
+  %trunc = trunc i16 %arg to i8
+  store i8 %trunc, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i16_to_i8:
+; SI: buffer_load_ubyte v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i16_to_i8(i8 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i16 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+  %load = load i16 addrspace(1)* %gep.in
+  %trunc = trunc i16 %load to i8
+  store i8 %trunc, i8 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srl_kernarg_i64_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @srl_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+  %srl = lshr i64 %arg, 32
+  %trunc = trunc i64 %srl to i8
+  store i8 %trunc, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srl_buffer_load_i64_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @srl_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+  %load = load i64 addrspace(1)* %gep.in
+  %srl = lshr i64 %load, 32
+  %trunc = trunc i64 %srl to i8
+  store i8 %trunc, i8 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_kernarg_i64_to_i8:
+; SI: s_load_dword s
+; SI: buffer_store_byte v
+define void @truncate_kernarg_i64_to_i8(i8 addrspace(1)* %out, i64 %arg) nounwind {
+  %trunc = trunc i64 %arg to i8
+  store i8 %trunc, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}truncate_buffer_load_i64_to_i8:
+; SI: buffer_load_dword v
+; SI: buffer_store_byte v
+define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.in = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %gep.out = getelementptr i8 addrspace(1)* %out, i32 %tid
+  %load = load i64 addrspace(1)* %gep.in
+  %trunc = trunc i64 %load to i8
+  store i8 %trunc, i8 addrspace(1)* %gep.out
+  ret void
+}
diff --git a/test/CodeGen/R600/operand-folding.ll b/test/CodeGen/R600/operand-folding.ll
new file mode 100644
index 000000000000..88a8145dcd62
--- /dev/null
+++ b/test/CodeGen/R600/operand-folding.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: {{^}}fold_sgpr:
+; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
+define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
+entry:
+  %tmp0 = icmp ne i32 %fold, 0
+  br i1 %tmp0, label %if, label %endif
+
+if:
+  %id = call i32 @llvm.r600.read.tidig.x()
+  %offset = add i32 %fold, %id
+  %tmp1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+  store i32 0, i32 addrspace(1)* %tmp1
+  br label %endif
+
+endif:
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fold_imm:
+; CHECK v_or_i32_e32 v{{[0-9]+}}, 5
+define void @fold_imm(i32 addrspace(1)* %out, i32 %cmp) {
+entry:
+  %fold = add i32 3, 2
+  %tmp0 = icmp ne i32 %cmp, 0
+  br i1 %tmp0, label %if, label %endif
+
+if:
+  %id = call i32 @llvm.r600.read.tidig.x()
+  %val = or i32 %id, %fold
+  store i32 %val, i32 addrspace(1)* %out
+  br label %endif
+
+endif:
+  ret void
+}
+
+; CHECK-LABEL: {{^}}fold_64bit_constant_add:
+; CHECK-NOT: s_mov_b64
+; FIXME: It would be better if we could use v_add here and drop the extra
+; v_mov_b32 instructions.
+; CHECK-DAG: s_add_u32 [[LO:s[0-9]+]], s{{[0-9]+}}, 1
+; CHECK-DAG: s_addc_u32 [[HI:s[0-9]+]], s{{[0-9]+}}, 0
+; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[HI]]
+; CHECK: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}},
+
+define void @fold_64bit_constant_add(i64 addrspace(1)* %out, i32 %cmp, i64 %val) {
+entry:
+  %tmp0 = add i64 %val, 1
+  store i64 %tmp0, i64 addrspace(1)* %out
+  ret void
+}
+
+; Inline constants should always be folded.
+
+; CHECK-LABEL: {{^}}vector_inline:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 5, v{{[0-9]+}}
+
+define void @vector_inline(<4 x i32> addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = add i32 %tmp0, 1
+  %tmp2 = add i32 %tmp0, 2
+  %tmp3 = add i32 %tmp0, 3
+  %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
+  %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
+  %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
+  %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
+  %tmp4 = xor <4 x i32> <i32 5, i32 5, i32 5, i32 5>, %vec3
+  store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Immediates with one use should be folded
+; CHECK-LABEL: {{^}}imm_one_use:
+; CHECK: v_xor_b32_e32 v{{[0-9]+}}, 0x64, v{{[0-9]+}}
+
+define void @imm_one_use(i32 addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = xor i32 %tmp0, 100
+  store i32 %tmp1, i32 addrspace(1)* %out
+  ret void
+}
+; CHECK-LABEL: {{^}}vector_imm:
+; CHECK: s_movk_i32 [[IMM:s[0-9]+]], 0x64
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+; CHECK: v_xor_b32_e32 v{{[0-9]}}, [[IMM]], v{{[0-9]}}
+
+define void @vector_imm(<4 x i32> addrspace(1)* %out) {
+entry:
+  %tmp0 = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = add i32 %tmp0, 1
+  %tmp2 = add i32 %tmp0, 2
+  %tmp3 = add i32 %tmp0, 3
+  %vec0 = insertelement <4 x i32> undef, i32 %tmp0, i32 0
+  %vec1 = insertelement <4 x i32> %vec0, i32 %tmp1, i32 1
+  %vec2 = insertelement <4 x i32> %vec1, i32 %tmp2, i32 2
+  %vec3 = insertelement <4 x i32> %vec2, i32 %tmp3, i32 3
+  %tmp4 = xor <4 x i32> <i32 100, i32 100, i32 100, i32 100>, %vec3
+  store <4 x i32> %tmp4, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/operand-spacing.ll b/test/CodeGen/R600/operand-spacing.ll
new file mode 100644
index 000000000000..dd9f25aad7f2
--- /dev/null
+++ b/test/CodeGen/R600/operand-spacing.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
+
+; Make sure there isn't an extra space between the instruction name and first operands.
+
+; SI-LABEL: {{^}}add_f32:
+; SI-DAG: s_load_dword [[SREGA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[SREGB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VREGB:v[0-9]+]], [[SREGB]]
+; SI: v_add_f32_e32 [[RESULT:v[0-9]+]], [[SREGA]], [[VREGB]]
+; SI: buffer_store_dword [[RESULT]],
+define void @add_f32(float addrspace(1)* %out, float %a, float %b) {
+  %result = fadd float %a, %b
+  store float %result, float addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/or.ll b/test/CodeGen/R600/or.ll
index 3c3b475d077c..d7dfb7ab1c2e 100644
--- a/test/CodeGen/R600/or.ll
+++ b/test/CodeGen/R600/or.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
 
-; EG-LABEL: @or_v2i32
+; EG-LABEL: {{^}}or_v2i32:
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI-LABEL: @or_v2i32
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI-LABEL: {{^}}or_v2i32:
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in)
   ret void
 }
 
-; EG-LABEL: @or_v4i32
+; EG-LABEL: {{^}}or_v4i32:
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: OR_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI-LABEL: @or_v4i32
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: V_OR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI-LABEL: {{^}}or_v4i32:
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,16 +39,16 @@ define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in)
   ret void
 }
 
-; SI-LABEL: @scalar_or_i32
-; SI: S_OR_B32
+; SI-LABEL: {{^}}scalar_or_i32:
+; SI: s_or_b32
 define void @scalar_or_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
   %or = or i32 %a, %b
   store i32 %or, i32 addrspace(1)* %out
   ret void
 }
 
-; SI-LABEL: @vector_or_i32
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}vector_or_i32:
+; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
   %loada = load i32 addrspace(1)* %a
   %or = or i32 %loada, %b
@@ -56,20 +56,46 @@ define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b)
   ret void
 }
 
-; EG-LABEL: @scalar_or_i64
+; SI-LABEL: {{^}}scalar_or_literal_i32:
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x1869f
+define void @scalar_or_literal_i32(i32 addrspace(1)* %out, i32 %a) {
+  %or = or i32 %a, 99999
+  store i32 %or, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}vector_or_literal_i32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
+define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+  %loada = load i32 addrspace(1)* %a, align 4
+  %or = or i32 %loada, 65535
+  store i32 %or, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}vector_or_inline_immediate_i32:
+; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
+define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+  %loada = load i32 addrspace(1)* %a, align 4
+  %or = or i32 %loada, 4
+  store i32 %or, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; EG-LABEL: {{^}}scalar_or_i64:
 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[2].W, KC0[3].Y
 ; EG-DAG: OR_INT * T{{[0-9]\.[XYZW]}}, KC0[3].X, KC0[3].Z
-; SI-LABEL: @scalar_or_i64
-; SI: S_OR_B64
+; SI-LABEL: {{^}}scalar_or_i64:
+; SI: s_or_b64
 define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
   %or = or i64 %a, %b
   store i64 %or, i64 addrspace(1)* %out
   ret void
 }
 
-; SI-LABEL: @vector_or_i64
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}vector_or_i64:
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64 addrspace(1)* %a, align 8
   %loadb = load i64 addrspace(1)* %a, align 8
@@ -78,9 +104,9 @@ define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
   ret void
 }
 
-; SI-LABEL: @scalar_vector_or_i64
-; SI: V_OR_B32_e32 v{{[0-9]}}
-; SI: V_OR_B32_e32 v{{[0-9]}}
+; SI-LABEL: {{^}}scalar_vector_or_i64:
+; SI: v_or_b32_e32 v{{[0-9]}}
+; SI: v_or_b32_e32 v{{[0-9]}}
 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
   %loada = load i64 addrspace(1)* %a
   %or = or i64 %loada, %b
@@ -88,13 +114,13 @@ define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
   ret void
 }
 
-; SI-LABEL: @vector_or_i64_loadimm
-; SI-DAG: S_MOV_B32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
-; SI-DAG: S_MOV_B32 [[HI_S_IMM:s[0-9]+]], 0x146f
-; SI-DAG: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: V_OR_B32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}vector_or_i64_loadimm:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xdf77987f
+; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x146f
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 22470723082367
@@ -103,11 +129,11 @@ define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a,
 }
 
 ; FIXME: The or 0 should really be removed.
-; SI-LABEL: @vector_or_i64_imm
-; SI: BUFFER_LOAD_DWORDX2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
-; SI: V_OR_B32_e32 {{v[0-9]+}}, 0, {{.*}}
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}vector_or_i64_imm:
+; SI: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI: v_or_b32_e32 {{v[0-9]+}}, 8, v[[LO_VREG]]
+; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
+; SI: s_endpgm
 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 8
@@ -115,12 +141,12 @@ define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64
   ret void
 }
 
-; SI-LABEL: @trunc_i64_or_to_i32
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG0:[0-9]+]]
-; SI: S_LOAD_DWORDX2 s{{\[}}[[SREG1:[0-9]+]]
-; SI: S_OR_B32 [[SRESULT:s[0-9]+]], s[[SREG1]], s[[SREG0]]
-; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: BUFFER_STORE_DWORD [[VRESULT]],
+; SI-LABEL: {{^}}trunc_i64_or_to_i32:
+; SI: s_load_dword s[[SREG0:[0-9]+]]
+; SI: s_load_dword s[[SREG1:[0-9]+]]
+; SI: s_or_b32 s[[SRESULT:[0-9]+]], s[[SREG1]], s[[SREG0]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], s[[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
 define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
   %add = or i64 %b, %a
   %trunc = trunc i64 %add to i32
@@ -128,11 +154,11 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
   ret void
 }
 
-; EG-CHECK: @or_i1
+; EG-CHECK: {{^}}or_i1:
 ; EG-CHECK: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 
-; SI-CHECK: @or_i1
-; SI-CHECK: S_OR_B64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; SI-CHECK: {{^}}or_i1:
+; SI-CHECK: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
   %a = load float addrspace(1) * %in0
   %b = load float addrspace(1) * %in1
diff --git a/test/CodeGen/R600/packetizer.ll b/test/CodeGen/R600/packetizer.ll
index 0a405c57ea93..49a7c0df748f 100644
--- a/test/CodeGen/R600/packetizer.ll
+++ b/test/CodeGen/R600/packetizer.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
 
-; CHECK: @test
+; CHECK: {{^}}test:
 ; CHECK: BIT_ALIGN_INT T{{[0-9]}}.X
 ; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Y
 ; CHECK: BIT_ALIGN_INT T{{[0-9]}}.Z
diff --git a/test/CodeGen/R600/parallelandifcollapse.ll b/test/CodeGen/R600/parallelandifcollapse.ll
index 8a269e0cb43a..82b11501e865 100644
--- a/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/test/CodeGen/R600/parallelandifcollapse.ll
@@ -1,5 +1,5 @@
 ; Function Attrs: nounwind
-; RUN: llc < %s -march=r600 -mcpu=redwood  | FileCheck %s
+; RUN: llc -march=r600 -mcpu=redwood -mattr=-promote-alloca < %s | FileCheck %s
 ;
 ; CFG flattening should use parallel-and mode to generate branch conditions and
 ; then merge if-regions with the same bodies.
@@ -11,7 +11,6 @@
 ; FIXME: For some reason having the allocas here allowed the flatten cfg pass
 ; to do its transfomation, however now that we are using local memory for
 ; allocas, the transformation isn't happening.
-; XFAIL: *
 
 define void @_Z9chk1D_512v() #0 {
 entry:
diff --git a/test/CodeGen/R600/predicate-dp4.ll b/test/CodeGen/R600/predicate-dp4.ll
index e48d6a7aa9a8..6bc187594359 100644
--- a/test/CodeGen/R600/predicate-dp4.ll
+++ b/test/CodeGen/R600/predicate-dp4.ll
@@ -1,6 +1,6 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman
 
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
 ; CHECK: PRED_SETE_INT * Pred,
 ; CHECK: DOT4 T{{[0-9]+}}.X, T0.X, T0.X, Pred_sel_one
 define void @main(<4 x float> inreg) #0 {
diff --git a/test/CodeGen/R600/predicates.ll b/test/CodeGen/R600/predicates.ll
index 902508ff9e05..0ce74d97ba8e 100644
--- a/test/CodeGen/R600/predicates.ll
+++ b/test/CodeGen/R600/predicates.ll
@@ -3,7 +3,7 @@
 ; These tests make sure the compiler is optimizing branches using predicates
 ; when it is legal to do so.
 
-; CHECK: @simple_if
+; CHECK: {{^}}simple_if:
 ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
 ; CHECK: LSHL * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 define void @simple_if(i32 addrspace(1)* %out, i32 %in) {
@@ -21,7 +21,7 @@ ENDIF:
   ret void
 }
 
-; CHECK: @simple_if_else
+; CHECK: {{^}}simple_if_else:
 ; CHECK: PRED_SET{{[EGN][ET]*}}_INT * Pred,
 ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
 ; CHECK: LSH{{[LR] \* T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, Pred_sel
@@ -44,7 +44,7 @@ ENDIF:
   ret void
 }
 
-; CHECK: @nested_if
+; CHECK: {{^}}nested_if:
 ; CHECK: ALU_PUSH_BEFORE
 ; CHECK: JUMP
 ; CHECK: POP
@@ -71,7 +71,7 @@ ENDIF:
   ret void
 }
 
-; CHECK: @nested_if_else
+; CHECK: {{^}}nested_if_else:
 ; CHECK: ALU_PUSH_BEFORE
 ; CHECK: JUMP
 ; CHECK: POP
diff --git a/test/CodeGen/R600/private-memory-atomics.ll b/test/CodeGen/R600/private-memory-atomics.ll
index def4f9dee521..765563783831 100644
--- a/test/CodeGen/R600/private-memory-atomics.ll
+++ b/test/CodeGen/R600/private-memory-atomics.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s
 
 ; This works because promote allocas pass replaces these with LDS atomics.
 
diff --git a/test/CodeGen/R600/private-memory-broken.ll b/test/CodeGen/R600/private-memory-broken.ll
index 40860858eb0f..173309496135 100644
--- a/test/CodeGen/R600/private-memory-broken.ll
+++ b/test/CodeGen/R600/private-memory-broken.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -verify-machineinstrs -march=r600 -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
+; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=SI %s -o /dev/null 2>&1 | FileCheck %s
 
 ; Make sure promote alloca pass doesn't crash
 
diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll
index 124d9fa64505..15153c69a48d 100644
--- a/test/CodeGen/R600/private-memory.ll
+++ b/test/CodeGen/R600/private-memory.ll
@@ -1,23 +1,23 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
-; RUN: llc -mattr=+promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
-; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
 
 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: @mova_same_clause
+; FUNC-LABEL: {{^}}mova_same_clause:
 
 ; R600: LDS_WRITE
 ; R600: LDS_WRITE
 ; R600: LDS_READ
 ; R600: LDS_READ
 
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_WRITE_B32
-; SI-PROMOTE: DS_READ_B32
-; SI-PROMOTE: DS_READ_B32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_write_b32
+; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_read_b32
 
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
+; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
@@ -45,10 +45,10 @@ entry:
 ; XXX: This generated code has unnecessary MOVs, we should be able to optimize
 ; this.
 
-; FUNC-LABEL: @multiple_structs
+; FUNC-LABEL: {{^}}multiple_structs:
 ; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
+; SI-NOT: v_movrel
 %struct.point = type { i32, i32 }
 
 define void @multiple_structs(i32 addrspace(1)* %out) {
@@ -76,9 +76,9 @@ entry:
 ; loads and stores should be lowered to copies, so there shouldn't be any
 ; MOVA instructions.
 
-; FUNC-LABEL: @direct_loop
+; FUNC-LABEL: {{^}}direct_loop:
 ; R600-NOT: MOVA_INT
-; SI-NOT: V_MOVREL
+; SI-NOT: v_movrel
 
 define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
@@ -112,14 +112,13 @@ for.end:
   ret void
 }
 
-; FUNC-LABEL: @short_array
+; FUNC-LABEL: {{^}}short_array:
 
 ; R600: MOVA_INT
 
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
-; SI-PROMOTE-NOT: MOVREL
-; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}}
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x68,0xe0
+; SI-PROMOTE-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:2 ; encoding: [0x02,0x10,0x68,0xe0
+; SI-PROMOTE: buffer_load_sshort v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
 define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
   %0 = alloca [2 x i16]
@@ -134,12 +133,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @char_array
+; FUNC-LABEL: {{^}}char_array:
 
 ; R600: MOVA_INT
 
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0
-; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x60,0xe0
+; SI-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:1 ; encoding: [0x01,0x10,0x60,0xe0
 define void @char_array(i32 addrspace(1)* %out, i32 %index) {
 entry:
   %0 = alloca [2 x i8]
@@ -157,12 +156,12 @@ entry:
 
 ; Make sure we don't overwrite workitem information with private memory
 
-; FUNC-LABEL: @work_item_info
+; FUNC-LABEL: {{^}}work_item_info:
 ; R600-NOT: MOV T0.X
 ; Additional check in case the move ends up in the last slot
 ; R600-NOT: MOV * TO.X
 
-; SI-NOT: V_MOV_B32_e{{(32|64)}} v0
+; SI-NOT: v_mov_b32_e{{(32|64)}} v0
 define void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = alloca [2 x i32]
@@ -180,11 +179,11 @@ entry:
 
 ; Test that two stack objects are not stored in the same register
 ; The second stack object should be in T3.X
-; FUNC-LABEL: @no_overlap
+; FUNC-LABEL: {{^}}no_overlap:
 ; R600_CHECK: MOV
 ; R600_CHECK: [[CHAN:[XYZW]]]+
 ; R600-NOT: [[CHAN]]+
-; SI: V_MOV_B32_e32 v3
+; SI: v_mov_b32_e32 v3
 define void @no_overlap(i32 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = alloca [3 x i8], align 1
@@ -291,3 +290,22 @@ entry:
   ret void
 }
 
+; AMDGPUPromoteAlloca does not know how to handle ptrtoint.  When it
+; finds one, it should stop trying to promote.
+
+; FUNC-LABEL: ptrtoint:
+; SI-NOT: ds_write
+; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen
+; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:5
+define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %alloca = alloca [16 x i32]
+  %tmp0 = getelementptr [16 x i32]* %alloca, i32 0, i32 %a
+  store i32 5, i32* %tmp0
+  %tmp1 = ptrtoint [16 x i32]* %alloca to i32
+  %tmp2 = add i32 %tmp1, 5
+  %tmp3 = inttoptr i32 %tmp2 to i32*
+  %tmp4 = getelementptr i32* %tmp3, i32 %b
+  %tmp5 = load i32* %tmp4
+  store i32 %tmp5, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 55eb56d3fb1d..1908f15949a2 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 | FileCheck %s
 
-;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: MAX T{{[0-9].[XYZW]}}, PV.X, 0.0
+; CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
+; CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
 
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
 main_body:
diff --git a/test/CodeGen/R600/r600-encoding.ll b/test/CodeGen/R600/r600-encoding.ll
index b760c882f4e3..112cdac0a1b8 100644
--- a/test/CodeGen/R600/r600-encoding.ll
+++ b/test/CodeGen/R600/r600-encoding.ll
@@ -4,10 +4,10 @@
 ; The earliest R600 GPUs have a slightly different encoding than the rest of
 ; the VLIW4/5 GPUs.
 
-; EG-CHECK: @test
+; EG-CHECK: {{^}}test:
 ; EG-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x01,0x[0-9a-f]+,0x[0-9a-f]+}}]
 
-; R600-CHECK: @test
+; R600-CHECK: {{^}}test:
 ; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
 
 define void @test(<4 x float> inreg %reg0) #0 {
diff --git a/test/CodeGen/R600/r600-export-fix.ll b/test/CodeGen/R600/r600-export-fix.ll
index 73bc0635ab21..7d7285632078 100644
--- a/test/CodeGen/R600/r600-export-fix.ll
+++ b/test/CodeGen/R600/r600-export-fix.ll
@@ -3,9 +3,9 @@
 ;CHECK:	EXPORT T{{[0-9]}}.XYZW
 ;CHECK:	EXPORT T{{[0-9]}}.0000
 ;CHECK: EXPORT T{{[0-9]}}.0000
-;CHECK: EXPORT T{{[0-9]}}.0XZW
+;CHECK: EXPORT T{{[0-9]}}.0XYZ
 ;CHECK: EXPORT T{{[0-9]}}.XYZW
-;CHECK: EXPORT T{{[0-9]}}.YX00
+;CHECK: EXPORT T{{[0-9]}}.YZ00
 ;CHECK: EXPORT T{{[0-9]}}.0000
 ;CHECK: EXPORT T{{[0-9]}}.0000
 
diff --git a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll b/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
index c89398f00662..f388f8ffe293 100644
--- a/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
+++ b/test/CodeGen/R600/r600-infinite-loop-bug-while-reorganizing-vector.ll
@@ -1,5 +1,4 @@
 ;RUN: llc < %s -march=r600 -mcpu=cayman
-;REQUIRES: asserts
 
 define void @main(<4 x float> inreg, <4 x float> inreg) #0 {
 main_body:
diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
index 6dee3ef89cf5..dddc9de7e963 100644
--- a/test/CodeGen/R600/r600cfg.ll
+++ b/test/CodeGen/R600/r600cfg.ll
@@ -1,5 +1,4 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood
-;REQUIRES: asserts
 
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll
index 329077cde57d..2b49f977def7 100644
--- a/test/CodeGen/R600/register-count-comments.ll
+++ b/test/CodeGen/R600/register-count-comments.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
 
-; SI-LABEL: @foo:
+; SI-LABEL: {{^}}foo:
 ; SI: .section	.AMDGPU.csdata
 ; SI: ; Kernel info:
 ; SI: ; NumSgprs: {{[0-9]+}}
@@ -18,3 +18,10 @@ define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 a
   store i32 %result, i32 addrspace(1)* %outptr, align 4
   ret void
 }
+
+; SI-LABEL: {{^}}one_vgpr_used:
+; SI: NumVgprs: 1
+define void @one_vgpr_used(i32 addrspace(1)* %out, i32 %x) nounwind {
+  store i32 %x, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/reorder-stores.ll b/test/CodeGen/R600/reorder-stores.ll
index be2fcc6849fb..81e424aab21e 100644
--- a/test/CodeGen/R600/reorder-stores.ll
+++ b/test/CodeGen/R600/reorder-stores.ll
@@ -1,15 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @no_reorder_v2f64_global_load_store
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_LOAD_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
 define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
   %tmp1 = load <2 x double> addrspace(1)* %x, align 16
   %tmp4 = load <2 x double> addrspace(1)* %y, align 16
@@ -18,12 +18,12 @@ define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocap
   ret void
 }
 
-; SI-LABEL: @no_reorder_scalarized_v2f64_local_load_store
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: s_endpgm
 define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
   %tmp1 = load <2 x double> addrspace(3)* %x, align 16
   %tmp4 = load <2 x double> addrspace(3)* %y, align 16
@@ -32,48 +32,48 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
   ret void
 }
 
-; SI-LABEL: @no_reorder_split_v8i32_global_load_store
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
 
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
 
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
 
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
-; SI: BUFFER_LOAD_DWORD
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+; SI: buffer_load_dword
 
 
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
 
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
 
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
 
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: buffer_store_dword
+; SI: s_endpgm
 define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
   %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
   %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
@@ -82,13 +82,13 @@ define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* no
   ret void
 }
 
-; SI-LABEL: @no_reorder_extload_64
-; SI: DS_READ_B64
-; SI: DS_READ_B64
-; SI: DS_WRITE_B64
-; SI-NOT: DS_READ
-; SI: DS_WRITE_B64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}no_reorder_extload_64:
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_write_b64
+; SI-NOT: ds_read
+; SI: ds_write_b64
+; SI: s_endpgm
 define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
   %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
   %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
diff --git a/test/CodeGen/R600/rotl.i64.ll b/test/CodeGen/R600/rotl.i64.ll
index bda0b6694a8d..13f251ee63e1 100644
--- a/test/CodeGen/R600/rotl.i64.ll
+++ b/test/CodeGen/R600/rotl.i64.ll
@@ -1,10 +1,11 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @s_rotl_i64:
-; SI: S_LSHL_B64
-; SI: S_SUB_I32
-; SI: S_LSHR_B64
-; SI: S_OR_B64
+; FUNC-LABEL: {{^}}s_rotl_i64:
+; SI-DAG: s_lshl_b64
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI: s_or_b64
+; SI: s_endpgm
 define void @s_rotl_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
 entry:
   %0 = shl i64 %x, %y
@@ -15,12 +16,13 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @v_rotl_i64:
-; SI: V_LSHL_B64
-; SI: V_SUB_I32
-; SI: V_LSHR_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
+; FUNC-LABEL: {{^}}v_rotl_i64:
+; SI-DAG: v_lshl_b64
+; SI-DAG: v_sub_i32
+; SI: v_lshr_b64
+; SI: v_or_b32
+; SI: v_or_b32
+; SI: s_endpgm
 define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
   %x = load i64 addrspace(1)* %xptr, align 8
diff --git a/test/CodeGen/R600/rotl.ll b/test/CodeGen/R600/rotl.ll
index 83f657fd4cce..bcf8890beeba 100644
--- a/test/CodeGen/R600/rotl.ll
+++ b/test/CodeGen/R600/rotl.ll
@@ -1,14 +1,14 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @rotl_i32:
+; FUNC-LABEL: {{^}}rotl_i32:
 ; R600: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
 ; R600-NEXT: 32
 ; R600: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
 
-; SI: S_SUB_I32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
-; SI: V_MOV_B32_e32 [[VDST:v[0-9]+]], [[SDST]]
-; SI: V_ALIGNBIT_B32 {{v[0-9]+, [s][0-9]+, v[0-9]+}}, [[VDST]]
+; SI: s_sub_i32 [[SDST:s[0-9]+]], 32, {{[s][0-9]+}}
+; SI: v_mov_b32_e32 [[VDST:v[0-9]+]], [[SDST]]
+; SI: v_alignbit_b32 {{v[0-9]+, [s][0-9]+, s[0-9]+}}, [[VDST]]
 define void @rotl_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
   %0 = shl i32 %x, %y
@@ -19,11 +19,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rotl_v2i32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
+; FUNC-LABEL: {{^}}rotl_v2i32:
+; SI-DAG: s_sub_i32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
 define void @rotl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
 entry:
   %0 = shl <2 x i32> %x, %y
@@ -34,15 +35,16 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rotl_v4i32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
-; SI: S_SUB_I32
-; SI: V_ALIGNBIT_B32
+; FUNC-LABEL: {{^}}rotl_v4i32:
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI-DAG: s_sub_i32
+; SI-DAG: v_alignbit_b32
+; SI: s_endpgm
 define void @rotl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
 entry:
   %0 = shl <4 x i32> %x, %y
diff --git a/test/CodeGen/R600/rotr.i64.ll b/test/CodeGen/R600/rotr.i64.ll
index c264751baeb1..4568859b1ae9 100644
--- a/test/CodeGen/R600/rotr.i64.ll
+++ b/test/CodeGen/R600/rotr.i64.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @s_rotr_i64
-; SI: S_LSHR_B64
-; SI: S_SUB_I32
-; SI: S_LSHL_B64
-; SI: S_OR_B64
+; FUNC-LABEL: {{^}}s_rotr_i64:
+; SI-DAG: s_sub_i32
+; SI-DAG: s_lshr_b64
+; SI-DAG: s_lshl_b64
+; SI: s_or_b64
 define void @s_rotr_i64(i64 addrspace(1)* %in, i64 %x, i64 %y) {
 entry:
   %tmp0 = sub i64 64, %y
@@ -15,12 +15,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @v_rotr_i64
-; SI: V_LSHR_B64
-; SI: V_SUB_I32
-; SI: V_LSHL_B64
-; SI: V_OR_B32
-; SI: V_OR_B32
+; FUNC-LABEL: {{^}}v_rotr_i64:
+; SI-DAG: v_sub_i32
+; SI-DAG: v_lshr_b64
+; SI-DAG: v_lshl_b64
+; SI: v_or_b32
+; SI: v_or_b32
 define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
   %x = load i64 addrspace(1)* %xptr, align 8
@@ -33,7 +33,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @s_rotr_v2i64
+; FUNC-LABEL: {{^}}s_rotr_v2i64:
 define void @s_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> %x, <2 x i64> %y) {
 entry:
   %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
@@ -44,7 +44,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @v_rotr_v2i64
+; FUNC-LABEL: {{^}}v_rotr_v2i64:
 define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
 entry:
   %x = load <2 x i64> addrspace(1)* %xptr, align 8
diff --git a/test/CodeGen/R600/rotr.ll b/test/CodeGen/R600/rotr.ll
index a5a4da480738..dcd036e450da 100644
--- a/test/CodeGen/R600/rotr.ll
+++ b/test/CodeGen/R600/rotr.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @rotr_i32:
+; FUNC-LABEL: {{^}}rotr_i32:
 ; R600: BIT_ALIGN_INT
 
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
 define void @rotr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 entry:
   %tmp0 = sub i32 32, %y
@@ -15,12 +15,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rotr_v2i32:
+; FUNC-LABEL: {{^}}rotr_v2i32:
 ; R600: BIT_ALIGN_INT
 ; R600: BIT_ALIGN_INT
 
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
 define void @rotr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
 entry:
   %tmp0 = sub <2 x i32> <i32 32, i32 32>, %y
@@ -31,16 +31,16 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @rotr_v4i32:
+; FUNC-LABEL: {{^}}rotr_v4i32:
 ; R600: BIT_ALIGN_INT
 ; R600: BIT_ALIGN_INT
 ; R600: BIT_ALIGN_INT
 ; R600: BIT_ALIGN_INT
 
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
-; SI: V_ALIGNBIT_B32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
+; SI: v_alignbit_b32
 define void @rotr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
 entry:
   %tmp0 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %y
diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll
index 3069f62724b7..b8a23df63d83 100644
--- a/test/CodeGen/R600/rsq.ll
+++ b/test/CodeGen/R600/rsq.ll
@@ -1,12 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
-; RUN: llc -march=r600 -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=SI-UNSAFE -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -mattr=-fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI %s
 
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 declare float @llvm.sqrt.f32(float) nounwind readnone
 declare double @llvm.sqrt.f64(double) nounwind readnone
 
-; SI-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}rsq_f32:
+; SI: v_rsq_f32_e32
+; SI: s_endpgm
 define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
   %val = load float addrspace(1)* %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
@@ -15,10 +16,10 @@ define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noali
   ret void
 }
 
-; SI-LABEL: @rsq_f64
-; SI-UNSAFE: V_RSQ_F64_e32
-; SI-SAFE: V_SQRT_F64_e32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}rsq_f64:
+; SI-UNSAFE: v_rsq_f64_e32
+; SI-SAFE: v_sqrt_f64_e32
+; SI: s_endpgm
 define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
   %val = load double addrspace(1)* %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
@@ -26,3 +27,48 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa
   store double %div, double addrspace(1)* %out, align 4
   ret void
 }
+
+; SI-LABEL: {{^}}rsq_f32_sgpr:
+; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: s_endpgm
+define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+  %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+  %div = fdiv float 1.0, %sqrt
+  store float %div, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; Recognize that this is rsqrt(a) * rcp(b) * c,
+; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
+
+; SI-LABEL: @rsqrt_fmul
+; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-DAG: buffer_load_dword [[C:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
+
+; SI-UNSAFE-DAG: v_rsq_f32_e32 [[RSQA:v[0-9]+]], [[A]]
+; SI-UNSAFE-DAG: v_rcp_f32_e32 [[RCPB:v[0-9]+]], [[B]]
+; SI-UNSAFE-DAG: v_mul_f32_e32 [[TMP:v[0-9]+]], [[RCPB]], [[RSQA]]
+; SI-UNSAFE: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[C]]
+; SI-UNSAFE: buffer_store_dword [[RESULT]]
+
+; SI-SAFE-NOT: v_rsq_f32
+
+; SI: s_endpgm
+define void @rsqrt_fmul(float addrspace(1)* %out, float addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %out.gep = getelementptr float addrspace(1)* %out, i32 %tid
+  %gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
+  %gep.2 = getelementptr float addrspace(1)* %gep.0, i32 2
+
+  %a = load float addrspace(1)* %gep.0
+  %b = load float addrspace(1)* %gep.1
+  %c = load float addrspace(1)* %gep.2
+
+  %x = call float @llvm.sqrt.f32(float %a)
+  %y = fmul float %x, %b
+  %z = fdiv float %c, %y
+  store float %z, float addrspace(1)* %out.gep
+  ret void
+}
diff --git a/test/CodeGen/R600/s_movk_i32.ll b/test/CodeGen/R600/s_movk_i32.ll
new file mode 100644
index 000000000000..469a6ba3a07e
--- /dev/null
+++ b/test/CodeGen/R600/s_movk_i32.ll
@@ -0,0 +1,184 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+; SI-LABEL: {{^}}s_movk_i32_k0:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k1:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k2:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k3:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k4:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k5:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k6:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k7:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}}
+; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+
+; SI-LABEL: {{^}}s_movk_i32_k8:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k9:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k10:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k11:
+; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_movk_i32_k12:
+; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}}
+; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
+; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI: s_endpgm
+define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
+  %loada = load i64 addrspace(1)* %a, align 4
+  %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/saddo.ll b/test/CodeGen/R600/saddo.ll
index c80480e85512..2f5f9af838fa 100644
--- a/test/CodeGen/R600/saddo.ll
+++ b/test/CodeGen/R600/saddo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
 
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
 declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
 
-; FUNC-LABEL: @saddo_i64_zext
+; FUNC-LABEL: {{^}}saddo_i64_zext:
 define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %sadd, 0
@@ -15,7 +15,7 @@ define void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @s_saddo_i32
+; FUNC-LABEL: {{^}}s_saddo_i32:
 define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
   %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %sadd, 0
@@ -25,7 +25,7 @@ define void @s_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @v_saddo_i32
+; FUNC-LABEL: {{^}}v_saddo_i32:
 define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %a = load i32 addrspace(1)* %aptr, align 4
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -37,7 +37,7 @@ define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @s_saddo_i64
+; FUNC-LABEL: {{^}}s_saddo_i64:
 define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
   %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %sadd, 0
@@ -47,9 +47,9 @@ define void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
   ret void
 }
 
-; FUNC-LABEL: @v_saddo_i64
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; FUNC-LABEL: {{^}}v_saddo_i64:
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %a = load i64 addrspace(1)* %aptr, align 4
   %b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll
index e7719b6feb3b..dfb181da1801 100644
--- a/test/CodeGen/R600/salu-to-valu.ll
+++ b/test/CodeGen/R600/salu-to-valu.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@@ -7,15 +7,15 @@
 ; sgpr register pair and use that for the pointer operand
 ; (low 64-bits of srsrc).
 
-; CHECK-LABEL: @mubuf
+; CHECK-LABEL: {{^}}mubuf:
 
-; Make sure we aren't using VGPRs for the source operand of S_MOV_B64
-; CHECK-NOT: S_MOV_B64 s[{{[0-9]+:[0-9]+}}], v
+; Make sure we aren't using VGPRs for the source operand of s_mov_b64
+; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
 
 ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
 ; instructions
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
-; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #1
@@ -49,9 +49,9 @@ attributes #1 = { nounwind readnone }
 
 ; Test moving an SMRD instruction to the VALU
 
-; CHECK-LABEL: @smrd_valu
-; CHECK: BUFFER_LOAD_DWORD [[OUT:v[0-9]+]]
-; CHECK: BUFFER_STORE_DWORD [[OUT]]
+; CHECK-LABEL: {{^}}smrd_valu:
+; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
+; CHECK: buffer_store_dword [[OUT]]
 
 define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
 entry:
@@ -77,8 +77,8 @@ endif:
 
 ; Test moving ann SMRD with an immediate offset to the VALU
 
-; CHECK-LABEL: @smrd_valu2
-; CHECK: BUFFER_LOAD_DWORD
+; CHECK-LABEL: {{^}}smrd_valu2:
+; CHECK: buffer_load_dword
 define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -88,3 +88,31 @@ entry:
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
+
+; CHECK-LABEL: {{^}}s_load_imm_v8i32:
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+entry:
+  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+  %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
+  %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+  store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
+  ret void
+}
+
+; CHECK-LABEL: {{^}}s_load_imm_v16i32:
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+; CHECK: buffer_load_dwordx4
+define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+entry:
+  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp1 = getelementptr inbounds i32 addrspace(2)* %in, i32 %tmp0
+  %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
+  %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+  store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
+  ret void
+}
diff --git a/test/CodeGen/R600/scalar_to_vector.ll b/test/CodeGen/R600/scalar_to_vector.ll
index bcccb065818e..7ad964eb1623 100644
--- a/test/CodeGen/R600/scalar_to_vector.ll
+++ b/test/CodeGen/R600/scalar_to_vector.ll
@@ -1,14 +1,14 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 
-; FUNC-LABEL: @scalar_to_vector_v2i32
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_to_vector_v2i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
 define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %tmp1 = load i32 addrspace(1)* %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
@@ -17,14 +17,14 @@ define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(
   ret void
 }
 
-; FUNC-LABEL: @scalar_to_vector_v2f32
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_LSHRREV_B32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: BUFFER_STORE_SHORT [[RESULT]]
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_to_vector_v2f32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 16, [[VAL]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: buffer_store_short [[RESULT]]
+; SI: s_endpgm
 define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tmp1 = load float addrspace(1)* %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
diff --git a/test/CodeGen/R600/schedule-global-loads.ll b/test/CodeGen/R600/schedule-global-loads.ll
new file mode 100644
index 000000000000..b6437d25b8cb
--- /dev/null
+++ b/test/CodeGen/R600/schedule-global-loads.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; FIXME: This currently doesn't do a great job of clustering the
+; loads, which end up with extra moves between them. Right now, it
+; seems the only things areLoadsFromSameBasePtr is accomplishing is
+; ordering the loads so that the lower address loads come first.
+
+; FUNC-LABEL: {{^}}cluster_global_arg_loads:
+; SI-DAG: buffer_load_dword [[REG0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; SI-DAG: buffer_load_dword [[REG1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4
+; SI: buffer_store_dword [[REG0]]
+; SI: buffer_store_dword [[REG1]]
+define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
+  %load0 = load i32 addrspace(1)* %ptr, align 4
+  %gep = getelementptr i32 addrspace(1)* %ptr, i32 1
+  %load1 = load i32 addrspace(1)* %gep, align 4
+  store i32 %load0, i32 addrspace(1)* %out0, align 4
+  store i32 %load1, i32 addrspace(1)* %out1, align 4
+  ret void
+}
+
+; Test for a crach in SIInstrInfo::areLoadsFromSameBasePtr() when checking
+; an MUBUF load which does not have a vaddr operand.
+; FUNC-LABEL: {{^}}same_base_ptr_crash:
+; SI: buffer_load_dword
+; SI: buffer_load_dword
+define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
+entry:
+  %out1 = getelementptr i32 addrspace(1)* %out, i32 %offset
+  %tmp0 = load i32 addrspace(1)* %out
+  %tmp1 = load i32 addrspace(1)* %out1
+  %tmp2 = add i32 %tmp0, %tmp1
+  store i32 %tmp2, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/schedule-kernel-arg-loads.ll b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
new file mode 100644
index 000000000000..215ebfccf5b4
--- /dev/null
+++ b/test/CodeGen/R600/schedule-kernel-arg-loads.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=FUNC -check-prefix=SI %s
+
+; FUNC-LABEL: {{^}}cluster_arg_loads:
+; SI: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x9
+; SI-NEXT: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NEXT: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xe
+define void @cluster_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) nounwind {
+  store i32 %x, i32 addrspace(1)* %out0, align 4
+  store i32 %y, i32 addrspace(1)* %out1, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 3d2142d53ecf..dee90f311f15 100644
--- a/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -1,11 +1,11 @@
 ; XFAIL: *
 ; REQUIRES: asserts
-; RUN: llc -O0 -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
+; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
 
 declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 
 
-; SI-LABEL: @main(
+; SI-LABEL: {{^}}main(
 define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
   %0 = extractelement <4 x float> %reg1, i32 0
diff --git a/test/CodeGen/R600/sdiv.ll b/test/CodeGen/R600/sdiv.ll
index e922d5c18680..c635c0569e84 100644
--- a/test/CodeGen/R600/sdiv.ll
+++ b/test/CodeGen/R600/sdiv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 ; The code generated by sdiv is long and complex and may frequently change.
@@ -10,7 +10,7 @@
 ; This was fixed by adding an additional pattern in R600Instructions.td to
 ; match this pattern with a CNDGE_INT.
 
-; FUNC-LABEL: @sdiv_i32
+; FUNC-LABEL: {{^}}sdiv_i32:
 ; EG: CF_END
 define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
@@ -21,7 +21,7 @@ define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @sdiv_i32_4
+; FUNC-LABEL: {{^}}sdiv_i32_4:
 define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %num = load i32 addrspace(1) * %in
   %result = sdiv i32 %num, 4
@@ -32,16 +32,16 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 ; Multiply by a weird constant to make sure setIntDivIsCheap is
 ; working.
 
-; FUNC-LABEL: @slow_sdiv_i32_3435
-; SI: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]],
-; SI: V_MOV_B32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
-; SI: V_MUL_HI_I32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
-; SI: V_ADD_I32
-; SI: V_LSHRREV_B32
-; SI: V_ASHRREV_I32
-; SI: V_ADD_I32
-; SI: BUFFER_STORE_DWORD
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
+; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
+; SI: v_add_i32
+; SI: v_lshrrev_b32
+; SI: v_ashrrev_i32
+; SI: v_add_i32
+; SI: buffer_store_dword
+; SI: s_endpgm
 define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %num = load i32 addrspace(1) * %in
   %result = sdiv i32 %num, 3435
diff --git a/test/CodeGen/R600/sdivrem24.ll b/test/CodeGen/R600/sdivrem24.ll
new file mode 100644
index 000000000000..bb90343dbfe6
--- /dev/null
+++ b/test/CodeGen/R600/sdivrem24.ll
@@ -0,0 +1,238 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}sdiv24_i8:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+  %num = load i8 addrspace(1) * %in
+  %den = load i8 addrspace(1) * %den_ptr
+  %result = sdiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv24_i16:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+  %num = load i16 addrspace(1) * %in, align 2
+  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %result = sdiv i16 %num, %den
+  store i16 %result, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv24_i32:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = ashr i32 %num.i24.0, 8
+  %den.i24 = ashr i32 %den.i24.0, 8
+  %result = sdiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sdiv25_i32:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = ashr i32 %num.i24.0, 7
+  %den.i24 = ashr i32 %den.i24.0, 7
+  %result = sdiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_sdiv24_i32_1:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = ashr i32 %num.i24.0, 8
+  %den.i24 = ashr i32 %den.i24.0, 7
+  %result = sdiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_sdiv24_i32_2:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = ashr i32 %num.i24.0, 7
+  %den.i24 = ashr i32 %den.i24.0, 8
+  %result = sdiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i8:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+  %num = load i8 addrspace(1) * %in
+  %den = load i8 addrspace(1) * %den_ptr
+  %result = srem i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i16:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+  %num = load i16 addrspace(1) * %in, align 2
+  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %result = srem i16 %num, %den
+  store i16 %result, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srem24_i32:
+; SI: v_cvt_f32_i32
+; SI: v_cvt_f32_i32
+; SI: v_rcp_f32
+; SI: v_cvt_i32_f32
+
+; EG: INT_TO_FLT
+; EG-DAG: INT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_INT
+define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = ashr i32 %num.i24.0, 8
+  %den.i24 = ashr i32 %den.i24.0, 8
+  %result = srem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}srem25_i32:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = ashr i32 %num.i24.0, 7
+  %den.i24 = ashr i32 %den.i24.0, 7
+  %result = srem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_srem24_i32_1:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = ashr i32 %num.i24.0, 8
+  %den.i24 = ashr i32 %den.i24.0, 7
+  %result = srem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_srem24_i32_2:
+; SI-NOT: v_cvt_f32_i32
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: INT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = ashr i32 %num.i24.0, 7
+  %den.i24 = ashr i32 %den.i24.0, 8
+  %result = srem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/select-i1.ll b/test/CodeGen/R600/select-i1.ll
index 009dd7f68dea..bb778dd264d2 100644
--- a/test/CodeGen/R600/select-i1.ll
+++ b/test/CodeGen/R600/select-i1.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; FIXME: This should go in existing select.ll test, except the current testcase there is broken on SI
 
-; FUNC-LABEL: @select_i1
-; SI: V_CNDMASK_B32
-; SI-NOT: V_CNDMASK_B32
+; FUNC-LABEL: {{^}}select_i1:
+; SI: v_cndmask_b32
+; SI-NOT: v_cndmask_b32
 define void @select_i1(i1 addrspace(1)* %out, i32 %cond, i1 %a, i1 %b) nounwind {
   %cmp = icmp ugt i32 %cond, 5
   %sel = select i1 %cmp, i1 %a, i1 %b
diff --git a/test/CodeGen/R600/select-vectors.ll b/test/CodeGen/R600/select-vectors.ll
index 94605fe08ad8..d982603335f2 100644
--- a/test/CodeGen/R600/select-vectors.ll
+++ b/test/CodeGen/R600/select-vectors.ll
@@ -1,14 +1,14 @@
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; Test expansion of scalar selects on vectors.
 ; Evergreen not enabled since it seems to be having problems with doubles.
 
 
-; FUNC-LABEL: @select_v4i8
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4i8:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b, i8 %c) nounwind {
   %cmp = icmp eq i8 %c, 0
   %select = select i1 %cmp, <4 x i8> %a, <4 x i8> %b
@@ -16,11 +16,11 @@ define void @select_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b,
   ret void
 }
 
-; FUNC-LABEL: @select_v4i16
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4i16:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x i16> %a, <4 x i16> %b
@@ -28,10 +28,10 @@ define void @select_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16>
   ret void
 }
 
-; FUNC-LABEL: @select_v2i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}select_v2i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx2
 define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x i32> %a, <2 x i32> %b
@@ -39,12 +39,12 @@ define void @select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32>
   ret void
 }
 
-; FUNC-LABEL: @select_v4i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}select_v4i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_dwordx4
 define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x i32> %a, <4 x i32> %b
@@ -52,15 +52,15 @@ define void @select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32>
   ret void
 }
 
-; FUNC-LABEL: @select_v8i32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8i32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x i32> %a, <8 x i32> %b
@@ -68,8 +68,8 @@ define void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32>
   ret void
 }
 
-; FUNC-LABEL: @select_v2f32
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}select_v2f32:
+; SI: buffer_store_dwordx2
 define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x float> %a, <2 x float> %b
@@ -77,8 +77,8 @@ define void @select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x f
   ret void
 }
 
-; FUNC-LABEL: @select_v4f32
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}select_v4f32:
+; SI: buffer_store_dwordx4
 define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x float> %a, <4 x float> %b
@@ -86,15 +86,15 @@ define void @select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x f
   ret void
 }
 
-; FUNC-LABEL: @select_v8f32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8f32:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x float> %a, <8 x float> %b
@@ -102,11 +102,11 @@ define void @select_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x f
   ret void
 }
 
-; FUNC-LABEL: @select_v2f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v2f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <2 x double> %a, <2 x double> %b
@@ -114,15 +114,15 @@ define void @select_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, <2 x
   ret void
 }
 
-; FUNC-LABEL: @select_v4f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v4f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <4 x double> %a, <4 x double> %b
@@ -130,23 +130,23 @@ define void @select_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, <4 x
   ret void
 }
 
-; FUNC-LABEL: @select_v8f64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
+; FUNC-LABEL: {{^}}select_v8f64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
 define void @select_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, <8 x double> %b, i32 %c) nounwind {
   %cmp = icmp eq i32 %c, 0
   %select = select i1 %cmp, <8 x double> %a, <8 x double> %b
diff --git a/test/CodeGen/R600/select.ll b/test/CodeGen/R600/select.ll
index 7d5156834b9d..45f3cd5a7ac5 100644
--- a/test/CodeGen/R600/select.ll
+++ b/test/CodeGen/R600/select.ll
@@ -7,7 +7,7 @@
 ; In order to avoid the select_cc optimization, this test case calculates the
 ; condition for the select in a separate basic block.
 
-; FUNC-LABEL: @select
+; FUNC-LABEL: {{^}}select:
 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.X
 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
diff --git a/test/CodeGen/R600/select64.ll b/test/CodeGen/R600/select64.ll
index dba25e3bd21e..f48ec2135de4 100644
--- a/test/CodeGen/R600/select64.ll
+++ b/test/CodeGen/R600/select64.ll
@@ -1,11 +1,11 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
 
-; CHECK-LABEL: @select0
+; CHECK-LABEL: {{^}}select0:
 ; i64 select should be split into two i32 selects, and we shouldn't need
 ; to use a shfit to extract the hi dword of the input.
-; CHECK-NOT: S_LSHR_B64
-; CHECK: V_CNDMASK
-; CHECK: V_CNDMASK
+; CHECK-NOT: s_lshr_b64
+; CHECK: v_cndmask
+; CHECK: v_cndmask
 define void @select0(i64 addrspace(1)* %out, i32 %cond, i64 %in) {
 entry:
   %0 = icmp ugt i32 %cond, 5
@@ -14,9 +14,9 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @select_trunc_i64
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}select_trunc_i64:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
 define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwind {
   %cmp = icmp ugt i32 %cond, 5
   %sel = select i1 %cmp, i64 0, i64 %in
@@ -25,9 +25,9 @@ define void @select_trunc_i64(i32 addrspace(1)* %out, i32 %cond, i64 %in) nounwi
   ret void
 }
 
-; CHECK-LABEL: @select_trunc_i64_2
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}select_trunc_i64_2:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
 define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %b) nounwind {
   %cmp = icmp ugt i32 %cond, 5
   %sel = select i1 %cmp, i64 %a, i64 %b
@@ -36,9 +36,9 @@ define void @select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 %a, i64 %
   ret void
 }
 
-; CHECK-LABEL: @v_select_trunc_i64_2
-; CHECK: V_CNDMASK_B32
-; CHECK-NOT: V_CNDMASK_B32
+; CHECK-LABEL: {{^}}v_select_trunc_i64_2:
+; CHECK: v_cndmask_b32
+; CHECK-NOT: v_cndmask_b32
 define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
   %a = load i64 addrspace(1)* %aptr, align 8
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index bdb6867850ba..feaea87592b6 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
-; FUNC-LABEL: @test_a
+; FUNC-LABEL: {{^}}test_a:
 ; EG-NOT: CND
 ; EG: SET{{[NEQGTL]+}}_DX10
 
@@ -30,7 +30,7 @@ ENDIF:
 ; Same as test_a, but the branch labels are swapped to produce the inverse cc
 ; for the icmp instruction
 
-; EG-LABEL: @test_b
+; EG-LABEL: {{^}}test_b:
 ; EG: SET{{[GTEQN]+}}_DX10
 ; EG-NEXT: PRED_
 ; EG-NEXT: ALU clause starting
@@ -56,7 +56,7 @@ ENDIF:
 }
 
 ; Test a CND*_INT instruction with float true/false values
-; EG-LABEL: @test_c
+; EG-LABEL: {{^}}test_c:
 ; EG: CND{{[GTE]+}}_INT
 define void @test_c(float addrspace(1)* %out, i32 %in) {
 entry:
@@ -66,11 +66,11 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @selectcc_bool
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32_e64
-; SI-NOT: CMP
-; SI-NOT: CNDMASK
+; FUNC-LABEL: {{^}}selectcc_bool:
+; SI: v_cmp_ne_i32
+; SI-NEXT: v_cndmask_b32_e64
+; SI-NOT: cmp
+; SI-NOT: cndmask
 define void @selectcc_bool(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %ext = select i1 %icmp0, i32 -1, i32 0
diff --git a/test/CodeGen/R600/selectcc.ll b/test/CodeGen/R600/selectcc.ll
index a8f57cf1b572..d0ae5bfe91ae 100644
--- a/test/CodeGen/R600/selectcc.ll
+++ b/test/CodeGen/R600/selectcc.ll
@@ -1,15 +1,15 @@
 ; RUN: llc -verify-machineinstrs -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @selectcc_i64
+; FUNC-LABEL: {{^}}selectcc_i64:
 ; EG: XOR_INT
 ; EG: XOR_INT
 ; EG: OR_INT
 ; EG: CNDE_INT
 ; EG: CNDE_INT
-; SI: V_CMP_EQ_I64
-; SI: V_CNDMASK
-; SI: V_CNDMASK
+; SI: v_cmp_eq_i64
+; SI: v_cndmask
+; SI: v_cndmask
 define void @selectcc_i64(i64 addrspace(1) * %out, i64 %lhs, i64 %rhs, i64 %true, i64 %false) {
 entry:
   %0 = icmp eq i64 %lhs, %rhs
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index 5c7d4998d07c..53694dcffa66 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -4,7 +4,7 @@
 ; to store integer true (-1) and false (0) values are lowered to one of the
 ; SET*DX10 instructions.
 
-; CHECK: @fcmp_une_select_fptosi
+; CHECK: {{^}}fcmp_une_select_fptosi:
 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -18,7 +18,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_une_select_i32
+; CHECK: {{^}}fcmp_une_select_i32:
 ; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -30,7 +30,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_oeq_select_fptosi
+; CHECK: {{^}}fcmp_oeq_select_fptosi:
 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -44,7 +44,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_oeq_select_i32
+; CHECK: {{^}}fcmp_oeq_select_i32:
 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -56,7 +56,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ogt_select_fptosi
+; CHECK: {{^}}fcmp_ogt_select_fptosi:
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -70,7 +70,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ogt_select_i32
+; CHECK: {{^}}fcmp_ogt_select_i32:
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -82,7 +82,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_oge_select_fptosi
+; CHECK: {{^}}fcmp_oge_select_fptosi:
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -96,7 +96,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_oge_select_i32
+; CHECK: {{^}}fcmp_oge_select_i32:
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -108,7 +108,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ole_select_fptosi
+; CHECK: {{^}}fcmp_ole_select_fptosi:
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -122,7 +122,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ole_select_i32
+; CHECK: {{^}}fcmp_ole_select_i32:
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -134,7 +134,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_olt_select_fptosi
+; CHECK: {{^}}fcmp_olt_select_fptosi:
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -148,7 +148,7 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_olt_select_i32
+; CHECK: {{^}}fcmp_olt_select_i32:
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
diff --git a/test/CodeGen/R600/setcc-equivalent.ll b/test/CodeGen/R600/setcc-equivalent.ll
index f796748fcefe..11ea793650c4 100644
--- a/test/CodeGen/R600/setcc-equivalent.ll
+++ b/test/CodeGen/R600/setcc-equivalent.ll
@@ -1,7 +1,6 @@
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
-; XFAIL: *
 
-; EG-LABEL: @and_setcc_setcc_i32
+; EG-LABEL: {{^}}and_setcc_setcc_i32:
 ; EG: AND_INT
 ; EG-NEXT: SETE_INT
 define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
@@ -13,7 +12,7 @@ define void @and_setcc_setcc_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
   ret void
 }
 
-; EG-LABEL: @and_setcc_setcc_v4i32
+; EG-LABEL: {{^}}and_setcc_setcc_v4i32:
 ; EG: AND_INT
 ; EG: AND_INT
 ; EG: SETE_INT
@@ -28,4 +27,4 @@ define void @and_setcc_setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <
   %ext = sext <4 x i1> %and to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out, align 4
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/R600/setcc-opt.ll b/test/CodeGen/R600/setcc-opt.ll
index 8e831e409191..a44c89f72cf5 100644
--- a/test/CodeGen/R600/setcc-opt.ll
+++ b/test/CodeGen/R600/setcc-opt.ll
@@ -1,15 +1,233 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-; SI-LABEL: @sext_bool_icmp_ne
-; SI: V_CMP_NE_I32
-; SI-NEXT: V_CNDMASK_B32
-; SI-NOT: V_CMP_NE_I32
-; SI-NOT: V_CNDMASK_B32
-; SI: S_ENDPGM
-define void @sext_bool_icmp_ne(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT:buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+
+; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
+; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
+define void @sext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp eq i32 %a, %b
+  %ext = sext i1 %icmp0 to i32
+  %icmp1 = icmp eq i32 %ext, 0
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+
+; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W
+; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1
+define void @sext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %icmp0 = icmp ne i32 %a, %b
   %ext = sext i1 %icmp0 to i32
   %icmp1 = icmp ne i32 %ext, 0
   store i1 %icmp1, i1 addrspace(1)* %out
   ret void
 }
+
+; This really folds away to false
+; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1:
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: v_cmp_eq_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; SI-NEXT: buffer_store_byte [[TMP]]
+; SI-NEXT: s_endpgm
+define void @sext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp eq i32 %a, %b
+  %ext = sext i1 %icmp0 to i32
+  %icmp1 = icmp eq i32 %ext, 1
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; This really folds away to true
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1:
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, vcc
+; SI-NEXT: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[TMP]], 1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1,
+; SI-NEXT: buffer_store_byte [[TMP]]
+; SI-NEXT: s_endpgm
+define void @sext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = sext i1 %icmp0 to i32
+  %icmp1 = icmp ne i32 %ext, 1
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp eq i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp eq i32 %ext, 0
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0:
+; SI-NOT: v_cmp
+; SI: v_cmp_ne_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_ne_0(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp ne i32 %ext, 0
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1:
+; SI-NOT: v_cmp
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp eq i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp eq i32 %ext, 1
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1:
+; SI-NOT: v_cmp
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zext_bool_icmp_ne_1(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp ne i32 %ext, 1
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k:
+; SI-DAG: s_load_dword [[A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[B:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[VB]], 2{{$}}
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI: buffer_store_byte
+; SI: s_endpgm
+define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = sext i1 %icmp0 to i32
+  %icmp1 = icmp ne i32 %ext, 2
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
+; SI: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
+; SI: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K255]]
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
+  %b.ext = zext i8 %b to i32
+  %icmp0 = icmp ne i32 %b.ext, 255
+  store i1 %icmp0, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1:
+; SI: buffer_load_sbyte [[B:v[0-9]+]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
+  %b = load i8 addrspace(1)* %b.ptr
+  %b.ext = sext i8 %b to i32
+  %icmp0 = icmp ne i32 %b.ext, -1
+  store i1 %icmp0, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_sext_arg:
+; SI: s_load_dword [[B:s[0-9]+]]
+; SI: v_cmp_ne_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[B]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) nounwind {
+  %b.ext = sext i8 %b to i32
+  %icmp0 = icmp ne i32 %b.ext, -1
+  store i1 %icmp0, i1 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: This ends up doing a buffer_load_ubyte, and and compare to
+; 255. Seems to be because of ordering problems when not allowing load widths to be reduced.
+; Should do a buffer_load_sbyte and compare with -1
+
+; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
+; SI-DAG: buffer_load_ubyte [[B:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
+; SI: v_cmp_ne_i32_e32 vcc, [[B]], [[K]]{{$}}
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
+  %b.ext = sext i8 %b to i32
+  %icmp0 = icmp ne i32 %b.ext, -1
+  store i1 %icmp0, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}cmp_zext_k_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI: s_endpgm
+define void @cmp_zext_k_neg1(i1 addrspace(1)* %out, i8 %b) nounwind {
+  %b.ext = zext i8 %b to i32
+  %icmp0 = icmp ne i32 %b.ext, -1
+  store i1 %icmp0, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp ne i32 %ext, 2
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+; SI-NEXT: s_endpgm
+define void @zext_bool_icmp_eq_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+  %icmp0 = icmp ne i32 %a, %b
+  %ext = zext i1 %icmp0 to i32
+  %icmp1 = icmp eq i32 %ext, 2
+  store i1 %icmp1, i1 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll
index 5bd95b79c0f0..f9c7e4f36128 100644
--- a/test/CodeGen/R600/setcc.ll
+++ b/test/CodeGen/R600/setcc.ll
@@ -1,7 +1,9 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @setcc_v2i32
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: {{^}}setcc_v2i32:
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
 
@@ -12,7 +14,7 @@ define void @setcc_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %
   ret void
 }
 
-; FUNC-LABEL: @setcc_v4i32
+; FUNC-LABEL: {{^}}setcc_v4i32:
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -32,9 +34,9 @@ define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
 ;; Float comparisons
 ;;;==========================================================================;;;
 
-; FUNC-LABEL: @f32_oeq
+; FUNC-LABEL: {{^}}f32_oeq:
 ; R600: SETE_DX10
-; SI: V_CMP_EQ_F32
+; SI: v_cmp_eq_f32
 define void @f32_oeq(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp oeq float %a, %b
@@ -43,9 +45,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ogt
+; FUNC-LABEL: {{^}}f32_ogt:
 ; R600: SETGT_DX10
-; SI: V_CMP_GT_F32
+; SI: v_cmp_gt_f32
 define void @f32_ogt(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ogt float %a, %b
@@ -54,9 +56,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_oge
+; FUNC-LABEL: {{^}}f32_oge:
 ; R600: SETGE_DX10
-; SI: V_CMP_GE_F32
+; SI: v_cmp_ge_f32
 define void @f32_oge(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp oge float %a, %b
@@ -65,9 +67,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_olt
+; FUNC-LABEL: {{^}}f32_olt:
 ; R600: SETGT_DX10
-; SI: V_CMP_LT_F32
+; SI: v_cmp_lt_f32
 define void @f32_olt(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp olt float %a, %b
@@ -76,9 +78,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ole
+; FUNC-LABEL: {{^}}f32_ole:
 ; R600: SETGE_DX10
-; SI: V_CMP_LE_F32
+; SI: v_cmp_le_f32
 define void @f32_ole(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ole float %a, %b
@@ -87,18 +89,16 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_one
+; FUNC-LABEL: {{^}}f32_one:
 ; R600-DAG: SETE_DX10
 ; R600-DAG: SETE_DX10
 ; R600-DAG: AND_INT
 ; R600-DAG: SETNE_DX10
 ; R600-DAG: AND_INT
 ; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
-; SI: V_CMP_NEQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+
+; SI: v_cmp_lg_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp one float %a, %b
@@ -107,12 +107,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ord
+; FUNC-LABEL: {{^}}f32_ord:
 ; R600-DAG: SETE_DX10
 ; R600-DAG: SETE_DX10
 ; R600-DAG: AND_INT
 ; R600-DAG: SETNE_INT
-; SI: V_CMP_O_F32
+; SI: v_cmp_o_f32
 define void @f32_ord(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ord float %a, %b
@@ -121,18 +121,16 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ueq
+; FUNC-LABEL: {{^}}f32_ueq:
 ; R600-DAG: SETNE_DX10
 ; R600-DAG: SETNE_DX10
 ; R600-DAG: OR_INT
 ; R600-DAG: SETE_DX10
 ; R600-DAG: OR_INT
 ; R600-DAG: SETNE_INT
-; SI: V_CMP_U_F32
-; SI: V_CMP_EQ_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nlg_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ueq float %a, %b
@@ -141,14 +139,11 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ugt
+; FUNC-LABEL: {{^}}f32_ugt:
 ; R600: SETGE
 ; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; SI: v_cmp_nle_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ugt float %a, %b
@@ -157,14 +152,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_uge
+; FUNC-LABEL: {{^}}f32_uge:
 ; R600: SETGT
 ; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_GE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nlt_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp uge float %a, %b
@@ -173,14 +166,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ult
+; FUNC-LABEL: {{^}}f32_ult:
 ; R600: SETGE
 ; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LT_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_nge_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ult float %a, %b
@@ -189,14 +180,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_ule
+; FUNC-LABEL: {{^}}f32_ule:
 ; R600: SETGT
 ; R600: SETE_DX10
-; SI: V_CMP_U_F32
-; SI: V_CMP_LE_F32
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+
+; SI: v_cmp_ngt_f32_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp ule float %a, %b
@@ -205,9 +194,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_une
+; FUNC-LABEL: {{^}}f32_une:
 ; R600: SETNE_DX10
-; SI: V_CMP_NEQ_F32
+; SI: v_cmp_neq_f32
 define void @f32_une(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp une float %a, %b
@@ -216,12 +205,12 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f32_uno
+; FUNC-LABEL: {{^}}f32_uno:
 ; R600: SETNE_DX10
 ; R600: SETNE_DX10
 ; R600: OR_INT
 ; R600: SETNE_INT
-; SI: V_CMP_U_F32
+; SI: v_cmp_u_f32
 define void @f32_uno(i32 addrspace(1)* %out, float %a, float %b) {
 entry:
   %0 = fcmp uno float %a, %b
@@ -234,9 +223,9 @@ entry:
 ;; 32-bit integer comparisons
 ;;;==========================================================================;;;
 
-; FUNC-LABEL: @i32_eq
+; FUNC-LABEL: {{^}}i32_eq:
 ; R600: SETE_INT
-; SI: V_CMP_EQ_I32
+; SI: v_cmp_eq_i32
 define void @i32_eq(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp eq i32 %a, %b
@@ -245,9 +234,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_ne
+; FUNC-LABEL: {{^}}i32_ne:
 ; R600: SETNE_INT
-; SI: V_CMP_NE_I32
+; SI: v_cmp_ne_i32
 define void @i32_ne(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp ne i32 %a, %b
@@ -256,9 +245,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_ugt
+; FUNC-LABEL: {{^}}i32_ugt:
 ; R600: SETGT_UINT
-; SI: V_CMP_GT_U32
+; SI: v_cmp_gt_u32
 define void @i32_ugt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp ugt i32 %a, %b
@@ -267,9 +256,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_uge
+; FUNC-LABEL: {{^}}i32_uge:
 ; R600: SETGE_UINT
-; SI: V_CMP_GE_U32
+; SI: v_cmp_ge_u32
 define void @i32_uge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp uge i32 %a, %b
@@ -278,9 +267,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_ult
+; FUNC-LABEL: {{^}}i32_ult:
 ; R600: SETGT_UINT
-; SI: V_CMP_LT_U32
+; SI: v_cmp_lt_u32
 define void @i32_ult(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp ult i32 %a, %b
@@ -289,9 +278,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_ule
+; FUNC-LABEL: {{^}}i32_ule:
 ; R600: SETGE_UINT
-; SI: V_CMP_LE_U32
+; SI: v_cmp_le_u32
 define void @i32_ule(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp ule i32 %a, %b
@@ -300,9 +289,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_sgt
+; FUNC-LABEL: {{^}}i32_sgt:
 ; R600: SETGT_INT
-; SI: V_CMP_GT_I32
+; SI: v_cmp_gt_i32
 define void @i32_sgt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp sgt i32 %a, %b
@@ -311,9 +300,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_sge
+; FUNC-LABEL: {{^}}i32_sge:
 ; R600: SETGE_INT
-; SI: V_CMP_GE_I32
+; SI: v_cmp_ge_i32
 define void @i32_sge(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp sge i32 %a, %b
@@ -322,9 +311,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_slt
+; FUNC-LABEL: {{^}}i32_slt:
 ; R600: SETGT_INT
-; SI: V_CMP_LT_I32
+; SI: v_cmp_lt_i32
 define void @i32_slt(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp slt i32 %a, %b
@@ -333,9 +322,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i32_sle
+; FUNC-LABEL: {{^}}i32_sle:
 ; R600: SETGE_INT
-; SI: V_CMP_LE_I32
+; SI: v_cmp_le_i32
 define void @i32_sle(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp sle i32 %a, %b
@@ -343,3 +332,46 @@ entry:
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
+
+; FIXME: This does 4 compares
+; FUNC-LABEL: {{^}}v3i32_eq:
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI: s_endpgm
+define void @v3i32_eq(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %ptra, <3 x i32> addrspace(1)* %ptrb) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.a = getelementptr <3 x i32> addrspace(1)* %ptra, i32 %tid
+  %gep.b = getelementptr <3 x i32> addrspace(1)* %ptrb, i32 %tid
+  %gep.out = getelementptr <3 x i32> addrspace(1)* %out, i32 %tid
+  %a = load <3 x i32> addrspace(1)* %gep.a
+  %b = load <3 x i32> addrspace(1)* %gep.b
+  %cmp = icmp eq <3 x i32> %a, %b
+  %ext = sext <3 x i1> %cmp to <3 x i32>
+  store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v3i8_eq:
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-DAG: v_cmp_eq_i32
+; SI-DAG: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI: s_endpgm
+define void @v3i8_eq(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %ptra, <3 x i8> addrspace(1)* %ptrb) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep.a = getelementptr <3 x i8> addrspace(1)* %ptra, i32 %tid
+  %gep.b = getelementptr <3 x i8> addrspace(1)* %ptrb, i32 %tid
+  %gep.out = getelementptr <3 x i8> addrspace(1)* %out, i32 %tid
+  %a = load <3 x i8> addrspace(1)* %gep.a
+  %b = load <3 x i8> addrspace(1)* %gep.b
+  %cmp = icmp eq <3 x i8> %a, %b
+  %ext = sext <3 x i1> %cmp to <3 x i8>
+  store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
+  ret void
+}
diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll
index 54a33b30940a..c0632198efdc 100644
--- a/test/CodeGen/R600/setcc64.ll
+++ b/test/CodeGen/R600/setcc64.ll
@@ -1,4 +1,4 @@
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 
 ; XXX: Merge this into setcc, once R600 supports 64-bit operations
 
@@ -6,8 +6,8 @@
 ;; Double comparisons
 ;;;==========================================================================;;;
 
-; FUNC-LABEL: @f64_oeq
-; SI: V_CMP_EQ_F64
+; FUNC-LABEL: {{^}}f64_oeq:
+; SI: v_cmp_eq_f64
 define void @f64_oeq(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp oeq double %a, %b
@@ -16,8 +16,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ogt
-; SI: V_CMP_GT_F64
+; FUNC-LABEL: {{^}}f64_ogt:
+; SI: v_cmp_gt_f64
 define void @f64_ogt(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ogt double %a, %b
@@ -26,8 +26,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_oge
-; SI: V_CMP_GE_F64
+; FUNC-LABEL: {{^}}f64_oge:
+; SI: v_cmp_ge_f64
 define void @f64_oge(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp oge double %a, %b
@@ -36,8 +36,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_olt
-; SI: V_CMP_LT_F64
+; FUNC-LABEL: {{^}}f64_olt:
+; SI: v_cmp_lt_f64
 define void @f64_olt(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp olt double %a, %b
@@ -46,8 +46,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ole
-; SI: V_CMP_LE_F64
+; FUNC-LABEL: {{^}}f64_ole:
+; SI: v_cmp_le_f64
 define void @f64_ole(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ole double %a, %b
@@ -56,12 +56,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_one
-; SI: V_CMP_O_F64
-; SI: V_CMP_NEQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32
+; FUNC-LABEL: {{^}}f64_one:
+; SI: v_cmp_lg_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp one double %a, %b
@@ -70,8 +67,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ord
-; SI: V_CMP_O_F64
+; FUNC-LABEL: {{^}}f64_ord:
+; SI: v_cmp_o_f64
 define void @f64_ord(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ord double %a, %b
@@ -80,12 +77,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ueq
-; SI: V_CMP_U_F64
-; SI: V_CMP_EQ_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ueq:
+; SI: v_cmp_nlg_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ueq double %a, %b
@@ -94,12 +88,10 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ugt
-; SI: V_CMP_U_F64
-; SI: V_CMP_GT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ugt:
+
+; SI: v_cmp_nle_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ugt double %a, %b
@@ -108,12 +100,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_uge
-; SI: V_CMP_U_F64
-; SI: V_CMP_GE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_uge:
+; SI: v_cmp_nlt_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp uge double %a, %b
@@ -122,12 +111,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ult
-; SI: V_CMP_U_F64
-; SI: V_CMP_LT_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ult:
+; SI: v_cmp_nge_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ult double %a, %b
@@ -136,12 +122,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_ule
-; SI: V_CMP_U_F64
-; SI: V_CMP_LE_F64
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: V_OR_B32_e32
+; FUNC-LABEL: {{^}}f64_ule:
+; SI: v_cmp_ngt_f64_e32 vcc
+; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc
 define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp ule double %a, %b
@@ -150,8 +133,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_une
-; SI: V_CMP_NEQ_F64
+; FUNC-LABEL: {{^}}f64_une:
+; SI: v_cmp_neq_f64
 define void @f64_une(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp une double %a, %b
@@ -160,8 +143,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @f64_uno
-; SI: V_CMP_U_F64
+; FUNC-LABEL: {{^}}f64_uno:
+; SI: v_cmp_u_f64
 define void @f64_uno(i32 addrspace(1)* %out, double %a, double %b) {
 entry:
   %0 = fcmp uno double %a, %b
@@ -174,8 +157,8 @@ entry:
 ;; 64-bit integer comparisons
 ;;;==========================================================================;;;
 
-; FUNC-LABEL: @i64_eq
-; SI: V_CMP_EQ_I64
+; FUNC-LABEL: {{^}}i64_eq:
+; SI: v_cmp_eq_i64
 define void @i64_eq(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp eq i64 %a, %b
@@ -184,8 +167,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_ne
-; SI: V_CMP_NE_I64
+; FUNC-LABEL: {{^}}i64_ne:
+; SI: v_cmp_ne_i64
 define void @i64_ne(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp ne i64 %a, %b
@@ -194,8 +177,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_ugt
-; SI: V_CMP_GT_U64
+; FUNC-LABEL: {{^}}i64_ugt:
+; SI: v_cmp_gt_u64
 define void @i64_ugt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp ugt i64 %a, %b
@@ -204,8 +187,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_uge
-; SI: V_CMP_GE_U64
+; FUNC-LABEL: {{^}}i64_uge:
+; SI: v_cmp_ge_u64
 define void @i64_uge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp uge i64 %a, %b
@@ -214,8 +197,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_ult
-; SI: V_CMP_LT_U64
+; FUNC-LABEL: {{^}}i64_ult:
+; SI: v_cmp_lt_u64
 define void @i64_ult(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp ult i64 %a, %b
@@ -224,8 +207,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_ule
-; SI: V_CMP_LE_U64
+; FUNC-LABEL: {{^}}i64_ule:
+; SI: v_cmp_le_u64
 define void @i64_ule(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp ule i64 %a, %b
@@ -234,8 +217,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_sgt
-; SI: V_CMP_GT_I64
+; FUNC-LABEL: {{^}}i64_sgt:
+; SI: v_cmp_gt_i64
 define void @i64_sgt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp sgt i64 %a, %b
@@ -244,8 +227,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_sge
-; SI: V_CMP_GE_I64
+; FUNC-LABEL: {{^}}i64_sge:
+; SI: v_cmp_ge_i64
 define void @i64_sge(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp sge i64 %a, %b
@@ -254,8 +237,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_slt
-; SI: V_CMP_LT_I64
+; FUNC-LABEL: {{^}}i64_slt:
+; SI: v_cmp_lt_i64
 define void @i64_slt(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp slt i64 %a, %b
@@ -264,8 +247,8 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @i64_sle
-; SI: V_CMP_LE_I64
+; FUNC-LABEL: {{^}}i64_sle:
+; SI: v_cmp_le_i64
 define void @i64_sle(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 entry:
   %0 = icmp sle i64 %a, %b
diff --git a/test/CodeGen/R600/seto.ll b/test/CodeGen/R600/seto.ll
index cc942c10a91e..c6265a451c5f 100644
--- a/test/CodeGen/R600/seto.ll
+++ b/test/CodeGen/R600/seto.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_O_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
+; CHECK-LABEL: {{^}}main:
+; CHECK: v_cmp_o_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp oeq float %p, %p
diff --git a/test/CodeGen/R600/setuo.ll b/test/CodeGen/R600/setuo.ll
index 33007fc754b8..f2113096e3a7 100644
--- a/test/CodeGen/R600/setuo.ll
+++ b/test/CodeGen/R600/setuo.ll
@@ -1,8 +1,8 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
-
-;CHECK-LABEL: @main
-;CHECK: V_CMP_U_F32_e32 vcc, {{[sv][0-9]+, v[0-9]+}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
 
+; CHECK-LABEL: {{^}}main:
+; CHECK: v_cmp_u_f32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[SREG:s[0-9]+]], [[SREG]]
+; CHECK-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1.0, [[CMP]]
 define void @main(float %p) {
 main_body:
   %c = fcmp une float %p, %p
diff --git a/test/CodeGen/R600/sext-eliminate.ll b/test/CodeGen/R600/sext-eliminate.ll
new file mode 100644
index 000000000000..7dc6eb87f6b5
--- /dev/null
+++ b/test/CodeGen/R600/sext-eliminate.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_add:
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: SUB_INT {{[* ]*}}[[RES]]
+; EG-NOT: BFE
+define void @sext_in_reg_i1_i32_add(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+  %sext = sext i1 %a to i32
+  %res = add i32 %b, %sext
+  store i32 %res, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32_sub:
+
+; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
+; EG: ADD_INT {{[* ]*}}[[RES]]
+; EG-NOT: BFE
+define void @sext_in_reg_i1_i32_sub(i32 addrspace(1)* %out, i1 %a, i32 %b) {
+  %sext = sext i1 %a to i32
+  %res = sub i32 %b, %sext
+  store i32 %res, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sext-in-reg.ll b/test/CodeGen/R600/sext-in-reg.ll
index 1b02e4bf8015..3260179921f9 100644
--- a/test/CodeGen/R600/sext-in-reg.ll
+++ b/test/CodeGen/R600/sext-in-reg.ll
@@ -1,14 +1,15 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 declare i32 @llvm.AMDGPU.imax(i32, i32) nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 
-; FUNC-LABEL: @sext_in_reg_i1_i32
-; SI: S_LOAD_DWORD [[ARG:s[0-9]+]],
-; SI: S_BFE_I32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
-; SI: V_MOV_B32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
-; SI: BUFFER_STORE_DWORD [[EXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i1_i32:
+; SI: s_load_dword [[ARG:s[0-9]+]],
+; SI: s_bfe_i32 [[SEXTRACT:s[0-9]+]], [[ARG]], 0x10000
+; SI: v_mov_b32_e32 [[EXTRACT:v[0-9]+]], [[SEXTRACT]]
+; SI: buffer_store_dword [[EXTRACT]],
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
@@ -20,11 +21,11 @@ define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i8_to_i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: ADD_INT
@@ -38,11 +39,11 @@ define void @sext_in_reg_i8_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounw
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i16_to_i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i16 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: ADD_INT
@@ -56,11 +57,11 @@ define void @sext_in_reg_i16_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) noun
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i8_to_v1i32
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: V_MOV_B32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
-; SI: BUFFER_STORE_DWORD [[VEXTRACT]],
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i32:
+; SI: s_add_i32 [[VAL:s[0-9]+]],
+; SI: s_sext_i32_i8 [[EXTRACT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VEXTRACT:v[0-9]+]], [[EXTRACT]]
+; SI: buffer_store_dword [[VEXTRACT]],
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: ADD_INT
@@ -74,29 +75,31 @@ define void @sext_in_reg_i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a,
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i1_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_BFE_I32 s{{[0-9]+}}, s{{[0-9]+}}, 0x10000
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i1_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x10000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-  %c = add i64 %a, %b
+  %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
   %ashr = ashr i64 %shl, 63
   store i64 %ashr, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i8_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I8 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x80000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: LSHL
+; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 ; EG: ASHR [[RES_HI]]
 ; EG-NOT: BFE_INT
 ; EG: LSHR
@@ -104,23 +107,24 @@ define void @sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
 ;; TODO Check address computation, using | with variables in {{}} does not work,
 ;; also the _LO/_HI order might be different
 define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-  %c = add i64 %a, %b
+  %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
   %ashr = ashr i64 %shl, 56
   store i64 %ashr, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i16_to_i64
-; SI: S_ADD_I32 [[VAL:s[0-9]+]],
-; SI: S_SEXT_I32_I16 [[EXTRACT:s[0-9]+]], [[VAL]]
-; SI: S_MOV_B32 {{s[0-9]+}}, -1
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i16_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x100000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
-; EG: ADD_INT
-; EG-NEXT: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
+; EG: LSHL
+; EG: BFE_INT {{\*?}} [[RES_LO]], {{.*}}, 0.0, literal
 ; EG: ASHR [[RES_HI]]
 ; EG-NOT: BFE_INT
 ; EG: LSHR
@@ -128,32 +132,32 @@ define void @sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounw
 ;; TODO Check address computation, using | with variables in {{}} does not work,
 ;; also the _LO/_HI order might be different
 define void @sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-  %c = add i64 %a, %b
+  %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
   %ashr = ashr i64 %shl, 48
   store i64 %ashr, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i32_to_i64
-; SI: S_LOAD_DWORD
-; SI: S_LOAD_DWORD
-; SI: S_ADD_I32 [[ADD:s[0-9]+]],
-; SI: S_ASHR_I32 s{{[0-9]+}}, [[ADD]], 31
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_i32_to_i64:
+; SI: s_lshl_b64 [[VAL:s\[[0-9]+:[0-9]+\]]]
+; SI-DAG: s_bfe_i64 s{{\[}}[[SLO:[0-9]+]]:[[SHI:[0-9]+]]{{\]}}, [[VAL]], 0x200000
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_LO:T[0-9]+\.[XYZW]]], [[ADDR_LO:T[0-9]+.[XYZW]]]
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES_HI:T[0-9]+\.[XYZW]]], [[ADDR_HI:T[0-9]+.[XYZW]]]
 ; EG-NOT: BFE_INT
-; EG: ADD_INT {{\*?}} [[RES_LO]]
+
 ; EG: ASHR [[RES_HI]]
-; EG: ADD_INT
+
 ; EG: LSHR
 ; EG: LSHR
 ;; TODO Check address computation, using | with variables in {{}} does not work,
 ;; also the _LO/_HI order might be different
 define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
-  %c = add i64 %a, %b
+  %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
   %ashr = ashr i64 %shl, 32
   store i64 %ashr, i64 addrspace(1)* %out, align 8
@@ -161,10 +165,10 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
 }
 
 ; This is broken on Evergreen for some reason related to the <1 x i64> kernel arguments.
-; XFUNC-LABEL: @sext_in_reg_i8_to_v1i64
-; XSI: S_BFE_I32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
-; XSI: S_ASHR_I32 {{v[0-9]+}}, [[EXTRACT]], 31
-; XSI: BUFFER_STORE_DWORD
+; XFUNC-LABEL: {{^}}sext_in_reg_i8_to_v1i64:
+; XSI: s_bfe_i32 [[EXTRACT:s[0-9]+]], {{s[0-9]+}}, 524288
+; XSI: s_ashr_i32 {{v[0-9]+}}, [[EXTRACT]], 31
+; XSI: buffer_store_dword
 ; XEG: BFE_INT
 ; XEG: ASHR
 ; define void @sext_in_reg_i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a, <1 x i64> %b) nounwind {
@@ -175,10 +179,93 @@ define void @sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) noun
 ;   ret void
 ; }
 
-; FUNC-LABEL: @sext_in_reg_i1_in_i32_other_amount
-; SI-NOT: BFE
-; SI: S_LSHL_B32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG]], 7
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i1_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+  %a = load i64 addrspace(1)* %a.gep, align 8
+  %b = load i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 63
+  %ashr = ashr i64 %shl, 63
+  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i8_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 8
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i8_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+  %a = load i64 addrspace(1)* %a.gep, align 8
+  %b = load i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 56
+  %ashr = ashr i64 %shl, 56
+  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i16_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 16
+; SI: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
+define void @v_sext_in_reg_i16_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+  %a = load i64 addrspace(1)* %a.gep, align 8
+  %b = load i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 48
+  %ashr = ashr i64 %shl, 48
+  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
+; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[SHR]]{{\]}}
+define void @v_sext_in_reg_i32_to_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64 addrspace(1)* %out, i32 %tid
+  %a = load i64 addrspace(1)* %a.gep, align 8
+  %b = load i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 32
+  %ashr = ashr i64 %shl, 32
+  store i64 %ashr, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sext_in_reg_i1_in_i32_other_amount:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_lshl_b32 [[REG:s[0-9]+]], {{s[0-9]+}}, 6
+; SI: s_ashr_i32 {{s[0-9]+}}, [[REG]], 7
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 ; EG-NOT: BFE
@@ -194,11 +281,12 @@ define void @sext_in_reg_i1_in_i32_other_amount(i32 addrspace(1)* %out, i32 %a,
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_v2i1_in_v2i32_other_amount
-; SI: S_LSHL_B32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG0]], 7
-; SI: S_LSHL_B32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
-; SI: S_ASHR_I32 {{s[0-9]+}}, [[REG1]], 7
+; FUNC-LABEL: {{^}}sext_in_reg_v2i1_in_v2i32_other_amount:
+; SI-DAG: s_lshl_b32 [[REG0:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG0]], 7
+; SI-DAG: s_lshl_b32 [[REG1:s[0-9]+]], {{s[0-9]}}, 6
+; SI-DAG: s_ashr_i32 {{s[0-9]+}}, [[REG1]], 7
+; SI: s_endpgm
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG-NOT: BFE
@@ -217,10 +305,10 @@ define void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out
 }
 
 
-; FUNC-LABEL: @sext_in_reg_v2i1_to_v2i32
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i1_to_v2i32:
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx2
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]]
@@ -234,12 +322,12 @@ define void @sext_in_reg_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_v4i1_to_v4i32
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: S_BFE_I32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}sext_in_reg_v4i1_to_v4i32:
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: s_bfe_i32 {{s[0-9]+}}, {{s[0-9]+}}, 0x10000
+; SI: buffer_store_dwordx4
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]]
@@ -255,10 +343,10 @@ define void @sext_in_reg_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_v2i8_to_v2i32
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i8_to_v2i32:
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]]
@@ -272,12 +360,12 @@ define void @sext_in_reg_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_v4i8_to_v4i32
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I8 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX4
+; FUNC-LABEL: {{^}}sext_in_reg_v4i8_to_v4i32:
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i8 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx4
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW][XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]]
@@ -293,10 +381,10 @@ define void @sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_v2i16_to_v2i32
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: S_SEXT_I32_I16 {{s[0-9]+}}, {{s[0-9]+}}
-; SI: BUFFER_STORE_DWORDX2
+; FUNC-LABEL: {{^}}sext_in_reg_v2i16_to_v2i32:
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_sext_i32_i16 {{s[0-9]+}}, {{s[0-9]+}}
+; SI: buffer_store_dwordx2
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+]]{{\.[XYZW][XYZW]}}, [[ADDR:T[0-9]+.[XYZW]]]
 ; EG: BFE_INT [[RES]]
@@ -310,7 +398,7 @@ define void @sext_in_reg_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
   ret void
 }
 
-; FUNC-LABEL: @testcase
+; FUNC-LABEL: {{^}}testcase:
 define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
@@ -322,7 +410,7 @@ define void @testcase(i8 addrspace(1)* %out, i8 %a) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @testcase_3
+; FUNC-LABEL: {{^}}testcase_3:
 define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
   %and_a_1 = and i8 %a, 1
   %cmp_eq = icmp eq i8 %and_a_1, 0
@@ -334,11 +422,11 @@ define void @testcase_3(i8 addrspace(1)* %out, i8 %a) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @vgpr_sext_in_reg_v4i8_to_v4i32
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i8_to_v4i32:
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
   %loada = load <4 x i32> addrspace(1)* %a, align 16
   %loadb = load <4 x i32> addrspace(1)* %b, align 16
@@ -349,9 +437,9 @@ define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
   ret void
 }
 
-; FUNC-LABEL: @vgpr_sext_in_reg_v4i16_to_v4i32
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
-; SI: V_BFE_I32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; FUNC-LABEL: {{^}}vgpr_sext_in_reg_v4i16_to_v4i32:
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
+; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
   %loada = load <4 x i32> addrspace(1)* %a, align 16
   %loadb = load <4 x i32> addrspace(1)* %b, align 16
@@ -365,11 +453,11 @@ define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x
 ; FIXME: The BFE should really be eliminated. I think it should happen
 ; when computeKnownBitsForTargetNode is implemented for imax.
 
-; FUNC-LABEL: @sext_in_reg_to_illegal_type
-; SI: BUFFER_LOAD_SBYTE
-; SI: V_MAX_I32
-; SI: V_BFE_I32
-; SI: BUFFER_STORE_SHORT
+; FUNC-LABEL: {{^}}sext_in_reg_to_illegal_type:
+; SI: buffer_load_sbyte
+; SI: v_max_i32
+; SI: v_bfe_i32
+; SI: buffer_store_short
 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
   %tmp5 = load i8 addrspace(1)* %src, align 1
   %tmp2 = sext i8 %tmp5 to i32
@@ -382,9 +470,9 @@ define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 ad
 
 declare i32 @llvm.AMDGPU.bfe.i32(i32, i32, i32) nounwind readnone
 
-; FUNC-LABEL: @bfe_0_width
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_0_width:
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
   %load = load i32 addrspace(1)* %ptr, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
@@ -392,10 +480,10 @@ define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
   ret void
 }
 
-; FUNC-LABEL: @bfe_8_bfe_8
-; SI: V_BFE_I32
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_8_bfe_8:
+; SI: v_bfe_i32
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
   %load = load i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
@@ -404,9 +492,9 @@ define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwin
   ret void
 }
 
-; FUNC-LABEL: @bfe_8_bfe_16
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_8_bfe_16:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI: s_endpgm
 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
   %load = load i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
@@ -416,10 +504,10 @@ define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
 }
 
 ; This really should be folded into 1
-; FUNC-LABEL: @bfe_16_bfe_8
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}bfe_16_bfe_8:
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
   %load = load i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
@@ -429,10 +517,10 @@ define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwi
 }
 
 ; Make sure there isn't a redundant BFE
-; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe
-; SI: S_SEXT_I32_I8 s{{[0-9]+}}, s{{[0-9]+}}
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe:
+; SI: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}}
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 0, i32 8) nounwind readnone
@@ -442,7 +530,7 @@ define void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) n
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i8_to_i32_bfe_wrong
+; FUNC-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong:
 define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %c = add i32 %a, %b ; add to prevent folding into extload
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %c, i32 8, i32 0) nounwind readnone
@@ -452,10 +540,10 @@ define void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32
   ret void
 }
 
-; FUNC-LABEL: @sextload_i8_to_i32_bfe
-; SI: BUFFER_LOAD_SBYTE
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe:
+; SI: buffer_load_sbyte
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
   %load = load i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
@@ -466,9 +554,10 @@ define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %pt
   ret void
 }
 
-; FUNC-LABEL: @sextload_i8_to_i32_bfe_0:
-; SI-NOT: BFE
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sextload_i8_to_i32_bfe_0:
+; SI: .text
+; SI-NOT: {{[^@]}}bfe
+; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
   %load = load i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
@@ -479,11 +568,11 @@ define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_0:
-; SI-NOT: SHR
-; SI-NOT: SHL
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_0:
+; SI-NOT: shr
+; SI-NOT: shl
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
+; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
@@ -493,12 +582,12 @@ define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i1_bfe_offset_1
-; SI: BUFFER_LOAD_DWORD
-; SI-NOT: SHL
-; SI-NOT: SHR
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i1_bfe_offset_1:
+; SI: buffer_load_dword
+; SI-NOT: shl
+; SI-NOT: shr
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
+; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
@@ -508,12 +597,12 @@ define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
   ret void
 }
 
-; FUNC-LABEL: @sext_in_reg_i2_bfe_offset_1:
-; SI: BUFFER_LOAD_DWORD
-; SI: V_LSHLREV_B32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_ASHRREV_I32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
-; SI: V_BFE_I32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sext_in_reg_i2_bfe_offset_1:
+; SI: buffer_load_dword
+; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 30, v{{[0-9]+}}
+; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
+; SI: s_endpgm
 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %x = load i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
diff --git a/test/CodeGen/R600/sgpr-control-flow.ll b/test/CodeGen/R600/sgpr-control-flow.ll
index 06ad24d959cf..f0236acc6daa 100644
--- a/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/test/CodeGen/R600/sgpr-control-flow.ll
@@ -1,12 +1,17 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 ;
 ;
 ; Most SALU instructions ignore control flow, so we need to make sure
 ; they don't overwrite values from other blocks.
 
-; SI-NOT: S_ADD
+; If the branch decision is made based on a value in an SGPR then all
+; threads will execute the same code paths, so we don't need to worry
+; about instructions in different blocks overwriting each other.
+; SI-LABEL: {{^}}sgpr_if_else_salu_br:
+; SI: s_add
+; SI: s_add
 
-define void @sgpr_if_else(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 entry:
   %0 = icmp eq i32 %a, 0
   br i1 %0, label %if, label %else
@@ -25,3 +30,76 @@ endif:
   store i32 %4, i32 addrspace(1)* %out
   ret void
 }
+
+; The two S_ADD instructions should write to different registers, since
+; different threads will take different control flow paths.
+
+; SI-LABEL: {{^}}sgpr_if_else_valu_br:
+; SI: s_add_i32 [[SGPR:s[0-9]+]]
+; SI-NOT: s_add_i32 [[SGPR]]
+
+define void @sgpr_if_else_valu_br(i32 addrspace(1)* %out, float %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tid_f = uitofp i32 %tid to float
+  %tmp1 = fcmp ueq float %tid_f, 0.0
+  br i1 %tmp1, label %if, label %else
+
+if:
+  %tmp2 = add i32 %b, %c
+  br label %endif
+
+else:
+  %tmp3 = add i32 %d, %e
+  br label %endif
+
+endif:
+  %tmp4 = phi i32 [%tmp2, %if], [%tmp3, %else]
+  store i32 %tmp4, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should write to different SGPR pairs instead of copying to
+; VALU for i1 phi.
+
+; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
+
+; SI: BB2_1:
+; SI: buffer_load_dword [[AVAL:v[0-9]+]]
+; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0
+; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
+
+; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]]
+; SI: buffer_store_dword [[RESULT]]
+define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
+entry:
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = icmp eq i32 %tid, 0
+  br i1 %tmp1, label %if, label %else
+
+if:
+  %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid
+  %a.val = load i32 addrspace(1)* %gep.if
+  %cmp.if = icmp eq i32 %a.val, 0
+  br label %endif
+
+else:
+  %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid
+  %b.val = load i32 addrspace(1)* %gep.else
+  %cmp.else = icmp slt i32 %b.val, 0
+  br label %endif
+
+endif:
+  %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else]
+  %ext = sext i1 %tmp4 to i32
+  store i32 %ext, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 9d8a623125fe..ad5e0a7765af 100644
--- a/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 ; Copy VGPR -> SGPR used twice as an instruction operand, which is then
 ; used in an REG_SEQUENCE that also needs to be handled.
 
-; SI-LABEL: @test_dup_operands:
-; SI: V_ADD_I32_e32
+; SI-LABEL: {{^}}test_dup_operands:
+; SI: v_add_i32_e32
 define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
   %a = load <2 x i32> addrspace(1)* %in
   %lo = extractelement <2 x i32> %a, i32 0
diff --git a/test/CodeGen/R600/sgpr-copy.ll b/test/CodeGen/R600/sgpr-copy.ll
index c7d5bf90644e..db11af5cdabb 100644
--- a/test/CodeGen/R600/sgpr-copy.ll
+++ b/test/CodeGen/R600/sgpr-copy.ll
@@ -1,10 +1,10 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; This test checks that no VGPR to SGPR copies are created by the register
 ; allocator.
-; CHECK-LABEL: @phi1
-; CHECK: S_BUFFER_LOAD_DWORD [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0
-; CHECK: V_MOV_B32_e32 v{{[0-9]}}, [[DST]]
+; CHECK-LABEL: {{^}}phi1:
+; CHECK: s_buffer_load_dword [[DST:s[0-9]]], {{s\[[0-9]+:[0-9]+\]}}, 0x0
+; CHECK: v_mov_b32_e32 v{{[0-9]}}, [[DST]]
 
 define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -29,7 +29,7 @@ ENDIF:                                            ; preds = %main_body, %ELSE
 }
 
 ; Make sure this program doesn't crash
-; CHECK-LABEL: @phi2
+; CHECK-LABEL: {{^}}phi2:
 define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -149,7 +149,7 @@ ENDIF24:                                          ; preds = %ENDIF, %IF25
 }
 
 ; We just want ot make sure the program doesn't crash
-; CHECK-LABEL: @loop
+; CHECK-LABEL: {{^}}loop:
 
 define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -202,8 +202,8 @@ attributes #2 = { readonly }
 attributes #3 = { readnone }
 attributes #4 = { nounwind readonly }
 
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{!0, !0, i64 0, i32 1}
 
 ; Function Attrs: nounwind readnone
 declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
@@ -227,11 +227,11 @@ declare i32 @llvm.SI.packf16(float, float) #1
 ; registers were being identified as an SGPR regclass which was causing
 ; an assertion failure.
 
-; CHECK-LABEL: @sample_v3
-; CHECK: IMAGE_SAMPLE
-; CHECK: IMAGE_SAMPLE
-; CHECK: EXP
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}sample_v3:
+; CHECK: image_sample
+; CHECK: image_sample
+; CHECK: exp
+; CHECK: s_endpgm
 define void @sample_v3([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 
 entry:
@@ -267,12 +267,12 @@ endif:
   ret void
 }
 
-!2 = metadata !{metadata !"const", null, i32 1}
+!2 = !{!"const", null, i32 1}
 
-; CHECK-LABEL: @copy1
-; CHECK: BUFFER_LOAD_DWORD
-; CHECK: V_ADD
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}copy1:
+; CHECK: buffer_load_dword
+; CHECK: v_add
+; CHECK: s_endpgm
 define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
 entry:
   %0 = load float addrspace(1)* %in0
@@ -296,8 +296,8 @@ endif:
 }
 
 ; This test is just checking that we don't crash / assertion fail.
-; CHECK-LABEL: @copy2
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}copy2:
+; CHECK: s_endpgm
 
 define void @copy2([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 entry:
@@ -325,3 +325,54 @@ ENDIF69:
 
 attributes #0 = { "ShaderType"="0" }
 
+; This test checks that image_sample resource descriptors aren't loaded into
+; vgprs.  The verifier will fail if this happens.
+; CHECK-LABEL:{{^}}sample_rsrc:
+; CHECK: image_sample
+; CHECK: image_sample
+; CHECK: s_endpgm
+define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
+bb:
+  %tmp = getelementptr [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
+  %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
+  %tmp25 = getelementptr [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
+  %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+  %tmp27 = getelementptr [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
+  %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+  %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
+  %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
+  %tmp31 = bitcast float %tmp23 to i32
+  %tmp36 = icmp ne i32 %tmp31, 0
+  br i1 %tmp36, label %bb38, label %bb80
+
+bb38:                                             ; preds = %bb
+  %tmp52 = bitcast float %tmp29 to i32
+  %tmp53 = bitcast float %tmp30 to i32
+  %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
+  %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
+  %tmp56 = bitcast <8 x i32> %tmp26 to <32 x i8>
+  %tmp57 = bitcast <4 x i32> %tmp28 to <16 x i8>
+  %tmp58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp55, <32 x i8> %tmp56, <16 x i8> %tmp57, i32 2)
+  br label %bb71
+
+bb80:                                             ; preds = %bb
+  %tmp81 = bitcast float %tmp29 to i32
+  %tmp82 = bitcast float %tmp30 to i32
+  %tmp82.2 = add i32 %tmp82, 1
+  %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
+  %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
+  %tmp85 = bitcast <8 x i32> %tmp26 to <32 x i8>
+  %tmp86 = bitcast <4 x i32> %tmp28 to <16 x i8>
+  %tmp87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %tmp84, <32 x i8> %tmp85, <16 x i8> %tmp86, i32 2)
+  br label %bb71
+
+bb71:                                             ; preds = %bb80, %bb38
+  %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
+  %tmp88 = extractelement <4 x float> %tmp72, i32 0
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp88, float %tmp88, float %tmp88, float %tmp88)
+  ret void
+}
+
+attributes #0 = { "ShaderType"="0" "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/shared-op-cycle.ll b/test/CodeGen/R600/shared-op-cycle.ll
index 0484fc9a8563..f52a9baf4d18 100644
--- a/test/CodeGen/R600/shared-op-cycle.ll
+++ b/test/CodeGen/R600/shared-op-cycle.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
-; CHECK: @main
+; CHECK: {{^}}main:
 ; CHECK: MULADD_IEEE *
 ; CHECK-NOT: MULADD_IEEE *
 
diff --git a/test/CodeGen/R600/shl.ll b/test/CodeGen/R600/shl.ll
index 43fab2a39dcc..98d9494e4e11 100644
--- a/test/CodeGen/R600/shl.ll
+++ b/test/CodeGen/R600/shl.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK: @shl_v2i32
+;EG-CHECK: {{^}}shl_v2i32:
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @shl_v2i32
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: {{^}}shl_v2i32:
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
   ret void
 }
 
-;EG-CHECK: @shl_v4i32
+;EG-CHECK: {{^}}shl_v4i32:
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @shl_v4i32
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHL_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: {{^}}shl_v4i32:
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,7 +39,7 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-;EG-CHECK: @shl_i64
+;EG-CHECK: {{^}}shl_i64:
 ;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ;EG-CHECK: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 ;EG-CHECK: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
@@ -51,8 +51,8 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
 ;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 ;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 
-;SI-CHECK: @shl_i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
 define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
@@ -63,7 +63,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK: @shl_v2i64
+;EG-CHECK: {{^}}shl_v2i64:
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: LSHR {{\*? *}}[[COMPSHA]]
@@ -85,9 +85,9 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 ;EG-CHECK-DAG: CNDE_INT
 ;EG-CHECK-DAG: CNDE_INT
 
-;SI-CHECK: @shl_v2i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_v2i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
 define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
@@ -98,7 +98,7 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
   ret void
 }
 
-;EG-CHECK: @shl_v4i64
+;EG-CHECK: {{^}}shl_v4i64:
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
@@ -140,11 +140,11 @@ define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in
 ;EG-CHECK-DAG: CNDE_INT
 ;EG-CHECK-DAG: CNDE_INT
 
-;SI-CHECK: @shl_v4i64
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHL_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: {{^}}shl_v4i64:
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
 define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
diff --git a/test/CodeGen/R600/shl_add_constant.ll b/test/CodeGen/R600/shl_add_constant.ll
new file mode 100644
index 000000000000..6915495beece
--- /dev/null
+++ b/test/CodeGen/R600/shl_add_constant.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Test with inline immediate
+
+; FUNC-LABEL: {{^}}shl_2_add_9_i32:
+; SI: v_lshlrev_b32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %add = add i32 %val, 9
+  %result = shl i32 %add, 2
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-DAG: buffer_store_dword [[ADDREG]]
+; SI-DAG: buffer_store_dword [[SHLREG]]
+; SI: s_endpgm
+define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %add = add i32 %val, 9
+  %result = shl i32 %add, 2
+  store i32 %result, i32 addrspace(1)* %out0, align 4
+  store i32 %add, i32 addrspace(1)* %out1, align 4
+  ret void
+}
+
+; Test with add literal constant
+
+; FUNC-LABEL: {{^}}shl_2_add_999_i32:
+; SI: v_lshlrev_b32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %ptr = getelementptr i32 addrspace(1)* %in, i32 %tid.x
+  %val = load i32 addrspace(1)* %ptr, align 4
+  %shl = add i32 %val, 999
+  %result = shl i32 %shl, 2
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_add_shl_add_constant:
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
+define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %shl, %y
+   store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
+; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
+; SI: s_add_i32 [[TMP:s[0-9]+]], [[SHL3]], [[Y]]
+; SI: s_add_i32 [[RESULT:s[0-9]+]], [[TMP]], 0x3d8
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
+; SI: buffer_store_dword [[VRESULT]]
+
+define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %y, %shl
+  store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/shl_add_ptr.ll b/test/CodeGen/R600/shl_add_ptr.ll
new file mode 100644
index 000000000000..cef48e227ef4
--- /dev/null
+++ b/test/CodeGen/R600/shl_add_ptr.ll
@@ -0,0 +1,283 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck -check-prefix=SI %s
+
+; Test that doing a shift of a pointer with a constant add will be
+; folded into the constant offset addressing mode even if the add has
+; multiple uses. This is relevant to accessing 2 separate, adjacent
+; LDS globals.
+
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+@lds0 = addrspace(3) global [512 x float] undef, align 4
+@lds1 = addrspace(3) global [512 x float] undef, align 4
+
+
+; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
+
+; SI-LABEL: {{^}}load_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8 [M0]
+; SI: s_endpgm
+define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  store float %val0, float addrspace(1)* %out
+  ret void
+}
+
+; Make sure once the first use is folded into the addressing mode, the
+; remaining add use goes through the normal shl + add constant fold.
+
+; SI-LABEL: {{^}}load_shl_base_lds_1:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 [M0]
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI-DAG: buffer_store_dword [[RESULT]]
+; SI-DAG: buffer_store_dword [[ADDUSE]]
+; SI: s_endpgm
+define void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %shl_add_use = shl i32 %idx.0, 2
+  store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
+  store float %val0, float addrspace(1)* %out
+  ret void
+}
+
+@maxlds = addrspace(3) global [65536 x i8] undef, align 4
+
+; SI-LABEL: {{^}}load_shl_base_lds_max_offset
+; SI: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
+; SI: s_endpgm
+define void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 65535
+  %arrayidx0 = getelementptr inbounds [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
+  %val0 = load i8 addrspace(3)* %arrayidx0
+  store i32 %idx.0, i32 addrspace(1)* %add_use
+  store i8 %val0, i8 addrspace(1)* %out
+  ret void
+}
+
+; The two globals are placed adjacent in memory, so the same base
+; pointer can be used with an offset into the second one.
+
+; SI-LABEL: {{^}}load_shl_base_lds_2:
+; SI: s_mov_b32 m0, -1
+; SI-NEXT: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI-NEXT: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9 [M0]
+; SI: s_endpgm
+define void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 64
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %arrayidx1 = getelementptr inbounds [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
+  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %sum = fadd float %val0, %val1
+  store float %sum, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}store_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8 [M0]
+; SI: s_endpgm
+define void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
+  store float 1.0, float addrspace(3)* %arrayidx0, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+
+; --------------------------------------------------------------------------------
+; Atomics.
+
+@lds2 = addrspace(3) global [512 x i32] undef, align 4
+
+; define void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+;   %idx.0 = add nsw i32 %tid.x, 2
+;   %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+;   %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+;   store i32 %val, i32 addrspace(1)* %out, align 4
+;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+;   ret void
+; }
+
+
+; SI-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
+  %result = extractvalue { i32, i1 } %pair, 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_swap_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_add_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_sub_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_and_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_or_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_xor_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; define void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+;   %idx.0 = add nsw i32 %tid.x, 2
+;   %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+;   %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+;   store i32 %val, i32 addrspace(1)* %out, align 4
+;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+;   ret void
+; }
+
+; SI-LABEL: {{^}}atomic_min_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_max_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_umin_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}atomic_umax_shl_base_lds_0:
+; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
+; SI: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
+; SI: s_endpgm
+define void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+  %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx.0 = add nsw i32 %tid.x, 2
+  %arrayidx0 = getelementptr inbounds [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
+  %val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
+  store i32 %val, i32 addrspace(1)* %out, align 4
+  store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/si-annotate-cf-assertion.ll b/test/CodeGen/R600/si-annotate-cf-assertion.ll
index daa4667150bc..515064f3076b 100644
--- a/test/CodeGen/R600/si-annotate-cf-assertion.ll
+++ b/test/CodeGen/R600/si-annotate-cf-assertion.ll
@@ -1,10 +1,10 @@
 ; REQUIRES: asserts
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs-asm-verbose=false < %s | FileCheck %s
 
 
 define void @test(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
-; CHECK-LABEL: @test:
+; CHECK-LABEL: {{^}}test:
 
 entry:
   switch i32 %x, label %sw.default [
diff --git a/test/CodeGen/R600/si-lod-bias.ll b/test/CodeGen/R600/si-lod-bias.ll
index 8d7a79cdbda0..2e2f2ce5fec8 100644
--- a/test/CodeGen/R600/si-lod-bias.ll
+++ b/test/CodeGen/R600/si-lod-bias.ll
@@ -1,10 +1,10 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
 ; This shader has the potential to generated illegal VGPR to SGPR copies if
 ; the wrong register class is used for the REG_SEQUENCE instructions.
 
-; CHECK: @main
-; CHECK: IMAGE_SAMPLE_B v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
+; CHECK: {{^}}main:
+; CHECK: image_sample_b v{{\[[0-9]:[0-9]\]}}, 15, 0, 0, 0, 0, 0, 0, 0, v{{\[[0-9]:[0-9]\]}}
 
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -47,5 +47,5 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
 attributes #0 = { "ShaderType"="0" }
 attributes #1 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null}
-!1 = metadata !{metadata !0, metadata !0, i64 0, i32 1}
+!0 = !{!"const", null}
+!1 = !{!0, !0, i64 0, i32 1}
diff --git a/test/CodeGen/R600/si-sgpr-spill.ll b/test/CodeGen/R600/si-sgpr-spill.ll
index 53a096513bbc..f1c20fe89773 100644
--- a/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/test/CodeGen/R600/si-sgpr-spill.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck %s
 
 ; These tests check that the compiler won't crash when it needs to spill
 ; SGPRs.
 
-; CHECK-LABEL: @main
+; CHECK-LABEL: {{^}}main:
 ; Writing to M0 from an SMRD instruction will hang the GPU.
-; CHECK-NOT: S_BUFFER_LOAD_DWORD m0
-; CHECK: S_ENDPGM
+; CHECK-NOT: s_buffer_load_dword m0
+; CHECK: s_endpgm
 @ddxy_lds = external addrspace(3) global [64 x i32]
 
 define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
@@ -686,10 +686,10 @@ attributes #2 = { readnone }
 attributes #3 = { readonly }
 attributes #4 = { nounwind readonly }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!"const", null, i32 1}
 
-; CHECK-LABEL: @main1
-; CHECK: S_ENDPGM
+; CHECK-LABEL: {{^}}main1:
+; CHECK: s_endpgm
 define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
diff --git a/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
new file mode 100644
index 000000000000..b2f4a9ff05e1
--- /dev/null
+++ b/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -0,0 +1,238 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -enable-misched -enable-aa-sched-mi < %s | FileCheck -check-prefix=FUNC -check-prefix=CI %s
+
+declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+declare void @llvm.AMDGPU.barrier.local() #2
+
+
+@stored_lds_ptr = addrspace(3) global i32 addrspace(3)* undef, align 4
+@stored_constant_ptr = addrspace(3) global i32 addrspace(2)* undef, align 8
+@stored_global_ptr = addrspace(3) global i32 addrspace(1)* undef, align 8
+
+; FUNC-LABEL: @reorder_local_load_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+; CI: buffer_store_dword
+define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+  %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+  %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  store i32 99, i32 addrspace(1)* %gptr, align 4
+  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @no_reorder_local_load_volatile_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI: buffer_store_dword
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+  %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+  %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  store volatile i32 99, i32 addrspace(1)* %gptr, align 4
+  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @no_reorder_barrier_local_load_global_store_local_load
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:4
+; CI: buffer_store_dword
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
+define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+  %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+  %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  store i32 99, i32 addrspace(1)* %gptr, align 4
+  call void @llvm.AMDGPU.barrier.local() #2
+  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; Technically we could reorder these, but just comparing the
+; instruction type of the load is insufficient.
+
+; FUNC-LABEL: @no_reorder_constant_load_global_store_constant_load
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
+  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+
+  %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+  %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  store i32 99, i32 addrspace(1)* %gptr, align 4
+  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; XXX: Should be able to reorder this, but the laods count as ordered
+
+; FUNC-LABEL: @reorder_constant_load_local_store_constant_load
+; CI: buffer_load_dword
+; CI: ds_write_b32
+; CI: buffer_load_dword
+; CI: buffer_store_dword
+define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
+  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+
+  %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+  %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  store i32 99, i32 addrspace(3)* %lptr, align 4
+  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @reorder_smrd_load_local_store_smrd_load
+; CI: s_load_dword
+; CI: s_load_dword
+; CI: s_load_dword
+; CI: ds_write_b32
+; CI: buffer_store_dword
+define void @reorder_smrd_load_local_store_smrd_load(i32 addrspace(1)* %out, i32 addrspace(3)* noalias %lptr, i32 addrspace(2)* %ptr0) #0 {
+  %ptr1 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 1
+  %ptr2 = getelementptr inbounds i32 addrspace(2)* %ptr0, i64 2
+
+  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  store i32 99, i32 addrspace(3)* %lptr, align 4
+  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @reorder_global_load_local_store_global_load
+; CI: buffer_load_dword
+; CI: buffer_load_dword
+; CI: ds_write_b32
+; CI: buffer_store_dword
+define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr, i32 addrspace(1)* %ptr0) #0 {
+  %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 1
+  %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i64 2
+
+  %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+  store i32 99, i32 addrspace(3)* %lptr, align 4
+  %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+
+  %add = add nsw i32 %tmp1, %tmp2
+
+  store i32 %add, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @reorder_local_offsets
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
+; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: buffer_store_dword
+; CI: s_endpgm
+define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(3)* noalias nocapture %ptr0) #0 {
+  %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 3
+  %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 100
+  %ptr3 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 101
+
+  store i32 123, i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+  store i32 123, i32 addrspace(3)* %ptr2, align 4
+  %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+  store i32 789, i32 addrspace(3)* %ptr3, align 4
+
+  %add.0 = add nsw i32 %tmp2, %tmp1
+  %add.1 = add nsw i32 %add.0, %tmp3
+  store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: @reorder_global_offsets
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword
+; CI: s_endpgm
+define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* noalias nocapture readnone %gptr, i32 addrspace(1)* noalias nocapture %ptr0) #0 {
+  %ptr1 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 3
+  %ptr2 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 100
+  %ptr3 = getelementptr inbounds i32 addrspace(1)* %ptr0, i32 101
+
+  store i32 123, i32 addrspace(1)* %ptr1, align 4
+  %tmp1 = load i32 addrspace(1)* %ptr2, align 4
+  %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+  store i32 123, i32 addrspace(1)* %ptr2, align 4
+  %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+  store i32 789, i32 addrspace(1)* %ptr3, align 4
+
+  %add.0 = add nsw i32 %tmp2, %tmp1
+  %add.1 = add nsw i32 %add.0, %tmp3
+  store i32 %add.1, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; XFUNC-LABEL: @reorder_local_load_tbuffer_store_local_load
+; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x4
+; XCI: TBUFFER_STORE_FORMAT
+; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
+; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
+;   %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+
+;   %ptr1 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 1
+;   %ptr2 = getelementptr inbounds i32 addrspace(3)* %ptr0, i32 2
+
+;   %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+
+;   %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
+;   call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
+;         i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
+;         i32 1, i32 0)
+
+;   %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+
+;   %add = add nsw i32 %tmp1, %tmp2
+
+;   store i32 %add, i32 addrspace(1)* %out, align 4
+;   ret void
+; }
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind noduplicate }
diff --git a/test/CodeGen/R600/si-vector-hang.ll b/test/CodeGen/R600/si-vector-hang.ll
index 093234f71958..8cbb4914c857 100644
--- a/test/CodeGen/R600/si-vector-hang.ll
+++ b/test/CodeGen/R600/si-vector-hang.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-; CHECK: @test_8_min_char
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
-; CHECK: BUFFER_STORE_BYTE
+; CHECK: {{^}}test_8_min_char:
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
+; CHECK: buffer_store_byte
 ; ModuleID = 'radeon'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
 target triple = "r600--"
@@ -96,12 +96,12 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
 
-!0 = metadata !{null}
-!1 = metadata !{null}
-!2 = metadata !{null}
-!3 = metadata !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
-!4 = metadata !{null}
-!5 = metadata !{null}
-!6 = metadata !{null}
-!7 = metadata !{null}
-!8 = metadata !{null}
+!0 = !{null}
+!1 = !{null}
+!2 = !{null}
+!3 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i8 addrspace(1)*)* @test_8_min_char}
+!4 = !{null}
+!5 = !{null}
+!6 = !{null}
+!7 = !{null}
+!8 = !{null}
diff --git a/test/CodeGen/R600/sign_extend.ll b/test/CodeGen/R600/sign_extend.ll
index e3bee507de67..1d90fdbec853 100644
--- a/test/CodeGen/R600/sign_extend.ll
+++ b/test/CodeGen/R600/sign_extend.ll
@@ -1,8 +1,8 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @s_sext_i1_to_i32:
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i1_to_i32:
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
 define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp eq i32 %a, %b
   %sext = sext i1 %cmp to i32
@@ -10,10 +10,10 @@ define void @s_sext_i1_to_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @test:
-; SI: V_ASHR
-; SI: S_ENDPG
-define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
+; SI-LABEL: {{^}}test_s_sext_i32_to_i64:
+; SI: s_ashr_i32
+; SI: s_endpg
+define void @test_s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) nounwind {
 entry:
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
@@ -22,10 +22,10 @@ entry:
   ret void
 }
 
-; SI-LABEL: @s_sext_i1_to_i64:
-; SI: V_CNDMASK_B32_e64
-; SI: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i1_to_i64:
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
 define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp eq i32 %a, %b
   %sext = sext i1 %cmp to i64
@@ -33,18 +33,18 @@ define void @s_sext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   ret void
 }
 
-; SI-LABEL: @s_sext_i32_to_i64:
-; SI: S_ASHR_I32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i32_to_i64:
+; SI: s_ashr_i32
+; SI: s_endpgm
 define void @s_sext_i32_to_i64(i64 addrspace(1)* %out, i32 %a) nounwind {
   %sext = sext i32 %a to i64
   store i64 %sext, i64 addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @v_sext_i32_to_i64:
-; SI: V_ASHR
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}v_sext_i32_to_i64:
+; SI: v_ashr
+; SI: s_endpgm
 define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
   %val = load i32 addrspace(1)* %in, align 4
   %sext = sext i32 %val to i64
@@ -52,8 +52,8 @@ define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) no
   ret void
 }
 
-; SI-LABEL: @s_sext_i16_to_i64:
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}s_sext_i16_to_i64:
+; SI: s_endpgm
 define void @s_sext_i16_to_i64(i64 addrspace(1)* %out, i16 %a) nounwind {
   %sext = sext i16 %a to i64
   store i64 %sext, i64 addrspace(1)* %out, align 8
diff --git a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index e6f8ce8ef0ee..e02350cc23f7 100644
--- a/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -1,5 +1,5 @@
 ; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI %s
 
 ; 64-bit select was originally lowered with a build_pair, and this
 ; could be simplified to 1 cndmask instead of 2, but that broken when
@@ -14,10 +14,10 @@ define void @trunc_select_i64(i32 addrspace(1)* %out, i64 %a, i64 %b, i32 %c) {
 }
 
 ; FIXME: Fix truncating store for local memory
-; SI-LABEL: @trunc_load_alloca_i64:
-; SI: V_MOVRELS_B32
-; SI-NOT: V_MOVRELS_B32
-; SI: S_ENDPGM
+; SI-LABEL: {{^}}trunc_load_alloca_i64:
+; SI: v_movrels_b32
+; SI-NOT: v_movrels_b32
+; SI: s_endpgm
 define void @trunc_load_alloca_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) {
   %idx = add i32 %a, %b
   %alloca = alloca i64, i32 4
diff --git a/test/CodeGen/R600/sint_to_fp.f64.ll b/test/CodeGen/R600/sint_to_fp.f64.ll
new file mode 100644
index 000000000000..77844a6aa384
--- /dev/null
+++ b/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -0,0 +1,60 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}sint_to_fp_i32_to_f64
+; SI: v_cvt_f64_i32_e32
+define void @sint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
+  %result = sitofp i32 %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}sint_to_fp_i1_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
+; uses an SGPR for [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %fp = sitofp i1 %cmp to double
+  store double %fp, double addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}sint_to_fp_i1_f64_load:
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, -1
+; SI-NEXT: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+  %fp = sitofp i1 %in to double
+  store double %fp, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: @s_sint_to_fp_i64_to_f64
+define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+  %result = sitofp i64 %in to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: @v_sint_to_fp_i64_to_f64
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %val = load i64 addrspace(1)* %gep, align 8
+  %result = sitofp i64 %val to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/sint_to_fp.ll b/test/CodeGen/R600/sint_to_fp.ll
index b27dfda8aea6..dd541bb36a24 100644
--- a/test/CodeGen/R600/sint_to_fp.ll
+++ b/test/CodeGen/R600/sint_to_fp.ll
@@ -1,28 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
-
-; R600-CHECK: @sint_to_fp_v2i32
-; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-CHECK-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
-; SI-CHECK: @sint_to_fp_v2i32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32:
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
+define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+  %result = sitofp i32 %in to float
+  store float %result, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sint_to_fp_v2i32:
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
 define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
   %result = sitofp <2 x i32> %in to <2 x float>
   store <2 x float> %result, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; R600-CHECK: @sint_to_fp_v4i32
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600-CHECK: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; SI-CHECK: @sint_to_fp_v4i32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
-; SI-CHECK: V_CVT_F32_I32_e32
+; FUNC-LABEL: {{^}}sint_to_fp_v4i32:
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
 define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %value = load <4 x i32> addrspace(1) * %in
   %result = sitofp <4 x i32> %value to <4 x float>
@@ -30,11 +40,11 @@ define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspac
   ret void
 }
 
-; FUNC-LABEL: @sint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
@@ -42,10 +52,10 @@ define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
   ret void
 }
 
-; FUNC-LABEL: @sint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, -1.000000e+00
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
+; FUNC-LABEL: {{^}}sint_to_fp_i1_f32_load:
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
 define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
   %fp = sitofp i1 %in to float
   store float %fp, float addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/sint_to_fp64.ll b/test/CodeGen/R600/sint_to_fp64.ll
deleted file mode 100644
index 12b8cf57cf5f..000000000000
--- a/test/CodeGen/R600/sint_to_fp64.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
-
-; SI: @sint_to_fp64
-; SI: V_CVT_F64_I32_e32
-define void @sint_to_fp64(double addrspace(1)* %out, i32 %in) {
-  %result = sitofp i32 %in to double
-  store double %result, double addrspace(1)* %out
-  ret void
-}
-
-; SI-LABEL: @sint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
-; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
-define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
-  %cmp = icmp eq i32 %in, 0
-  %fp = sitofp i1 %cmp to double
-  store double %fp, double addrspace(1)* %out, align 4
-  ret void
-}
-
-; SI-LABEL: @sint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, -1
-; SI-NEXT: V_CVT_F64_I32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
-define void @sint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
-  %fp = sitofp i1 %in to double
-  store double %fp, double addrspace(1)* %out, align 8
-  ret void
-}
diff --git a/test/CodeGen/R600/smrd.ll b/test/CodeGen/R600/smrd.ll
index dec61855b018..a66ad0201bf9 100644
--- a/test/CodeGen/R600/smrd.ll
+++ b/test/CodeGen/R600/smrd.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck %s
 
 ; SMRD load with an immediate offset.
-; CHECK-LABEL: @smrd0
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; CHECK-LABEL: {{^}}smrd0:
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 1
@@ -12,8 +12,8 @@ entry:
 }
 
 ; SMRD load with the largest possible immediate offset.
-; CHECK-LABEL: @smrd1
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; CHECK-LABEL: {{^}}smrd1:
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 255
@@ -23,9 +23,10 @@ entry:
 }
 
 ; SMRD load with an offset greater than the largest possible immediate.
-; CHECK-LABEL: @smrd2
-; CHECK: S_MOV_B32 s[[OFFSET:[0-9]]], 0x400
-; CHECK: S_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK-LABEL: {{^}}smrd2:
+; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK: s_endpgm
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 256
@@ -35,15 +36,15 @@ entry:
 }
 
 ; SMRD load with a 64-bit offset
-; CHECK-LABEL: @smrd3
-; CHECK-DAG: S_MOV_B32 s[[SHI:[0-9]+]], 4
-; CHECK-DAG: S_MOV_B32 s[[SLO:[0-9]+]], 0
+; CHECK-LABEL: {{^}}smrd3:
+; CHECK-DAG: s_mov_b32 s[[SHI:[0-9]+]], 4
+; CHECK-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0 ;
 ; FIXME: We don't need to copy these values to VGPRs
-; CHECK-DAG: V_MOV_B32_e32 v[[VHI:[0-9]+]], s[[SHI]]
-; CHECK-DAG: V_MOV_B32_e32 v[[VLO:[0-9]+]], s[[SLO]]
-; FIXME: We should be able to use S_LOAD_DWORD here
-; BUFFER_LOAD_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v[[[VLO]]:[[VHI]]] + 0x0
-
+; CHECK-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; CHECK-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; FIXME: We should be able to use s_load_dword here
+; CHECK: buffer_load_dword v{{[0-9]+}}, v{{\[}}[[VLO]]:[[VHI]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64
+; CHECK: s_endpgm
 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
@@ -53,8 +54,8 @@ entry:
 }
 
 ; SMRD load using the load.const intrinsic with an immediate offset
-; CHECK-LABEL: @smrd_load_const0
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; CHECK-LABEL: {{^}}smrd_load_const0:
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -64,10 +65,10 @@ main_body:
   ret void
 }
 
-; SMRD load using the load.const intrinsic with an offset greater largest possible
-; immediate offset.
-; CHECK-LABEL: @smrd_load_const1
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SMRD load using the load.const intrinsic with the largest possible immediate
+; offset.
+; CHECK-LABEL: {{^}}smrd_load_const1:
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
@@ -76,10 +77,12 @@ main_body:
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
 }
-; SMRD load using the load.const intrinsic with the largetst possible
+; SMRD load using the load.const intrinsic with an offset greater than the
+; largets possible immediate.
 ; immediate offset.
-; CHECK-LABEL: @smrd_load_const2
-; CHECK: S_BUFFER_LOAD_DWORD s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CHECK-LABEL: {{^}}smrd_load_const2:
+; CHECK: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
+; CHECK: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8> addrspace(2)* %0, i32 0
diff --git a/test/CodeGen/R600/split-scalar-i64-add.ll b/test/CodeGen/R600/split-scalar-i64-add.ll
new file mode 100644
index 000000000000..ec50fd9f4c1e
--- /dev/null
+++ b/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i32 @llvm.r600.read.tidig.x() readnone
+
+; This is broken because the low half of the 64-bit add remains on the
+; SALU, but the upper half does not. The addc expects the carry bit
+; set in vcc, which is undefined since the low scalar half add sets
+; scc instead.
+
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
+  %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
+  %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
+  %bc = bitcast <2 x i32> %vec.1 to i64
+  %add = add i64 %bc, 399
+  store i64 %add, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
+  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+  %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
+  %bc = bitcast <2 x i32> %vec.1 to i64
+  %add = add i64 %bc, %val1
+  store i64 %add, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; Doesn't use constants
+; FUNC-LABEL @imp_def_vcc_split_i64_add_2
+; SI: v_add_i32
+; SI: v_addc_u32
+define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %gep = getelementptr i32 addrspace(1)* %in, i32 %tid
+  %load = load i32 addrspace(1)* %gep
+  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+  %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
+  %bc = bitcast <2 x i32> %vec.1 to i64
+  %add = add i64 %bc, %val1
+  store i64 %add, i64 addrspace(1)* %out, align 8
+  ret void
+}
diff --git a/test/CodeGen/R600/sra.ll b/test/CodeGen/R600/sra.ll
index 9eb3dc544074..d463fc20b9c5 100644
--- a/test/CodeGen/R600/sra.ll
+++ b/test/CodeGen/R600/sra.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK-LABEL: @ashr_v2i32
+;EG-CHECK-LABEL: {{^}}ashr_v2i32:
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK-LABEL: @ashr_v2i32
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v2i32:
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -18,17 +18,17 @@ define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %i
   ret void
 }
 
-;EG-CHECK-LABEL: @ashr_v4i32
+;EG-CHECK-LABEL: {{^}}ashr_v4i32:
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: ASHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK-LABEL: @ashr_v4i32
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_ASHR_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v4i32:
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI-CHECK: v_ashr_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
@@ -39,11 +39,11 @@ define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
   ret void
 }
 
-;EG-CHECK-LABEL: @ashr_i64
+;EG-CHECK-LABEL: {{^}}ashr_i64:
 ;EG-CHECK: ASHR
 
-;SI-CHECK-LABEL: @ashr_i64
-;SI-CHECK: S_ASHR_I64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
+;SI-CHECK-LABEL: {{^}}ashr_i64:
+;SI-CHECK: s_ashr_i64 s[{{[0-9]}}:{{[0-9]}}], s[{{[0-9]}}:{{[0-9]}}], 8
 define void @ashr_i64(i64 addrspace(1)* %out, i32 %in) {
 entry:
   %0 = sext i32 %in to i64
@@ -52,7 +52,7 @@ entry:
   ret void
 }
 
-;EG-CHECK-LABEL: @ashr_i64_2
+;EG-CHECK-LABEL: {{^}}ashr_i64_2:
 ;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 ;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
@@ -66,8 +66,8 @@ entry:
 ;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 ;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 
-;SI-CHECK-LABEL: @ashr_i64_2
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_i64_2:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
   %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
@@ -78,7 +78,7 @@ entry:
   ret void
 }
 
-;EG-CHECK-LABEL: @ashr_v2i64
+;EG-CHECK-LABEL: {{^}}ashr_v2i64:
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
@@ -104,9 +104,9 @@ entry:
 ;EG-CHECK-DAG: CNDE_INT
 ;EG-CHECK-DAG: CNDE_INT
 
-;SI-CHECK-LABEL: @ashr_v2i64
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v2i64:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
 define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
@@ -117,7 +117,7 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
   ret void
 }
 
-;EG-CHECK-LABEL: @ashr_v4i64
+;EG-CHECK-LABEL: {{^}}ashr_v4i64:
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
@@ -167,11 +167,11 @@ define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %i
 ;EG-CHECK-DAG: CNDE_INT
 ;EG-CHECK-DAG: CNDE_INT
 
-;SI-CHECK-LABEL: @ashr_v4i64
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_ASHR_I64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK-LABEL: {{^}}ashr_v4i64:
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+;SI-CHECK: v_ashr_i64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
 
 define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
diff --git a/test/CodeGen/R600/srem.ll b/test/CodeGen/R600/srem.ll
index 65e33952d29e..759c8b4ef722 100644
--- a/test/CodeGen/R600/srem.ll
+++ b/test/CodeGen/R600/srem.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=r600 -mcpu=SI < %s
+; RUN: llc -march=amdgcn -mcpu=SI < %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s
 
 define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
diff --git a/test/CodeGen/R600/srl.ll b/test/CodeGen/R600/srl.ll
index 44ad73f073ed..9e7b35e8338a 100644
--- a/test/CodeGen/R600/srl.ll
+++ b/test/CodeGen/R600/srl.ll
@@ -1,166 +1,166 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-
-;EG-CHECK: @lshr_v2i32
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}lshr_i32:
+; SI: v_lshrrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = lshr i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
 
-;SI-CHECK: @lshr_v2i32
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}lshr_v2i32:
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32> addrspace(1)* %b_ptr
   %result = lshr <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
+; FUNC-LABEL: {{^}}lshr_v4i32:
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_lshr_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
-;EG-CHECK: @lshr_v4i32
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
-;SI-CHECK: @lshr_v4i32
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_LSHR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32> addrspace(1)* %b_ptr
   %result = lshr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-;EG-CHECK: @lshr_i64
-;EG-CHECK: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
-;EG-CHECK: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG-CHECK: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-CHECK-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-CHECK-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-CHECK-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-;EG-CHECK-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-;EG-CHECK-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
-;EG-CHECK-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
-
-;SI-CHECK: @lshr_i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
+; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
+; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
+; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
+; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
+; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1) * %in
-  %b = load i64 addrspace(1) * %b_ptr
+  %a = load i64 addrspace(1)* %in
+  %b = load i64 addrspace(1)* %b_ptr
   %result = lshr i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-;EG-CHECK: @lshr_v2i64
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-
-;SI-CHECK: @lshr_v2i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_v2i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1) * %in
-  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %a = load <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64> addrspace(1)* %b_ptr
   %result = lshr <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
 }
 
-
-;EG-CHECK: @lshr_v4i64
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHA]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHB]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHC]]
-;EG-CHECK-DAG: LSHL {{\*? *}}[[COMPSHD]]
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHL {{.*}}, 1
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHA]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHB]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHC]]
-;EG-CHECK-DAG: LSHR {{.*}}, [[SHD]]
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: OR_INT
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: LSHR
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
-;EG-CHECK-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT {{.*}}, 0.0
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-;EG-CHECK-DAG: CNDE_INT
-
-;SI-CHECK: @lshr_v4i64
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-;SI-CHECK: V_LSHR_B64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
-
+; FUNC-LABEL: {{^}}lshr_v4i64:
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+; SI: v_lshr_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}}
+
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]]
+; EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHA]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHB]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHC]]
+; EG-DAG: LSHL {{\*? *}}[[COMPSHD]]
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHL {{.*}}, 1
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHC]]
+; EG-DAG: LSHR {{.*}}, [[SHD]]
+; EG-DAG: LSHR {{.*}}, [[SHA]]
+; EG-DAG: LSHR {{.*}}, [[SHB]]
+; EG-DAG: LSHR {{.*}}, [[SHC]]
+; EG-DAG: LSHR {{.*}}, [[SHD]]
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: OR_INT
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: ADD_INT  {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: LSHR
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal
+; EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT {{.*}}, 0.0
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
+; EG-DAG: CNDE_INT
 define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1) * %in
-  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %a = load <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64> addrspace(1)* %b_ptr
   %result = lshr <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/ssubo.ll b/test/CodeGen/R600/ssubo.ll
index b330276ae9e7..ccf76ca29c75 100644
--- a/test/CodeGen/R600/ssubo.ll
+++ b/test/CodeGen/R600/ssubo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
 
 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
 declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
 
-; FUNC-LABEL: @ssubo_i64_zext
+; FUNC-LABEL: {{^}}ssubo_i64_zext:
 define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %ssub, 0
@@ -15,7 +15,7 @@ define void @ssubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @s_ssubo_i32
+; FUNC-LABEL: {{^}}s_ssubo_i32:
 define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
   %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %ssub, 0
@@ -25,7 +25,7 @@ define void @s_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @v_ssubo_i32
+; FUNC-LABEL: {{^}}v_ssubo_i32:
 define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %a = load i32 addrspace(1)* %aptr, align 4
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -37,9 +37,9 @@ define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @s_ssubo_i64
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_ssubo_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
 define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
   %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %ssub, 0
@@ -49,9 +49,9 @@ define void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
   ret void
 }
 
-; FUNC-LABEL: @v_ssubo_i64
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; FUNC-LABEL: {{^}}v_ssubo_i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
 define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %a = load i64 addrspace(1)* %aptr, align 4
   %b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/store-barrier.ll b/test/CodeGen/R600/store-barrier.ll
new file mode 100644
index 000000000000..350b006ba5e0
--- /dev/null
+++ b/test/CodeGen/R600/store-barrier.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck  --check-prefix=CHECK %s
+; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt -enable-misched < %s | FileCheck  --check-prefix=CHECK %s
+
+; This test is for a bug in the machine scheduler where stores without
+; an underlying object would be moved across the barrier.  In this
+; test, the <2 x i8> store will be split into two i8 stores, so they
+; won't have an underlying object.
+
+; CHECK-LABEL: {{^}}test:
+; CHECK: ds_write_b8
+; CHECK: ds_write_b8
+; CHECK: s_barrier
+; CHECK: s_endpgm
+; Function Attrs: nounwind
+define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
+bb:
+  %tmp10 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp9
+  %tmp13 = load i32 addrspace(1)* %tmp10, align 2
+  %tmp14 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp13
+  %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+  %tmp16 = add i32 %tmp13, 1
+  %tmp17 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp16
+  store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
+  tail call void @llvm.AMDGPU.barrier.local() #2
+  %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+  %tmp26 = sext i32 %tmp25 to i64
+  %tmp27 = sext i32 %arg4 to i64
+  %tmp28 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
+  %tmp29 = load i8 addrspace(3)* %tmp28, align 1
+  %tmp30 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
+  store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
+  %tmp32 = getelementptr inbounds <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
+  %tmp33 = load i8 addrspace(3)* %tmp32, align 1
+  %tmp35 = getelementptr inbounds <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
+  store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
+  ret void
+}
+
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() #2
+
+attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/R600/store-v3i32.ll b/test/CodeGen/R600/store-v3i32.ll
index 33578035daa2..42132f6ddba0 100644
--- a/test/CodeGen/R600/store-v3i32.ll
+++ b/test/CodeGen/R600/store-v3i32.ll
@@ -1,11 +1,11 @@
 ; XFAIL: *
-; RUN: llc -verify-machineinstrs -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI %s
 
 ; 3 vectors have the same size and alignment as 4 vectors, so this
 ; should be done in a single store.
 
-; SI-LABEL: @store_v3i32:
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}store_v3i32:
+; SI: buffer_store_dwordx4
 define void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
   store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/test/CodeGen/R600/store-v3i64.ll b/test/CodeGen/R600/store-v3i64.ll
index 58d28b567bd1..82d427e4fa76 100644
--- a/test/CodeGen/R600/store-v3i64.ll
+++ b/test/CodeGen/R600/store-v3i64.ll
@@ -1,27 +1,27 @@
 ; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI
 
-; SI-LABEL: @global_store_v3i64:
-; SI: BUFFER_STORE_DWORDX4
-; SI: BUFFER_STORE_DWORDX4
+; SI-LABEL: {{^}}global_store_v3i64:
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 define void @global_store_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
   store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 32
   ret void
 }
 
-; SI-LABEL: @global_store_v3i64_unaligned:
+; SI-LABEL: {{^}}global_store_v3i64_unaligned:
 define void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
   store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
   ret void
 }
 
-; SI-LABEL: @local_store_v3i64:
+; SI-LABEL: {{^}}local_store_v3i64:
 define void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
   store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
   ret void
 }
 
-; SI-LABEL: @local_store_v3i64_unaligned:
+; SI-LABEL: {{^}}local_store_v3i64_unaligned:
 define void @local_store_v3i64_unaligned(<3 x i64> addrspace(1)* %out, <3 x i64> %x) {
   store <3 x i64> %x, <3 x i64> addrspace(1)* %out, align 1
   ret void
diff --git a/test/CodeGen/R600/store-vector-ptrs.ll b/test/CodeGen/R600/store-vector-ptrs.ll
index 41c5edc280d7..868b68d10efa 100644
--- a/test/CodeGen/R600/store-vector-ptrs.ll
+++ b/test/CodeGen/R600/store-vector-ptrs.ll
@@ -1,9 +1,11 @@
-; REQUIRES: asserts
-; XFAIL: *
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s
 
+; This tests for a bug that caused a crash in
+; AMDGPUDAGToDAGISel::SelectMUBUFScratch() which is used for selecting
+; scratch loads and stores.
+; CHECK-LABEL: {{^}}store_vector_ptrs:
 define void @store_vector_ptrs(<4 x i32*>* %out, <4 x [1024 x i32]*> %array) nounwind {
   %p = getelementptr <4 x [1024 x i32]*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
   store <4 x i32*> %p, <4 x i32*>* %out
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/R600/store.ll b/test/CodeGen/R600/store.ll
index dd275338d7c0..8e5cb2a4e6d6 100644
--- a/test/CodeGen/R600/store.ll
+++ b/test/CodeGen/R600/store.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK --check-prefix=FUNC %s
-; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI-CHECK -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG-CHECK -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM-CHECK -check-prefix=FUNC %s
 
 ;===------------------------------------------------------------------------===;
 ; Global Address Space
 ;===------------------------------------------------------------------------===;
-; FUNC-LABEL: @store_i1
+; FUNC-LABEL: {{^}}store_i1:
 ; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
 define void @store_i1(i1 addrspace(1)* %out) {
 entry:
   store i1 true, i1 addrspace(1)* %out
@@ -15,18 +15,20 @@ entry:
 }
 
 ; i8 store
-; EG-CHECK-LABEL: @store_i8
+; EG-CHECK-LABEL: {{^}}store_i8:
 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]]
+
 ; IG 0: Get the byte index and truncate the value
-; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
 ; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43)
+
+
 ; IG 1: Truncate the calculated the shift amount for the mask
-; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG-CHECK-NEXT: 3
+
 ; IG 2: Shift the value and the mask
-; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
 ; EG-CHECK-NEXT: 255
 ; IG 3: Initialize the Y and Z channels to zero
@@ -34,8 +36,8 @@ entry:
 ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
 ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
 
-; SI-CHECK-LABEL: @store_i8
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_i8:
+; SI-CHECK: buffer_store_byte
 
 define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
 entry:
@@ -44,18 +46,23 @@ entry:
 }
 
 ; i16 store
-; EG-CHECK-LABEL: @store_i16
+; EG-CHECK-LABEL: {{^}}store_i16:
 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
-; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]]
+
 ; IG 0: Get the byte index and truncate the value
-; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
-; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y
+
+
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
+; EG-CHECK-NEXT: 3(4.203895e-45),
+
+; EG-CHECK: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
+; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
+
 ; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
 ; IG 1: Truncate the calculated the shift amount for the mask
-; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
-; EG-CHECK: 3
+
 ; IG 2: Shift the value and the mask
-; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]]
+; EG-CHECK: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
 ; EG-CHECK-NEXT: 65535
 ; IG 3: Initialize the Y and Z channels to zero
@@ -63,20 +70,20 @@ entry:
 ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0
 ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0
 
-; SI-CHECK-LABEL: @store_i16
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-LABEL: {{^}}store_i16:
+; SI-CHECK: buffer_store_short
 define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
 entry:
   store i16 %in, i16 addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_v2i8
+; EG-CHECK-LABEL: {{^}}store_v2i8:
 ; EG-CHECK: MEM_RAT MSKOR
 ; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK-LABEL: @store_v2i8
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v2i8:
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
 define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
 entry:
   %0 = trunc <2 x i32> %in to <2 x i8>
@@ -85,13 +92,13 @@ entry:
 }
 
 
-; EG-CHECK-LABEL: @store_v2i16
+; EG-CHECK-LABEL: {{^}}store_v2i16:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v2i16
+; CM-CHECK-LABEL: {{^}}store_v2i16:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v2i16
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK-LABEL: {{^}}store_v2i16:
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
 define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
 entry:
   %0 = trunc <2 x i32> %in to <2 x i16>
@@ -99,15 +106,15 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @store_v4i8
+; EG-CHECK-LABEL: {{^}}store_v4i8:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v4i8
+; CM-CHECK-LABEL: {{^}}store_v4i8:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v4i8
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v4i8:
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
+; SI-CHECK: buffer_store_byte
 define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
 entry:
   %0 = trunc <4 x i32> %in to <4 x i8>
@@ -116,30 +123,30 @@ entry:
 }
 
 ; floating-point store
-; EG-CHECK-LABEL: @store_f32
+; EG-CHECK-LABEL: {{^}}store_f32:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
-; CM-CHECK-LABEL: @store_f32
+; CM-CHECK-LABEL: {{^}}store_f32:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
-; SI-CHECK-LABEL: @store_f32
-; SI-CHECK: BUFFER_STORE_DWORD
+; SI-CHECK-LABEL: {{^}}store_f32:
+; SI-CHECK: buffer_store_dword
 
 define void @store_f32(float addrspace(1)* %out, float %in) {
   store float %in, float addrspace(1)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_v4i16
+; EG-CHECK-LABEL: {{^}}store_v4i16:
 ; EG-CHECK: MEM_RAT MSKOR
 ; EG-CHECK: MEM_RAT MSKOR
 ; EG-CHECK: MEM_RAT MSKOR
 ; EG-CHECK: MEM_RAT MSKOR
 ; EG-CHECK-NOT: MEM_RAT MSKOR
-; SI-CHECK-LABEL: @store_v4i16
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK: BUFFER_STORE_SHORT
-; SI-CHECK-NOT: BUFFER_STORE_BYTE
+; SI-CHECK-LABEL: {{^}}store_v4i16:
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK: buffer_store_short
+; SI-CHECK-NOT: buffer_store_byte
 define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
 entry:
   %0 = trunc <4 x i32> %in to <4 x i16>
@@ -148,12 +155,12 @@ entry:
 }
 
 ; vec2 floating-point stores
-; EG-CHECK-LABEL: @store_v2f32
+; EG-CHECK-LABEL: {{^}}store_v2f32:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v2f32
+; CM-CHECK-LABEL: {{^}}store_v2f32:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v2f32
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK-LABEL: {{^}}store_v2f32:
+; SI-CHECK: buffer_store_dwordx2
 
 define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
 entry:
@@ -163,23 +170,23 @@ entry:
   ret void
 }
 
-; EG-CHECK-LABEL: @store_v4i32
+; EG-CHECK-LABEL: {{^}}store_v4i32:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
 ; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @store_v4i32
+; CM-CHECK-LABEL: {{^}}store_v4i32:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
 ; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @store_v4i32
-; SI-CHECK: BUFFER_STORE_DWORDX4
+; SI-CHECK-LABEL: {{^}}store_v4i32:
+; SI-CHECK: buffer_store_dwordx4
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @store_i64_i8
+; FUNC-LABEL: {{^}}store_i64_i8:
 ; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_BYTE
+; SI-CHECK: buffer_store_byte
 define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
 entry:
   %0 = trunc i64 %in to i8
@@ -187,9 +194,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @store_i64_i16
+; FUNC-LABEL: {{^}}store_i64_i16:
 ; EG-CHECK: MEM_RAT MSKOR
-; SI-CHECK: BUFFER_STORE_SHORT
+; SI-CHECK: buffer_store_short
 define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
 entry:
   %0 = trunc i64 %in to i16
@@ -201,99 +208,99 @@ entry:
 ; Local Address Space
 ;===------------------------------------------------------------------------===;
 
-; FUNC-LABEL: @store_local_i1
+; FUNC-LABEL: {{^}}store_local_i1:
 ; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
 define void @store_local_i1(i1 addrspace(3)* %out) {
 entry:
   store i1 true, i1 addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_i8
+; EG-CHECK-LABEL: {{^}}store_local_i8:
 ; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK-LABEL: @store_local_i8
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK-LABEL: {{^}}store_local_i8:
+; SI-CHECK: ds_write_b8
 define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
   store i8 %in, i8 addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_i16
+; EG-CHECK-LABEL: {{^}}store_local_i16:
 ; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK-LABEL: @store_local_i16
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK-LABEL: {{^}}store_local_i16:
+; SI-CHECK: ds_write_b16
 define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
   store i16 %in, i16 addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_v2i16
+; EG-CHECK-LABEL: {{^}}store_local_v2i16:
 ; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v2i16
+; CM-CHECK-LABEL: {{^}}store_local_v2i16:
 ; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v2i16
-; SI-CHECK: DS_WRITE_B16
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK-LABEL: {{^}}store_local_v2i16:
+; SI-CHECK: ds_write_b16
+; SI-CHECK: ds_write_b16
 define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
 entry:
   store <2 x i16> %in, <2 x i16> addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_v4i8
+; EG-CHECK-LABEL: {{^}}store_local_v4i8:
 ; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v4i8
+; CM-CHECK-LABEL: {{^}}store_local_v4i8:
 ; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v4i8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK-LABEL: {{^}}store_local_v4i8:
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
+; SI-CHECK: ds_write_b8
 define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
 entry:
   store <4 x i8> %in, <4 x i8> addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_v2i32
+; EG-CHECK-LABEL: {{^}}store_local_v2i32:
 ; EG-CHECK: LDS_WRITE
 ; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v2i32
+; CM-CHECK-LABEL: {{^}}store_local_v2i32:
 ; CM-CHECK: LDS_WRITE
 ; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v2i32
-; SI-CHECK: DS_WRITE_B64
+; SI-CHECK-LABEL: {{^}}store_local_v2i32:
+; SI-CHECK: ds_write_b64
 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
 entry:
   store <2 x i32> %in, <2 x i32> addrspace(3)* %out
   ret void
 }
 
-; EG-CHECK-LABEL: @store_local_v4i32
+; EG-CHECK-LABEL: {{^}}store_local_v4i32:
 ; EG-CHECK: LDS_WRITE
 ; EG-CHECK: LDS_WRITE
 ; EG-CHECK: LDS_WRITE
 ; EG-CHECK: LDS_WRITE
-; CM-CHECK-LABEL: @store_local_v4i32
+; CM-CHECK-LABEL: {{^}}store_local_v4i32:
 ; CM-CHECK: LDS_WRITE
 ; CM-CHECK: LDS_WRITE
 ; CM-CHECK: LDS_WRITE
 ; CM-CHECK: LDS_WRITE
-; SI-CHECK-LABEL: @store_local_v4i32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
-; SI-CHECK: DS_WRITE_B32
+; SI-CHECK-LABEL: {{^}}store_local_v4i32:
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
+; SI-CHECK: ds_write_b32
 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(3)* %out
   ret void
 }
 
-; FUNC-LABEL: @store_local_i64_i8
+; FUNC-LABEL: {{^}}store_local_i64_i8:
 ; EG-CHECK: LDS_BYTE_WRITE
-; SI-CHECK: DS_WRITE_B8
+; SI-CHECK: ds_write_b8
 define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
 entry:
   %0 = trunc i64 %in to i8
@@ -301,9 +308,9 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @store_local_i64_i16
+; FUNC-LABEL: {{^}}store_local_i64_i16:
 ; EG-CHECK: LDS_SHORT_WRITE
-; SI-CHECK: DS_WRITE_B16
+; SI-CHECK: ds_write_b16
 define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
 entry:
   %0 = trunc i64 %in to i16
@@ -318,12 +325,12 @@ entry:
 ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
 ; be two 32-bit stores.
 
-; EG-CHECK-LABEL: @vecload2
+; EG-CHECK-LABEL: {{^}}vecload2:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW
-; CM-CHECK-LABEL: @vecload2
+; CM-CHECK-LABEL: {{^}}vecload2:
 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD
-; SI-CHECK-LABEL: @vecload2
-; SI-CHECK: BUFFER_STORE_DWORDX2
+; SI-CHECK-LABEL: {{^}}vecload2:
+; SI-CHECK: buffer_store_dwordx2
 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
 entry:
   %0 = load i32 addrspace(2)* %mem, align 4
@@ -339,7 +346,7 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 
 ; When i128 was a legal type this program generated cannot select errors:
 
-; FUNC-LABEL: @i128-const-store
+; FUNC-LABEL: {{^}}"i128-const-store":
 ; FIXME: We should be able to to this with one store instruction
 ; EG-CHECK: STORE_RAW
 ; EG-CHECK: STORE_RAW
@@ -349,8 +356,8 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 ; CM-CHECK: STORE_DWORD
 ; CM-CHECK: STORE_DWORD
 ; CM-CHECK: STORE_DWORD
-; SI: BUFFER_STORE_DWORDX2
-; SI: BUFFER_STORE_DWORDX2
+; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx2
 define void @i128-const-store(i32 addrspace(1)* %out) {
 entry:
   store i32 1, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/store.r600.ll b/test/CodeGen/R600/store.r600.ll
index 00589a0c6c86..3df30d4c6696 100644
--- a/test/CodeGen/R600/store.r600.ll
+++ b/test/CodeGen/R600/store.r600.ll
@@ -3,7 +3,7 @@
 ; XXX: Merge this test into store.ll once it is supported on SI
 
 ; v4i32 store
-; EG-CHECK: @store_v4i32
+; EG-CHECK: {{^}}store_v4i32:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
@@ -13,7 +13,7 @@ define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %
 }
 
 ; v4f32 store
-; EG-CHECK: @store_v4f32
+; EG-CHECK: {{^}}store_v4f32:
 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %1 = load <4 x float> addrspace(1) * %in
diff --git a/test/CodeGen/R600/structurize.ll b/test/CodeGen/R600/structurize.ll
index c2acd938ad05..02e592e9a559 100644
--- a/test/CodeGen/R600/structurize.ll
+++ b/test/CodeGen/R600/structurize.ll
@@ -13,7 +13,7 @@
 ;
 ;
 
-; CHECK-LABEL: @branch_into_diamond
+; CHECK-LABEL: {{^}}branch_into_diamond:
 ; === entry block:
 ; CHECK: ALU_PUSH_BEFORE
 ; === Branch instruction (IF):
diff --git a/test/CodeGen/R600/structurize1.ll b/test/CodeGen/R600/structurize1.ll
index 8c10301a1686..77432c1f9d2b 100644
--- a/test/CodeGen/R600/structurize1.ll
+++ b/test/CodeGen/R600/structurize1.ll
@@ -16,7 +16,7 @@
 ;   }
 ; }
 
-; CHECK-LABEL: @if_inside_loop
+; CHECK-LABEL: {{^}}if_inside_loop:
 ; CHECK: LOOP_START_DX10
 ; CHECK: END_LOOP
 define void @if_inside_loop(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d) {
diff --git a/test/CodeGen/R600/sub.ll b/test/CodeGen/R600/sub.ll
index 8e64148142d2..be48e186e870 100644
--- a/test/CodeGen/R600/sub.ll
+++ b/test/CodeGen/R600/sub.ll
@@ -1,16 +1,31 @@
-;RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
-;RUN: llc -march=r600 -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
 
 declare i32 @llvm.r600.read.tidig.x() readnone
 
-;FUNC-LABEL: @test2
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}test_sub_i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = sub i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
 
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_sub_v2i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+
+define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
   %a = load <2 x i32> addrspace(1) * %in
   %b = load <2 x i32> addrspace(1) * %b_ptr
@@ -19,18 +34,18 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;FUNC-LABEL: @test4
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}test_sub_v4i32:
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: V_SUB_I32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
   %a = load <4 x i32> addrspace(1) * %in
   %b = load <4 x i32> addrspace(1) * %b_ptr
@@ -39,9 +54,9 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   ret void
 }
 
-; FUNC-LABEL: @s_sub_i64:
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_sub_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
 
 ; EG-DAG: SETGE_UINT
 ; EG-DAG: CNDE_INT
@@ -54,9 +69,9 @@ define void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind
   ret void
 }
 
-; FUNC-LABEL: @v_sub_i64:
-; SI: V_SUB_I32_e32
-; SI: V_SUBB_U32_e32
+; FUNC-LABEL: {{^}}v_sub_i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
 
 ; EG-DAG: SETGE_UINT
 ; EG-DAG: CNDE_INT
@@ -73,3 +88,39 @@ define void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+; FUNC-LABEL: {{^}}v_test_sub_v2i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+define void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %a_ptr = getelementptr <2 x i64> addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %inB, i32 %tid
+  %a = load <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %result = sub <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_test_sub_v4i64:
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+; SI: v_sub_i32_e32
+; SI: v_subb_u32_e32
+define void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
+  %tid = call i32 @llvm.r600.read.tidig.x() readnone
+  %a_ptr = getelementptr <4 x i64> addrspace(1)* %inA, i32 %tid
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %inB, i32 %tid
+  %a = load <4 x i64> addrspace(1)* %a_ptr
+  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %result = sub <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/subreg-coalescer-crash.ll b/test/CodeGen/R600/subreg-coalescer-crash.ll
new file mode 100644
index 000000000000..e841637b5047
--- /dev/null
+++ b/test/CodeGen/R600/subreg-coalescer-crash.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -o - %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+target triple = "r600--"
+
+; SI: s_endpgm
+; Function Attrs: nounwind
+define void @row_filter_C1_D0() #0 {
+entry:
+  br i1 undef, label %for.inc.1, label %do.body.preheader
+
+do.body.preheader:                                ; preds = %entry
+  %0 = insertelement <4 x i32> zeroinitializer, i32 undef, i32 1
+  br i1 undef, label %do.body56.1, label %do.body90
+
+do.body90:                                        ; preds = %do.body56.2, %do.body56.1, %do.body.preheader
+  %1 = phi <4 x i32> [ %6, %do.body56.2 ], [ %5, %do.body56.1 ], [ %0, %do.body.preheader ]
+  %2 = insertelement <4 x i32> %1, i32 undef, i32 2
+  %3 = insertelement <4 x i32> %2, i32 undef, i32 3
+  br i1 undef, label %do.body124.1, label %do.body.1562.preheader
+
+do.body.1562.preheader:                           ; preds = %do.body124.1, %do.body90
+  %storemerge = phi <4 x i32> [ %3, %do.body90 ], [ %7, %do.body124.1 ]
+  %4 = insertelement <4 x i32> undef, i32 undef, i32 1
+  br label %for.inc.1
+
+do.body56.1:                                      ; preds = %do.body.preheader
+  %5 = insertelement <4 x i32> %0, i32 undef, i32 1
+  %or.cond472.1 = or i1 undef, undef
+  br i1 %or.cond472.1, label %do.body56.2, label %do.body90
+
+do.body56.2:                                      ; preds = %do.body56.1
+  %6 = insertelement <4 x i32> %5, i32 undef, i32 1
+  br label %do.body90
+
+do.body124.1:                                     ; preds = %do.body90
+  %7 = insertelement <4 x i32> %3, i32 undef, i32 3
+  br label %do.body.1562.preheader
+
+for.inc.1:                                        ; preds = %do.body.1562.preheader, %entry
+  %storemerge591 = phi <4 x i32> [ zeroinitializer, %entry ], [ %storemerge, %do.body.1562.preheader ]
+  %add.i495 = add <4 x i32> %storemerge591, undef
+  unreachable
+}
+
diff --git a/test/CodeGen/R600/swizzle-export.ll b/test/CodeGen/R600/swizzle-export.ll
index 16c3f191935c..3e6f7a732630 100644
--- a/test/CodeGen/R600/swizzle-export.ll
+++ b/test/CodeGen/R600/swizzle-export.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
 
-;EG-CHECK: @main
+;EG-CHECK: {{^}}main:
 ;EG-CHECK: EXPORT T{{[0-9]+}}.XYXX
 ;EG-CHECK: EXPORT T{{[0-9]+}}.ZXXX
 ;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
@@ -92,9 +92,9 @@ main_body:
   ret void
 }
 
-; EG-CHECK: @main2
+; EG-CHECK: {{^}}main2:
 ; EG-CHECK: T{{[0-9]+}}.XY__
-; EG-CHECK: T{{[0-9]+}}.YXZ0
+; EG-CHECK: T{{[0-9]+}}.ZXY0
 
 define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
 main_body:
diff --git a/test/CodeGen/R600/trunc-cmp-constant.ll b/test/CodeGen/R600/trunc-cmp-constant.ll
new file mode 100644
index 000000000000..73c35512d77c
--- /dev/null
+++ b/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -0,0 +1,169 @@
+; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI: s_xor_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, -1{{$}}
+; SI: v_cndmask_b32_e64
+; SI: buffer_store_byte
+define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp eq i32 %ext, 0
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: The negate should be inverting the compare.
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp eq i32 %ext, 0
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp eq i32 %ext, 1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp eq i32 %ext, 1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_neg1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp eq i32 %ext, -1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_eq_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp eq i32 %ext, -1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+
+; FUNC-LABEL {{^}}sextload_i1_to_i32_trunc_cmp_ne_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp ne i32 %ext, 0
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_0:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp ne i32 %ext, 0
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp ne i32 %ext, 1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_1:
+; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; SI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 1{{$}}
+; SI-NEXT: s_xor_b64 [[NEG:s\[[0-9]+:[0-9]+\]]], [[CMP0]], -1
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp ne i32 %ext, 1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: This should be one compare.
+; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_ne_neg1:
+; XSI: buffer_load_ubyte [[LOAD:v[0-9]+]]
+; XSI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
+; XSI: v_cmp_eq_i32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], [[TMP]], 0{{$}}
+; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
+; XSI-NEXT: buffer_store_byte [[RESULT]]
+define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = sext i1 %load to i32
+  %cmp = icmp ne i32 %ext, -1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}zextload_i1_to_i32_trunc_cmp_ne_neg1:
+; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
+; SI-NEXT: buffer_store_byte [[RESULT]]
+define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
+  %load = load i1 addrspace(1)* %in
+  %ext = zext i1 %load to i32
+  %cmp = icmp ne i32 %ext, -1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}masked_load_i1_to_i32_trunc_cmp_ne_neg1:
+; SI: buffer_load_sbyte [[LOAD:v[0-9]+]]
+; SI: v_cmp_ne_i32_e64 {{s\[[0-9]+:[0-9]+\]}}, [[LOAD]], -1{{$}}
+; SI-NEXT: v_cndmask_b32_e64
+; SI-NEXT: buffer_store_byte
+define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
+  %load = load i8 addrspace(1)* %in
+  %masked = and i8 %load, 255
+  %ext = sext i8 %masked to i32
+  %cmp = icmp ne i32 %ext, -1
+  store i1 %cmp, i1 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/trunc-store-i1.ll b/test/CodeGen/R600/trunc-store-i1.ll
index a3975c8b8e4c..83b546f93a4a 100644
--- a/test/CodeGen/R600/trunc-store-i1.ll
+++ b/test/CodeGen/R600/trunc-store-i1.ll
@@ -1,30 +1,30 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
 
-; SI-LABEL: @global_truncstore_i32_to_i1
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI-LABEL: {{^}}global_truncstore_i32_to_i1:
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
 define void @global_truncstore_i32_to_i1(i1 addrspace(1)* %out, i32 %val) nounwind {
   %trunc = trunc i32 %val to i1
   store i1 %trunc, i1 addrspace(1)* %out, align 1
   ret void
 }
 
-; SI-LABEL: @global_truncstore_i64_to_i1
-; SI: BUFFER_STORE_BYTE
+; SI-LABEL: {{^}}global_truncstore_i64_to_i1:
+; SI: buffer_store_byte
 define void @global_truncstore_i64_to_i1(i1 addrspace(1)* %out, i64 %val) nounwind {
   %trunc = trunc i64 %val to i1
   store i1 %trunc, i1 addrspace(1)* %out, align 1
   ret void
 }
 
-; SI-LABEL: @global_truncstore_i16_to_i1
-; SI: S_LOAD_DWORD [[LOAD:s[0-9]+]],
-; SI: S_AND_B32 [[SREG:s[0-9]+]], [[LOAD]], 1
-; SI: V_MOV_B32_e32 [[VREG:v[0-9]+]], [[SREG]]
-; SI: BUFFER_STORE_BYTE [[VREG]],
+; SI-LABEL: {{^}}global_truncstore_i16_to_i1:
+; SI: s_load_dword [[LOAD:s[0-9]+]],
+; SI: s_and_b32 [[SREG:s[0-9]+]], [[LOAD]], 1
+; SI: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]]
+; SI: buffer_store_byte [[VREG]],
 define void @global_truncstore_i16_to_i1(i1 addrspace(1)* %out, i16 %val) nounwind {
   %trunc = trunc i16 %val to i1
   store i1 %trunc, i1 addrspace(1)* %out, align 1
diff --git a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
index ec959c21798a..878ea3f48995 100644
--- a/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
+++ b/test/CodeGen/R600/trunc-vector-store-assertion-failure.ll
@@ -4,7 +4,7 @@
 ; vector stores at the end of a basic block were not being added to the
 ; LegalizedNodes list, which triggered an assertion failure.
 
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
 ; CHECK: MEM_RAT_CACHELESS STORE_RAW
 define void @test(<4 x i8> addrspace(1)* %out, i32 %cond, <4 x i8> %in) {
 entry:
diff --git a/test/CodeGen/R600/trunc.ll b/test/CodeGen/R600/trunc.ll
index 31cdfcd1a884..a6f902f24976 100644
--- a/test/CodeGen/R600/trunc.ll
+++ b/test/CodeGen/R600/trunc.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
 
 define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
-; SI-LABEL: @trunc_i64_to_i32_store
-; SI: S_LOAD_DWORD s0, s[0:1], 0xb
-; SI: V_MOV_B32_e32 v0, s0
-; SI: BUFFER_STORE_DWORD v0
+; SI-LABEL: {{^}}trunc_i64_to_i32_store:
+; SI: s_load_dword [[SLOAD:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VLOAD:v[0-9]+]], [[SLOAD]]
+; SI: buffer_store_dword [[VLOAD]]
 
-; EG-LABEL: @trunc_i64_to_i32_store
+; EG-LABEL: {{^}}trunc_i64_to_i32_store:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
 ; EG: LSHR
 ; EG-NEXT: 2(
@@ -16,12 +16,12 @@ define void @trunc_i64_to_i32_store(i32 addrspace(1)* %out, i64 %in) {
   ret void
 }
 
-; SI-LABEL: @trunc_load_shl_i64:
-; SI-DAG: S_LOAD_DWORDX2
-; SI-DAG: S_LOAD_DWORD [[SREG:s[0-9]+]],
-; SI: S_LSHL_B32 [[SHL:s[0-9]+]], [[SREG]], 2
-; SI: V_MOV_B32_e32 [[VSHL:v[0-9]+]], [[SHL]]
-; SI: BUFFER_STORE_DWORD [[VSHL]],
+; SI-LABEL: {{^}}trunc_load_shl_i64:
+; SI-DAG: s_load_dwordx2
+; SI-DAG: s_load_dword [[SREG:s[0-9]+]],
+; SI: s_lshl_b32 [[SHL:s[0-9]+]], [[SREG]], 2
+; SI: v_mov_b32_e32 [[VSHL:v[0-9]+]], [[SHL]]
+; SI: buffer_store_dword [[VSHL]],
 define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
   %b = shl i64 %a, 2
   %result = trunc i64 %b to i32
@@ -29,12 +29,13 @@ define void @trunc_load_shl_i64(i32 addrspace(1)* %out, i64 %a) {
   ret void
 }
 
-; SI-LABEL: @trunc_shl_i64:
-; SI: S_LOAD_DWORDX2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}},
-; SI: S_ADD_I32 s[[LO_ADD:[0-9]+]], s[[LO_SREG]],
-; SI: S_LSHL_B64 s{{\[}}[[LO_SREG2:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_ADD]]:{{[0-9]+\]}}, 2
-; SI: V_MOV_B32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
-; SI: BUFFER_STORE_DWORD v[[LO_VREG]],
+; SI-LABEL: {{^}}trunc_shl_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:{{[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI: s_lshl_b64 s{{\[}}[[LO_SHL:[0-9]+]]:{{[0-9]+\]}}, s{{\[}}[[LO_SREG]]:{{[0-9]+\]}}, 2
+; SI: s_add_u32 s[[LO_SREG2:[0-9]+]], s[[LO_SHL]],
+; SI: s_addc_u32
+; SI: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG2]]
+; SI: buffer_store_dword v[[LO_VREG]],
 define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64 %a) {
   %aa = add i64 %a, 234 ; Prevent shrinking store.
   %b = shl i64 %aa, 2
@@ -44,10 +45,21 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
   ret void
 }
 
-; SI-LABEL: @trunc_i32_to_i1:
-; SI: V_AND_B32
-; SI: V_CMP_EQ_I32
-define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+; SI-LABEL: {{^}}trunc_i32_to_i1:
+; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
+; SI: v_cmp_eq_i32
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
+  %a = load i32 addrspace(1)* %ptr, align 4
+  %trunc = trunc i32 %a to i1
+  %result = select i1 %trunc, i32 1, i32 0
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
+; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: v_cmp_eq_i32
+define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
   %trunc = trunc i32 %a to i1
   %result = select i1 %trunc, i32 1, i32 0
   store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/R600/uaddo.ll b/test/CodeGen/R600/uaddo.ll
index a80e502eef2a..ac2960412503 100644
--- a/test/CodeGen/R600/uaddo.ll
+++ b/test/CodeGen/R600/uaddo.ll
@@ -1,13 +1,13 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
 
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
-; FUNC-LABEL: @uaddo_i64_zext
-; SI: ADD
-; SI: ADDC
-; SI: ADDC
+; FUNC-LABEL: {{^}}uaddo_i64_zext:
+; SI: add
+; SI: addc
+; SI: addc
 define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %uadd, 0
@@ -18,8 +18,8 @@ define void @uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @s_uaddo_i32
-; SI: S_ADD_I32
+; FUNC-LABEL: {{^}}s_uaddo_i32:
+; SI: s_add_i32
 define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
   %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %uadd, 0
@@ -29,8 +29,8 @@ define void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @v_uaddo_i32
-; SI: V_ADD_I32
+; FUNC-LABEL: {{^}}v_uaddo_i32:
+; SI: v_add_i32
 define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %a = load i32 addrspace(1)* %aptr, align 4
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -42,9 +42,9 @@ define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @s_uaddo_i64
-; SI: S_ADD_I32
-; SI: S_ADDC_U32
+; FUNC-LABEL: {{^}}s_uaddo_i64:
+; SI: s_add_u32
+; SI: s_addc_u32
 define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
   %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %uadd, 0
@@ -54,9 +54,9 @@ define void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
   ret void
 }
 
-; FUNC-LABEL: @v_uaddo_i64
-; SI: V_ADD_I32
-; SI: V_ADDC_U32
+; FUNC-LABEL: {{^}}v_uaddo_i64:
+; SI: v_add_i32
+; SI: v_addc_u32
 define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %a = load i64 addrspace(1)* %aptr, align 4
   %b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/udiv.ll b/test/CodeGen/R600/udiv.ll
index 53713217a1c0..e9a6155da6fb 100644
--- a/test/CodeGen/R600/udiv.ll
+++ b/test/CodeGen/R600/udiv.ll
@@ -1,7 +1,7 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK-LABEL: @test
+;EG-CHECK-LABEL: {{^}}test:
 ;EG-CHECK-NOT: SETGE_INT
 ;EG-CHECK: CF_END
 
@@ -18,10 +18,10 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 ;The goal of this test is to make sure the ISel doesn't fail when it gets
 ;a v4i32 udiv
 
-;EG-CHECK-LABEL: @test2
+;EG-CHECK-LABEL: {{^}}test2:
 ;EG-CHECK: CF_END
-;SI-CHECK-LABEL: @test2
-;SI-CHECK: S_ENDPGM
+;SI-CHECK-LABEL: {{^}}test2:
+;SI-CHECK: s_endpgm
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
@@ -32,10 +32,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   ret void
 }
 
-;EG-CHECK-LABEL: @test4
+;EG-CHECK-LABEL: {{^}}test4:
 ;EG-CHECK: CF_END
-;SI-CHECK-LABEL: @test4
-;SI-CHECK: S_ENDPGM
+;SI-CHECK-LABEL: {{^}}test4:
+;SI-CHECK: s_endpgm
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/R600/udivrem.ll b/test/CodeGen/R600/udivrem.ll
index 5f5753adca3f..2254a2abd5b9 100644
--- a/test/CodeGen/R600/udivrem.ll
+++ b/test/CodeGen/R600/udivrem.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck --check-prefix=SI --check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
 
-; FUNC-LABEL: @test_udivrem
+; FUNC-LABEL: {{^}}test_udivrem:
 ; EG: RECIP_UINT
 ; EG-DAG: MULHI
 ; EG-DAG: MULLO_INT
@@ -26,30 +26,30 @@
 ; EG-DAG: CNDE_INT
 ; EG-DAG: CNDE_INT
 
-; SI: V_RCP_IFLAG_F32_e32 [[RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[RCP_HI:v[0-9]+]], [[RCP]]
-; SI-DAG: V_MUL_LO_I32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: V_SUB_I32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: V_ADD_I32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
-; SI: V_CNDMASK_B32_e64
-; SI: V_MUL_HI_U32 [[Quotient:v[0-9]+]]
-; SI: V_MUL_LO_I32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: V_AND_B32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
+; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI: v_cndmask_b32_e64
+; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
+; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
 define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
   %result0 = udiv i32 %x, %y
   store i32 %result0, i32 addrspace(1)* %out
@@ -58,7 +58,7 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
   ret void
 }
 
-; FUNC-LABEL: @test_udivrem_v2
+; FUNC-LABEL: {{^}}test_udivrem_v2:
 ; EG-DAG: RECIP_UINT
 ; EG-DAG: MULHI
 ; EG-DAG: MULLO_INT
@@ -106,53 +106,53 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
 ; EG-DAG: CNDE_INT
 ; EG-DAG: CNDE_INT
 
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
 define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
   %result0 = udiv <2 x i32> %x, %y
   store <2 x i32> %result0, <2 x i32> addrspace(1)* %out
@@ -162,7 +162,7 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
 }
 
 
-; FUNC-LABEL: @test_udivrem_v4
+; FUNC-LABEL: {{^}}test_udivrem_v4:
 ; EG-DAG: RECIP_UINT
 ; EG-DAG: MULHI
 ; EG-DAG: MULLO_INT
@@ -256,99 +256,99 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
 ; EG-DAG: CNDE_INT
 ; EG-DAG: CNDE_INT
 
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_RCP_IFLAG_F32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_MUL_HI_U32 [[FOURTH_Quotient:v[0-9]+]]
-; SI-DAG: V_MUL_LO_I32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: V_SUB_I32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_AND_B32_e32 [[FOURTH_Tmp1:v[0-9]+]]
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_ADD_I32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: V_SUBREV_I32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
-; SI-DAG: V_CNDMASK_B32_e64
-; SI-DAG: V_CNDMASK_B32_e64
-; SI: S_ENDPGM
+; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FIRST_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[SECOND_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
+; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[THIRD_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[THIRD_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
+; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_mul_hi_u32 [[FOURTH_Quotient:v[0-9]+]]
+; SI-DAG: v_mul_lo_i32 [[FOURTH_Num_S_Remainder:v[0-9]+]]
+; SI-DAG: v_sub_i32_e32 [[FOURTH_Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[FOURTH_Num_S_Remainder]]
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_and_b32_e32 [[FOURTH_Tmp1:v[0-9]+]]
+; SI-DAG: v_add_i32_e32 [[FOURTH_Quotient_A_One:v[0-9]+]], {{.*}}, [[FOURTH_Quotient]]
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_add_i32_e32 [[FOURTH_Remainder_A_Den:v[0-9]+]],
+; SI-DAG: v_subrev_i32_e32 [[FOURTH_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_cndmask_b32_e64
+; SI-DAG: v_cndmask_b32_e64
+; SI: s_endpgm
 define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
   %result0 = udiv <4 x i32> %x, %y
   store <4 x i32> %result0, <4 x i32> addrspace(1)* %out
diff --git a/test/CodeGen/R600/udivrem24.ll b/test/CodeGen/R600/udivrem24.ll
new file mode 100644
index 000000000000..0e15c0730c04
--- /dev/null
+++ b/test/CodeGen/R600/udivrem24.ll
@@ -0,0 +1,244 @@
+; RUN: llc -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}udiv24_i8:
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+  %num = load i8 addrspace(1) * %in
+  %den = load i8 addrspace(1) * %den_ptr
+  %result = udiv i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i16:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+  %num = load i16 addrspace(1) * %in, align 2
+  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %result = udiv i16 %num, %den
+  store i16 %result, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv24_i32:
+; SI: v_cvt_f32_u32
+; SI-DAG: v_cvt_f32_u32
+; SI-DAG: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = lshr i32 %num.i24.0, 8
+  %den.i24 = lshr i32 %den.i24.0, 8
+  %result = udiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}udiv25_i32:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = lshr i32 %num.i24.0, 7
+  %den.i24 = lshr i32 %den.i24.0, 7
+  %result = udiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_udiv24_i32_1:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = lshr i32 %num.i24.0, 8
+  %den.i24 = lshr i32 %den.i24.0, 7
+  %result = udiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_udiv24_i32_2:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = lshr i32 %num.i24.0, 7
+  %den.i24 = lshr i32 %den.i24.0, 8
+  %result = udiv i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i8:
+; SI: v_cvt_f32_ubyte
+; SI: v_cvt_f32_ubyte
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
+  %den_ptr = getelementptr i8 addrspace(1)* %in, i8 1
+  %num = load i8 addrspace(1) * %in
+  %den = load i8 addrspace(1) * %den_ptr
+  %result = urem i8 %num, %den
+  store i8 %result, i8 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i16:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
+  %den_ptr = getelementptr i16 addrspace(1)* %in, i16 1
+  %num = load i16 addrspace(1) * %in, align 2
+  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %result = urem i16 %num, %den
+  store i16 %result, i16 addrspace(1)* %out, align 2
+  ret void
+}
+
+; FUNC-LABEL: {{^}}urem24_i32:
+; SI: v_cvt_f32_u32
+; SI: v_cvt_f32_u32
+; SI: v_rcp_f32
+; SI: v_cvt_u32_f32
+
+; EG: UINT_TO_FLT
+; EG-DAG: UINT_TO_FLT
+; EG-DAG: RECIP_IEEE
+; EG: FLT_TO_UINT
+define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = lshr i32 %num.i24.0, 8
+  %den.i24 = lshr i32 %den.i24.0, 8
+  %result = urem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}urem25_i32:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = lshr i32 %num.i24.0, 7
+  %den.i24 = lshr i32 %den.i24.0, 7
+  %result = urem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_urem24_i32_1:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 8
+  %den.i24.0 = shl i32 %den, 7
+  %num.i24 = lshr i32 %num.i24.0, 8
+  %den.i24 = lshr i32 %den.i24.0, 7
+  %result = urem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_no_urem24_i32_2:
+; RCP_IFLAG is for URECIP in the full 32b alg
+; SI: v_rcp_iflag
+; SI-NOT: v_rcp_f32
+
+; EG-NOT: UINT_TO_FLT
+; EG-NOT: RECIP_IEEE
+define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %num = load i32 addrspace(1) * %in, align 4
+  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num.i24.0 = shl i32 %num, 7
+  %den.i24.0 = shl i32 %den, 8
+  %num.i24 = lshr i32 %num.i24.0, 7
+  %den.i24 = lshr i32 %den.i24.0, 8
+  %result = urem i32 %num.i24, %den.i24
+  store i32 %result, i32 addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/udivrem64.ll b/test/CodeGen/R600/udivrem64.ll
index a71315a12d80..a04745859879 100644
--- a/test/CodeGen/R600/udivrem64.ll
+++ b/test/CodeGen/R600/udivrem64.ll
@@ -1,7 +1,7 @@
-;XUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
+;XUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs| FileCheck --check-prefix=SI --check-prefix=FUNC %s
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
 
-;FUNC-LABEL: @test_udiv
+;FUNC-LABEL: {{^}}test_udiv:
 ;EG: RECIP_UINT
 ;EG: LSHL {{.*}}, 1,
 ;EG: BFE_UINT
@@ -34,14 +34,14 @@
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: BFE_UINT
-;SI: S_ENDPGM
+;SI: s_endpgm
 define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
   %result = udiv i64 %x, %y
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-;FUNC-LABEL: @test_urem
+;FUNC-LABEL: {{^}}test_urem:
 ;EG: RECIP_UINT
 ;EG: BFE_UINT
 ;EG: BFE_UINT
@@ -74,7 +74,7 @@ define void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
 ;EG: BFE_UINT
 ;EG: BFE_UINT
 ;EG: AND_INT {{.*}}, 1,
-;SI: S_ENDPGM
+;SI: s_endpgm
 define void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
   %result = urem i64 %x, %y
   store i64 %result, i64 addrspace(1)* %out
diff --git a/test/CodeGen/R600/uint_to_fp.f64.ll b/test/CodeGen/R600/uint_to_fp.f64.ll
index 9a41796a06bf..09e987dd14da 100644
--- a/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -1,35 +1,96 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @uint_to_fp_f64_i32
-; SI: V_CVT_F64_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_f64_i32(double addrspace(1)* %out, i32 %in) {
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
+; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
+; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep = getelementptr i64 addrspace(1)* %in, i32 %tid
+  %val = load i64 addrspace(1)* %gep, align 8
+  %result = uitofp i64 %val to double
+  store double %result, double addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_i64_to_f64
+define void @s_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
+  %cast = uitofp i64 %in to double
+  store double %cast, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v2i64_to_v2f64
+define void @s_uint_to_fp_v2i64_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i64> %in) {
+  %cast = uitofp <2 x i64> %in to <2 x double>
+  store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v4i64_to_v4f64
+define void @s_uint_to_fp_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %in) {
+  %cast = uitofp <4 x i64> %in to <4 x double>
+  store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_i32_to_f64
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_i32_to_f64(double addrspace(1)* %out, i32 %in) {
   %cast = uitofp i32 %in to double
   store double %cast, double addrspace(1)* %out, align 8
   ret void
 }
 
-; SI-LABEL: @uint_to_fp_i1_f64:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; FIXME: We should the VGPR sources for V_CNDMASK are copied from SGPRs,
-; we should be able to fold the SGPRs into the V_CNDMASK instructions.
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: V_CNDMASK_B32_e64 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, [[CMP]]
-; SI: BUFFER_STORE_DWORDX2
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
+; SI-LABEL: {{^}}s_uint_to_fp_v2i32_to_v2f64
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_v2i32_to_v2f64(<2 x double> addrspace(1)* %out, <2 x i32> %in) {
+  %cast = uitofp <2 x i32> %in to <2 x double>
+  store <2 x double> %cast, <2 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}s_uint_to_fp_v4i32_to_v4f64
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: v_cvt_f64_u32_e32
+; SI: s_endpgm
+define void @s_uint_to_fp_v4i32_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i32> %in) {
+  %cast = uitofp <4 x i32> %in to <4 x double>
+  store <4 x double> %cast, <4 x double> addrspace(1)* %out, align 16
+  ret void
+}
+
+; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
+; uses an SGPR for [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to double
   store double %fp, double addrspace(1)* %out, align 4
   ret void
 }
 
-; SI-LABEL: @uint_to_fp_i1_f64_load:
-; SI: V_CNDMASK_B32_e64 [[IRESULT:v[0-9]]], 0, 1
-; SI-NEXT: V_CVT_F64_U32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
-; SI: BUFFER_STORE_DWORDX2 [[RESULT]]
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f64_load(double addrspace(1)* %out, i1 %in) {
+; SI-LABEL: {{^}}uint_to_fp_i1_to_f64_load:
+; SI: v_cndmask_b32_e64 [[IRESULT:v[0-9]]], 0, 1
+; SI-NEXT: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]\]]], [[IRESULT]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f64_load(double addrspace(1)* %out, i1 %in) {
   %fp = uitofp i1 %in to double
   store double %fp, double addrspace(1)* %out, align 8
   ret void
diff --git a/test/CodeGen/R600/uint_to_fp.ll b/test/CodeGen/R600/uint_to_fp.ll
index 8f5d42d42c6b..1c5e487724bc 100644
--- a/test/CodeGen/R600/uint_to_fp.ll
+++ b/test/CodeGen/R600/uint_to_fp.ll
@@ -1,69 +1,80 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
 
-; FUNC-LABEL: @uint_to_fp_v2i32
+; FUNC-LABEL: {{^}}uint_to_fp_i32_to_f32:
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+  %result = uitofp i32 %in to float
+  store float %result, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}uint_to_fp_v2i32_to_v2f32:
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
 ; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
 
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
   %result = uitofp <2 x i32> %in to <2 x float>
   store <2 x float> %result, <2 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @uint_to_fp_v4i32
+; FUNC-LABEL: {{^}}uint_to_fp_v4i32_to_v4f32:
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: S_ENDPGM
-define void @uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: s_endpgm
+define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %value = load <4 x i32> addrspace(1) * %in
   %result = uitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @uint_to_fp_i64_f32
+; FUNC-LABEL: {{^}}uint_to_fp_i64_to_f32:
 ; R600: UINT_TO_FLT
 ; R600: UINT_TO_FLT
 ; R600: MULADD_IEEE
-; SI: V_CVT_F32_U32_e32
-; SI: V_CVT_F32_U32_e32
-; SI: V_MAD_F32
-; SI: S_ENDPGM
-define void @uint_to_fp_i64_f32(float addrspace(1)* %out, i64 %in) {
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+; SI: v_mad_f32
+; SI: s_endpgm
+define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
 entry:
   %0 = uitofp i64 %in to float
   store float %0, float addrspace(1)* %out
   ret void
 }
 
-; FUNC-LABEL: @uint_to_fp_i1_f32:
-; SI: V_CMP_EQ_I32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; SI-NEXT: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00, [[CMP]]
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
+; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
   %fp = uitofp i1 %cmp to float
   store float %fp, float addrspace(1)* %out, align 4
   ret void
 }
 
-; FUNC-LABEL: @uint_to_fp_i1_f32_load:
-; SI: V_CNDMASK_B32_e64 [[RESULT:v[0-9]+]], 0, 1.000000e+00
-; SI: BUFFER_STORE_DWORD [[RESULT]],
-; SI: S_ENDPGM
-define void @uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
+; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32_load:
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) {
   %fp = uitofp i1 %in to float
   store float %fp, float addrspace(1)* %out, align 4
   ret void
diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll
index 4df69d1e5f16..47fba78544d9 100644
--- a/test/CodeGen/R600/unaligned-load-store.ll
+++ b/test/CodeGen/R600/unaligned-load-store.ll
@@ -1,17 +1,112 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
 
-; SI-LABEL: @unaligned_load_store_i32:
-; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+; SI-LABEL: {{^}}unaligned_load_store_i32:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_write_b32
+; SI: s_endpgm
 define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
   %v = load i32 addrspace(3)* %p, align 1
   store i32 %v, i32 addrspace(3)* %r, align 1
   ret void
 }
 
-; SI-LABEL: @unaligned_load_store_v4i32:
-; DS_READ_U32 {{v[0-9]+}}, 0, [[REG]]
+; SI-LABEL: {{^}}unaligned_load_store_v4i32:
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+; SI: ds_read_u8
+
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: ds_write_b32
+; SI: s_endpgm
 define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
   %v = load <4 x i32> addrspace(3)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
   ret void
 }
+
+; SI-LABEL: {{^}}load_lds_i64_align_4:
+; SI: ds_read2_b32
+; SI: s_endpgm
+define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+  %val = load i64 addrspace(3)* %in, align 4
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}load_lds_i64_align_4_with_offset
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
+; SI: s_endpgm
+define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+  %ptr = getelementptr i64 addrspace(3)* %in, i32 4
+  %val = load i64 addrspace(3)* %ptr, align 4
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}load_lds_i64_align_4_with_split_offset:
+; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
+; SI: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @load_lds_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+  %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
+  %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+  %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
+  %val = load i64 addrspace(3)* %ptri64, align 4
+  store i64 %val, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FIXME: Need to fix this case.
+; define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
+;   %val = load i64 addrspace(3)* %in, align 1
+;   store i64 %val, i64 addrspace(1)* %out, align 8
+;   ret void
+; }
+
+; SI-LABEL: {{^}}store_lds_i64_align_4:
+; SI: ds_write2_b32
+; SI: s_endpgm
+define void @store_lds_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
+  store i64 %val, i64 addrspace(3)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}store_lds_i64_align_4_with_offset
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
+; SI: s_endpgm
+define void @store_lds_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
+  %ptr = getelementptr i64 addrspace(3)* %out, i32 4
+  store i64 0, i64 addrspace(3)* %ptr, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}store_lds_i64_align_4_with_split_offset:
+; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
+; SI: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:0 offset1:1
+; SI: s_endpgm
+define void @store_lds_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
+  %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
+  %ptr255 = getelementptr i32 addrspace(3)* %ptr, i32 255
+  %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
+  store i64 0, i64 addrspace(3)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index e4129c511230..9faeed6d6565 100644
--- a/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -1,11 +1,11 @@
 ; REQUIRES: asserts
 ; XFAIL: *
-; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
+; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=amdgcn -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=COMMON %s
 ; RUN: llc -O0 -verify-machineinstrs -asm-verbose=0 -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=COMMON %s
 
 ; SI hits an assertion at -O0, evergreen hits a not implemented unreachable.
 
-; COMMON-LABEL: @branch_true:
+; COMMON-LABEL: {{^}}branch_true:
 define void @branch_true(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
 entry:
   br i1 true, label %for.end, label %for.body.lr.ph
@@ -39,9 +39,9 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-; COMMON-LABEL: @branch_false:
+; COMMON-LABEL: {{^}}branch_false:
 ; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
 define void @branch_false(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
 entry:
   br i1 false, label %for.end, label %for.body.lr.ph
@@ -75,9 +75,9 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-; COMMON-LABEL: @branch_undef:
+; COMMON-LABEL: {{^}}branch_undef:
 ; SI: .text
-; SI-NEXT: S_ENDPGM
+; SI-NEXT: s_endpgm
 define void @branch_undef(i8 addrspace(1)* nocapture %main, i32 %main_stride) #0 {
 entry:
   br i1 undef, label %for.end, label %for.body.lr.ph
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index f986a0251dce..8ab4faf2f145 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,7 +2,7 @@
 
 ; These tests are for condition codes that are not supported by the hardware
 
-; CHECK-LABEL: @slt
+; CHECK-LABEL: {{^}}slt:
 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
@@ -14,7 +14,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ult_i32
+; CHECK-LABEL: {{^}}ult_i32:
 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
@@ -26,7 +26,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ult_float
+; CHECK-LABEL: {{^}}ult_float:
 ; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 ; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
@@ -39,7 +39,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ult_float_native
+; CHECK-LABEL: {{^}}ult_float_native:
 ; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 ; CHECK-NEXT: LSHR *
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -51,7 +51,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @olt
+; CHECK-LABEL: {{^}}olt:
 ; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR *
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -63,7 +63,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @sle
+; CHECK-LABEL: {{^}}sle:
 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
@@ -75,7 +75,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ule_i32
+; CHECK-LABEL: {{^}}ule_i32:
 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
@@ -87,7 +87,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ule_float
+; CHECK-LABEL: {{^}}ule_float:
 ; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 ; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
@@ -100,7 +100,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ule_float_native
+; CHECK-LABEL: {{^}}ule_float_native:
 ; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 ; CHECK-NEXT: LSHR *
 ; CHECK-NEXT: 1084227584(5.000000e+00)
@@ -112,7 +112,7 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ole
+; CHECK-LABEL: {{^}}ole:
 ; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR *
 ; CHECK-NEXT:1084227584(5.000000e+00)
diff --git a/test/CodeGen/R600/urecip.ll b/test/CodeGen/R600/urecip.ll
index e808e3d2cf19..132f0009cda1 100644
--- a/test/CodeGen/R600/urecip.ll
+++ b/test/CodeGen/R600/urecip.ll
@@ -1,6 +1,6 @@
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 
-;CHECK: V_RCP_IFLAG_F32_e32
+;CHECK: v_rcp_iflag_f32_e32
 
 define void @test(i32 %p, i32 %q) {
    %i = udiv i32 %p, %q
diff --git a/test/CodeGen/R600/urem.ll b/test/CodeGen/R600/urem.ll
index 8045145bd10d..daa32446146b 100644
--- a/test/CodeGen/R600/urem.ll
+++ b/test/CodeGen/R600/urem.ll
@@ -1,34 +1,78 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-;The code generated by urem is long and complex and may frequently change.
-;The goal of this test is to make sure the ISel doesn't fail when it gets
-;a v2i32/v4i32 urem
+; The code generated by urem is long and complex and may frequently
+; change.  The goal of this test is to make sure the ISel doesn't fail
+; when it gets a v2i32/v4i32 urem
 
-;EG-CHECK: @test2
-;EG-CHECK: CF_END
-;SI-CHECK: @test2
-;SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}test_urem_i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+  %b_ptr = getelementptr i32 addrspace(1)* %in, i32 1
+  %a = load i32 addrspace(1)* %in
+  %b = load i32 addrspace(1)* %b_ptr
+  %result = urem i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
 
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_urem_v2i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32> addrspace(1)* %b_ptr
   %result = urem <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
-;EG-CHECK: @test4
-;EG-CHECK: CF_END
-;SI-CHECK: @test4
-;SI-CHECK: S_ENDPGM
-
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}test_urem_v4i32:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32> addrspace(1)* %b_ptr
   %result = urem <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: {{^}}test_urem_i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
+  %b_ptr = getelementptr i64 addrspace(1)* %in, i64 1
+  %a = load i64 addrspace(1)* %in
+  %b = load i64 addrspace(1)* %b_ptr
+  %result = urem i64 %a, %b
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_urem_v2i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <2 x i64> addrspace(1)* %in, i64 1
+  %a = load <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %result = urem <2 x i64> %a, %b
+  store <2 x i64> %result, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_urem_v4i64:
+; SI: s_endpgm
+; EG: CF_END
+define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
+  %b_ptr = getelementptr <4 x i64> addrspace(1)* %in, i64 1
+  %a = load <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %result = urem <4 x i64> %a, %b
+  store <4 x i64> %result, <4 x i64> addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/R600/use-sgpr-multiple-times.ll b/test/CodeGen/R600/use-sgpr-multiple-times.ll
new file mode 100644
index 000000000000..97d73ba74bc5
--- /dev/null
+++ b/test/CodeGen/R600/use-sgpr-multiple-times.ll
@@ -0,0 +1,96 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.fmuladd.f32(float, float, float) #1
+declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
+
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_binop:
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_add_f32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
+  %dbl = fadd float %a, %a
+  store float %dbl, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_three_ternary_op:
+; SI: s_load_dword [[SGPR:s[0-9]+]],
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_b:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_b_a_a:
+; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_imm:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_imm_a:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_fma_f32 [[RESULT:v[0-9]+]], 2.0, [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
+; SI-LABEL: {{^}}test_sgpr_use_twice_ternary_op_imm_a_a:
+; SI: s_load_dword [[SGPR:s[0-9]+]]
+; SI: v_mad_i32_i24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
+; SI: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
+  %fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
+  store i32 %fma, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/usubo.ll b/test/CodeGen/R600/usubo.ll
index d57a2c7f773e..1af595421b84 100644
--- a/test/CodeGen/R600/usubo.ll
+++ b/test/CodeGen/R600/usubo.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs< %s
 
 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
 declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
 
-; FUNC-LABEL: @usubo_i64_zext
+; FUNC-LABEL: {{^}}usubo_i64_zext:
 define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %usub, 0
@@ -15,8 +15,8 @@ define void @usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b) nounwind {
   ret void
 }
 
-; FUNC-LABEL: @s_usubo_i32
-; SI: S_SUB_I32
+; FUNC-LABEL: {{^}}s_usubo_i32:
+; SI: s_sub_i32
 define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 %a, i32 %b) nounwind {
   %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %usub, 0
@@ -26,8 +26,8 @@ define void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @v_usubo_i32
-; SI: V_SUBREV_I32_e32
+; FUNC-LABEL: {{^}}v_usubo_i32:
+; SI: v_subrev_i32_e32
 define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %a = load i32 addrspace(1)* %aptr, align 4
   %b = load i32 addrspace(1)* %bptr, align 4
@@ -39,9 +39,9 @@ define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32
   ret void
 }
 
-; FUNC-LABEL: @s_usubo_i64
-; SI: S_SUB_I32
-; SI: S_SUBB_U32
+; FUNC-LABEL: {{^}}s_usubo_i64:
+; SI: s_sub_u32
+; SI: s_subb_u32
 define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a, i64 %b) nounwind {
   %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %usub, 0
@@ -51,9 +51,9 @@ define void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64
   ret void
 }
 
-; FUNC-LABEL: @v_usubo_i64
-; SI: V_SUB_I32
-; SI: V_SUBB_U32
+; FUNC-LABEL: {{^}}v_usubo_i64:
+; SI: v_sub_i32
+; SI: v_subb_u32
 define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %a = load i64 addrspace(1)* %aptr, align 4
   %b = load i64 addrspace(1)* %bptr, align 4
diff --git a/test/CodeGen/R600/v1i64-kernel-arg.ll b/test/CodeGen/R600/v1i64-kernel-arg.ll
index 2aa1221b366e..31755125c03b 100644
--- a/test/CodeGen/R600/v1i64-kernel-arg.ll
+++ b/test/CodeGen/R600/v1i64-kernel-arg.ll
@@ -2,14 +2,14 @@
 ; XFAIL: *
 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s
 
-; CHECK-LABEL: @kernel_arg_i64
+; CHECK-LABEL: {{^}}kernel_arg_i64:
 define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
   store i64 %a, i64 addrspace(1)* %out, align 8
   ret void
 }
 
 ; i64 arg works, v1i64 arg does not.
-; CHECK-LABEL: @kernel_arg_v1i64
+; CHECK-LABEL: {{^}}kernel_arg_v1i64:
 define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
   store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
   ret void
diff --git a/test/CodeGen/R600/v_cndmask.ll b/test/CodeGen/R600/v_cndmask.ll
index 84087ee78d59..410e1b5a47a7 100644
--- a/test/CodeGen/R600/v_cndmask.ll
+++ b/test/CodeGen/R600/v_cndmask.ll
@@ -1,14 +1,38 @@
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-; SI: @v_cnd_nan
-; SI: V_CNDMASK_B32_e64 v{{[0-9]}},
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; SI-LABEL: {{^}}v_cnd_nan_nosgpr:
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
 ; SI-DAG: v{{[0-9]}}
 ; All nan values are converted to 0xffffffff
-; SI-DAG: -1
-define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) {
-entry:
-  %0 = icmp ne i32 %c, 0
-  %1 = select i1 %0, float 0xFFFFFFFFE0000000, float %f
-  store float %1, float addrspace(1)* %out
+; SI: s_endpgm
+define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
+  %idx = call i32 @llvm.r600.read.tidig.x() #1
+  %f.gep = getelementptr float addrspace(1)* %fptr, i32 %idx
+  %f = load float addrspace(1)* %fptr
+  %setcc = icmp ne i32 %c, 0
+  %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
+  store float %select, float addrspace(1)* %out
   ret void
 }
+
+
+; This requires slightly trickier SGPR operand legalization since the
+; single constant bus SGPR usage is the last operand, and it should
+; never be moved.
+
+; SI-LABEL: {{^}}v_cnd_nan:
+; SI: v_cndmask_b32_e64 v{{[0-9]}}, v{{[0-9]}}, -1, s{{\[[0-9]+:[0-9]+\]}}
+; SI-DAG: v{{[0-9]}}
+; All nan values are converted to 0xffffffff
+; SI: s_endpgm
+define void @v_cnd_nan(float addrspace(1)* %out, i32 %c, float %f) #0 {
+  %setcc = icmp ne i32 %c, 0
+  %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
+  store float %select, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/R600/valu-i1.ll b/test/CodeGen/R600/valu-i1.ll
index 5d5e3ff63a47..a4027178431b 100644
--- a/test/CodeGen/R600/valu-i1.ll
+++ b/test/CodeGen/R600/valu-i1.ll
@@ -1,10 +1,13 @@
-; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s
 
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; SI-LABEL: @test_if
 ; Make sure the i1 values created by the cfg structurizer pass are
 ; moved using VALU instructions
-; SI-NOT: S_MOV_B64 s[{{[0-9]:[0-9]}}], -1
-; SI: V_MOV_B32_e32 v{{[0-9]}}, -1
-define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) {
+; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1
+; SI: v_mov_b32_e32 v{{[0-9]}}, -1
+define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 {
 entry:
   switch i32 %a, label %default [
     i32 0, label %case0
@@ -37,3 +40,150 @@ else:
 end:
   ret void
 }
+
+; SI-LABEL: @simple_test_v_if
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+
+; SI: ; BB#1
+; SI: buffer_store_dword
+; SI: s_endpgm
+
+; SI: BB1_2:
+; SI: s_or_b64 exec, exec, [[BR_SREG]]
+; SI: s_endpgm
+define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %is.0 = icmp ne i32 %tid, 0
+  br i1 %is.0, label %store, label %exit
+
+store:
+  %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid
+  store i32 999, i32 addrspace(1)* %gep
+  ret void
+
+exit:
+  ret void
+}
+
+; SI-LABEL: @simple_test_v_loop
+; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0
+; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]]
+; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]]
+; SI: s_cbranch_execz BB2_2
+
+; SI: ; BB#1:
+; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
+
+; SI: BB2_3:
+; SI: buffer_load_dword
+; SI: buffer_store_dword
+; SI: v_cmp_eq_i32_e32 vcc,
+; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
+; SI: s_cbranch_execnz BB2_3
+
+define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 {
+entry:
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %is.0 = icmp ne i32 %tid, 0
+  %limit = add i32 %tid, 64
+  br i1 %is.0, label %loop, label %exit
+
+loop:
+  %i = phi i32 [%tid, %entry], [%i.inc, %loop]
+  %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i
+  %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i
+  %load = load i32 addrspace(1)* %src
+  store i32 %load, i32 addrspace(1)* %gep.dst
+  %i.inc = add nsw i32 %i, 1
+  %cmp = icmp eq i32 %limit, %i.inc
+  br i1 %cmp, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; SI-LABEL: @multi_vcond_loop
+
+; Load loop limit from buffer
+; Branch to exit if uniformly not taken
+; SI: ; BB#0:
+; SI: buffer_load_dword [[VBOUND:v[0-9]+]]
+; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]]
+; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]]
+; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]]
+; SI: s_cbranch_execz BB3_2
+
+; Initialize inner condition to false
+; SI: ; BB#1:
+; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}}
+; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]]
+
+; Clear exec bits for workitems that load -1s
+; SI: BB3_3:
+; SI: buffer_load_dword [[A:v[0-9]+]]
+; SI: buffer_load_dword [[B:v[0-9]+]]
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1
+; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1
+; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
+; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_cbranch_execz BB3_5
+
+; SI: BB#4:
+; SI: buffer_store_dword
+; SI: v_cmp_ge_i64_e32 vcc
+; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+
+; SI: BB3_5:
+; SI: s_or_b64 exec, exec, [[ORNEG1]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
+; SI: s_cbranch_execnz BB3_3
+
+; SI: BB#6
+; SI: s_or_b64 exec, exec, [[COND_STATE]]
+
+; SI: BB3_2:
+; SI-NOT: [[COND_STATE]]
+; SI: s_endpgm
+
+define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 {
+bb:
+  %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
+  %tmp4 = sext i32 %tmp to i64
+  %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4
+  %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+  %tmp7 = icmp sgt i32 %tmp6, 0
+  %tmp8 = sext i32 %tmp6 to i64
+  br i1 %tmp7, label %bb10, label %bb26
+
+bb10:                                             ; preds = %bb, %bb20
+  %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
+  %tmp12 = add nsw i64 %tmp11, %tmp4
+  %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12
+  %tmp14 = load i32 addrspace(1)* %tmp13, align 4
+  %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12
+  %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+  %tmp17 = icmp ne i32 %tmp14, -1
+  %tmp18 = icmp ne i32 %tmp16, -1
+  %tmp19 = and i1 %tmp17, %tmp18
+  br i1 %tmp19, label %bb20, label %bb26
+
+bb20:                                             ; preds = %bb10
+  %tmp21 = add nsw i32 %tmp16, %tmp14
+  %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12
+  store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4
+  %tmp23 = add nuw nsw i64 %tmp11, 1
+  %tmp24 = icmp slt i64 %tmp23, %tmp8
+  br i1 %tmp24, label %bb10, label %bb26
+
+bb26:                                             ; preds = %bb10, %bb20, %bb
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/R600/vector-alloca.ll b/test/CodeGen/R600/vector-alloca.ll
index ec1995f68089..8f73f657ccd8 100644
--- a/test/CodeGen/R600/vector-alloca.ll
+++ b/test/CodeGen/R600/vector-alloca.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
 
-; FUNC-LABEL: @vector_read
+; FUNC-LABEL: {{^}}vector_read:
 ; EG: MOV
 ; EG: MOV
 ; EG: MOV
@@ -25,7 +25,7 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: @vector_write
+; FUNC-LABEL: {{^}}vector_write:
 ; EG: MOV
 ; EG: MOV
 ; EG: MOV
@@ -53,7 +53,7 @@ entry:
 
 ; This test should be optimize to:
 ; store i32 0, i32 addrspace(1)* %out
-; FUNC-LABEL: @bitcast_gep
+; FUNC-LABEL: {{^}}bitcast_gep:
 ; EG: STORE_RAW
 define void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
 entry:
diff --git a/test/CodeGen/R600/vertex-fetch-encoding.ll b/test/CodeGen/R600/vertex-fetch-encoding.ll
index 7ea7a5c079cf..e24744e5ea4a 100644
--- a/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=barts | FileCheck --check-prefix=NI-CHECK %s
 ; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=cayman | FileCheck --check-prefix=CM-CHECK %s
 
-; NI-CHECK: @vtx_fetch32
+; NI-CHECK: {{^}}vtx_fetch32:
 ; NI-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
-; CM-CHECK: @vtx_fetch32
+; CM-CHECK: {{^}}vtx_fetch32:
 ; CM-CHECK: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00
 
 define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -13,7 +13,7 @@ entry:
   ret void
 }
 
-; NI-CHECK: @vtx_fetch128
+; NI-CHECK: {{^}}vtx_fetch128:
 ; NI-CHECK: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
 ; XXX: Add a case for Cayman when v4i32 stores are supported.
 
diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll
index 54e588d80842..11d91293c5b4 100644
--- a/test/CodeGen/R600/vop-shrink.ll
+++ b/test/CodeGen/R600/vop-shrink.ll
@@ -1,14 +1,9 @@
-; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; XXX: This testis for a bug in the SIShrinkInstruction pass and it will be
-;       relevant once we are selecting 64-bit instructions.  We are
-;       currently selecting mostly 32-bit instruction, so the
-;       SIShrinkInstructions pass isn't doing much.
-; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 ; Test that we correctly commute a sub instruction
-; FUNC-LABEL: @sub_rev
-; SI-NOT: V_SUB_I32_e32 v{{[0-9]+}}, s
-; SI: V_SUBREV_I32_e32 v{{[0-9]+}}, s
+; FUNC-LABEL: {{^}}sub_rev:
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
 
 ; ModuleID = 'vop-shrink.ll'
 
@@ -34,6 +29,20 @@ endif:                                            ; preds = %else, %if
   ret void
 }
 
+; Test that we fold an immediate that was illegal for a 64-bit op into the
+; 32-bit op when we shrink it.
+
+; FUNC-LABEL: {{^}}add_fold:
+; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
+define void @add_fold(float addrspace(1)* %out) {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = uitofp i32 %tmp to float
+  %tmp2 = fadd float %tmp1, 1.024000e+03
+  store float %tmp2, float addrspace(1)* %out
+  ret void
+}
+
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.x() #0
 
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index dca7b067b26e..1fa8d4a49e10 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -1,13 +1,13 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+;RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
 
-;EG-CHECK: @test_select_v2i32
+;EG-CHECK: {{^}}test_select_v2i32:
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test_select_v2i32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v2i32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
 
 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
 entry:
@@ -19,13 +19,13 @@ entry:
   ret void
 }
 
-;EG-CHECK: @test_select_v2f32
+;EG-CHECK: {{^}}test_select_v2f32:
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test_select_v2f32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v2f32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
 
 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
 entry:
@@ -37,17 +37,17 @@ entry:
   ret void
 }
 
-;EG-CHECK: @test_select_v4i32
+;EG-CHECK: {{^}}test_select_v4i32:
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @test_select_v4i32
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
-;SI-CHECK: V_CNDMASK_B32_e64
+;SI-CHECK: {{^}}test_select_v4i32:
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
+;SI-CHECK: v_cndmask_b32_e64
 
 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
 entry:
@@ -59,7 +59,7 @@ entry:
   ret void
 }
 
-;EG-CHECK: @test_select_v4f32
+;EG-CHECK: {{^}}test_select_v4f32:
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
diff --git a/test/CodeGen/R600/vselect64.ll b/test/CodeGen/R600/vselect64.ll
index 604695b4fa6b..ef85ebe7899f 100644
--- a/test/CodeGen/R600/vselect64.ll
+++ b/test/CodeGen/R600/vselect64.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck  %s
 ; XXX: Merge this test into vselect.ll once SI supports 64-bit select.
 
-; CHECK-LABEL: @test_select_v4i64
+; CHECK-LABEL: {{^}}test_select_v4i64:
 ; Make sure the vectors aren't being stored on the stack.  We know they are
 ; being stored on the stack if the shaders uses at leat 10 registers.
 ; CHECK-NOT: {{\**}} MOV T{{[0-9][0-9]}}.X
diff --git a/test/CodeGen/R600/vtx-fetch-branch.ll b/test/CodeGen/R600/vtx-fetch-branch.ll
index 0fc99dee0dbe..bcbe34ea543b 100644
--- a/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -6,7 +6,7 @@
 ; after the fetch clause.
 
 
-; CHECK-LABEL: @test
+; CHECK-LABEL: {{^}}test:
 ; CHECK-NOT: ALU_POP_AFTER
 ; CHECK: TEX
 ; CHECK-NEXT: POP
diff --git a/test/CodeGen/R600/vtx-schedule.ll b/test/CodeGen/R600/vtx-schedule.ll
index ce852c5efeff..8254c9923477 100644
--- a/test/CodeGen/R600/vtx-schedule.ll
+++ b/test/CodeGen/R600/vtx-schedule.ll
@@ -4,7 +4,7 @@
 ; the result of another VTX_READ instruction were being grouped in the
 ; same fetch clasue.
 
-; CHECK: @test
+; CHECK: {{^}}test:
 ; CHECK: Fetch clause
 ; CHECK: VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
 ; CHECK: Fetch clause
diff --git a/test/CodeGen/R600/wait.ll b/test/CodeGen/R600/wait.ll
index 2cf88fe9f735..b30f86ccd0d6 100644
--- a/test/CodeGen/R600/wait.ll
+++ b/test/CodeGen/R600/wait.ll
@@ -1,37 +1,45 @@
-; RUN: llc < %s -march=r600 -mcpu=SI --verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
 
-;CHECK-LABEL: @main
-;CHECK: S_WAITCNT lgkmcnt(0)
-;CHECK: S_WAITCNT vmcnt(0)
-;CHECK: S_WAITCNT expcnt(0) lgkmcnt(0)
-
-define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 {
+; CHECK-LABEL: {{^}}main:
+; CHECK: s_load_dwordx4
+; CHECK: s_load_dwordx4
+; CHECK: s_waitcnt lgkmcnt(0){{$}}
+; CHECK: s_waitcnt vmcnt(0){{$}}
+; CHECK: s_waitcnt expcnt(0) lgkmcnt(0){{$}}
+define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
 main_body:
-  %10 = getelementptr <16 x i8> addrspace(2)* %3, i32 0
-  %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
-  %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6)
-  %13 = extractelement <4 x float> %12, i32 0
-  %14 = extractelement <4 x float> %12, i32 1
-  %15 = extractelement <4 x float> %12, i32 2
-  %16 = extractelement <4 x float> %12, i32 3
-  %17 = getelementptr <16 x i8> addrspace(2)* %3, i32 1
-  %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0
-  %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6)
-  %20 = extractelement <4 x float> %19, i32 0
-  %21 = extractelement <4 x float> %19, i32 1
-  %22 = extractelement <4 x float> %19, i32 2
-  %23 = extractelement <4 x float> %19, i32 3
-  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23)
-  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16)
+  %tmp = getelementptr <16 x i8> addrspace(2)* %arg3, i32 0
+  %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
+  %tmp12 = extractelement <4 x float> %tmp11, i32 0
+  %tmp13 = extractelement <4 x float> %tmp11, i32 1
+  call void @llvm.AMDGPU.barrier.global() #1
+  %tmp14 = extractelement <4 x float> %tmp11, i32 2
+;  %tmp15 = extractelement <4 x float> %tmp11, i32 3
+  %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+  %tmp16 = getelementptr <16 x i8> addrspace(2)* %arg3, i32 1
+  %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+  %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
+  %tmp19 = extractelement <4 x float> %tmp18, i32 0
+  %tmp20 = extractelement <4 x float> %tmp18, i32 1
+  %tmp21 = extractelement <4 x float> %tmp18, i32 2
+  %tmp22 = extractelement <4 x float> %tmp18, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp12, float %tmp13, float %tmp14, float %tmp15)
   ret void
 }
 
+; Function Attrs: noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.global() #1
+
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
 
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
 attributes #0 = { "ShaderType"="1" }
-attributes #1 = { nounwind readnone }
+attributes #1 = { noduplicate nounwind }
+attributes #2 = { nounwind readnone }
 
-!0 = metadata !{metadata !"const", null, i32 1}
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}
diff --git a/test/CodeGen/R600/work-item-intrinsics.ll b/test/CodeGen/R600/work-item-intrinsics.ll
index 01236590742a..37c0e0f304ce 100644
--- a/test/CodeGen/R600/work-item-intrinsics.ll
+++ b/test/CodeGen/R600/work-item-intrinsics.ll
@@ -1,13 +1,14 @@
-; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK %s
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
-
-; R600-CHECK: @ngroups_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].X
-; SI-CHECK: @ngroups_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}ngroups_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @ngroups_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -15,13 +16,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @ngroups_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].Y
-; SI-CHECK: @ngroups_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x1
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}ngroups_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].Y
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @ngroups_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -29,13 +30,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @ngroups_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].Z
-; SI-CHECK: @ngroups_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x2
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}ngroups_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @ngroups_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -43,13 +44,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @global_size_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[0].W
-; SI-CHECK: @global_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x3
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[0].W
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @global_size_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -57,13 +58,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @global_size_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].X
-; SI-CHECK: @global_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x4
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @global_size_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -71,13 +72,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @global_size_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].Y
-; SI-CHECK: @global_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x5
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}global_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].Y
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @global_size_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -85,13 +86,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @local_size_x
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].Z
-; SI-CHECK: @local_size_x
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x6
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @local_size_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.x() #0
@@ -99,13 +100,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @local_size_y
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[1].W
-; SI-CHECK: @local_size_y
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x7
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[1].W
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @local_size_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.y() #0
@@ -113,13 +114,13 @@ entry:
   ret void
 }
 
-; R600-CHECK: @local_size_z
-; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; R600-CHECK: MOV [[VAL]], KC0[2].X
-; SI-CHECK: @local_size_z
-; SI-CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]], s[0:1], 0x8
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}local_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[2].X
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
 define void @local_size_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.local.size.z() #0
@@ -127,13 +128,27 @@ entry:
   ret void
 }
 
+; FUNC-LABEL: {{^}}get_work_dim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], KC0[2].Z
+
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[VVAL]]
+define void @get_work_dim (i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
 ; The tgid values are stored in sgprs offset by the number of user sgprs.
 ; Currently we always use exactly 2 user sgprs for the pointer to the
 ; kernel arguments, but this may change in the future.
 
-; SI-CHECK: @tgid_x
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s4
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_x:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
+; SI: buffer_store_dword [[VVAL]]
 define void @tgid_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.x() #0
@@ -141,9 +156,9 @@ entry:
   ret void
 }
 
-; SI-CHECK: @tgid_y
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s5
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_y:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
+; SI: buffer_store_dword [[VVAL]]
 define void @tgid_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.y() #0
@@ -151,9 +166,9 @@ entry:
   ret void
 }
 
-; SI-CHECK: @tgid_z
-; SI-CHECK: V_MOV_B32_e32 [[VVAL:v[0-9]+]], s6
-; SI-CHECK: BUFFER_STORE_DWORD [[VVAL]]
+; FUNC-LABEL: {{^}}tgid_z:
+; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
+; SI: buffer_store_dword [[VVAL]]
 define void @tgid_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.z() #0
@@ -161,8 +176,8 @@ entry:
   ret void
 }
 
-; SI-CHECK: @tidig_x
-; SI-CHECK: BUFFER_STORE_DWORD v0
+; FUNC-LABEL: {{^}}tidig_x:
+; SI: buffer_store_dword v0
 define void @tidig_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #0
@@ -170,8 +185,8 @@ entry:
   ret void
 }
 
-; SI-CHECK: @tidig_y
-; SI-CHECK: BUFFER_STORE_DWORD v1
+; FUNC-LABEL: {{^}}tidig_y:
+; SI: buffer_store_dword v1
 define void @tidig_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.y() #0
@@ -179,8 +194,8 @@ entry:
   ret void
 }
 
-; SI-CHECK: @tidig_z
-; SI-CHECK: BUFFER_STORE_DWORD v2
+; FUNC-LABEL: {{^}}tidig_z:
+; SI: buffer_store_dword v2
 define void @tidig_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.z() #0
@@ -208,4 +223,6 @@ declare i32 @llvm.r600.read.tidig.x() #0
 declare i32 @llvm.r600.read.tidig.y() #0
 declare i32 @llvm.r600.read.tidig.z() #0
 
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
 attributes #0 = { readnone }
diff --git a/test/CodeGen/R600/wrong-transalu-pos-fix.ll b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
index b1cbe3ffbc41..4e77c07c0ea1 100644
--- a/test/CodeGen/R600/wrong-transalu-pos-fix.ll
+++ b/test/CodeGen/R600/wrong-transalu-pos-fix.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
 ; We want all MULLO_INT inst to be last in their instruction group
-;CHECK: @fill3d
+;CHECK: {{^}}fill3d:
 ;CHECK-NOT: MULLO_INT T[0-9]+
 
 ; ModuleID = 'radeon'
@@ -81,6 +81,6 @@ attributes #1 = { nounwind readnone }
 
 !opencl.kernels = !{!0, !1, !2}
 
-!0 = metadata !{null}
-!1 = metadata !{null}
-!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
+!0 = !{null}
+!1 = !{null}
+!2 = !{void (i32 addrspace(1)*)* @fill3d}
diff --git a/test/CodeGen/R600/xor.ll b/test/CodeGen/R600/xor.ll
index e14bd7127231..af6196dabe90 100644
--- a/test/CodeGen/R600/xor.ll
+++ b/test/CodeGen/R600/xor.ll
@@ -1,14 +1,13 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK %s
-;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-;EG-CHECK: @xor_v2i32
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @xor_v2i32
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; FUNC-LABEL: {{^}}xor_v2i32:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
+; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
   %a = load <2 x i32> addrspace(1) * %in0
@@ -18,17 +17,16 @@ define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in
   ret void
 }
 
-;EG-CHECK: @xor_v4i32
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; FUNC-LABEL: {{^}}xor_v4i32:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI-CHECK: @xor_v4i32
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
-;SI-CHECK: V_XOR_B32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
   %a = load <4 x i32> addrspace(1) * %in0
@@ -38,25 +36,42 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-;EG-CHECK: @xor_i1
-;EG-CHECK: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
-
-;SI-CHECK: @xor_i1
-;SI-CHECK: V_XOR_B32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; FUNC-LABEL: {{^}}xor_i1:
+; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0
+; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0
+; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]]
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
 define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
   %a = load float addrspace(1) * %in0
   %b = load float addrspace(1) * %in1
   %acmp = fcmp oge float %a, 0.000000e+00
-  %bcmp = fcmp oge float %b, 0.000000e+00
+  %bcmp = fcmp oge float %b, 1.000000e+00
   %xor = xor i1 %acmp, %bcmp
   %result = select i1 %xor, float %a, float %b
   store float %result, float addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @vector_xor_i32
-; SI-CHECK: V_XOR_B32_e32
+; FUNC-LABEL: {{^}}v_xor_i1:
+; SI: buffer_load_ubyte [[A:v[0-9]+]]
+; SI: buffer_load_ubyte [[B:v[0-9]+]]
+; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[A]], [[B]]
+; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
+; SI: buffer_store_byte [[RESULT]]
+define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
+  %a = load i1 addrspace(1)* %in0
+  %b = load i1 addrspace(1)* %in1
+  %xor = xor i1 %a, %b
+  store i1 %xor, i1 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}vector_xor_i32:
+; SI: v_xor_b32_e32
 define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
   %a = load i32 addrspace(1)* %in0
   %b = load i32 addrspace(1)* %in1
@@ -65,24 +80,24 @@ define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
   ret void
 }
 
-; SI-CHECK-LABEL: @scalar_xor_i32
-; SI-CHECK: S_XOR_B32
+; FUNC-LABEL: {{^}}scalar_xor_i32:
+; SI: s_xor_b32
 define void @scalar_xor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
   %result = xor i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @scalar_not_i32
-; SI-CHECK: S_NOT_B32
+; FUNC-LABEL: {{^}}scalar_not_i32:
+; SI: s_not_b32
 define void @scalar_not_i32(i32 addrspace(1)* %out, i32 %a) {
   %result = xor i32 %a, -1
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @vector_not_i32
-; SI-CHECK: V_NOT_B32
+; FUNC-LABEL: {{^}}vector_not_i32:
+; SI: v_not_b32
 define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
   %a = load i32 addrspace(1)* %in0
   %b = load i32 addrspace(1)* %in1
@@ -91,10 +106,10 @@ define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32
   ret void
 }
 
-; SI-CHECK-LABEL: @vector_xor_i64
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: V_XOR_B32_e32
-; SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}vector_xor_i64:
+; SI: v_xor_b32_e32
+; SI: v_xor_b32_e32
+; SI: s_endpgm
 define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
   %a = load i64 addrspace(1)* %in0
   %b = load i64 addrspace(1)* %in1
@@ -103,26 +118,26 @@ define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64
   ret void
 }
 
-; SI-CHECK-LABEL: @scalar_xor_i64
-; SI-CHECK: S_XOR_B64
-; SI-CHECK: S_ENDPGM
+; FUNC-LABEL: {{^}}scalar_xor_i64:
+; SI: s_xor_b64
+; SI: s_endpgm
 define void @scalar_xor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
   %result = xor i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @scalar_not_i64
-; SI-CHECK: S_NOT_B64
+; FUNC-LABEL: {{^}}scalar_not_i64:
+; SI: s_not_b64
 define void @scalar_not_i64(i64 addrspace(1)* %out, i64 %a) {
   %result = xor i64 %a, -1
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
-; SI-CHECK-LABEL: @vector_not_i64
-; SI-CHECK: V_NOT_B32
-; SI-CHECK: V_NOT_B32
+; FUNC-LABEL: {{^}}vector_not_i64:
+; SI: v_not_b32
+; SI: v_not_b32
 define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
   %a = load i64 addrspace(1)* %in0
   %b = load i64 addrspace(1)* %in1
@@ -135,9 +150,8 @@ define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64
 ; Note that in the future the backend may be smart enough to
 ; use an SALU instruction for this.
 
-; SI-CHECK-LABEL: @xor_cf
-; SI-CHECK: V_XOR
-; SI-CHECK: V_XOR
+; FUNC-LABEL: {{^}}xor_cf:
+; SI: s_xor_b64
 define void @xor_cf(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b) {
 entry:
   %0 = icmp eq i64 %a, 0
diff --git a/test/CodeGen/R600/zero_extend.ll b/test/CodeGen/R600/zero_extend.ll
index 8585d4ab191e..7df4b48c2b9d 100644
--- a/test/CodeGen/R600/zero_extend.ll
+++ b/test/CodeGen/R600/zero_extend.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
-; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI-CHECK
 
-; R600-CHECK: @test
+; R600-CHECK: {{^}}test:
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
 ; R600-CHECK: MEM_RAT_CACHELESS STORE_RAW
 
-; SI-CHECK: @test
-; SI-CHECK: S_MOV_B32 [[ZERO:s[0-9]]], 0
-; SI-CHECK: V_MOV_B32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
-; SI-CHECK: BUFFER_STORE_DWORDX2 v[0:[[V_ZERO]]{{\]}}
+; SI-CHECK: {{^}}test:
+; SI-CHECK: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}}
+; SI-CHECK: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI-CHECK: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
 define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
   %0 = mul i32 %a, %b
@@ -18,8 +18,8 @@ entry:
   ret void
 }
 
-; SI-CHECK-LABEL: @testi1toi32
-; SI-CHECK: V_CNDMASK_B32
+; SI-CHECK-LABEL: {{^}}testi1toi32:
+; SI-CHECK: v_cndmask_b32
 define void @testi1toi32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
 entry:
   %0 = icmp eq i32 %a, %b
@@ -28,10 +28,10 @@ entry:
   ret void
 }
 
-; SI-CHECK-LABEL: @zext_i1_to_i64
-; SI-CHECK: V_CMP_EQ_I32
-; SI-CHECK: V_CNDMASK_B32
-; SI-CHECK: S_MOV_B32 s{{[0-9]+}}, 0
+; SI-CHECK-LABEL: {{^}}zext_i1_to_i64:
+; SI-CHECK: v_cmp_eq_i32
+; SI-CHECK: v_cndmask_b32
+; SI-CHECK: s_mov_b32 s{{[0-9]+}}, 0
 define void @zext_i1_to_i64(i64 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp eq i32 %a, %b
   %ext = zext i1 %cmp to i64
diff --git a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index e8315f17ebb6..373a1967307a 100644
--- a/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc -no-integrated-as
 ; PR 1557
 
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
diff --git a/test/CodeGen/SPARC/empty-functions.ll b/test/CodeGen/SPARC/empty-functions.ll
new file mode 100644
index 000000000000..38d288903d4a
--- /dev/null
+++ b/test/CodeGen/SPARC/empty-functions.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=sparc-linux-gnu | FileCheck -check-prefix=LINUX-NO-FP %s
+; RUN: llc < %s -mtriple=sparc-linux-gnu -disable-fp-elim | FileCheck -check-prefix=LINUX-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+
+; An empty function is perfectly fine on ELF.
+; LINUX-NO-FP: func:
+; LINUX-NO-FP-NEXT: .cfi_startproc
+; LINUX-NO-FP-NEXT: {{^}}!
+; LINUX-NO-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-NO-FP-NEXT: .size   func, .L{{.*}}-func
+; LINUX-NO-FP-NEXT: .cfi_endproc
+
+; A cfi directive can point to the end of a function. It (and in fact the
+; entire body) could be optimized out because of the unreachable, but we
+; don't do it right now.
+; LINUX-FP: func:
+; LINUX-FP-NEXT: .cfi_startproc
+; LINUX-FP-NEXT: {{^}}!
+; LINUX-FP-NEXT: save %sp, -96, %sp
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_def_cfa_register %fp
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_window_save
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_register 15, 31
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .size   func, .Ltmp3-func
+; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll
index 2650533b7fec..526cde8de8b4 100644
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc <%s | FileCheck %s
+; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
 
 ; CHECK-LABEL: test_constraint_r
 ; CHECK:       add %o1, %o0, %o0
diff --git a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
index 6013b17d9372..6a67616d53be 100644
--- a/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
+++ b/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=sparc
+; RUN: llc < %s -march=sparc -no-integrated-as
 ; ModuleID = 'mult-alt-generic.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
 target triple = "sparc"
diff --git a/test/CodeGen/SPARC/setjmp.ll b/test/CodeGen/SPARC/setjmp.ll
index a31cd7016731..17afb36ca74c 100644
--- a/test/CodeGen/SPARC/setjmp.ll
+++ b/test/CodeGen/SPARC/setjmp.ll
@@ -65,8 +65,8 @@ attributes #0 = { nounwind }
 attributes #1 = { noreturn nounwind }
 attributes #2 = { nounwind returns_twice }
 
-!0 = metadata !{metadata !"alias set 6: struct.jmpbuf_env*", metadata !1}
-!1 = metadata !{metadata !1}
-!2 = metadata !{metadata !"alias set 3: int", metadata !1}
-!3 = metadata !{metadata !0, metadata !0, i64 0}
-!4 = metadata !{metadata !2, metadata !2, i64 0}
+!0 = !{!"alias set 6: struct.jmpbuf_env*", !1}
+!1 = !{!1}
+!2 = !{!"alias set 3: int", !1}
+!3 = !{!0, !0, i64 0}
+!4 = !{!2, !2, i64 0}
diff --git a/test/CodeGen/SystemZ/alias-01.ll b/test/CodeGen/SystemZ/alias-01.ll
index 8839aade7a0e..89a731830187 100644
--- a/test/CodeGen/SystemZ/alias-01.ll
+++ b/test/CodeGen/SystemZ/alias-01.ll
@@ -14,6 +14,6 @@ define void @f1(<16 x i32> *%src1, <16 x float> *%dest) {
   ret void
 }
 
-!0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !"set1", metadata !0 }
-!2 = metadata !{ metadata !"set2", metadata !0 }
+!0 = !{ !"root" }
+!1 = !{ !"set1", !0 }
+!2 = !{ !"set2", !0 }
diff --git a/test/CodeGen/SystemZ/and-08.ll b/test/CodeGen/SystemZ/and-08.ll
index 7ded115aedff..a328c4ea2046 100644
--- a/test/CodeGen/SystemZ/and-08.ll
+++ b/test/CodeGen/SystemZ/and-08.ll
@@ -371,8 +371,8 @@ define void @f26(i64 *%ptr1, i64 *%ptr2) {
   ret void
 }
 
-!0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !"set1", metadata !0 }
-!2 = metadata !{ metadata !"set2", metadata !0 }
-!3 = metadata !{ metadata !1, metadata !1, i64 0}
-!4 = metadata !{ metadata !2, metadata !2, i64 0}
+!0 = !{ !"root" }
+!1 = !{ !"set1", !0 }
+!2 = !{ !"set2", !0 }
+!3 = !{ !1, !1, i64 0}
+!4 = !{ !2, !2, i64 0}
diff --git a/test/CodeGen/SystemZ/asm-01.ll b/test/CodeGen/SystemZ/asm-01.ll
index 801378c5fcbd..3dbc8ac268b7 100644
--- a/test/CodeGen/SystemZ/asm-01.ll
+++ b/test/CodeGen/SystemZ/asm-01.ll
@@ -1,7 +1,7 @@
 ; Test the "Q" asm constraint, which accepts addresses that have a base
 ; and a 12-bit displacement.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Check the lowest range.
 define void @f1(i64 %base) {
diff --git a/test/CodeGen/SystemZ/asm-02.ll b/test/CodeGen/SystemZ/asm-02.ll
index ad1e35bb3621..458bfeb49753 100644
--- a/test/CodeGen/SystemZ/asm-02.ll
+++ b/test/CodeGen/SystemZ/asm-02.ll
@@ -1,7 +1,7 @@
 ; Test the "R" asm constraint, which accepts addresses that have a base,
 ; an index and a 12-bit displacement.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Check the lowest range.
 define void @f1(i64 %base) {
diff --git a/test/CodeGen/SystemZ/asm-03.ll b/test/CodeGen/SystemZ/asm-03.ll
index fa3e1a7d01d8..2e60ad61ef40 100644
--- a/test/CodeGen/SystemZ/asm-03.ll
+++ b/test/CodeGen/SystemZ/asm-03.ll
@@ -1,7 +1,7 @@
 ; Test the "S" asm constraint, which accepts addresses that have a base
 ; and a 20-bit displacement.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define void @f1(i64 %base) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-04.ll b/test/CodeGen/SystemZ/asm-04.ll
index af7ea9fdef94..b212253dbd9c 100644
--- a/test/CodeGen/SystemZ/asm-04.ll
+++ b/test/CodeGen/SystemZ/asm-04.ll
@@ -1,7 +1,7 @@
 ; Test the "T" asm constraint, which accepts addresses that have a base,
 ; an index and a 20-bit displacement.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define void @f1(i64 %base) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-05.ll b/test/CodeGen/SystemZ/asm-05.ll
index e18cb757b142..db99b10853ed 100644
--- a/test/CodeGen/SystemZ/asm-05.ll
+++ b/test/CodeGen/SystemZ/asm-05.ll
@@ -1,6 +1,6 @@
 ; Test the "m" asm constraint, which is equivalent to "T".
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define void @f1(i64 %base) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-06.ll b/test/CodeGen/SystemZ/asm-06.ll
index f9848a2df6fc..73c938f9fcf8 100644
--- a/test/CodeGen/SystemZ/asm-06.ll
+++ b/test/CodeGen/SystemZ/asm-06.ll
@@ -1,6 +1,6 @@
 ; Test the GPR constraint "a", which forbids %r0.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define i64 @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-07.ll b/test/CodeGen/SystemZ/asm-07.ll
index bf63150cd818..42b89e6115bf 100644
--- a/test/CodeGen/SystemZ/asm-07.ll
+++ b/test/CodeGen/SystemZ/asm-07.ll
@@ -1,6 +1,6 @@
 ; Test the GPR constraint "r".
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define i64 @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-08.ll b/test/CodeGen/SystemZ/asm-08.ll
index 166233752db2..4185108e73c5 100644
--- a/test/CodeGen/SystemZ/asm-08.ll
+++ b/test/CodeGen/SystemZ/asm-08.ll
@@ -1,6 +1,6 @@
 ; Test the GPR constraint "d", which is equivalent to "r".
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define i64 @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-09.ll b/test/CodeGen/SystemZ/asm-09.ll
index 5cd7efb94009..b9d86cfbfebb 100644
--- a/test/CodeGen/SystemZ/asm-09.ll
+++ b/test/CodeGen/SystemZ/asm-09.ll
@@ -1,6 +1,6 @@
 ; Test matching operands with the GPR constraint "r".
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define void @f1(i32 *%dst) {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-10.ll b/test/CodeGen/SystemZ/asm-10.ll
index 0eccc1972187..b71db8350781 100644
--- a/test/CodeGen/SystemZ/asm-10.ll
+++ b/test/CodeGen/SystemZ/asm-10.ll
@@ -1,6 +1,6 @@
 ; Test the FPR constraint "f".
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 define float @f1() {
 ; CHECK-LABEL: f1:
diff --git a/test/CodeGen/SystemZ/asm-11.ll b/test/CodeGen/SystemZ/asm-11.ll
index 8aeb784134a3..8a4cdbb6a932 100644
--- a/test/CodeGen/SystemZ/asm-11.ll
+++ b/test/CodeGen/SystemZ/asm-11.ll
@@ -1,6 +1,6 @@
 ; Test the "I" constraint (8-bit unsigned constants).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-12.ll b/test/CodeGen/SystemZ/asm-12.ll
index feecbacf09e8..115092cb6aa5 100644
--- a/test/CodeGen/SystemZ/asm-12.ll
+++ b/test/CodeGen/SystemZ/asm-12.ll
@@ -1,6 +1,6 @@
 ; Test the "J" constraint (12-bit unsigned constants).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-13.ll b/test/CodeGen/SystemZ/asm-13.ll
index b88170079ecc..83454eab1cb1 100644
--- a/test/CodeGen/SystemZ/asm-13.ll
+++ b/test/CodeGen/SystemZ/asm-13.ll
@@ -1,6 +1,6 @@
 ; Test the "K" constraint (16-bit signed constants).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-14.ll b/test/CodeGen/SystemZ/asm-14.ll
index bcd8b1ebc3df..41b8f40e09d4 100644
--- a/test/CodeGen/SystemZ/asm-14.ll
+++ b/test/CodeGen/SystemZ/asm-14.ll
@@ -1,6 +1,6 @@
 ; Test the "L" constraint (20-bit signed constants).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the first valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-15.ll b/test/CodeGen/SystemZ/asm-15.ll
index 886ee0e897dc..8361b6849923 100644
--- a/test/CodeGen/SystemZ/asm-15.ll
+++ b/test/CodeGen/SystemZ/asm-15.ll
@@ -1,6 +1,6 @@
 ; Test the "M" constraint (0x7fffffff)
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-16.ll b/test/CodeGen/SystemZ/asm-16.ll
index 886ee0e897dc..8361b6849923 100644
--- a/test/CodeGen/SystemZ/asm-16.ll
+++ b/test/CodeGen/SystemZ/asm-16.ll
@@ -1,6 +1,6 @@
 ; Test the "M" constraint (0x7fffffff)
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test 1 below the valid value.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
index 7bc9da32ea95..533b5e90d62d 100644
--- a/test/CodeGen/SystemZ/asm-17.ll
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -1,6 +1,6 @@
 ; Test explicit register names.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 ; Test i32 GPRs.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
index d60654b7863d..71e145a285ff 100644
--- a/test/CodeGen/SystemZ/asm-18.ll
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -1,7 +1,7 @@
 ; Test high-word operations, using "h" constraints to force a high
 ; register and "r" constraints to force a low register.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s
 
 ; Test loads and stores involving mixtures of high and low registers.
 define void @f1(i32 *%ptr1, i32 *%ptr2) {
diff --git a/test/CodeGen/SystemZ/fp-cmp-04.ll b/test/CodeGen/SystemZ/fp-cmp-04.ll
index 781a3beb4d4d..1637ccb0791b 100644
--- a/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -1,7 +1,7 @@
 ; Test that floating-point compares are omitted if CC already has the
 ; right value.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s
 
 declare float @llvm.fabs.f32(float %f)
 
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index f065e6421295..30c1c4f1ed6c 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -1,7 +1,7 @@
 ; Test that compares are omitted if CC already has the right value
 ; (z10 version).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/int-cmp-45.ll b/test/CodeGen/SystemZ/int-cmp-45.ll
index 9c9c49c05df1..c9affa672d54 100644
--- a/test/CodeGen/SystemZ/int-cmp-45.ll
+++ b/test/CodeGen/SystemZ/int-cmp-45.ll
@@ -1,7 +1,7 @@
 ; Test that compares are omitted if CC already has the right value
 ; (z196 version).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s
 
 ; Addition provides enough for equality comparisons with zero.  First teest
 ; the EQ case with LOC.
diff --git a/test/CodeGen/SystemZ/memchr-02.ll b/test/CodeGen/SystemZ/memchr-02.ll
index 982b3964f190..8986627a6057 100644
--- a/test/CodeGen/SystemZ/memchr-02.ll
+++ b/test/CodeGen/SystemZ/memchr-02.ll
@@ -1,6 +1,6 @@
 ; Test memchr using SRST, with the correct prototype.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
 
 declare i8 *@memchr(i8 *%src, i32 %char, i64 %len)
 
diff --git a/test/CodeGen/SystemZ/memcpy-02.ll b/test/CodeGen/SystemZ/memcpy-02.ll
index 2b010911f88e..776cfee50be9 100644
--- a/test/CodeGen/SystemZ/memcpy-02.ll
+++ b/test/CodeGen/SystemZ/memcpy-02.ll
@@ -385,8 +385,8 @@ define void @f32(i64 *%ptr1, i64 *%ptr2) {
   ret void
 }
 
-!0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !3, metadata !3, i64 0 }
-!2 = metadata !{ metadata !4, metadata !4, i64 0 }
-!3 = metadata !{ metadata !"set1", metadata !0 }
-!4 = metadata !{ metadata !"set2", metadata !0 }
+!0 = !{ !"root" }
+!1 = !{ !3, !3, i64 0 }
+!2 = !{ !4, !4, i64 0 }
+!3 = !{ !"set1", !0 }
+!4 = !{ !"set2", !0 }
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index ffc9584199cf..622f55d994f6 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -25,7 +25,7 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
   %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1]
   %v_6 = icmp slt i32 %1, 2                         ; <i1> [#uses=1]
   %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
-  call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0
+  call void @llvm.dbg.value(metadata double %storemerge, i64 0, metadata !91, metadata !{!"0x102"}), !dbg !0
   %v_7 = icmp eq i32 %2, 1, !dbg !92                ; <i1> [#uses=1]
   %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
   %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
@@ -40,116 +40,116 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
   ret void, !dbg !98
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare double @sqrt(double) nounwind readonly
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!104}
-!0 = metadata !{i32 46, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !101, metadata !2, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524299, metadata !101, metadata !3, i32 44, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, metadata !101, null, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !101} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, metadata !101, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, metadata !"", i32 0, metadata !102, metadata !102, metadata !103, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !22, metadata !22}
-!8 = metadata !{i32 524307, metadata !99, null, metadata !"ggVector3", i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [ggVector3] [line 66, size 192, align 32, offset 0] [def] [from ]
-!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
-!99 = metadata !{metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
-!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
-!11 = metadata !{i32 524301, metadata !99, metadata !8, metadata !"e", i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 524289, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 192, align 32, offset 0] [from double]
-!13 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"double", i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
-!16 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null, metadata !19, metadata !20}
-!19 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524310, metadata !100, null, metadata !"ggBoolean", i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
-!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ]
-!100 = metadata !{metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm"}
-!22 = metadata !{i32 524324, metadata !101, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{null, metadata !19}
-!26 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!27 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
-!29 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!30 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!31 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!32 = metadata !{metadata !13, metadata !33}
-!33 = metadata !{i32 524303, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 524326, metadata !101, metadata !4, metadata !"", i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!35 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!36 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!38 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!39 = metadata !{metadata !40, metadata !19}
-!40 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"double", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
-!41 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!42 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!43 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!44 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!45 = metadata !{null, metadata !19, metadata !13}
-!46 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!47 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!48 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!49 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!50 = metadata !{null, metadata !19, metadata !51}
-!51 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
-!52 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!53 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!54 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!55 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!56 = metadata !{metadata !51, metadata !33}
-!57 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!58 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!59 = metadata !{metadata !8, metadata !33}
-!60 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!61 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!62 = metadata !{metadata !13, metadata !33, metadata !22}
-!63 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!65 = metadata !{metadata !40, metadata !19, metadata !22}
-!66 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!67 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!68 = metadata !{metadata !69, metadata !19, metadata !51}
-!69 = metadata !{i32 524304, metadata !101, metadata !4, metadata !"ggVector3", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
-!70 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!71 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!72 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!73 = metadata !{metadata !69, metadata !19, metadata !13}
-!74 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!75 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!76 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!77 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!78 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!79 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
-!81 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!82 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!83 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!84 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!85 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!86 = metadata !{i32 524309, metadata !101, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!87 = metadata !{metadata !22, metadata !33}
-!88 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!89 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!90 = metadata !{i32 524334, metadata !9, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
-!92 = metadata !{i32 48, i32 0, metadata !1, null}
-!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
-!94 = metadata !{i32 524299, metadata !101, metadata !95, i32 217, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!95 = metadata !{i32 524299, metadata !101, metadata !77, i32 217, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!96 = metadata !{i32 51, i32 0, metadata !1, null}
-!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
-!98 = metadata !{i32 52, i32 0, metadata !1, null}
-!101 = metadata !{metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
-!102 = metadata !{i32 0}
-!103 = metadata !{metadata !3, metadata !77}
-!104 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 46, scope: !1)
+!1 = !{!"0xb\0044\000\000", !101, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0xb\0044\000\000", !101, !3} ; [ DW_TAG_lexical_block ]
+!3 = !{!"0x2e\00getClosestDiagonal3\00getClosestDiagonal3\00_Z19getClosestDiagonal3ii\0044\000\001\000\006\000\000\000", !101, null, !6, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x29", !101} ; [ DW_TAG_file_type ]
+!5 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)\001\00\000\00\000", !101, !102, !102, !103, null, null} ; [ DW_TAG_compile_unit ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !22, !22}
+!8 = !{!"0x13\00ggVector3\0066\00192\0032\000\000\000", !99, null, null, !10, null, null, null} ; [ DW_TAG_structure_type ] [ggVector3] [line 66, size 192, align 32, offset 0] [def] [from ]
+!9 = !{!"0x29", !"ggVector3.h", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", !5} ; [ DW_TAG_file_type ]
+!99 = !{!"ggVector3.h", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
+!10 = !{!11, !16, !23, !26, !29, !30, !35, !36, !37, !41, !42, !43, !46, !47, !48, !52, !53, !54, !57, !60, !63, !66, !70, !71, !74, !75, !76, !77, !78, !81, !82, !83, !84, !85, !88, !89, !90}
+!11 = !{!"0xd\00e\00160\00192\0032\000\000", !99, !8, !12} ; [ DW_TAG_member ]
+!12 = !{!"0x1\00\000\00192\0032\000\000", !101, !4, !13, !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 192, align 32, offset 0] [from double]
+!13 = !{!"0x24\00double\000\0064\0032\000\000\004", !101, !4} ; [ DW_TAG_base_type ]
+!14 = !{!15}
+!15 = !{!"0x21\000\003"}        ; [ DW_TAG_subrange_type ]
+!16 = !{!"0x2e\00ggVector3\00ggVector3\00\0072\000\000\000\006\000\000\000", !9, !8, !17, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19, !20}
+!19 = !{!"0xf\00\000\0032\0032\000\0064", !101, !4, !8} ; [ DW_TAG_pointer_type ]
+!20 = !{!"0x16\00ggBoolean\00478\000\000\000\000", !100, null, !22} ; [ DW_TAG_typedef ]
+!21 = !{!"0x29", !"math.h", !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", !5} ; [ DW_TAG_file_type ]
+!100 = !{!"math.h", !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm"}
+!22 = !{!"0x24\00int\000\0032\0032\000\000\005", !101, !4} ; [ DW_TAG_base_type ]
+!23 = !{!"0x2e\00ggVector3\00ggVector3\00\0073\000\000\000\006\000\000\000", !9, !8, !24, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!24 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = !{null, !19}
+!26 = !{!"0x2e\00ggVector3\00ggVector3\00\0074\000\000\000\006\000\000\000", !9, !8, !27, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!27 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !28, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = !{null, !19, !13, !13, !13}
+!29 = !{!"0x2e\00Set\00Set\00_ZN9ggVector33SetEddd\0081\000\000\000\006\000\000\000", !9, !8, !27, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!30 = !{!"0x2e\00x\00x\00_ZNK9ggVector31xEv\0082\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!31 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !32, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!32 = !{!13, !33}
+!33 = !{!"0xf\00\000\0032\0032\000\0064", !101, !4, !34} ; [ DW_TAG_pointer_type ]
+!34 = !{!"0x26\00\000\00192\0032\000\000", !101, !4, !8} ; [ DW_TAG_const_type ]
+!35 = !{!"0x2e\00y\00y\00_ZNK9ggVector31yEv\0083\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!36 = !{!"0x2e\00z\00z\00_ZNK9ggVector31zEv\0084\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!37 = !{!"0x2e\00x\00x\00_ZN9ggVector31xEv\0085\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!38 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !39, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!39 = !{!40, !19}
+!40 = !{!"0x10\00double\000\0032\0032\000\000", !101, !4, !13} ; [ DW_TAG_reference_type ]
+!41 = !{!"0x2e\00y\00y\00_ZN9ggVector31yEv\0086\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!42 = !{!"0x2e\00z\00z\00_ZN9ggVector31zEv\0087\000\001\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!43 = !{!"0x2e\00SetX\00SetX\00_ZN9ggVector34SetXEd\0088\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!44 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !45, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = !{null, !19, !13}
+!46 = !{!"0x2e\00SetY\00SetY\00_ZN9ggVector34SetYEd\0089\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!47 = !{!"0x2e\00SetZ\00SetZ\00_ZN9ggVector34SetZEd\0090\000\000\000\006\000\000\000", !9, !8, !44, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!48 = !{!"0x2e\00ggVector3\00ggVector3\00\0092\000\000\000\006\000\000\000", !9, !8, !49, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!49 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !50, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!50 = !{null, !19, !51}
+!51 = !{!"0x10\00\000\0032\0032\000\000", !101, !4, !34} ; [ DW_TAG_reference_type ]
+!52 = !{!"0x2e\00tolerance\00tolerance\00_ZNK9ggVector39toleranceEv\00100\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!53 = !{!"0x2e\00tolerance\00tolerance\00_ZN9ggVector39toleranceEv\00101\000\000\000\006\000\000\000", !9, !8, !38, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!54 = !{!"0x2e\00operator+\00operator+\00_ZNK9ggVector3psEv\00107\000\000\000\006\000\000\000", !9, !8, !55, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!55 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !56, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!56 = !{!51, !33}
+!57 = !{!"0x2e\00operator-\00operator-\00_ZNK9ggVector3ngEv\00108\000\000\000\006\000\000\000", !9, !8, !58, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!58 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !59, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!59 = !{!8, !33}
+!60 = !{!"0x2e\00operator[]\00operator[]\00_ZNK9ggVector3ixEi\00290\000\000\000\006\000\000\000", !9, !8, !61, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!61 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !62, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!62 = !{!13, !33, !22}
+!63 = !{!"0x2e\00operator[]\00operator[]\00_ZN9ggVector3ixEi\00278\000\000\000\006\000\000\000", !9, !8, !64, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!64 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !65, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!65 = !{!40, !19, !22}
+!66 = !{!"0x2e\00operator+=\00operator+=\00_ZN9ggVector3pLERKS_\00303\000\000\000\006\000\000\000", !9, !8, !67, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!67 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !68, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!68 = !{!69, !19, !51}
+!69 = !{!"0x10\00ggVector3\000\0032\0032\000\000", !101, !4, !8} ; [ DW_TAG_reference_type ]
+!70 = !{!"0x2e\00operator-=\00operator-=\00_ZN9ggVector3mIERKS_\00310\000\000\000\006\000\000\000", !9, !8, !67, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!71 = !{!"0x2e\00operator*=\00operator*=\00_ZN9ggVector3mLEd\00317\000\000\000\006\000\000\000", !9, !8, !72, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!72 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !73, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!73 = !{!69, !19, !13}
+!74 = !{!"0x2e\00operator/=\00operator/=\00_ZN9ggVector3dVEd\00324\000\000\000\006\000\000\000", !9, !8, !72, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!75 = !{!"0x2e\00length\00length\00_ZNK9ggVector36lengthEv\00121\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!76 = !{!"0x2e\00squaredLength\00squaredLength\00_ZNK9ggVector313squaredLengthEv\00122\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!77 = !{!"0x2e\00MakeUnitVector\00MakeUnitVector\00_ZN9ggVector314MakeUnitVectorEv\00217\000\001\000\006\000\000\000", !9, !8, !24, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!78 = !{!"0x2e\00Perturb\00Perturb\00_ZNK9ggVector37PerturbEdd\00126\000\000\000\006\000\000\000", !9, !8, !79, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!79 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !80, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!80 = !{!8, !33, !13, !13}
+!81 = !{!"0x2e\00maxComponent\00maxComponent\00_ZNK9ggVector312maxComponentEv\00128\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!82 = !{!"0x2e\00minComponent\00minComponent\00_ZNK9ggVector312minComponentEv\00129\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!83 = !{!"0x2e\00maxAbsComponent\00maxAbsComponent\00_ZNK9ggVector315maxAbsComponentEv\00131\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!84 = !{!"0x2e\00minAbsComponent\00minAbsComponent\00_ZNK9ggVector315minAbsComponentEv\00132\000\000\000\006\000\000\000", !9, !8, !31, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!85 = !{!"0x2e\00indexOfMinComponent\00indexOfMinComponent\00_ZNK9ggVector319indexOfMinComponentEv\00133\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!86 = !{!"0x15\00\000\000\000\000\000\000", !101, !4, null, !87, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!87 = !{!22, !33}
+!88 = !{!"0x2e\00indexOfMinAbsComponent\00indexOfMinAbsComponent\00_ZNK9ggVector322indexOfMinAbsComponentEv\00137\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!89 = !{!"0x2e\00indexOfMaxComponent\00indexOfMaxComponent\00_ZNK9ggVector319indexOfMaxComponentEv\00146\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!90 = !{!"0x2e\00indexOfMaxAbsComponent\00indexOfMaxAbsComponent\00_ZNK9ggVector322indexOfMaxAbsComponentEv\00150\000\000\000\006\000\000\000", !9, !8, !86, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!91 = !{!"0x100\00vx\0046\000", !1, !4, !13} ; [ DW_TAG_auto_variable ]
+!92 = !MDLocation(line: 48, scope: !1)
+!93 = !MDLocation(line: 218, scope: !94, inlinedAt: !96)
+!94 = !{!"0xb\00217\000\000", !101, !95} ; [ DW_TAG_lexical_block ]
+!95 = !{!"0xb\00217\000\000", !101, !77} ; [ DW_TAG_lexical_block ]
+!96 = !MDLocation(line: 51, scope: !1)
+!97 = !MDLocation(line: 227, scope: !94, inlinedAt: !96)
+!98 = !MDLocation(line: 52, scope: !1)
+!101 = !{!"ggEdgeDiscrepancy.cc", !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src"}
+!102 = !{i32 0}
+!103 = !{!3, !77}
+!104 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
index b39978b9d44e..369ac96a205f 100644
--- a/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
+++ b/test/CodeGen/Thumb/2012-04-26-M0ISelBug.ll
@@ -5,7 +5,7 @@
 define i32 @t(i32 %a) nounwind {
 ; CHECK-LABEL: t:
 ; CHECK: asrs [[REG1:(r[0-9]+)]], [[REG2:(r[0-9]+)]], #31
-; CHECK: eors [[REG1]], [[REG2]]
+; CHECK: eors [[REG2]], [[REG1]]
   %tmp0 = ashr i32 %a, 31
   %tmp1 = xor i32 %tmp0, %a
   ret i32 %tmp1
diff --git a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
index ae663697ebeb..cfa1159bda2c 100644
--- a/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
+++ b/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
-; XFAIL: *
+; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s
 
 define void @foo(i32* %A) #0 {
 entry:
 ; CHECK-LABEL: foo:
 ; CHECK: push {r7, lr}
-; CHECK: ldm [[REG0:r[0-9]]]!,
-; CHECK-NEXT: subs [[REG0]]
+; CHECK: ldm
+; CHECK-NEXT: subs
 ; CHECK-NEXT: bl
   %0 = load i32* %A, align 4
   %arrayidx1 = getelementptr inbounds i32* %A, i32 1
diff --git a/test/CodeGen/Thumb/copy_thumb.ll b/test/CodeGen/Thumb/copy_thumb.ll
new file mode 100644
index 000000000000..528f54bd84e6
--- /dev/null
+++ b/test/CodeGen/Thumb/copy_thumb.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple=armv4-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=armv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=armv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=armv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=armv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=thumbv6-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; RUN: llc -mtriple=thumbv7-none--eabi < %s | FileCheck %s --check-prefix=CHECK-LOLOMOV
+; CHECK-LOLOMOV-LABEL:  foo
+; CHECK-LOLOMOV:        mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
+; CHECK-LOLOMOV-NEXT:   mov [[SRC1]], [[SRC2:r[01]]]
+; CHECK-LOLOMOV-NEXT:   mov [[SRC2]], [[TMP]]
+; CHECK-LOLOMOV-LABEL:  bar
+; CHECK-LOLOMOV-LABEL:  fnend
+; 
+; 'MOV lo, lo' in Thumb mode produces undefined results on pre-v6 hardware
+; RUN: llc -mtriple=thumbv4t-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
+; RUN: llc -mtriple=thumbv5-none--eabi < %s | FileCheck %s --check-prefix=CHECK-NOLOLOMOV
+; CHECK-NOLOLOMOV-LABEL: foo
+; CHECK-NOLOLOMOV-NOT:   mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
+; CHECK-NOLOLOMOV:       push  {[[SRC1:r[01]]]}
+; CHECK-NOLOLOMOV-NEXT:  pop {[[TMP:r[0-7]]]}
+; CHECK-NOLOLOMOV-NOT:   mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
+; CHECK-NOLOLOMOV:       push  {[[SRC2:r[01]]]}
+; CHECK-NOLOLOMOV-NEXT:  pop {[[SRC1]]}
+; CHECK-NOLOLOMOV-NOT:   mov [[TMP:r[0-7]]], [[SRC1:r[01]]]
+; CHECK-NOLOLOMOV:       push  {[[TMP]]}
+; CHECK-NOLOLOMOV-NEXT:  pop {[[SRC2]]}
+; CHECK-NOLOLOMOV-LABEL: bar
+; CHECK-NOLOLOMOV-LABEL: fnend
+
+declare void @bar(i32, i32)
+
+define void @foo(i32 %a, i32 %b) {
+entry:
+  call void @bar(i32 %b, i32 %a);
+  ret void
+}
+
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index 6c6de55347a4..309d80217c11 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_GREEDY
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-cgp-branch-opts -disable-post-ra -regalloc=basic -verify-machineinstrs | FileCheck %s -check-prefix=CHECK -check-prefix=RA_BASIC
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
@@ -45,7 +45,8 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 ; CHECK: sub sp, #
 ; CHECK: mov r[[R0:[0-9]+]], sp
 ; CHECK: str r{{[0-9+]}}, [r[[R0]]
-; CHECK: str r{{[0-9+]}}, [r[[R0]]
+; RA_GREEDY: str r{{[0-9+]}}, [r[[R0]]
+; RA_BASIC: stm r[[R0]]!
 ; CHECK-NOT: ldr r0, [sp
 ; CHECK: mov r[[R1:[0-9]+]], sp
 ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r{{[0-9]+}}
diff --git a/test/CodeGen/Thumb/fastcc.ll b/test/CodeGen/Thumb/fastcc.ll
index 98ff684d2ec6..1a012465f877 100644
--- a/test/CodeGen/Thumb/fastcc.ll
+++ b/test/CodeGen/Thumb/fastcc.ll
@@ -33,4 +33,4 @@ attributes #0 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"clang version 3.5.0 "}
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
index 76224bc5348c..ecd4a6b96fa6 100644
--- a/test/CodeGen/Thumb/iabs.ll
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumb-unknown-unknown -filetype=obj -o %t.o
-; RUN: llvm-objdump -disassemble -arch=thumb %t.o | FileCheck %s
+; RUN: llvm-objdump -disassemble -arch-name=thumb %t.o | FileCheck %s
 
 define i32 @test(i32 %a) {
         %tmp1neg = sub i32 0, %a
diff --git a/test/CodeGen/Thumb/inlineasm-thumb.ll b/test/CodeGen/Thumb/inlineasm-thumb.ll
index 2547ce8d6beb..cfaf2ba7cb43 100644
--- a/test/CodeGen/Thumb/inlineasm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-thumb.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -no-integrated-as %s -o - | FileCheck %s
 
 define i32 @t1(i32 %x, i32 %y) nounwind {
 entry:
@@ -6,3 +6,14 @@ entry:
   %0 = tail call i32 asm "mov $0, $1", "=l,h"(i32 %y) nounwind
   ret i32 %0
 }
+
+; CHECK-LABEL: constraint_r:
+; CHECK: foo2 r{{[0-7]+}}, r{{[0-7]+}}
+
+define i32 @constraint_r() {
+entry:
+  %0 = tail call i32 asm sideeffect "movs $0, #1", "=r"()
+  tail call void asm sideeffect "foo1", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7}"()
+  %1 = tail call i32 asm sideeffect "foo2 $0, $1", "=r,r"(i32 %0)
+  ret i32 %1
+}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fb6daa478651..269bdd938d0a 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,31 +1,57 @@
-; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
+; RUN: llc < %s -mtriple=thumb-none-eabi | FileCheck %s --check-prefix=CHECK --check-prefix=EABI
+; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-apple-ios
+; RUN: llvm-objdump -triple=thumbv6-apple-ios -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
+; RUN: llc < %s -o %t -filetype=obj -mtriple=thumbv6-none-eabi
+; RUN: llvm-objdump -triple=thumbv6-none-eabi -d %t | FileCheck %s --check-prefix=CHECK --check-prefix=EABI
 
+; Largest stack for which a single tADDspi/tSUBspi is enough
 define void @test1() {
 ; CHECK-LABEL: test1:
-; CHECK: sub sp, #256
-; CHECK: add sp, #256
-    %tmp = alloca [ 64 x i32 ] , align 4
+; CHECK: sub sp, #508
+; CHECK: add sp, #508
+    %tmp = alloca [ 508 x i8 ] , align 4
     ret void
 }
 
+; Largest stack for which three tADDspi/tSUBspis are enough
+define void @test100() {
+; CHECK-LABEL: test100:
+; CHECK: sub sp, #508
+; CHECK: sub sp, #508
+; CHECK: sub sp, #508
+; EABI: add sp, #508
+; EABI: add sp, #508
+; EABI: add sp, #508
+; IOS: subs r4, r7, #4
+; IOS: mov sp, r4
+    %tmp = alloca [ 1524 x i8 ] , align 4
+    ret void
+}
+
+; Smallest stack for which we use a constant pool
 define void @test2() {
 ; CHECK-LABEL: test2:
-; CHECK: ldr r0, LCPI
+; CHECK: ldr r0,
 ; CHECK: add sp, r0
-; CHECK: subs r4, r7, #4
-; CHECK: mov sp, r4
-    %tmp = alloca [ 4168 x i8 ] , align 4
+; EABI: ldr r0,
+; EABI: add sp, r0
+; IOS: subs r4, r7, #4
+; IOS: mov sp, r4
+    %tmp = alloca [ 1528 x i8 ] , align 4
     ret void
 }
 
 define i32 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK: ldr r1, LCPI
+; CHECK: ldr r1,
 ; CHECK: add sp, r1
-; CHECK: ldr r1, LCPI
+; CHECK: ldr r1,
 ; CHECK: add r1, sp
-; CHECK: subs r4, r7, #4
-; CHECK: mov sp, r4
+; EABI: ldr r1,
+; EABI: add sp, r1
+; IOS: subs r4, r7, #4
+; IOS: mov sp, r4
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
@@ -33,3 +59,22 @@ define i32 @test3() {
     %tmp1 = load i32* %tmp
     ret i32 %tmp1
 }
+
+; Here, the adds get optimized out because they are dead, but the calculation
+; of the address of stack_a is dead but not optimized out. When the address
+; calculation gets expanded to two instructions, we need to avoid reading a
+; dead register.
+; No CHECK lines (just test for crashes), as we hope this will be optimised
+; better in future.
+define i32 @test4() {
+entry:
+  %stack_a = alloca i8, align 1
+  %stack_b = alloca [256 x i32*], align 4
+  %int = ptrtoint i8* %stack_a to i32
+  %add = add i32 %int, 1
+  br label %block2
+
+block2:
+  %add2 = add i32 %add, 1
+  ret i32 0
+}
diff --git a/test/CodeGen/Thumb/ldm-merge-call.ll b/test/CodeGen/Thumb/ldm-merge-call.ll
new file mode 100644
index 000000000000..febc96b07ce6
--- /dev/null
+++ b/test/CodeGen/Thumb/ldm-merge-call.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m--linux-gnueabi"
+
+; Function Attrs: nounwind optsize
+define void @foo(i32* nocapture readonly %A) #0 {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: ldm r[[BASE:[0-9]]]!,
+; CHECK-NEXT: mov r[[BASE]],
+  %0 = load i32* %A, align 4
+  %arrayidx1 = getelementptr inbounds i32* %A, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %call = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
+  %call2 = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
+  ret void
+}
+
+; Function Attrs: optsize
+declare i32 @bar(i32, i32, i32, i32) #1
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind optsize }
diff --git a/test/CodeGen/Thumb/ldm-merge-struct.ll b/test/CodeGen/Thumb/ldm-merge-struct.ll
new file mode 100644
index 000000000000..2f732e00bf74
--- /dev/null
+++ b/test/CodeGen/Thumb/ldm-merge-struct.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--eabi"
+
+%struct.S = type { i32, i32 }
+
+@s = common global %struct.S zeroinitializer, align 4
+
+define i32 @f() {
+entry:
+; CHECK-LABEL: f:
+; CHECK: ldm r[[BASE:[0-9]]],
+; CHECK-NEXT-NOT: subs r[[BASE]]
+  %0 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4
+  %1 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4
+  %cmp = icmp sgt i32 %0, %1
+  %2 = sub i32 0, %1
+  %cond.p = select i1 %cmp, i32 %1, i32 %2
+  %cond = add i32 %cond.p, %0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
new file mode 100644
index 000000000000..6382c25b60fe
--- /dev/null
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m-none--eabi"
+
+@a = external global i32*
+@b = external global i32*
+
+; Function Attrs: nounwind
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: foo:
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK-NEXT: ldm r[[NLB]],
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: stm r[[NSB]]
+  %0 = load i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
index 1e45c7f37bcb..3c539c690170 100644
--- a/test/CodeGen/Thumb/pop.ll
+++ b/test/CodeGen/Thumb/pop.ll
@@ -7,7 +7,9 @@ define void @t(i8* %a, ...) nounwind {
 ; CHECK-NEXT: add sp, #12
 ; CHECK-NEXT: bx r3
 entry:
-  %a.addr = alloca i8*
-  store i8* %a, i8** %a.addr
+  %a.addr = alloca i8, i32 4
+  call void @llvm.va_start(i8* %a.addr)
   ret void
 }
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/Thumb/stack_guard_remat.ll b/test/CodeGen/Thumb/stack_guard_remat.ll
new file mode 100644
index 000000000000..e949cc181f15
--- /dev/null
+++ b/test/CodeGen/Thumb/stack_guard_remat.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=NO-PIC  -check-prefix=STATIC
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s  -check-prefix=NO-PIC -check-prefix=DYNAMIC-NO-PIC
+
+;PIC:   foo2
+;PIC:   ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
+;PIC: [[LABEL1:LPC[0-9_]+]]:
+;PIC:   add [[R0]], pc
+;PIC:   ldr [[R1:r[0-9]+]], {{\[}}[[R0]]{{\]}}
+;PIC:   ldr [[R1:r[0-9]+]], {{\[}}[[R1]]{{\]}}
+
+;PIC:      [[LABEL0]]:
+;PIC-NEXT:   .long L___stack_chk_guard$non_lazy_ptr-([[LABEL1]]+4)
+
+;NO-PIC:   foo2
+;NO-PIC:   ldr [[R0:r[0-9]+]], [[LABEL0:LCPI[0-9_]+]]
+;NO-PIC-NOT: LPC
+;NO-PIC:   ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+
+;STATIC:      [[LABEL0]]:
+;STATIC-NEXT:   .long ___stack_chk_guard
+
+;DYNAMIC-NO-PIC:      [[LABEL0]]:
+;DYNAMIC-NO-PIC-NEXT:   .long L___stack_chk_guard$non_lazy_ptr
+
+; Function Attrs: nounwind ssp
+define i32 @test_stack_guard_remat() #0 {
+  %a1 = alloca [256 x i32], align 4
+  %1 = bitcast [256 x i32]* %a1 to i8*
+  call void @llvm.lifetime.start(i64 1024, i8* %1)
+  %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+  call void @foo3(i32* %2) #3
+  call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
+  call void @llvm.lifetime.end(i64 1024, i8* %1)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo3(i32*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Thumb/stm-merge.ll b/test/CodeGen/Thumb/stm-merge.ll
new file mode 100644
index 000000000000..76e71f4da652
--- /dev/null
+++ b/test/CodeGen/Thumb/stm-merge.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv6m--linux-gnueabi"
+
+@d = internal unnamed_addr global i32 0, align 4
+@c = internal global i32* null, align 4
+@e = internal unnamed_addr global i32* null, align 4
+
+; Function Attrs: nounwind optsize
+define void @fn1() #0 {
+entry:
+; CHECK-LABEL: fn1:
+; CHECK: stm r[[BASE:[0-9]]]!, {{.*}}
+; CHECK-NOT: {{.*}} r[[BASE]]
+; CHECK: ldr r[[BASE]], {{.*}}
+  %g = alloca i32, align 4
+  %h = alloca i32, align 4
+  store i32 1, i32* %g, align 4
+  store i32 0, i32* %h, align 4
+  %.pr = load i32* @d, align 4
+  %cmp11 = icmp slt i32 %.pr, 1
+  br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
+
+for.inc.lr.ph:                                    ; preds = %entry
+  store i32 1, i32* @d, align 4
+  br label %for.body5
+
+for.body5:                                        ; preds = %entry, %for.inc.lr.ph, %for.body5
+  %f.010 = phi i32 [ %inc7, %for.body5 ], [ 0, %for.inc.lr.ph ], [ 0, %entry ]
+  store volatile i32* %g, i32** @c, align 4
+  %inc7 = add nsw i32 %f.010, 1
+  %exitcond = icmp eq i32 %inc7, 2
+  br i1 %exitcond, label %for.end8, label %for.body5
+
+for.end8:                                         ; preds = %for.body5
+  store i32* %h, i32** @e, align 4
+  ret void
+}
+
+attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Thumb/thumb-ldm.ll b/test/CodeGen/Thumb/thumb-ldm.ll
index 95f3edc9c4c3..7e9560eec8d5 100644
--- a/test/CodeGen/Thumb/thumb-ldm.ll
+++ b/test/CodeGen/Thumb/thumb-ldm.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv6m-eabi -o - | FileCheck %s
-; XFAIL: *
+; RUN: llc < %s -mtriple=thumbv6m-eabi -verify-machineinstrs -o - | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
index dedc82bf68ce..da2f3f09b281 100644
--- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
+++ b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
@@ -1,35 +1,33 @@
-; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s
-; XFAIL: *
-
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
 @d = external global [64 x i32]
 @s = external global [64 x i32]
 
 ; Function Attrs: nounwind
 define void @t1() #0 {
 entry:
-; CHECK: ldr [[REG0:r[0-9]]],
-; CHECK: ldm [[REG0]]!,
-; CHECK: ldr [[REG1:r[0-9]]],
-; CHECK: stm [[REG1]]!,
-; CHECK: subs [[REG0]], #32
-; CHECK-NEXT: ldrb
-; CHECK: subs [[REG1]], #32
-; CHECK-NEXT: strb
-    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 33, i32 4, i1 false)
+; CHECK-LABEL: t1:
+; CHECK: ldr r[[LB:[0-9]]],
+; CHECK-NEXT: ldm r[[LB]]!,
+; CHECK-NEXT: ldr r[[SB:[0-9]]],
+; CHECK-NEXT: stm r[[SB]]!,
+; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
+; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
     ret void
 }
 
 ; Function Attrs: nounwind
 define void @t2() #0 {
 entry:
-; CHECK: ldr [[REG0:r[0-9]]],
-; CHECK: ldm [[REG0]]!,
-; CHECK: ldr [[REG1:r[0-9]]],
-; CHECK: stm [[REG1]]!,
-; CHECK: ldrh
-; CHECK: ldrb
-; CHECK: strb
-; CHECK: strh
+; CHECK-LABEL: t2:
+; CHECK: ldr r[[LB:[0-9]]],
+; CHECK-NEXT: ldm r[[LB]]!,
+; CHECK-NEXT: ldr r[[SB:[0-9]]],
+; CHECK-NEXT: stm r[[SB]]!,
+; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
+; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
+; CHECK-NEXT: strb {{.*}}, [r[[SB]], #2]
+; CHECK-NEXT: strh {{.*}}, [r[[SB]]]
     tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
     ret void
 }
diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
index c8eac8d4d094..59c236732118 100644
--- a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -13,6 +13,7 @@ entry:
 ; CHECK-NOT: mov sp, r7
 ; CHECK: add sp, #8
 	call void @__gcov_flush() nounwind
+	call void @llvm.va_start(i8* null)
 	br i1 undef, label %bb5, label %bb
 
 bb:		; preds = %bb, %entry
@@ -27,3 +28,5 @@ bb5:		; preds = %bb, %entry
 declare hidden void @__gcov_flush()
 
 declare i32 @execvp(i8*, i8**) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 524e5a6b7b68..89b7148f5ecd 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts | \
+; RUN: opt < %s -O3 | \
 ; RUN:   llc -mtriple=thumbv7-apple-darwin10 -mattr=+neon | FileCheck %s
 
 define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y_size, i8* %bp) nounwind {
diff --git a/test/CodeGen/Thumb2/aapcs.ll b/test/CodeGen/Thumb2/aapcs.ll
new file mode 100644
index 000000000000..21af8c119b04
--- /dev/null
+++ b/test/CodeGen/Thumb2/aapcs.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m4 -mattr=-vfp2             | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 -mattr=+vfp4,+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 -mattr=+vfp3             | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
+
+define float @float_in_reg(float %a, float %b) {
+entry:
+; CHECK-LABEL: float_in_reg:
+; SOFT: mov r0, r1
+; HARD: vmov.f32  s0, s1
+; CHECK-NEXT: bx lr
+  ret float %b
+}
+
+define double @double_in_reg(double %a, double %b) {
+entry:
+; CHECK-LABEL: double_in_reg:
+; SOFT: mov r0, r2
+; SOFT: mov r1, r3
+; SP: vmov.f32  s0, s2
+; SP: vmov.f32  s1, s3
+; DP: vmov.f64  d0, d1
+; CHECK-NEXT: bx lr
+  ret double %b
+}
+
+define float @float_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, float %i) {
+; CHECK-LABEL: float_on_stack:
+; SOFT: ldr r0, [sp, #48]
+; HARD: vldr s0, [sp]
+; CHECK-NEXT: bx lr
+  ret float %i
+}
+
+define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
+; CHECK-LABEL: double_on_stack:
+; SOFT: ldr r0, [sp, #48]
+; SOFT: ldr r1, [sp, #52]
+; HARD: vldr d0, [sp]
+; CHECK-NEXT: bx lr
+  ret double %i
+}
+
+define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
+; CHECK-LABEL: double_not_split:
+; SOFT: ldr r0, [sp, #48]
+; SOFT: ldr r1, [sp, #52]
+; HARD: vldr d0, [sp]
+; CHECK-NEXT: bx lr
+  ret double %i
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
index 3a2803f91f16..4ef294bdf5ff 100644
--- a/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-ios"
 ;
 ; The caller-saved r4 is used as a scratch register for stack realignment.
 ; CHECK: push {r4, r7, lr}
-; CHECK: bic r4, r4, #7
+; CHECK: bfc r4, #0, #3
 ; CHECK: mov sp, r4
 define void @f(double* nocapture %p) nounwind ssp {
 entry:
@@ -23,7 +23,7 @@ entry:
 ; NEON: f
 ; NEON: push {r4, r7, lr}
 ; NEON: sub.w r4, sp, #64
-; NEON: bic r4, r4, #15
+; NEON: bfc r4, #0, #4
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
 ; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
@@ -54,7 +54,7 @@ entry:
 ; NEON: f7
 ; NEON: push {r4, r7, lr}
 ; NEON: sub.w r4, sp, #56
-; NEON: bic r4, r4, #15
+; NEON: bfc r4, #0, #4
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
 ; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]!
@@ -81,7 +81,7 @@ entry:
 ; NEON: push {r4, r7, lr}
 ; NEON: vpush {d12, d13, d14, d15}
 ; NEON: sub.w r4, sp, #24
-; NEON: bic r4, r4, #15
+; NEON: bfc r4, #0, #4
 ; Stack pointer must be updated before the spills.
 ; NEON: mov sp, r4
 ; NEON: vst1.64 {d8, d9}, [r4:128]
diff --git a/test/CodeGen/Thumb2/constant-islands-jump-table.ll b/test/CodeGen/Thumb2/constant-islands-jump-table.ll
new file mode 100644
index 000000000000..0dd7092291ba
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands-jump-table.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -O1 %s -o - | FileCheck %s
+
+; CHECK-LABEL: test_jump_table:
+; CHECK: b .LBB
+; CHECK-NOT: tbh
+
+define i32 @test_jump_table(i32 %x, float %in) {
+
+h1:
+
+ %b0 = fadd float %in, 1234.5
+ %b1 = fptoui float %b0 to i32
+  
+  switch i32 %x, label %h2 [
+    i32 0, label %h3
+    i32 2, label %h4
+    i32 4, label %h5
+    i32 6, label %h6
+  ]
+
+h2:
+  %a0 = add i32 %x, 5
+  br label %h3
+
+h3:
+  %d2 = phi i32 [%b1, %h1], [%a0, %h2]
+  %d3 = add i32 %d2, 3
+  br label %h4
+
+h4:
+  %c2 = phi i32 [%b1, %h1], [%d3, %h3]
+  %c3 = add i32 %c2, 5
+  br label %h5
+
+h5:
+  %a2 = phi i32 [%b1, %h1], [%c3, %h4]
+  %a3 = add i32 %a2, 6
+  br label %h6
+
+h6:
+  %y = phi i32 [0, %h1], [%a3, %h5]
+  call i32 @llvm.arm.space(i32 2000, i32 undef)
+  ret i32 %y
+  
+}
+
+declare i32 @llvm.arm.space(i32, i32)
diff --git a/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll b/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
new file mode 100644
index 000000000000..991b043f0bdc
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios %s -o - | FileCheck %s
+
+@g0 = common global i32 0, align 4
+@d0 = common global double 0.000000e+00, align 8
+@f0 = common global float 0.000000e+00, align 4
+@g1 = common global i32 0, align 4
+
+declare i32 @llvm.arm.space(i32, i32)
+
+; Check that the constant island pass moves the float constant pool entry inside
+; the function.
+
+; CHECK: .long 1067320814 @ float 1.23455596
+; CHECK: {{.*}} %do.end
+
+define i32 @testpadding(i32 %a) {
+entry:
+  %0 = load i32* @g0, align 4
+  %add = add nsw i32 %0, 12
+  store i32 %add, i32* @g0, align 4
+  %1 = load double* @d0, align 8
+  %add1 = fadd double %1, 0x3FF3C0B8ED46EACB
+  store double %add1, double* @d0, align 8
+  %tmpcall11 = call i32 @llvm.arm.space(i32 28, i32 undef)
+  call void @foo20(i32 191)
+  %2 = load float* @f0, align 4
+  %add2 = fadd float %2, 0x3FF3C0BDC0000000
+  store float %add2, float* @f0, align 4
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  tail call void @foo20(i32 19)
+  %3 = load i32* @g1, align 4
+  %tobool = icmp eq i32 %3, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:                                           ; preds = %do.body
+  %tmpcall111 = call i32 @llvm.arm.space(i32 954, i32 undef)
+  ret i32 10
+}
+
+declare void @foo20(i32)
diff --git a/test/CodeGen/Thumb2/constant-islands-new-island.ll b/test/CodeGen/Thumb2/constant-islands-new-island.ll
new file mode 100644
index 000000000000..8ed657ef1f2a
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands-new-island.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
+
+; Check that new water is created by splitting the basic block right after the
+; load instruction. Previously, new water was created before the load
+; instruction, which caused the pass to fail to converge.
+
+define void @test(i1 %tst) {
+; CHECK-LABEL: test:
+; CHECK: vldr  {{s[0-9]+}}, [[CONST:\.LCPI[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.w [[CONTINUE:\.LBB[0-9]+_[0-9]+]]
+
+; CHECK: [[CONST]]:
+; CHECK-NEXT: .long
+
+; CHECK: [[CONTINUE]]:
+
+entry:
+  call i32 @llvm.arm.space(i32 2000, i32 undef)
+  br i1 %tst, label %true, label %false
+
+true:
+  %val = phi float [12345.0, %entry], [undef, %false]
+  call void @bar(float %val)
+  ret void
+
+false:
+  br label %true
+}
+
+declare void @bar(float)
+declare i32 @llvm.arm.space(i32, i32)
diff --git a/test/CodeGen/Thumb2/cortex-fp.ll b/test/CodeGen/Thumb2/cortex-fp.ll
index e63970a97e1f..5548492ed098 100644
--- a/test/CodeGen/Thumb2/cortex-fp.ll
+++ b/test/CodeGen/Thumb2/cortex-fp.ll
@@ -1,13 +1,15 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM3
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXM7
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CORTEXA8
 
 
 define float @foo(float %a, float %b) {
 entry:
 ; CHECK-LABEL: foo:
-; CORTEXM3: blx ___mulsf3
+; CORTEXM3: bl ___mulsf3
 ; CORTEXM4: vmul.f32  s
+; CORTEXM7: vmul.f32  s
 ; CORTEXA8: vmul.f32  d
   %0 = fmul float %a, %b
   ret float %0
@@ -17,8 +19,9 @@ define double @bar(double %a, double %b) {
 entry:
 ; CHECK-LABEL: bar:
   %0 = fmul double %a, %b
-; CORTEXM3: blx ___muldf3
-; CORTEXM4: blx ___muldf3
+; CORTEXM3: bl ___muldf3
+; CORTEXM4: {{bl|b.w}} ___muldf3
+; CORTEXM7: vmul.f64  d
 ; CORTEXA8: vmul.f64  d
   ret double %0
 }
diff --git a/test/CodeGen/Thumb2/float-cmp.ll b/test/CodeGen/Thumb2/float-cmp.ll
new file mode 100644
index 000000000000..88d6c3b0adb8
--- /dev/null
+++ b/test/CodeGen/Thumb2/float-cmp.ll
@@ -0,0 +1,301 @@
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP
+
+
+
+define i1 @cmp_f_false(float %a, float %b) {
+; CHECK-LABEL: cmp_f_false:
+; NONE: movs r0, #0
+; HARD: movs r0, #0
+  %1 = fcmp false float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_oeq(float %a, float %b) {
+; CHECK-LABEL: cmp_f_oeq:
+; NONE: bl __aeabi_fcmpeq
+; HARD: vcmpe.f32
+; HARD: moveq r0, #1
+  %1 = fcmp oeq float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ogt(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ogt:
+; NONE: bl __aeabi_fcmpgt
+; HARD: vcmpe.f32
+; HARD: movgt r0, #1
+  %1 = fcmp ogt float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_oge(float %a, float %b) {
+; CHECK-LABEL: cmp_f_oge:
+; NONE: bl __aeabi_fcmpge
+; HARD: vcmpe.f32
+; HARD: movge r0, #1
+  %1 = fcmp oge float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_olt(float %a, float %b) {
+; CHECK-LABEL: cmp_f_olt:
+; NONE: bl __aeabi_fcmplt
+; HARD: vcmpe.f32
+; HARD: movmi r0, #1
+  %1 = fcmp olt float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ole(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ole:
+; NONE: bl __aeabi_fcmple
+; HARD: vcmpe.f32
+; HARD: movls r0, #1
+  %1 = fcmp ole float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_one(float %a, float %b) {
+; CHECK-LABEL: cmp_f_one:
+; NONE: bl __aeabi_fcmpgt
+; NONE: bl __aeabi_fcmplt
+; HARD: vcmpe.f32
+; HARD: movmi r0, #1
+; HARD: movgt r0, #1
+  %1 = fcmp one float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ord(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ord:
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movvc r0, #1
+  %1 = fcmp ord float %a, %b
+  ret i1 %1
+}define i1 @cmp_f_ueq(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ueq:
+; NONE: bl __aeabi_fcmpeq
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: moveq r0, #1
+; HARD: movvs r0, #1
+  %1 = fcmp ueq float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ugt(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ugt:
+; NONE: bl __aeabi_fcmpgt
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movhi r0, #1
+  %1 = fcmp ugt float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_uge(float %a, float %b) {
+; CHECK-LABEL: cmp_f_uge:
+; NONE: bl __aeabi_fcmpge
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movpl r0, #1
+  %1 = fcmp uge float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ult(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ult:
+; NONE: bl __aeabi_fcmplt
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movlt r0, #1
+  %1 = fcmp ult float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_ule(float %a, float %b) {
+; CHECK-LABEL: cmp_f_ule:
+; NONE: bl __aeabi_fcmple
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movle r0, #1
+  %1 = fcmp ule float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_une(float %a, float %b) {
+; CHECK-LABEL: cmp_f_une:
+; NONE: bl __aeabi_fcmpeq
+; HARD: vcmpe.f32
+; HARD: movne r0, #1
+  %1 = fcmp une float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_uno(float %a, float %b) {
+; CHECK-LABEL: cmp_f_uno:
+; NONE: bl __aeabi_fcmpun
+; HARD: vcmpe.f32
+; HARD: movvs r0, #1
+  %1 = fcmp uno float %a, %b
+  ret i1 %1
+}
+define i1 @cmp_f_true(float %a, float %b) {
+; CHECK-LABEL: cmp_f_true:
+; NONE: movs r0, #1
+; HARD: movs r0, #1
+  %1 = fcmp true float %a, %b
+  ret i1 %1
+}
+
+define i1 @cmp_d_false(double %a, double %b) {
+; CHECK-LABEL: cmp_d_false:
+; NONE: movs r0, #0
+; HARD: movs r0, #0
+  %1 = fcmp false double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_oeq(double %a, double %b) {
+; CHECK-LABEL: cmp_d_oeq:
+; NONE: bl __aeabi_dcmpeq
+; SP: bl __aeabi_dcmpeq
+; DP: vcmpe.f64
+; DP: moveq r0, #1
+  %1 = fcmp oeq double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_ogt(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ogt:
+; NONE: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmpgt
+; DP: vcmpe.f64
+; DP: movgt r0, #1
+  %1 = fcmp ogt double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_oge(double %a, double %b) {
+; CHECK-LABEL: cmp_d_oge:
+; NONE: bl __aeabi_dcmpge
+; SP: bl __aeabi_dcmpge
+; DP: vcmpe.f64
+; DP: movge r0, #1
+  %1 = fcmp oge double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_olt(double %a, double %b) {
+; CHECK-LABEL: cmp_d_olt:
+; NONE: bl __aeabi_dcmplt
+; SP: bl __aeabi_dcmplt
+; DP: vcmpe.f64
+; DP: movmi r0, #1
+  %1 = fcmp olt double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_ole(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ole:
+; NONE: bl __aeabi_dcmple
+; SP: bl __aeabi_dcmple
+; DP: vcmpe.f64
+; DP: movls r0, #1
+  %1 = fcmp ole double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_one(double %a, double %b) {
+; CHECK-LABEL: cmp_d_one:
+; NONE: bl __aeabi_dcmpgt
+; NONE: bl __aeabi_dcmplt
+; SP: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmplt
+; DP: vcmpe.f64
+; DP: movmi r0, #1
+; DP: movgt r0, #1
+  %1 = fcmp one double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_ord(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ord:
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movvc r0, #1
+  %1 = fcmp ord double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_ugt(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ugt:
+; NONE: bl __aeabi_dcmpgt
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movhi r0, #1
+  %1 = fcmp ugt double %a, %b
+  ret i1 %1
+}
+
+define i1 @cmp_d_ult(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ult:
+; NONE: bl __aeabi_dcmplt
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmplt
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movlt r0, #1
+  %1 = fcmp ult double %a, %b
+  ret i1 %1
+}
+
+
+define i1 @cmp_d_uno(double %a, double %b) {
+; CHECK-LABEL: cmp_d_uno:
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movvs r0, #1
+  %1 = fcmp uno double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_true(double %a, double %b) {
+; CHECK-LABEL: cmp_d_true:
+; NONE: movs r0, #1
+; HARD: movs r0, #1
+  %1 = fcmp true double %a, %b
+  ret i1 %1
+}
+define i1 @cmp_d_ueq(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ueq:
+; NONE: bl __aeabi_dcmpeq
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpeq
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: moveq r0, #1
+; DP: movvs r0, #1
+  %1 = fcmp ueq double %a, %b
+  ret i1 %1
+}
+
+define i1 @cmp_d_uge(double %a, double %b) {
+; CHECK-LABEL: cmp_d_uge:
+; NONE: bl __aeabi_dcmpge
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmpge
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movpl r0, #1
+  %1 = fcmp uge double %a, %b
+  ret i1 %1
+}
+
+define i1 @cmp_d_ule(double %a, double %b) {
+; CHECK-LABEL: cmp_d_ule:
+; NONE: bl __aeabi_dcmple
+; NONE: bl __aeabi_dcmpun
+; SP: bl __aeabi_dcmple
+; SP: bl __aeabi_dcmpun
+; DP: vcmpe.f64
+; DP: movle r0, #1
+  %1 = fcmp ule double %a, %b
+  ret i1 %1
+}
+
+define i1 @cmp_d_une(double %a, double %b) {
+; CHECK-LABEL: cmp_d_une:
+; NONE: bl __aeabi_dcmpeq
+; SP: bl __aeabi_dcmpeq
+; DP: vcmpe.f64
+; DP: movne r0, #1
+  %1 = fcmp une double %a, %b
+  ret i1 %1
+}
diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll
new file mode 100644
index 000000000000..01a23bd0fe69
--- /dev/null
+++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -0,0 +1,228 @@
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=SP
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8
+
+declare double     @llvm.sqrt.f64(double %Val)
+define double @sqrt_d(double %a) {
+; CHECK-LABEL: sqrt_d:
+; SOFT: {{(bl|b)}} sqrt
+; HARD: vsqrt.f64 d0, d0
+  %1 = call double @llvm.sqrt.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.powi.f64(double %Val, i32 %power)
+define double @powi_d(double %a, i32 %b) {
+; CHECK-LABEL: powi_d:
+; SOFT: {{(bl|b)}} __powidf2
+; HARD: b __powidf2
+  %1 = call double @llvm.powi.f64(double %a, i32 %b)
+  ret double %1
+}
+
+declare double     @llvm.sin.f64(double %Val)
+define double @sin_d(double %a) {
+; CHECK-LABEL: sin_d:
+; SOFT: {{(bl|b)}} sin
+; HARD: b sin
+  %1 = call double @llvm.sin.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.cos.f64(double %Val)
+define double @cos_d(double %a) {
+; CHECK-LABEL: cos_d:
+; SOFT: {{(bl|b)}} cos
+; HARD: b cos
+  %1 = call double @llvm.cos.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.pow.f64(double %Val, double %power)
+define double @pow_d(double %a, double %b) {
+; CHECK-LABEL: pow_d:
+; SOFT: {{(bl|b)}} pow
+; HARD: b pow
+  %1 = call double @llvm.pow.f64(double %a, double %b)
+  ret double %1
+}
+
+declare double     @llvm.exp.f64(double %Val)
+define double @exp_d(double %a) {
+; CHECK-LABEL: exp_d:
+; SOFT: {{(bl|b)}} exp
+; HARD: b exp
+  %1 = call double @llvm.exp.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.exp2.f64(double %Val)
+define double @exp2_d(double %a) {
+; CHECK-LABEL: exp2_d:
+; SOFT: {{(bl|b)}} exp2
+; HARD: b exp2
+  %1 = call double @llvm.exp2.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.log.f64(double %Val)
+define double @log_d(double %a) {
+; CHECK-LABEL: log_d:
+; SOFT: {{(bl|b)}} log
+; HARD: b log
+  %1 = call double @llvm.log.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.log10.f64(double %Val)
+define double @log10_d(double %a) {
+; CHECK-LABEL: log10_d:
+; SOFT: {{(bl|b)}} log10
+; HARD: b log10
+  %1 = call double @llvm.log10.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.log2.f64(double %Val)
+define double @log2_d(double %a) {
+; CHECK-LABEL: log2_d:
+; SOFT: {{(bl|b)}} log2
+; HARD: b log2
+  %1 = call double @llvm.log2.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.fma.f64(double %a, double %b, double %c)
+define double @fma_d(double %a, double %b, double %c) {
+; CHECK-LABEL: fma_d:
+; SOFT: {{(bl|b)}} fma
+; HARD: vfma.f64
+  %1 = call double @llvm.fma.f64(double %a, double %b, double %c)
+  ret double %1
+}
+
+; FIXME: the FPv4-SP version is less efficient than the no-FPU version
+declare double     @llvm.fabs.f64(double %Val)
+define double @abs_d(double %a) {
+; CHECK-LABEL: abs_d:
+; NONE: bic r1, r1, #-2147483648
+; SP: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmpun
+; SP: bl __aeabi_dsub
+; DP: vabs.f64 d0, d0
+  %1 = call double @llvm.fabs.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.copysign.f64(double  %Mag, double  %Sgn)
+define double @copysign_d(double %a, double %b) {
+; CHECK-LABEL: copysign_d:
+; SOFT: lsrs [[REG:r[0-9]+]], r3, #31
+; SOFT: bfi r1, [[REG]], #31, #1
+; VFP: lsrs [[REG:r[0-9]+]], r3, #31
+; VFP: bfi r1, [[REG]], #31, #1
+; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000
+; NEON: vshl.i64 [[REG]], [[REG]], #32
+; NEON: vbsl [[REG]], d
+  %1 = call double @llvm.copysign.f64(double %a, double %b)
+  ret double %1
+}
+
+declare double     @llvm.floor.f64(double %Val)
+define double @floor_d(double %a) {
+; CHECK-LABEL: floor_d:
+; SOFT: {{(bl|b)}} floor
+; VFP4: b floor
+; FP-ARMv8: vrintm.f64
+  %1 = call double @llvm.floor.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.ceil.f64(double %Val)
+define double @ceil_d(double %a) {
+; CHECK-LABEL: ceil_d:
+; SOFT: {{(bl|b)}} ceil
+; VFP4: b ceil
+; FP-ARMv8: vrintp.f64
+  %1 = call double @llvm.ceil.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.trunc.f64(double %Val)
+define double @trunc_d(double %a) {
+; CHECK-LABEL: trunc_d:
+; SOFT: {{(bl|b)}} trunc
+; FFP4: b trunc
+; FP-ARMv8: vrintz.f64
+  %1 = call double @llvm.trunc.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.rint.f64(double %Val)
+define double @rint_d(double %a) {
+; CHECK-LABEL: rint_d:
+; SOFT: {{(bl|b)}} rint
+; VFP4: b rint
+; FP-ARMv8: vrintx.f64
+  %1 = call double @llvm.rint.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.nearbyint.f64(double %Val)
+define double @nearbyint_d(double %a) {
+; CHECK-LABEL: nearbyint_d:
+; SOFT: {{(bl|b)}} nearbyint
+; VFP4: b nearbyint
+; FP-ARMv8: vrintr.f64
+  %1 = call double @llvm.nearbyint.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.round.f64(double %Val)
+define double @round_d(double %a) {
+; CHECK-LABEL: round_d:
+; SOFT: {{(bl|b)}} round
+; VFP4: b round
+; FP-ARMv8: vrinta.f64
+  %1 = call double @llvm.round.f64(double %a)
+  ret double %1
+}
+
+declare double     @llvm.fmuladd.f64(double %a, double %b, double %c)
+define double @fmuladd_d(double %a, double %b, double %c) {
+; CHECK-LABEL: fmuladd_d:
+; SOFT: bl __aeabi_dmul
+; SOFT: bl __aeabi_dadd
+; VFP4: vmul.f64
+; VFP4: vadd.f64
+; FP-ARMv8: vmla.f64
+  %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)
+  ret double %1
+}
+
+declare i16 @llvm.convert.to.fp16.f64(double %a)
+define i16 @d_to_h(double %a) {
+; CHECK-LABEL: d_to_h:
+; SOFT: bl __aeabi_d2h
+; VFP4: bl __aeabi_d2h
+; FP-ARMv8: vcvt{{[bt]}}.f16.f64
+  %1 = call i16 @llvm.convert.to.fp16.f64(double %a)
+  ret i16 %1
+}
+
+declare double @llvm.convert.from.fp16.f64(i16 %a)
+define double @h_to_d(i16 %a) {
+; CHECK-LABEL: h_to_d:
+; NONE: bl __gnu_h2f_ieee
+; NONE: bl __aeabi_f2d
+; SP: vcvt{{[bt]}}.f32.f16
+; SP: bl __aeabi_f2d
+; VFPv4: vcvt{{[bt]}}.f32.f16
+; VFPv4: vcvt.f64.f32
+; FP-ARMv8: vcvt{{[bt]}}.f64.f16
+  %1 = call double @llvm.convert.from.fp16.f64(i16 %a)
+  ret double %1
+}
diff --git a/test/CodeGen/Thumb2/float-intrinsics-float.ll b/test/CodeGen/Thumb2/float-intrinsics-float.ll
new file mode 100644
index 000000000000..ec1bcd3708ac
--- /dev/null
+++ b/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -0,0 +1,221 @@
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3                    | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8  -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=+fp-only-sp | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 -check-prefix=VMLA
+
+declare float     @llvm.sqrt.f32(float %Val)
+define float @sqrt_f(float %a) {
+; CHECK-LABEL: sqrt_f:
+; SOFT: bl sqrtf
+; HARD: vsqrt.f32 s0, s0
+  %1 = call float @llvm.sqrt.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.powi.f32(float %Val, i32 %power)
+define float @powi_f(float %a, i32 %b) {
+; CHECK-LABEL: powi_f:
+; SOFT: bl __powisf2
+; HARD: b __powisf2
+  %1 = call float @llvm.powi.f32(float %a, i32 %b)
+  ret float %1
+}
+
+declare float     @llvm.sin.f32(float %Val)
+define float @sin_f(float %a) {
+; CHECK-LABEL: sin_f:
+; SOFT: bl sinf
+; HARD: b sinf
+  %1 = call float @llvm.sin.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.cos.f32(float %Val)
+define float @cos_f(float %a) {
+; CHECK-LABEL: cos_f:
+; SOFT: bl cosf
+; HARD: b cosf
+  %1 = call float @llvm.cos.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.pow.f32(float %Val, float %power)
+define float @pow_f(float %a, float %b) {
+; CHECK-LABEL: pow_f:
+; SOFT: bl powf
+; HARD: b powf
+  %1 = call float @llvm.pow.f32(float %a, float %b)
+  ret float %1
+}
+
+declare float     @llvm.exp.f32(float %Val)
+define float @exp_f(float %a) {
+; CHECK-LABEL: exp_f:
+; SOFT: bl expf
+; HARD: b expf
+  %1 = call float @llvm.exp.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.exp2.f32(float %Val)
+define float @exp2_f(float %a) {
+; CHECK-LABEL: exp2_f:
+; SOFT: bl exp2f
+; HARD: b exp2f
+  %1 = call float @llvm.exp2.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.log.f32(float %Val)
+define float @log_f(float %a) {
+; CHECK-LABEL: log_f:
+; SOFT: bl logf
+; HARD: b logf
+  %1 = call float @llvm.log.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.log10.f32(float %Val)
+define float @log10_f(float %a) {
+; CHECK-LABEL: log10_f:
+; SOFT: bl log10f
+; HARD: b log10f
+  %1 = call float @llvm.log10.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.log2.f32(float %Val)
+define float @log2_f(float %a) {
+; CHECK-LABEL: log2_f:
+; SOFT: bl log2f
+; HARD: b log2f
+  %1 = call float @llvm.log2.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.fma.f32(float %a, float %b, float %c)
+define float @fma_f(float %a, float %b, float %c) {
+; CHECK-LABEL: fma_f:
+; SOFT: bl fmaf
+; HARD: vfma.f32
+  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+declare float     @llvm.fabs.f32(float %Val)
+define float @abs_f(float %a) {
+; CHECK-LABEL: abs_f:
+; SOFT: bic r0, r0, #-2147483648
+; HARD: vabs.f32
+  %1 = call float @llvm.fabs.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.copysign.f32(float  %Mag, float  %Sgn)
+define float @copysign_f(float %a, float %b) {
+; CHECK-LABEL: copysign_f:
+; NONE: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
+; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1
+; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
+; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1
+; VFP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
+; VFP: bfi r{{[0-9]+}}, [[REG]], #31, #1
+; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000
+; NEON: vbsl [[REG]], d
+  %1 = call float @llvm.copysign.f32(float %a, float %b)
+  ret float %1
+}
+
+declare float     @llvm.floor.f32(float %Val)
+define float @floor_f(float %a) {
+; CHECK-LABEL: floor_f:
+; SOFT: bl floorf
+; VFP4: b floorf
+; FP-ARMv8: vrintm.f32
+  %1 = call float @llvm.floor.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.ceil.f32(float %Val)
+define float @ceil_f(float %a) {
+; CHECK-LABEL: ceil_f:
+; SOFT: bl ceilf
+; VFP4: b ceilf
+; FP-ARMv8: vrintp.f32
+  %1 = call float @llvm.ceil.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.trunc.f32(float %Val)
+define float @trunc_f(float %a) {
+; CHECK-LABEL: trunc_f:
+; SOFT: bl truncf
+; VFP4: b truncf
+; FP-ARMv8: vrintz.f32
+  %1 = call float @llvm.trunc.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.rint.f32(float %Val)
+define float @rint_f(float %a) {
+; CHECK-LABEL: rint_f:
+; SOFT: bl rintf
+; VFP4: b rintf
+; FP-ARMv8: vrintx.f32
+  %1 = call float @llvm.rint.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.nearbyint.f32(float %Val)
+define float @nearbyint_f(float %a) {
+; CHECK-LABEL: nearbyint_f:
+; SOFT: bl nearbyintf
+; VFP4: b nearbyintf
+; FP-ARMv8: vrintr.f32
+  %1 = call float @llvm.nearbyint.f32(float %a)
+  ret float %1
+}
+
+declare float     @llvm.round.f32(float %Val)
+define float @round_f(float %a) {
+; CHECK-LABEL: round_f:
+; SOFT: bl roundf
+; VFP4: b roundf
+; FP-ARMv8: vrinta.f32
+  %1 = call float @llvm.round.f32(float %a)
+  ret float %1
+}
+
+; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd?
+; (these should be equivalent, even the rounding is the same)
+declare float     @llvm.fmuladd.f32(float %a, float %b, float %c)
+define float @fmuladd_f(float %a, float %b, float %c) {
+; CHECK-LABEL: fmuladd_f:
+; SOFT: bl __aeabi_fmul
+; SOFT: bl __aeabi_fadd
+; VMLA: vmla.f32
+; NO-VMLA: vmul.f32
+; NO-VMLA: vadd.f32
+  %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
+  ret float %1
+}
+
+declare i16 @llvm.convert.to.fp16.f32(float %a)
+define i16 @f_to_h(float %a) {
+; CHECK-LABEL: f_to_h:
+; SOFT: bl __gnu_f2h_ieee
+; HARD: vcvt{{[bt]}}.f16.f32
+  %1 = call i16 @llvm.convert.to.fp16.f32(float %a)
+  ret i16 %1
+}
+
+declare float @llvm.convert.from.fp16.f32(i16 %a)
+define float @h_to_f(i16 %a) {
+; CHECK-LABEL: h_to_f:
+; SOFT: bl __gnu_h2f_ieee
+; HARD: vcvt{{[bt]}}.f32.f16
+  %1 = call float @llvm.convert.from.fp16.f32(i16 %a)
+  ret float %1
+}
diff --git a/test/CodeGen/Thumb2/float-ops.ll b/test/CodeGen/Thumb2/float-ops.ll
new file mode 100644
index 000000000000..d383065cd53e
--- /dev/null
+++ b/test/CodeGen/Thumb2/float-ops.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VFP4-ALL
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=FP-ARMv8
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP4-ALL -check-prefix=VFP4-DP
+
+define float @add_f(float %a, float %b) {
+entry:
+; CHECK-LABEL: add_f:
+; NONE: bl __aeabi_fadd
+; HARD: vadd.f32  s0, s0, s1
+  %0 = fadd float %a, %b
+  ret float %0
+}
+
+define double @add_d(double %a, double %b) {
+entry:
+; CHECK-LABEL: add_d:
+; NONE: bl __aeabi_dadd
+; SP: bl __aeabi_dadd
+; DP: vadd.f64  d0, d0, d1
+  %0 = fadd double %a, %b
+  ret double %0
+}
+
+define float @sub_f(float %a, float %b) {
+entry:
+; CHECK-LABEL: sub_f:
+; NONE: bl __aeabi_fsub
+; HARD: vsub.f32  s
+  %0 = fsub float %a, %b
+  ret float %0
+}
+
+define double @sub_d(double %a, double %b) {
+entry:
+; CHECK-LABEL: sub_d:
+; NONE: bl __aeabi_dsub
+; SP: bl __aeabi_dsub
+; DP: vsub.f64  d0, d0, d1
+  %0 = fsub double %a, %b
+  ret double %0
+}
+
+define float @mul_f(float %a, float %b) {
+entry:
+; CHECK-LABEL: mul_f:
+; NONE: bl __aeabi_fmul
+; HARD: vmul.f32  s
+  %0 = fmul float %a, %b
+  ret float %0
+}
+
+define double @mul_d(double %a, double %b) {
+entry:
+; CHECK-LABEL: mul_d:
+; NONE: bl __aeabi_dmul
+; SP: bl __aeabi_dmul
+; DP: vmul.f64  d0, d0, d1
+  %0 = fmul double %a, %b
+  ret double %0
+}
+
+define float @div_f(float %a, float %b) {
+entry:
+; CHECK-LABEL: div_f:
+; NONE: bl __aeabi_fdiv
+; HARD: vdiv.f32  s
+  %0 = fdiv float %a, %b
+  ret float %0
+}
+
+define double @div_d(double %a, double %b) {
+entry:
+; CHECK-LABEL: div_d:
+; NONE: bl __aeabi_ddiv
+; SP: bl __aeabi_ddiv
+; DP: vdiv.f64  d0, d0, d1
+  %0 = fdiv double %a, %b
+  ret double %0
+}
+
+define float @rem_f(float %a, float %b) {
+entry:
+; CHECK-LABEL: rem_f:
+; NONE: bl fmodf
+; HARD: b fmodf
+  %0 = frem float %a, %b
+  ret float %0
+}
+
+define double @rem_d(double %a, double %b) {
+entry:
+; CHECK-LABEL: rem_d:
+; NONE: bl fmod
+; HARD: b fmod
+  %0 = frem double %a, %b
+  ret double %0
+}
+
+define float @load_f(float* %a) {
+entry:
+; CHECK-LABEL: load_f:
+; NONE: ldr r0, [r0]
+; HARD: vldr s0, [r0]
+  %0 = load float* %a, align 4
+  ret float %0
+}
+
+define double @load_d(double* %a) {
+entry:
+; CHECK-LABEL: load_d:
+; NONE: ldm.w r0, {r0, r1}
+; HARD: vldr d0, [r0]
+  %0 = load double* %a, align 8
+  ret double %0
+}
+
+define void @store_f(float* %a, float %b) {
+entry:
+; CHECK-LABEL: store_f:
+; NONE: str r1, [r0]
+; HARD: vstr s0, [r0]
+  store float %b, float* %a, align 4
+  ret void
+}
+
+define void @store_d(double* %a, double %b) {
+entry:
+; CHECK-LABEL: store_d:
+; NONE: mov r1, r3
+; NONE: str r2, [r0]
+; NONE: str r1, [r0, #4]
+; HARD: vstr d0, [r0]
+  store double %b, double* %a, align 8
+  ret void
+}
+
+define double @f_to_d(float %a) {
+; CHECK-LABEL: f_to_d:
+; NONE: bl __aeabi_f2d
+; SP: bl __aeabi_f2d
+; DP: vcvt.f64.f32 d0, s0
+  %1 = fpext float %a to double
+  ret double %1
+}
+
+define float @d_to_f(double %a) {
+; CHECK-LABEL: d_to_f:
+; NONE: bl __aeabi_d2f
+; SP: bl __aeabi_d2f
+; DP: vcvt.f32.f64 s0, d0
+  %1 = fptrunc double %a to float
+  ret float %1
+}
+
+define i32 @f_to_si(float %a) {
+; CHECK-LABEL: f_to_si:
+; NONE: bl __aeabi_f2iz
+; HARD: vcvt.s32.f32 s0, s0
+; HARD: vmov r0, s0
+  %1 = fptosi float %a to i32
+  ret i32 %1
+}
+
+define i32 @d_to_si(double %a) {
+; CHECK-LABEL: d_to_si:
+; NONE: bl __aeabi_d2iz
+; SP: vmov r0, r1, d0
+; SP: bl __aeabi_d2iz
+; DP: vcvt.s32.f64 s0, d0
+; DP: vmov r0, s0
+  %1 = fptosi double %a to i32
+  ret i32 %1
+}
+
+define i32 @f_to_ui(float %a) {
+; CHECK-LABEL: f_to_ui:
+; NONE: bl __aeabi_f2uiz
+; HARD: vcvt.u32.f32 s0, s0
+; HARD: vmov r0, s0
+  %1 = fptoui float %a to i32
+  ret i32 %1
+}
+
+define i32 @d_to_ui(double %a) {
+; CHECK-LABEL: d_to_ui:
+; NONE: bl __aeabi_d2uiz
+; SP: vmov r0, r1, d0
+; SP: bl __aeabi_d2uiz
+; DP: vcvt.u32.f64 s0, d0
+; DP: vmov r0, s0
+  %1 = fptoui double %a to i32
+  ret i32 %1
+}
+
+define float @si_to_f(i32 %a) {
+; CHECK-LABEL: si_to_f:
+; NONE: bl __aeabi_i2f
+; HARD: vcvt.f32.s32 s0, s0
+  %1 = sitofp i32 %a to float
+  ret float %1
+}
+
+define double @si_to_d(i32 %a) {
+; CHECK-LABEL: si_to_d:
+; NONE: bl __aeabi_i2d
+; SP: bl __aeabi_i2d
+; DP: vcvt.f64.s32 d0, s0
+  %1 = sitofp i32 %a to double
+  ret double %1
+}
+
+define float @ui_to_f(i32 %a) {
+; CHECK-LABEL: ui_to_f:
+; NONE: bl __aeabi_ui2f
+; HARD: vcvt.f32.u32 s0, s0
+  %1 = uitofp i32 %a to float
+  ret float %1
+}
+
+define double @ui_to_d(i32 %a) {
+; CHECK-LABEL: ui_to_d:
+; NONE: bl __aeabi_ui2d
+; SP: bl __aeabi_ui2d
+; DP: vcvt.f64.u32 d0, s0
+  %1 = uitofp i32 %a to double
+  ret double %1
+}
+
+define float @bitcast_i_to_f(i32 %a) {
+; CHECK-LABEL: bitcast_i_to_f:
+; NONE-NOT: mov
+; HARD: vmov s0, r0
+  %1 = bitcast i32 %a to float
+  ret float %1
+}
+
+define double @bitcast_i_to_d(i64 %a) {
+; CHECK-LABEL: bitcast_i_to_d:
+; NONE-NOT: mov
+; HARD: vmov d0, r0, r1
+  %1 = bitcast i64 %a to double
+  ret double %1
+}
+
+define i32 @bitcast_f_to_i(float %a) {
+; CHECK-LABEL: bitcast_f_to_i:
+; NONE-NOT: mov
+; HARD: vmov r0, s0
+  %1 = bitcast float %a to i32
+  ret i32 %1
+}
+
+define i64 @bitcast_d_to_i(double %a) {
+; CHECK-LABEL: bitcast_d_to_i:
+; NONE-NOT: mov
+; HARD: vmov r0, r1, d0
+  %1 = bitcast double %a to i64
+  ret i64 %1
+}
+
+define float @select_f(float %a, float %b, i1 %c) {
+; CHECK-LABEL: select_f:
+; NONE: tst.w   r2, #1
+; NONE: moveq   r0, r1
+; HARD: tst.w   r0, #1
+; VFP4-ALL: vmovne.f32      s1, s0
+; VFP4-ALL: vmov.f32        s0, s1
+; FP-ARMv8: vseleq.f32 s0, s1, s0
+  %1 = select i1 %c, float %a, float %b
+  ret float %1
+}
+
+define double @select_d(double %a, double %b, i1 %c) {
+; CHECK-LABEL: select_d:
+; NONE: ldr.w   [[REG:r[0-9]+]], [sp]
+; NONE: ands    [[REG]], [[REG]], #1
+; NONE: moveq   r0, r2
+; NONE: moveq   r1, r3
+; SP: ands r0, r0, #1
+; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
+; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
+; SP: itt ne
+; SP-DAG: movne [[BLO]], [[ALO]]
+; SP-DAG: movne [[BHI]], [[AHI]]
+; SP: vmov d0, [[BLO]], [[BHI]]
+; DP: tst.w   r0, #1
+; VFP4-DP: vmovne.f64      d1, d0
+; VFP4-DP: vmov.f64        d0, d1
+; FP-ARMV8: vseleq.f64      d0, d1, d0
+  %1 = select i1 %c, double %a, double %b
+  ret double %1
+}
diff --git a/test/CodeGen/Thumb2/stack_guard_remat.ll b/test/CodeGen/Thumb2/stack_guard_remat.ll
new file mode 100644
index 000000000000..c8ea8714d317
--- /dev/null
+++ b/test/CodeGen/Thumb2/stack_guard_remat.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -no-integrated-as | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=static -no-integrated-as | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=dynamic-no-pic -no-integrated-as | FileCheck %s  -check-prefix=DYNAMIC-NO-PIC
+
+;PIC:   foo2
+;PIC:   movw  [[R0:r[0-9]+]], :lower16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0:LPC[0-9_]+]]+4))
+;PIC:   movt  [[R0]], :upper16:(L___stack_chk_guard$non_lazy_ptr-([[LABEL0]]+4))
+;PIC: [[LABEL0]]:
+;PIC:   add [[R0]], pc
+;PIC:   ldr [[R1:r[0-9]+]], {{\[}}[[R0]]{{\]}}
+;PIC:   ldr {{r[0-9]+}}, {{\[}}[[R1]]{{\]}}
+
+;STATIC:   foo2
+;STATIC:   movw  [[R0:r[0-9]+]], :lower16:___stack_chk_guard
+;STATIC:   movt  [[R0]], :upper16:___stack_chk_guard
+;STATIC:   ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+
+;DYNAMIC-NO-PIC:   foo2
+;DYNAMIC-NO-PIC:   movw  [[R0:r[0-9]+]], :lower16:L___stack_chk_guard$non_lazy_ptr
+;DYNAMIC-NO-PIC:   movt  [[R0]], :upper16:L___stack_chk_guard$non_lazy_ptr
+;DYNAMIC-NO-PIC:   ldr {{r[0-9]+}}, {{\[}}[[R0]]{{\]}}
+
+; Function Attrs: nounwind ssp
+define i32 @test_stack_guard_remat() #0 {
+  %a1 = alloca [256 x i32], align 4
+  %1 = bitcast [256 x i32]* %a1 to i8*
+  call void @llvm.lifetime.start(i64 1024, i8* %1)
+  %2 = getelementptr inbounds [256 x i32]* %a1, i32 0, i32 0
+  call void @foo3(i32* %2) #3
+  call void asm sideeffect "foo2", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{sp},~{lr}"()
+  call void @llvm.lifetime.end(i64 1024, i8* %1)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo3(i32*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { nounwind ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll
index efa150529ad6..0f361d75e52c 100644
--- a/test/CodeGen/Thumb2/thumb2-cmn.ll
+++ b/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -79,7 +79,7 @@ define void @f9(i32 %a, i32 %b) nounwind optsize {
   ret void
 }
 
-!0 = metadata !{i32 81}
+!0 = !{i32 81}
 
 ; CHECK-LABEL: f9:
 ; CHECK: 	cmn.w	r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index 94f472593b3c..d1deb461574f 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK-LABEL: aaa:
-; CHECK: bic r4, r4, #15
+; CHECK: bfc r4, #0, #4
 ; CHECK: vst1.64 {{.*}}[{{.*}}:128]
 ; CHECK: vld1.64 {{.*}}[{{.*}}:128]
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index cef3490e2a38..02a8c47ea48b 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -2,15 +2,15 @@
 ; RUN:  | FileCheck %s
 
 define i32 @test0(i8 %A) {
-; CHECK: test0
+; CHECK-LABEL: test0:
 ; CHECK: sxtb r0, r0
         %B = sext i8 %A to i32
 	ret i32 %B
 }
 
 define signext i8 @test1(i32 %A)  {
-; CHECK: test1
-; CHECK: sxtb.w r0, r0, ror #8
+; CHECK-LABEL: test1:
+; CHECK: sbfx r0, r0, #8, #8
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
@@ -19,9 +19,8 @@ define signext i8 @test1(i32 %A)  {
 }
 
 define signext i32 @test2(i32 %A, i32 %X)  {
-; CHECK: test2
-; CHECK: lsrs r0, r0, #8
-; CHECK: sxtab  r0, r1, r0
+; CHECK-LABEL: test2:
+; CHECK: sxtab  r0, r1, r0, ror #8
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
@@ -30,3 +29,14 @@ define signext i32 @test2(i32 %A, i32 %X)  {
         %G = add i32 %F, %X
 	ret i32 %G
 }
+
+define i32 @test3(i32 %A, i32 %X) {
+; CHECK-LABEL: test3:
+; CHECK: sxtah r0, r0, r1, ror #8
+  %X.hi = lshr i32 %X, 8
+  %X.trunc = trunc i32 %X.hi to i16
+  %addend = sext i16 %X.trunc to i32
+
+  %sum = add i32 %A, %addend
+  ret i32 %sum
+}
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index bcd4a0fa38ff..4afea894aebc 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -24,8 +24,8 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
 }
 
 define zeroext i32 @test3(i32 %A.u)  {
-; A8: test3
-; A8: uxth.w r0, r0, ror #8
+; A8-LABEL: test3
+; A8: ubfx  r0, r0, #8, #16
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
     %D.u = or i32 %B.u, %C.u
@@ -33,3 +33,25 @@ define zeroext i32 @test3(i32 %A.u)  {
     %F.u = zext i16 %E.u to i32
     ret i32 %F.u
 }
+
+define i32 @test4(i32 %A, i32 %X) {
+; A8-LABEL: test4:
+; A8: uxtab r0, r0, r1, ror #16
+  %X.hi = lshr i32 %X, 16
+  %X.trunc = trunc i32 %X.hi to i8
+  %addend = zext i8 %X.trunc to i32
+
+  %sum = add i32 %A, %addend
+  ret i32 %sum
+}
+
+define i32 @test5(i32 %A, i32 %X) {
+; A8-LABEL: test5:
+; A8: uxtah r0, r0, r1, ror #8
+  %X.hi = lshr i32 %X, 8
+  %X.trunc = trunc i32 %X.hi to i16
+  %addend = zext i16 %X.trunc to i32
+
+  %sum = add i32 %A, %addend
+  ret i32 %sum
+}
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 638d399056a2..62c503da35a6 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s
 
-@__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@__gthrw_pthread_once = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
 define weak i32 @pthread_once(i32*, void ()*) {
   ret i32 0
diff --git a/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/test/CodeGen/X86/2008-06-18-BadShuffle.ll
deleted file mode 100644
index 66f9065799e5..000000000000
--- a/test/CodeGen/X86/2008-06-18-BadShuffle.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
-
-; Test to make sure we actually insert the bottom element of the vector
-define <8 x i16> @a(<8 x i16> %a) nounwind  {
-entry:
-	shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> < i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
-	%add = add <8 x i16> %0, %a
-	ret <8 x i16> %add
-}
-
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 296f0ca135b8..6c8e3b5a8fdc 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -14,9 +14,9 @@ entry:
   %2 = alloca i64                                 ; <i64*> [#uses=1]
   %3 = alloca i64                                 ; <i64*> [#uses=6]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i8** %s1_addr}, metadata !0), !dbg !7
+  call void @llvm.dbg.declare(metadata i8** %s1_addr, metadata !0, metadata !{!"0x102"}), !dbg !7
   store i8* %s1, i8** %s1_addr
-  call void @llvm.dbg.declare(metadata !{[0 x i8]** %str.0}, metadata !8), !dbg !7
+  call void @llvm.dbg.declare(metadata [0 x i8]** %str.0, metadata !8, metadata !{!"0x102"}), !dbg !7
   %4 = call i8* @llvm.stacksave(), !dbg !7        ; <i8*> [#uses=1]
   store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
   %5 = load i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
@@ -58,7 +58,7 @@ return:                                           ; preds = %entry
   ret i8 %retval12, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @llvm.stacksave() nounwind
 
@@ -66,22 +66,22 @@ declare i64 @strlen(i8*) nounwind readonly
 
 declare void @llvm.stackrestore(i8*) nounwind
 
-!0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 458769, metadata !17, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 458773, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5, metadata !6}
-!5 = metadata !{i32 458788, null, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 2, i32 0, metadata !1, null}
-!8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458767, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 458753, null, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
-!13 = metadata !{i32 3, i32 0, metadata !14, null}
-!14 = metadata !{i32 458763, metadata !17, metadata !1, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 4, i32 0, metadata !14, null}
-!16 = metadata !{i32 5, i32 0, metadata !14, null}
-!17 = metadata !{metadata !"vla.c", metadata !"/tmp/"}
-!18 = metadata !{i32 0}
+!0 = !{!"0x101\00s1\002\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\000\000", i32 0, !2, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !17, !18, !18, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", null, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5, !6}
+!5 = !{!"0x24\00char\000\008\008\000\000\006", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !5} ; [ DW_TAG_pointer_type ]
+!7 = !MDLocation(line: 2, scope: !1)
+!8 = !{!"0x100\00str.0\003\000", !1, !2, !9} ; [ DW_TAG_auto_variable ]
+!9 = !{!"0xf\00\000\0064\0064\000\0064", null, !2, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x1\00\000\008\008\000\000", null, !2, !5, !11, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!11 = !{!12}
+!12 = !{!"0x21\000\001"}        ; [ DW_TAG_subrange_type ]
+!13 = !MDLocation(line: 3, scope: !14)
+!14 = !{!"0xb\000\000\000", !17, !1} ; [ DW_TAG_lexical_block ]
+!15 = !MDLocation(line: 4, scope: !14)
+!16 = !MDLocation(line: 5, scope: !14)
+!17 = !{!"vla.c", !"/tmp/"}
+!18 = !{i32 0}
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 764c2cdd6d99..e046b966921f 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "4 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn -stats 2>&1 | grep "7 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse4.1 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
deleted file mode 100644
index e1930e012dd8..000000000000
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse4.1,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
-; RUN:   FileCheck %s
-; rdar://6808032
-
-; CHECK: pextrw $14
-; CHECK-NEXT: shrl $8
-; CHECK-NEXT: pinsrw
-
-define void @update(i8** %args_list) nounwind {
-entry:
-	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %cmp.i, label %if.then.i, label %test_cl.exit
-
-if.then.i:		; preds = %entry
-	%val = load <16 x i8> addrspace(1)* null		; <<16 x i8>> [#uses=8]
-	%tmp10.i = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 undef, i8 0, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef>, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 29, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp17.i = shufflevector <16 x i8> %tmp10.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 18, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp24.i = shufflevector <16 x i8> %tmp17.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 24, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp31.i = shufflevector <16 x i8> %tmp24.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 21, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp38.i = shufflevector <16 x i8> %tmp31.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 27, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp45.i = shufflevector <16 x i8> %tmp38.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 29, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp52.i = shufflevector <16 x i8> %tmp45.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 21, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef>		; <<16 x i8>> [#uses=1]
-	%tmp59.i = shufflevector <16 x i8> %tmp52.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 20>		; <<16 x i8>> [#uses=1]
-	store <16 x i8> %tmp59.i, <16 x i8> addrspace(1)* null
-	ret void
-
-test_cl.exit:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 50c62dfb73b8..ffbe02c71356 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 > %t1
-; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 1
-; RUN: grep movd %t1 | count 4
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
 
 define <4 x i16> @a(i32* %x1) nounwind {
+; CHECK-LABEL: a:
+; CHECK:         shrl %[[R:[^,]+]]
+; CHECK-NEXT:    movd %[[R]], %xmm0
+; CHECK-NEXT:    retl
+
   %x2 = load i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i16
@@ -12,6 +14,12 @@ define <4 x i16> @a(i32* %x1) nounwind {
 }
 
 define <8 x i16> @b(i32* %x1) nounwind {
+; CHECK-LABEL: b:
+; CHECK:         shrl %e[[R:.]]x
+; CHECK-NEXT:    movzwl %[[R]]x, %e[[R]]x
+; CHECK-NEXT:    movd %e[[R]]x, %xmm0
+; CHECK-NEXT:    retl
+
   %x2 = load i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i16
@@ -20,6 +28,12 @@ define <8 x i16> @b(i32* %x1) nounwind {
 }
 
 define <8 x i8> @c(i32* %x1) nounwind {
+; CHECK-LABEL: c:
+; CHECK:         shrl %e[[R:.]]x
+; CHECK-NEXT:    movzwl %[[R]]x, %e[[R]]x
+; CHECK-NEXT:    movd %e[[R]]x, %xmm0
+; CHECK-NEXT:    retl
+
   %x2 = load i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i8
@@ -28,6 +42,12 @@ define <8 x i8> @c(i32* %x1) nounwind {
 }
 
 define <16 x i8> @d(i32* %x1) nounwind {
+; CHECK-LABEL: d:
+; CHECK:         shrl %e[[R:.]]x
+; CHECK-NEXT:    movzbl %[[R]]l, %e[[R]]x
+; CHECK-NEXT:    movd %e[[R]]x, %xmm0
+; CHECK-NEXT:    retl
+
   %x2 = load i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i8
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index a936edc120d8..e75d594e6682 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -9,7 +9,7 @@ entry:
   br label %do.body, !dbg !0
 
 do.body:                                          ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  call void @llvm.dbg.declare(metadata i32* %count_, metadata !4, metadata !{!"0x102"})
   %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
   %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
   br label %do.end, !dbg !0
@@ -18,17 +18,17 @@ do.end:                                           ; preds = %do.body
   ret void, !dbg !7
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @foo(i32) ssp
 
-!0 = metadata !{i32 5, i32 2, metadata !1, null}
-!1 = metadata !{i32 458763, null, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, metadata !8, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, null, metadata !9, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
-!5 = metadata !{i32 458763, null, metadata !1, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!6 = metadata !{i32 458788, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
-!7 = metadata !{i32 6, i32 1, metadata !2, null}
-!8 = metadata !{metadata !"genmodes.i", metadata !"/Users/yash/Downloads"}
-!9 = metadata !{i32 0}
+!0 = !MDLocation(line: 5, column: 2, scope: !1)
+!1 = !{!"0xb\001\001\000", null, !2}; [DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", i32 0, !3, null, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x11\0012\00clang 1.1\001\00\000\00\000", !8, null, !9, null, null, null}; [DW_TAG_compile_unit ]
+!4 = !{!"0x100\00count_\005\000", !5, !3, !6}; [ DW_TAG_auto_variable ]
+!5 = !{!"0xb\001\001\000", null, !1}; [DW_TAG_lexical_block ]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3}; [DW_TAG_base_type ]
+!7 = !MDLocation(line: 6, column: 1, scope: !2)
+!8 = !{!"genmodes.i", !"/Users/yash/Downloads"}
+!9 = !{i32 0}
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index f99e68242811..b21846d39494 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -12,7 +12,7 @@ entry:
   %retval = alloca double                         ; <double*> [#uses=2]
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !{!"0x102"}), !dbg !15
   %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
   %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
   %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
@@ -26,30 +26,30 @@ return:                                           ; preds = %entry
   ret double %retval1, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
-!11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 11, i32 0, metadata !1, null}
-!16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !19, metadata !1, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{metadata !1}
-!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
-!20 = metadata !{i32 0}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00my_r0\0011\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\0011\000\001\000\006\000\000\0011", !19, !2, !4, null, double (%struct.Rect*)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !19, !20, !20, !18, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !19, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !7}
+!6 = !{!"0x24\00double\000\0064\0064\000\000\004", !19, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0x13\00Rect\006\00256\0064\000\000\000", !19, !2, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
+!8 = !{!9, !14}
+!9 = !{!"0xd\00P1\007\00128\0064\000\000", !19, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x13\00Pt\001\00128\0064\000\000\000", !19, !2, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
+!11 = !{!12, !13}
+!12 = !{!"0xd\00x\002\0064\0064\000\000", !19, !10, !6} ; [ DW_TAG_member ]
+!13 = !{!"0xd\00y\003\0064\0064\0064\000", !19, !10, !6} ; [ DW_TAG_member ]
+!14 = !{!"0xd\00P2\008\00128\0064\00128\000", !19, !7, !10} ; [ DW_TAG_member ]
+!15 = !MDLocation(line: 11, scope: !1)
+!16 = !MDLocation(line: 12, scope: !17)
+!17 = !{!"0xb\0011\000\000", !19, !1} ; [ DW_TAG_lexical_block ]
+!18 = !{!1}
+!19 = !{!"b2.c", !"/tmp/"}
+!20 = !{i32 0}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index 4d4e8c197d87..b85f1afea0cf 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -8,28 +8,28 @@
 
 define i32 @"main(tart.core.String[])->int32"(i32 %args) {
 entry:
-  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @llvm.dbg.value(metadata %tart.reflect.ComplexType* @.type.SwitchStmtTest, i64 0, metadata !8, metadata !{!"0x102"})
   tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
   ret i32 3
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = metadata !{i32 458769, metadata !15, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, null, null, null, i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 458790, metadata !15, metadata !0, metadata !"", i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!2 = metadata !{i32 458771, metadata !15, metadata !0, metadata !"C", i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 192, align 64, offset 0] [def] [from ]
-!3 = metadata !{metadata !4, metadata !6, metadata !7}
-!4 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"x", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!5 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"y", i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!7 = metadata !{i32 458765, metadata !15, metadata !2, metadata !"z", i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
-!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
-!9 = metadata !{i32 458763, null, metadata !10, i32 0, i32 0, i32 0}        ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 458773, metadata !15, metadata !0, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 458788, metadata !15, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
-!15 = metadata !{metadata !"sm.c", metadata !""}
-!16 = metadata !{i32 0}
+!0 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !15, !16, !16, null, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x26\00\000\00192\0064\000\000", !15, !0, !2} ; [ DW_TAG_const_type ]
+!2 = !{!"0x13\00C\001\00192\0064\000\000\000", !15, !0, null, !3, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 192, align 64, offset 0] [def] [from ]
+!3 = !{!4, !6, !7}
+!4 = !{!"0xd\00x\001\0064\0064\000\000", !15, !2, !5} ; [ DW_TAG_member ]
+!5 = !{!"0x24\00double\000\0064\0064\000\000\004", !15, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0xd\00y\001\0064\0064\0064\000", !15, !2, !5} ; [ DW_TAG_member ]
+!7 = !{!"0xd\00z\001\0064\0064\00128\000", !15, !2, !5} ; [ DW_TAG_member ]
+!8 = !{!"0x100\00t\005\000", !9, !0, !2} ; [ DW_TAG_auto_variable ]
+!9 = !{!"0xb\000\000\000", null, !10}        ; [ DW_TAG_lexical_block ]
+!10 = !{!"0x2e\00foo\00foo\00foo\004\000\001\000\006\000\000\000", i32 0, !0, !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !15, !0, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !15, !0} ; [ DW_TAG_base_type ]
+!14 = !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
+!15 = !{!"sm.c", !""}
+!16 = !{i32 0}
diff --git a/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
index 5789a0b9847f..f9cca8c70c76 100644
--- a/test/CodeGen/X86/2010-02-11-NonTemporal.ll
+++ b/test/CodeGen/X86/2010-02-11-NonTemporal.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
-!0 = metadata !{ i32 1 }
+!0 = !{ i32 1 }
 
 define void @sub_(i32* noalias %n) {
 "file movnt.f90, line 2, bb1":
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 5372bc522785..60025bfcdc81 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -7,7 +7,7 @@ entry:
         %tmp1 = bitcast double %a to <8 x i8>
         %tmp2 = bitcast double %b to <8 x i8>
         %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddw
+; CHECK:  paddb
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
@@ -18,7 +18,7 @@ entry:
         %tmp1 = bitcast double %a to <4 x i16>
         %tmp2 = bitcast double %b to <4 x i16>
         %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddd
+; CHECK:  paddw
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
@@ -29,7 +29,7 @@ entry:
         %tmp1 = bitcast double %a to <2 x i32>
         %tmp2 = bitcast double %b to <2 x i32>
         %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddq
+; CHECK:  paddd
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
diff --git a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
index fc8c895af5b4..86be390b8228 100644
--- a/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
+++ b/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
@@ -29,4 +29,4 @@ entry:
   ret i8* %1
 }
 
-!0 = metadata !{i32 79}
+!0 = !{i32 79}
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 7faee993a7d1..0d30a3f88eb9 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -2,8 +2,7 @@
 ; RUN: llc -mtriple=x86_64-pc-linux -O2 -regalloc=basic < %s | FileCheck %s
 ; Test to check .debug_loc support. This test case emits many debug_loc entries.
 
-; CHECK: Loc expr size
-; CHECK-NEXT: .short
+; CHECK: .short {{.*}} # Loc expr size
 ; CHECK-NEXT: .Ltmp
 ; CHECK-NEXT: DW_OP_reg
 
@@ -11,10 +10,10 @@
 
 define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
 entry:
-  tail call void @llvm.dbg.value(metadata !{float %a}, i64 0, metadata !0)
-  tail call void @llvm.dbg.value(metadata !{float %b}, i64 0, metadata !11)
-  tail call void @llvm.dbg.value(metadata !{float %c}, i64 0, metadata !12)
-  tail call void @llvm.dbg.value(metadata !{float %d}, i64 0, metadata !13)
+  tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !{!"0x102"})
+  tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !{!"0x102"})
+  tail call void @llvm.dbg.value(metadata float %c, i64 0, metadata !12, metadata !{!"0x102"})
+  tail call void @llvm.dbg.value(metadata float %d, i64 0, metadata !13, metadata !{!"0x102"})
   %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
   %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
   %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
@@ -22,34 +21,34 @@ entry:
 
 bb:                                               ; preds = %entry
   %3 = fdiv float %c, %d, !dbg !20                ; <float> [#uses=3]
-  tail call void @llvm.dbg.value(metadata !{float %3}, i64 0, metadata !16), !dbg !20
+  tail call void @llvm.dbg.value(metadata float %3, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !20
   %4 = fmul float %3, %c, !dbg !21                ; <float> [#uses=1]
   %5 = fadd float %4, %d, !dbg !21                ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %5}, i64 0, metadata !14), !dbg !21
+  tail call void @llvm.dbg.value(metadata float %5, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !21
   %6 = fmul float %3, %a, !dbg !22                ; <float> [#uses=1]
   %7 = fadd float %6, %b, !dbg !22                ; <float> [#uses=1]
   %8 = fdiv float %7, %5, !dbg !22                ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %8}, i64 0, metadata !17), !dbg !22
+  tail call void @llvm.dbg.value(metadata float %8, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !22
   %9 = fmul float %3, %b, !dbg !23                ; <float> [#uses=1]
   %10 = fsub float %9, %a, !dbg !23               ; <float> [#uses=1]
   %11 = fdiv float %10, %5, !dbg !23              ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %11}, i64 0, metadata !18), !dbg !23
+  tail call void @llvm.dbg.value(metadata float %11, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !23
   br label %bb2, !dbg !23
 
 bb1:                                              ; preds = %entry
   %12 = fdiv float %d, %c, !dbg !24               ; <float> [#uses=3]
-  tail call void @llvm.dbg.value(metadata !{float %12}, i64 0, metadata !16), !dbg !24
+  tail call void @llvm.dbg.value(metadata float %12, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !24
   %13 = fmul float %12, %d, !dbg !25              ; <float> [#uses=1]
   %14 = fadd float %13, %c, !dbg !25              ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %14}, i64 0, metadata !14), !dbg !25
+  tail call void @llvm.dbg.value(metadata float %14, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !25
   %15 = fmul float %12, %b, !dbg !26              ; <float> [#uses=1]
   %16 = fadd float %15, %a, !dbg !26              ; <float> [#uses=1]
   %17 = fdiv float %16, %14, !dbg !26             ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %17}, i64 0, metadata !17), !dbg !26
+  tail call void @llvm.dbg.value(metadata float %17, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !26
   %18 = fmul float %12, %a, !dbg !27              ; <float> [#uses=1]
   %19 = fsub float %b, %18, !dbg !27              ; <float> [#uses=1]
   %20 = fdiv float %19, %14, !dbg !27             ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %20}, i64 0, metadata !18), !dbg !27
+  tail call void @llvm.dbg.value(metadata float %20, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !27
   br label %bb2, !dbg !27
 
 bb2:                                              ; preds = %bb1, %bb
@@ -75,9 +74,9 @@ bb6:                                              ; preds = %bb4
 bb8:                                              ; preds = %bb6
   %27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
   %28 = fmul float %27, %a, !dbg !30              ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %28}, i64 0, metadata !17), !dbg !30
+  tail call void @llvm.dbg.value(metadata float %28, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !30
   %29 = fmul float %27, %b, !dbg !31              ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %29}, i64 0, metadata !18), !dbg !31
+  tail call void @llvm.dbg.value(metadata float %29, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !31
   br label %bb46, !dbg !31
 
 bb9:                                              ; preds = %bb6, %bb4
@@ -107,24 +106,24 @@ bb15:                                             ; preds = %bb14
 bb16:                                             ; preds = %bb15
   %iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
   %42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %42}, i64 0, metadata !0), !dbg !33
+  tail call void @llvm.dbg.value(metadata float %42, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !33
   %43 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
   %44 = fsub float %b, %b, !dbg !34               ; <float> [#uses=1]
   %45 = fcmp uno float %44, 0.000000e+00          ; <i1> [#uses=1]
   %46 = and i1 %43, %45, !dbg !34                 ; <i1> [#uses=1]
   %iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
   %47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %47}, i64 0, metadata !11), !dbg !34
+  tail call void @llvm.dbg.value(metadata float %47, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !34
   %48 = fmul float %42, %c, !dbg !35              ; <float> [#uses=1]
   %49 = fmul float %47, %d, !dbg !35              ; <float> [#uses=1]
   %50 = fadd float %48, %49, !dbg !35             ; <float> [#uses=1]
   %51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %51}, i64 0, metadata !17), !dbg !35
+  tail call void @llvm.dbg.value(metadata float %51, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
   %52 = fmul float %47, %c, !dbg !36              ; <float> [#uses=1]
   %53 = fmul float %42, %d, !dbg !36              ; <float> [#uses=1]
   %54 = fsub float %52, %53, !dbg !36             ; <float> [#uses=1]
   %55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %55}, i64 0, metadata !18), !dbg !36
+  tail call void @llvm.dbg.value(metadata float %55, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !36
   br label %bb46, !dbg !36
 
 bb27:                                             ; preds = %bb15, %bb14, %bb11
@@ -155,24 +154,24 @@ bb34:                                             ; preds = %bb33, %bb30
 bb35:                                             ; preds = %bb34
   %iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
   %67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %67}, i64 0, metadata !12), !dbg !38
+  tail call void @llvm.dbg.value(metadata float %67, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !38
   %68 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
   %69 = fsub float %d, %d, !dbg !39               ; <float> [#uses=1]
   %70 = fcmp uno float %69, 0.000000e+00          ; <i1> [#uses=1]
   %71 = and i1 %68, %70, !dbg !39                 ; <i1> [#uses=1]
   %iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
   %72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{float %72}, i64 0, metadata !13), !dbg !39
+  tail call void @llvm.dbg.value(metadata float %72, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !39
   %73 = fmul float %67, %a, !dbg !40              ; <float> [#uses=1]
   %74 = fmul float %72, %b, !dbg !40              ; <float> [#uses=1]
   %75 = fadd float %73, %74, !dbg !40             ; <float> [#uses=1]
   %76 = fmul float %75, 0.000000e+00, !dbg !40    ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %76}, i64 0, metadata !17), !dbg !40
+  tail call void @llvm.dbg.value(metadata float %76, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !40
   %77 = fmul float %67, %b, !dbg !41              ; <float> [#uses=1]
   %78 = fmul float %72, %a, !dbg !41              ; <float> [#uses=1]
   %79 = fsub float %77, %78, !dbg !41             ; <float> [#uses=1]
   %80 = fmul float %79, 0.000000e+00, !dbg !41    ; <float> [#uses=1]
-  tail call void @llvm.dbg.value(metadata !{float %80}, i64 0, metadata !18), !dbg !41
+  tail call void @llvm.dbg.value(metadata float %80, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !41
   br label %bb46, !dbg !41
 
 bb46:                                             ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
@@ -196,57 +195,57 @@ declare float @fabsf(float)
 
 declare float @copysignf(float, float) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!48}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !45, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, %0 (float, float, float, float)* @__divsc3, null, null, metadata !43, i32 1922} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !45} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !45, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !47, metadata !47, metadata !44, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !45, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SCtype", i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 786473, metadata !46} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"complex float", i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786454, metadata !46, metadata !7, metadata !"SFtype", i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 786468, metadata !45, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 786689, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!14 = metadata !{i32 786688, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !45, metadata !1, i32 1922, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786688, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!17 = metadata !{i32 786688, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!19 = metadata !{i32 1929, i32 0, metadata !15, null}
-!20 = metadata !{i32 1931, i32 0, metadata !15, null}
-!21 = metadata !{i32 1932, i32 0, metadata !15, null}
-!22 = metadata !{i32 1933, i32 0, metadata !15, null}
-!23 = metadata !{i32 1934, i32 0, metadata !15, null}
-!24 = metadata !{i32 1938, i32 0, metadata !15, null}
-!25 = metadata !{i32 1939, i32 0, metadata !15, null}
-!26 = metadata !{i32 1940, i32 0, metadata !15, null}
-!27 = metadata !{i32 1941, i32 0, metadata !15, null}
-!28 = metadata !{i32 1946, i32 0, metadata !15, null}
-!29 = metadata !{i32 1948, i32 0, metadata !15, null}
-!30 = metadata !{i32 1950, i32 0, metadata !15, null}
-!31 = metadata !{i32 1951, i32 0, metadata !15, null}
-!32 = metadata !{i32 1953, i32 0, metadata !15, null}
-!33 = metadata !{i32 1955, i32 0, metadata !15, null}
-!34 = metadata !{i32 1956, i32 0, metadata !15, null}
-!35 = metadata !{i32 1957, i32 0, metadata !15, null}
-!36 = metadata !{i32 1958, i32 0, metadata !15, null}
-!37 = metadata !{i32 1960, i32 0, metadata !15, null}
-!38 = metadata !{i32 1962, i32 0, metadata !15, null}
-!39 = metadata !{i32 1963, i32 0, metadata !15, null}
-!40 = metadata !{i32 1964, i32 0, metadata !15, null}
-!41 = metadata !{i32 1965, i32 0, metadata !15, null}
-!42 = metadata !{i32 1969, i32 0, metadata !15, null}
-!43 = metadata !{metadata !0, metadata !11, metadata !12, metadata !13, metadata !14, metadata !16, metadata !17, metadata !18}
-!44 = metadata !{metadata !1}
-!45 = metadata !{metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
-!46 = metadata !{metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
-!47 = metadata !{i32 0}
-!48 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00a\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00__divsc3\00__divsc3\00__divsc3\001922\000\001\000\006\000\001\001922", !45, !2, !4, null, %0 (float, float, float, float)* @__divsc3, null, null, !43} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !45} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !45, !47, !47, !44, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !45, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !9, !9, !9, !9}
+!6 = !{!"0x16\00SCtype\00170\000\000\000\000", !46, !7, !8} ; [ DW_TAG_typedef ]
+!7 = !{!"0x29", !46} ; [ DW_TAG_file_type ]
+!8 = !{!"0x24\00complex float\000\0064\0032\000\000\003", !45, !2} ; [ DW_TAG_base_type ]
+!9 = !{!"0x16\00SFtype\00167\000\000\000\000", !46, !7, !10} ; [ DW_TAG_typedef ]
+!10 = !{!"0x24\00float\000\0032\0032\000\000\004", !45, !2} ; [ DW_TAG_base_type ]
+!11 = !{!"0x101\00b\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!12 = !{!"0x101\00c\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!13 = !{!"0x101\00d\001921\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!14 = !{!"0x100\00denom\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
+!15 = !{!"0xb\001922\000\000", !45, !1} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x100\00ratio\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
+!17 = !{!"0x100\00x\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
+!18 = !{!"0x100\00y\001923\000", !15, !2, !9} ; [ DW_TAG_auto_variable ]
+!19 = !MDLocation(line: 1929, scope: !15)
+!20 = !MDLocation(line: 1931, scope: !15)
+!21 = !MDLocation(line: 1932, scope: !15)
+!22 = !MDLocation(line: 1933, scope: !15)
+!23 = !MDLocation(line: 1934, scope: !15)
+!24 = !MDLocation(line: 1938, scope: !15)
+!25 = !MDLocation(line: 1939, scope: !15)
+!26 = !MDLocation(line: 1940, scope: !15)
+!27 = !MDLocation(line: 1941, scope: !15)
+!28 = !MDLocation(line: 1946, scope: !15)
+!29 = !MDLocation(line: 1948, scope: !15)
+!30 = !MDLocation(line: 1950, scope: !15)
+!31 = !MDLocation(line: 1951, scope: !15)
+!32 = !MDLocation(line: 1953, scope: !15)
+!33 = !MDLocation(line: 1955, scope: !15)
+!34 = !MDLocation(line: 1956, scope: !15)
+!35 = !MDLocation(line: 1957, scope: !15)
+!36 = !MDLocation(line: 1958, scope: !15)
+!37 = !MDLocation(line: 1960, scope: !15)
+!38 = !MDLocation(line: 1962, scope: !15)
+!39 = !MDLocation(line: 1963, scope: !15)
+!40 = !MDLocation(line: 1964, scope: !15)
+!41 = !MDLocation(line: 1965, scope: !15)
+!42 = !MDLocation(line: 1969, scope: !15)
+!43 = !{!0, !11, !12, !13, !14, !16, !17, !18}
+!44 = !{!1}
+!45 = !{!"libgcc2.c", !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
+!46 = !{!"libgcc2.h", !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc"}
+!47 = !{i32 0}
+!48 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index e11b5382c23f..9915a706e5ee 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-darwin10"
 
 define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %myvar}, i64 0, metadata !8)
+  tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !{!"0x102"})
   %0 = getelementptr inbounds %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
   %1 = load i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
   tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
@@ -19,49 +19,49 @@ entry:
 
 declare void @foo(i32) nounwind optsize noinline ssp
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786484, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null, null} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 786473, metadata !36} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !36, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !37, metadata !37, metadata !32, metadata !31,  metadata !37, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786468, metadata !36, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!4 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!5 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo, null, null, metadata !33, i32 13} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !3}
-!8 = metadata !{i32 786689, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar, null, null, metadata !34, i32 17} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786451, metadata !36, metadata !1, metadata !"a", i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 2, size 128, align 64, offset 0] [def] [from ]
-!15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"c", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
-!17 = metadata !{i32 786445, metadata !36, metadata !14, metadata !"d", i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786689, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786478, metadata !36, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !35, i32 22} ; [ DW_TAG_subprogram ]
-!20 = metadata !{i32 786453, metadata !36, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !3, metadata !3, metadata !22}
-!22 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786447, metadata !36, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
-!24 = metadata !{i32 786468, metadata !36, metadata !1, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 786689, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22, i32 0, null} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 786688, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14, i32 0, null} ; [ DW_TAG_auto_variable ]
-!27 = metadata !{i32 786443, metadata !36, metadata !19, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{i32 18, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !36, metadata !9, i32 17, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 19, i32 0, metadata !29, null}
-!31 = metadata !{metadata !0}
-!32 = metadata !{metadata !5, metadata !9, metadata !19}
-!33 = metadata !{metadata !4}
-!34 = metadata !{metadata !8}
-!35 = metadata !{metadata !18, metadata !25, metadata !26}
-!36 = metadata !{metadata !"foo.c", metadata !"/tmp/"}
-!37 = metadata !{}
+!0 = !{!"0x34\00ret\00ret\00\007\000\001", !1, !1, !3, null, null} ; [ DW_TAG_variable ]
+!1 = !{!"0x29", !36} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !36, !37, !37, !32, !31,  !37} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x24\00int\000\0032\0032\000\000\005", !36, !1} ; [ DW_TAG_base_type ]
+!4 = !{!"0x101\00x\0012\000", !5, !1, !3} ; [ DW_TAG_arg_variable ]
+!5 = !{!"0x2e\00foo\00foo\00foo\0013\000\001\000\006\000\001\0013", !36, !1, !6, null, void (i32)* @foo, null, null, !33} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !3}
+!8 = !{!"0x101\00myvar\0017\000", !9, !1, !13} ; [ DW_TAG_arg_variable ]
+!9 = !{!"0x2e\00bar\00bar\00bar\0017\000\001\000\006\000\001\0017", !36, !1, !10, null, i8* (%struct.a*)* @bar, null, null, !34} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!12, !13}
+!12 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, null} ; [ DW_TAG_pointer_type ]
+!13 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !14} ; [ DW_TAG_pointer_type ]
+!14 = !{!"0x13\00a\002\00128\0064\000\000\000", !36, !1, null, !15, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 2, size 128, align 64, offset 0] [def] [from ]
+!15 = !{!16, !17}
+!16 = !{!"0xd\00c\003\0032\0032\000\000", !36, !14, !3} ; [ DW_TAG_member ]
+!17 = !{!"0xd\00d\004\0064\0064\0064\000", !36, !14, !13} ; [ DW_TAG_member ]
+!18 = !{!"0x101\00argc\0022\000", !19, !1, !3} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x2e\00main\00main\00main\0022\000\001\000\006\000\001\0022", !36, !1, !20, null, null, null, null, !35} ; [ DW_TAG_subprogram ]
+!20 = !{!"0x15\00\000\000\000\000\000\000", !36, !1, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!3, !3, !22}
+!22 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !23} ; [ DW_TAG_pointer_type ]
+!23 = !{!"0xf\00\000\0064\0064\000\000", !36, !1, !24} ; [ DW_TAG_pointer_type ]
+!24 = !{!"0x24\00char\000\008\008\000\000\006", !36, !1} ; [ DW_TAG_base_type ]
+!25 = !{!"0x101\00argv\0022\000", !19, !1, !22} ; [ DW_TAG_arg_variable ]
+!26 = !{!"0x100\00e\0023\000", !27, !1, !14} ; [ DW_TAG_auto_variable ]
+!27 = !{!"0xb\0022\000\000", !36, !19} ; [ DW_TAG_lexical_block ]
+!28 = !MDLocation(line: 18, scope: !29)
+!29 = !{!"0xb\0017\000\001", !36, !9} ; [ DW_TAG_lexical_block ]
+!30 = !MDLocation(line: 19, scope: !29)
+!31 = !{!0}
+!32 = !{!5, !9, !19}
+!33 = !{!4}
+!34 = !{!8}
+!35 = !{!18, !25, !26}
+!36 = !{!"foo.c", !"/tmp/"}
+!37 = !{}
 
 ; The variable bar:myvar changes registers after the first movq.
 ; It is cobbered by popq %rbx
@@ -73,18 +73,22 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 
 ; CHECK: Ldebug_loc0:
-; CHECK-NEXT: .quad   Lfunc_begin0
-; CHECK-NEXT: .quad   [[LABEL]]
+; CHECK-NEXT: [[SET1:.*]] = Lfunc_begin0-Lfunc_begin0
+; CHECK-NEXT: .quad   [[SET1]]
+; CHECK-NEXT: [[SET2:.*]] = [[LABEL]]-Lfunc_begin0
+; CHECK-NEXT: .quad   [[SET2]]
 ; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
 ; CHECK-NEXT: .short  Lset{{.*}}
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   85
 ; CHECK-NEXT: Ltmp{{.*}}:
-; CHECK-NEXT: .quad   [[LABEL]]
-; CHECK-NEXT: .quad   [[CLOBBER]]
+; CHECK-NEXT: [[SET3:.*]] = [[LABEL]]-Lfunc_begin0
+; CHECK-NEXT: .quad   [[SET3]]
+; CHECK-NEXT: [[SET4:.*]] = [[CLOBBER]]-Lfunc_begin0
+; CHECK-NEXT: .quad   [[SET4]]
 ; CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}               ## Loc expr size
 ; CHECK-NEXT: .short  Lset{{.*}}
 ; CHECK-NEXT: Ltmp{{.*}}:
 ; CHECK-NEXT: .byte   83
 ; CHECK-NEXT: Ltmp{{.*}}:
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index 1114c8dc87bb..7adacf5e0176 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -4,19 +4,19 @@
 
 define i32 @foo(i32 %y) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !{!"0x102"})
   %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
   ret i32 %0, !dbg !9
 }
 
 declare i32 @zoo(...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define i32 @bar(i32 %x) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !7)
-  tail call void @llvm.dbg.value(metadata !11, i64 0, metadata !0) nounwind
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !{!"0x102"})
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !{!"0x102"}) nounwind
   %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
   %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
   ret i32 %1, !dbg !13
@@ -25,28 +25,28 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, metadata !15, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !6}
-!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @bar, null, null, metadata !16, i32 6} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 3, i32 0, metadata !10, null}
-!10 = metadata !{i32 786443, metadata !18, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 1}
-!12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
-!13 = metadata !{i32 7, i32 0, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !18, metadata !8, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{metadata !0}
-!16 = metadata !{metadata !7}
-!17 = metadata !{metadata !1, metadata !8}
-!18 = metadata !{metadata !"f.c", metadata !"/tmp"}
-!19 = metadata !{i32 0}
+!0 = !{!"0x101\00y\002\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\001\002", !18, !2, !4, null, i32 (i32)* @foo, null, null, !15} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !18, !19, !19, !17, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !18, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0x101\00x\006\000", !8, !2, !6} ; [ DW_TAG_arg_variable ]
+!8 = !{!"0x2e\00bar\00bar\00bar\006\000\001\000\006\000\001\006", !18, !2, !4, null, i32 (i32)* @bar, null, null, !16} ; [ DW_TAG_subprogram ]
+!9 = !MDLocation(line: 3, scope: !10)
+!10 = !{!"0xb\002\000\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!11 = !{i32 1}
+!12 = !MDLocation(line: 3, scope: !10, inlinedAt: !13)
+!13 = !MDLocation(line: 7, scope: !14)
+!14 = !{!"0xb\006\000\000", !18, !8} ; [ DW_TAG_lexical_block ]
+!15 = !{!0}
+!16 = !{!7}
+!17 = !{!1, !8}
+!18 = !{!"f.c", !"/tmp"}
+!19 = !{i32 0}
 
 ;CHECK: DEBUG_VALUE: bar:x <- E
 ;CHECK: Ltmp
 ;CHECK:	DEBUG_VALUE: foo:y <- 1{{$}}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index b45ac226a650..3687b828c4a4 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -10,51 +10,51 @@ target triple = "x86_64-apple-darwin10.2"
 define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
 ;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.foo* %this}, i64 0, metadata !15)
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !16)
+  tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !{!"0x102"})
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !{!"0x102"})
   %0 = mul nsw i32 %x, 7, !dbg !29                ; <i32> [#uses=1]
   %1 = add nsw i32 %0, 1, !dbg !29                ; <i32> [#uses=1]
   ret i32 %1, !dbg !29
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!4}
 !llvm.module.flags = !{!34}
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786451, metadata !31, metadata !3, metadata !"foo", i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
-!3 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, metadata !31, i32 4, metadata !"4.2.1 LLVM build", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !33, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{metadata !6, metadata !1, metadata !8}
-!6 = metadata !{i32 786445, metadata !31, metadata !2, metadata !"y", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
-!7 = metadata !{i32 786468, metadata !31, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786478, metadata !31, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
-!9 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{metadata !7, metadata !11, metadata !7}
-!11 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 786689, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786689, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 786478, metadata !31, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, null, i32 19} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 786453, metadata !31, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!20 = metadata !{metadata !7, metadata !7, metadata !21}
-!21 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
-!22 = metadata !{i32 786447, metadata !31, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 786468, metadata !31, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!24 = metadata !{i32 786689, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21, i32 0, null} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 786688, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2, i32 0, null} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 786443, metadata !31, metadata !27, i32 19, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 786443, metadata !31, metadata !18, i32 19, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{i32 786688, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 16, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !31, metadata !8, i32 15, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{metadata !"foo.cp", metadata !"/tmp/"}
-!32 = metadata !{i32 0}
-!33 = metadata !{metadata !1, metadata !8, metadata !18}
-!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00this\0011\000", !1, !3, !12} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEi\0011\000\001\000\006\000\001\0011", !31, !2, !9, null, i32 (%struct.foo*, i32)* null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x13\00foo\003\0032\0032\000\000\000", !31, !3, null, !5, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 3, size 32, align 32, offset 0] [def] [from ]
+!3 = !{!"0x29", !31} ; [ DW_TAG_file_type ]
+!4 = !{!"0x11\004\004.2.1 LLVM build\001\00\000\00\000", !31, !32, !32, !33, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!6, !1, !8}
+!6 = !{!"0xd\00y\008\0032\0032\000\000", !31, !2, !7} ; [ DW_TAG_member ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !31, !3} ; [ DW_TAG_base_type ]
+!8 = !{!"0x2e\00baz\00baz\00_ZN3foo3bazEi\0015\000\001\000\006\000\001\0015", !31, !2, !9, null, i32 (%struct.foo*, i32)* @_ZN3foo3bazEi, null, null, null} ; [ DW_TAG_subprogram ]
+!9 = !{!"0x15\00\000\000\000\000\000\000", !31, !3, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{!7, !11, !7}
+!11 = !{!"0xf\00\000\0064\0064\000\0064", !31, !3, !2} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x26\00\000\0064\0064\000\0064", !31, !3, !13} ; [ DW_TAG_const_type ]
+!13 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !2} ; [ DW_TAG_pointer_type ]
+!14 = !{!"0x101\00x\0011\000", !1, !3, !7} ; [ DW_TAG_arg_variable ]
+!15 = !{!"0x101\00this\0015\000", !8, !3, !12} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x101\00x\0015\000", !8, !3, !7} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x101\00argc\0019\000", !18, !3, !7} ; [ DW_TAG_arg_variable ]
+!18 = !{!"0x2e\00main\00main\00main\0019\000\001\000\006\000\001\0019", !31, !3, !19, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!19 = !{!"0x15\00\000\000\000\000\000\000", !31, !3, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{!7, !7, !21}
+!21 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !22} ; [ DW_TAG_pointer_type ]
+!22 = !{!"0xf\00\000\0064\0064\000\000", !31, !3, !23} ; [ DW_TAG_pointer_type ]
+!23 = !{!"0x24\00char\000\008\008\000\000\006", !31, !3} ; [ DW_TAG_base_type ]
+!24 = !{!"0x101\00argv\0019\000", !18, !3, !21} ; [ DW_TAG_arg_variable ]
+!25 = !{!"0x100\00a\0020\000", !26, !3, !2} ; [ DW_TAG_auto_variable ]
+!26 = !{!"0xb\0019\000\000", !31, !27} ; [ DW_TAG_lexical_block ]
+!27 = !{!"0xb\0019\000\000", !31, !18} ; [ DW_TAG_lexical_block ]
+!28 = !{!"0x100\00b\0021\000", !26, !3, !7} ; [ DW_TAG_auto_variable ]
+!29 = !MDLocation(line: 16, scope: !30)
+!30 = !{!"0xb\0015\000\000", !31, !8} ; [ DW_TAG_lexical_block ]
+!31 = !{!"foo.cp", !"/tmp/"}
+!32 = !{i32 0}
+!33 = !{!1, !8, !18}
+!34 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 0f8855d1267e..74a7610e6597 100644
--- a/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -26,4 +26,4 @@ entry:
 
 declare i32 @printf(i8*, ...)
 
-!0 = metadata !{i32 191}
+!0 = !{i32 191}
diff --git a/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
index 0df9dc1cb769..3470a06a543b 100644
--- a/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
+++ b/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
@@ -16,4 +16,4 @@ entry:
 
 declare x86_stdcallcc void @RtlUnwind(...)
 
-!0 = metadata !{i32 215}
+!0 = !{i32 215}
diff --git a/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
index d7bc21f6393a..7cffdc545e02 100644
--- a/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
+++ b/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
@@ -19,4 +19,4 @@ entry:
   ret i32 %asmresult
 }
 
-!0 = metadata !{i32 108}
+!0 = !{i32 108}
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index b49aec3af87a..457c49852dca 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -3,29 +3,29 @@
 @.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
 @.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
 @C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)]
-!38 = metadata !{i32 524329, metadata !109} ; [ DW_TAG_file_type ]
-!39 = metadata !{i32 524305, metadata !109, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !108, metadata !108, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!46 = metadata !{i32 524303, metadata !109, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]
-!47 = metadata !{i32 524324, metadata !109, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!97 = metadata !{i32 524334, i32 0, metadata !39, metadata !"main", metadata !"main", metadata !"main", i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!98 = metadata !{i32 524309, metadata !109, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!99 = metadata !{metadata !100}
-!100 = metadata !{i32 524324, metadata !109, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!101 = metadata !{[2 x i8*]* @C.9.2167}
-!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!103 = metadata !{i32 524299, null, metadata !97, i32 73, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!104 = metadata !{i32 524289, metadata !109, null, metadata !"", i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 85312, align 64, offset 0] [from ]
-!105 = metadata !{metadata !106}
-!106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
-!107 = metadata !{i32 73, i32 0, metadata !103, null}
-!108 = metadata !{i32 0}
-!109 = metadata !{metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch"}
+!38 = !{!"0x29", !109} ; [ DW_TAG_file_type ]
+!39 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)\001\00\000\00\000", !109, !108, !108, null, null, null} ; [ DW_TAG_compile_unit ]
+!46 = !{!"0xf\00\000\0064\0064\000\000", !109, null, !47} ; [ DW_TAG_pointer_type ]
+!47 = !{!"0x24\00char\000\008\008\000\000\006", !109, null} ; [ DW_TAG_base_type ]
+!97 = !{!"0x2e\00main\00main\00main\0073\000\001\000\006\000\000\000", i32 0, !39, !98, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!98 = !{!"0x15\00\000\000\000\000\000\000", !109, null, null, !99, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!99 = !{!100}
+!100 = !{!"0x24\00int\000\0032\0032\000\000\005", !109, null} ; [ DW_TAG_base_type ]
+!101 = !{[2 x i8*]* @C.9.2167}
+!102 = !{!"0x100\00find_strings\0075\000", !103, !38, !104} ; [ DW_TAG_auto_variable ]
+!103 = !{!"0xb\0073\000\000", null, !97} ; [ DW_TAG_lexical_block ]
+!104 = !{!"0x1\00\000\0085312\0064\000\000", !109, null, !46, !105, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 85312, align 64, offset 0] [from ]
+!105 = !{!106}
+!106 = !{!"0x21\000\001333"}    ; [ DW_TAG_subrange_type ]
+!107 = !MDLocation(line: 73, scope: !103)
+!108 = !{i32 0}
+!109 = !{!"pbmsrch.c", !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch"}
 
 define i32 @main() nounwind ssp {
 bb.nph:
-  tail call void @llvm.dbg.declare(metadata !101, metadata !102), !dbg !107
+  tail call void @llvm.dbg.declare(metadata [2 x i8*]* @C.9.2167, metadata !102, metadata !{!"0x102"}), !dbg !107
   ret i32 0, !dbg !107
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 09e34ef6b7f5..e3decf0c889a 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -6,8 +6,8 @@
 define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
-  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !24
+  call void @llvm.dbg.value(metadata %struct.SVal* %location, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !24
   %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !27
 
@@ -34,7 +34,7 @@ return:                                           ; preds = %bb2
 define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !34
   %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
   store i8* null, i8** %0, align 8, !dbg !34
   %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
@@ -45,14 +45,14 @@ return:                                           ; preds = %entry
   ret void, !dbg !35
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @main() nounwind ssp {
 entry:
   %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
   %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @llvm.dbg.declare(metadata %struct.SVal* %v, metadata !38, metadata !{!"0x102"}), !dbg !41
   call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
   %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
   store i32 1, i32* %1, align 8, !dbg !42
@@ -65,65 +65,65 @@ entry:
   %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
   store i32 %7, i32* %5, align 8, !dbg !43
   %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
   br label %return, !dbg !45
 
 return:                                           ; preds = %entry
   ret i32 0, !dbg !45
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!49}
-!46 = metadata !{metadata !16, metadata !17, metadata !20}
+!46 = !{!16, !17, !20}
 
-!0 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786451, metadata !47, metadata !2, metadata !"SVal", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
-!2 = metadata !{i32 786473, metadata !47} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !47, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !48, metadata !48, metadata !46, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
-!5 = metadata !{i32 786445, metadata !47, metadata !1, metadata !"Data", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786445, metadata !47, metadata !1, metadata !"Kind", i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 12} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{null, metadata !12, metadata !13}
-!12 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 786468, metadata !47, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !12}
-!16 = metadata !{i32 786478, metadata !47, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null, i32 11} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null, i32 16} ; [ DW_TAG_subprogram ]
-!18 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{metadata !13, metadata !13, metadata !1}
-!20 = metadata !{i32 786478, metadata !47, metadata !2, metadata !"main", metadata !"main", metadata !"main", i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main, null, null, null, i32 23} ; [ DW_TAG_subprogram ]
-!21 = metadata !{i32 786453, metadata !47, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!22 = metadata !{metadata !13}
-!23 = metadata !{i32 786689, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 16, i32 0, metadata !17, null}
-!25 = metadata !{i32 786689, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
-!26 = metadata !{i32 786448, metadata !47, metadata !2, metadata !"SVal", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
-!27 = metadata !{i32 17, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !47, metadata !17, i32 16, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
-!29 = metadata !{i32 18, i32 0, metadata !28, null}
-!30 = metadata !{i32 20, i32 0, metadata !28, null}
-!31 = metadata !{i32 786689, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32, i32 0, null} ; [ DW_TAG_arg_variable ]
-!32 = metadata !{i32 786470, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
-!33 = metadata !{i32 786447, metadata !47, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 11, i32 0, metadata !16, null}
-!35 = metadata !{i32 11, i32 0, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !47, metadata !37, i32 11, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 786443, metadata !47, metadata !16, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{i32 786688, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1, i32 0, null} ; [ DW_TAG_auto_variable ]
-!39 = metadata !{i32 786443, metadata !47, metadata !40, i32 23, i32 0, i32 4} ; [ DW_TAG_lexical_block ]
-!40 = metadata !{i32 786443, metadata !47, metadata !20, i32 23, i32 0, i32 3} ; [ DW_TAG_lexical_block ]
-!41 = metadata !{i32 24, i32 0, metadata !39, null}
-!42 = metadata !{i32 25, i32 0, metadata !39, null}
-!43 = metadata !{i32 26, i32 0, metadata !39, null}
-!44 = metadata !{i32 786688, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13, i32 0, null} ; [ DW_TAG_auto_variable ]
-!45 = metadata !{i32 27, i32 0, metadata !39, null}
-!47 = metadata !{metadata !"small.cc", metadata !"/Users/manav/R8248330"}
-!48 = metadata !{i32 0}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00SVal\00SVal\00\0011\000\000\000\006\000\000\0011", !47, !1, !14, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x13\00SVal\001\00128\0064\000\000\000", !47, !2, null, !4, null, null, null} ; [ DW_TAG_structure_type ] [SVal] [line 1, size 128, align 64, offset 0] [def] [from ]
+!2 = !{!"0x29", !47} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\001", !47, !48, !48, !46, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!5, !7, !0, !9}
+!5 = !{!"0xd\00Data\007\0064\0064\000\000", !47, !1, !6} ; [ DW_TAG_member ]
+!6 = !{!"0xf\00\000\0064\0064\000\000", !47, !2, null} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0xd\00Kind\008\0032\0032\0064\000", !47, !1, !8} ; [ DW_TAG_member ]
+!8 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !47, !2} ; [ DW_TAG_base_type ]
+!9 = !{!"0x2e\00~SVal\00~SVal\00\0012\000\000\000\006\000\000\0012", !47, !1, !10, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12, !13}
+!12 = !{!"0xf\00\000\0064\0064\000\0064", !47, !2, !1} ; [ DW_TAG_pointer_type ]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", !47, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !12}
+!16 = !{!"0x2e\00SVal\00SVal\00_ZN4SValC1Ev\0011\000\001\000\006\000\000\0011", !47, !1, !14, null, void (%struct.SVal*)* @_ZN4SValC1Ev, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = !{!"0x2e\00foo\00foo\00_Z3fooi4SVal\0016\000\001\000\006\000\000\0016", !47, !2, !18, null, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal, null, null, null} ; [ DW_TAG_subprogram ]
+!18 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{!13, !13, !1}
+!20 = !{!"0x2e\00main\00main\00main\0023\000\001\000\006\000\000\0023", !47, !2, !21, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!21 = !{!"0x15\00\000\000\000\000\000\000", !47, !2, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{!13}
+!23 = !{!"0x101\00i\0016\000", !17, !2, !13} ; [ DW_TAG_arg_variable ]
+!24 = !MDLocation(line: 16, scope: !17)
+!25 = !{!"0x101\00location\0016\000", !17, !2, !26} ; [ DW_TAG_arg_variable ]
+!26 = !{!"0x10\00SVal\000\0064\0064\000\000", !47, !2, !1} ; [ DW_TAG_reference_type ]
+!27 = !MDLocation(line: 17, scope: !28)
+!28 = !{!"0xb\0016\000\002", !47, !17} ; [ DW_TAG_lexical_block ]
+!29 = !MDLocation(line: 18, scope: !28)
+!30 = !MDLocation(line: 20, scope: !28)
+!31 = !{!"0x101\00this\0011\000", !16, !2, !32} ; [ DW_TAG_arg_variable ]
+!32 = !{!"0x26\00\000\0064\0064\000\0064", !47, !2, !33} ; [ DW_TAG_const_type ]
+!33 = !{!"0xf\00\000\0064\0064\000\000", !47, !2, !1} ; [ DW_TAG_pointer_type ]
+!34 = !MDLocation(line: 11, scope: !16)
+!35 = !MDLocation(line: 11, scope: !36)
+!36 = !{!"0xb\0011\000\001", !47, !37} ; [ DW_TAG_lexical_block ]
+!37 = !{!"0xb\0011\000\000", !47, !16} ; [ DW_TAG_lexical_block ]
+!38 = !{!"0x100\00v\0024\000", !39, !2, !1} ; [ DW_TAG_auto_variable ]
+!39 = !{!"0xb\0023\000\004", !47, !40} ; [ DW_TAG_lexical_block ]
+!40 = !{!"0xb\0023\000\003", !47, !20} ; [ DW_TAG_lexical_block ]
+!41 = !MDLocation(line: 24, scope: !39)
+!42 = !MDLocation(line: 25, scope: !39)
+!43 = !MDLocation(line: 26, scope: !39)
+!44 = !{!"0x100\00k\0026\000", !39, !2, !13} ; [ DW_TAG_auto_variable ]
+!45 = !MDLocation(line: 27, scope: !39)
+!47 = !{!"small.cc", !"/Users/manav/R8248330"}
+!48 = !{i32 0}
+!49 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index a65b632691ae..cf9897ac03ad 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -15,21 +15,21 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114084)", i1 false, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !14, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !15, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!8 = metadata !{i32 53, i32 13, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !14, metadata !0, i32 53, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 4, i32 13, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !15, metadata !12, i32 4, i32 13, i32 2} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786443, metadata !15, metadata !6, i32 4, i32 11, i32 1} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{metadata !0, metadata !6}
-!14 = metadata !{metadata !"", metadata !"/private/tmp"}
-!15 = metadata !{metadata !"bug.c", metadata !"/private/tmp"}
-!16 = metadata !{i32 0}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00foo\0053\000\001\000\006\000\000\000", !14, !1, !3, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !14} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 114084)\000\00\000\00\000", !15, !16, !16, !13, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !14, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !14, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00bar\00bar\00bar\004\000\001\000\006\000\000\000", !15, !7, !3, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x29", !15} ; [ DW_TAG_file_type ]
+!8 = !MDLocation(line: 53, column: 13, scope: !9)
+!9 = !{!"0xb\0053\0011\000", !14, !0} ; [ DW_TAG_lexical_block ]
+!10 = !MDLocation(line: 4, column: 13, scope: !11)
+!11 = !{!"0xb\004\0013\002", !15, !12} ; [ DW_TAG_lexical_block ]
+!12 = !{!"0xb\004\0011\001", !15, !6} ; [ DW_TAG_lexical_block ]
+!13 = !{!0, !6}
+!14 = !{!"", !"/private/tmp"}
+!15 = !{!"bug.c", !"/private/tmp"}
+!16 = !{i32 0}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2010-09-16-asmcrash.ll b/test/CodeGen/X86/2010-09-16-asmcrash.ll
index 9bbd6919421f..7aa9f32d41c4 100644
--- a/test/CodeGen/X86/2010-09-16-asmcrash.ll
+++ b/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -53,4 +53,4 @@ return:                                           ; preds = %while.end, %while.b
   ret void
 }
 
-!0 = metadata !{i32 158484}
+!0 = !{i32 158484}
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 21ac7c9079e8..df3aa1f2ab37 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -9,32 +9,32 @@ target triple = "i386-apple-darwin11.0.0"
 define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
 ; CHECK: TAG_formal_parameter
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+  tail call void @llvm.dbg.value(metadata %struct.bar* %i, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
   ret i32 1, !dbg !13
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!19}
 
-!0 = metadata !{i32 786478, metadata !17, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo, null, null, metadata !16, i32 3} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 2.9 (trunk 117922)", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !15, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !17, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !17, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786447, metadata !17, metadata !1, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !17, metadata !1, metadata !"bar", i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 2, size 64, align 32, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !11}
-!10 = metadata !{i32 786445, metadata !17,  metadata !1, metadata !"x", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 786445, metadata !17, metadata !1, metadata !"y", i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
-!12 = metadata !{i32 3, i32 47, metadata !0, null}
-!13 = metadata !{i32 4, i32 2, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !17, metadata !0, i32 3, i32 50, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{metadata !0}
-!16 = metadata !{metadata !6}
-!17 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
-!18 = metadata !{i32 0}
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\00256\001\003", !17, !1, !3, null, i32 (%struct.bar*)* @foo, null, null, !16} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 117922)\001\00\000\00\000", !17, !18, !18, !15, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !17, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !17, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00i\003\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
+!7 = !{!"0xf\00\000\0032\0032\000\000", !17, !1, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x13\00bar\002\0064\0032\000\000\000", !17, !1, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 2, size 64, align 32, offset 0] [def] [from ]
+!9 = !{!10, !11}
+!10 = !{!"0xd\00x\002\0032\0032\000\000", !17,  !1, !5} ; [ DW_TAG_member ]
+!11 = !{!"0xd\00y\002\0032\0032\0032\000", !17, !1, !5} ; [ DW_TAG_member ]
+!12 = !MDLocation(line: 3, column: 47, scope: !0)
+!13 = !MDLocation(line: 4, column: 2, scope: !14)
+!14 = !{!"0xb\003\0050\000", !17, !0} ; [ DW_TAG_lexical_block ]
+!15 = !{!0}
+!16 = !{!6}
+!17 = !{!"one.c", !"/private/tmp"}
+!18 = !{i32 0}
+!19 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 625a35161c11..8404020c91f1 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -22,8 +22,8 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
-  tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+  tail call void @llvm.dbg.value(metadata i64 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i64 %b, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !19
   br label %while.body, !dbg !20
 
 while.body:                                       ; preds = %while.body, %entry
@@ -34,14 +34,14 @@ while.body:                                       ; preds = %while.body, %entry
   br i1 %cmp, label %if.then, label %while.body, !dbg !23
 
 if.then:                                          ; preds = %while.body
-  tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+  tail call void @llvm.dbg.value(metadata i64 %rem, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !21
   ret i64 %b.addr.0, !dbg !23
 }
 
 define i32 @main() nounwind optsize ssp {
 entry:
   %call = tail call i32 @rand() nounwind optsize, !dbg !24
-  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !24
   %cmp = icmp ugt i32 %call, 21, !dbg !25
   br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
 
@@ -51,7 +51,7 @@ cond.true:                                        ; preds = %entry
 
 cond.end:                                         ; preds = %entry, %cond.true
   %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
-  tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+  tail call void @llvm.dbg.value(metadata i32 %cond, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !25
   %conv = sext i32 %cond to i64, !dbg !26
   %conv5 = zext i32 %call to i64, !dbg !26
   %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
@@ -71,44 +71,44 @@ declare i32 @rand() optsize
 
 declare i32 @printf(i8* nocapture, ...) nounwind optsize
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 declare i32 @puts(i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i64 (i64, i64)* @gcd, null, null, metadata !29, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd]
-!1 = metadata !{i32 786473, metadata !31} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !31, i32 12, metadata !"clang version 2.9 (trunk 124117)", i1 true, metadata !"", i32 0, metadata !32, metadata !32, metadata !28, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !31, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !30, i32 0} ; [ DW_TAG_subprogram ] [line 25] [def] [scope 0] [main]
-!7 = metadata !{i32 786453, metadata !31, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786689, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 786688, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!13 = metadata !{i32 786443, metadata !31, metadata !0, i32 5, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 786688, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0, null} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !31, metadata !6, i32 25, i32 12, i32 2} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786688, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 5, i32 41, metadata !0, null}
-!19 = metadata !{i32 5, i32 49, metadata !0, null}
-!20 = metadata !{i32 7, i32 5, metadata !13, null}
-!21 = metadata !{i32 8, i32 9, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !31, metadata !13, i32 7, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 9, i32 9, metadata !22, null}
-!24 = metadata !{i32 26, i32 38, metadata !15, null}
-!25 = metadata !{i32 27, i32 38, metadata !15, null}
-!26 = metadata !{i32 28, i32 9, metadata !15, null}
-!27 = metadata !{i32 30, i32 1, metadata !15, null}
-!28 = metadata !{metadata !0, metadata !6}
-!29 = metadata !{metadata !10, metadata !11, metadata !12}
-!30 = metadata !{metadata !14, metadata !17}
-!31 = metadata !{metadata !"rem_small.c", metadata !"/private/tmp"}
-!32 = metadata !{i32 0}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00gcd\00gcd\00\005\000\001\000\006\00256\001\000", !31, !1, !3, null, i64 (i64, i64)* @gcd, null, null, !29} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [gcd]
+!1 = !{!"0x29", !31} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 124117)\001\00\000\00\001", !31, !32, !32, !28, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !31, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00main\00main\00\0025\000\001\000\006\000\001\000", !31, !1, !7, null, i32 ()* @main, null, null, !30} ; [ DW_TAG_subprogram ] [line 25] [def] [scope 0] [main]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !31, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!10 = !{!"0x101\00a\005\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!11 = !{!"0x101\00b\005\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!12 = !{!"0x100\00c\006\000", !13, !1, !5} ; [ DW_TAG_auto_variable ]
+!13 = !{!"0xb\005\0052\000", !31, !0} ; [ DW_TAG_lexical_block ]
+!14 = !{!"0x100\00m\0026\000", !15, !1, !16} ; [ DW_TAG_auto_variable ]
+!15 = !{!"0xb\0025\0012\002", !31, !6} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !2} ; [ DW_TAG_base_type ]
+!17 = !{!"0x100\00z_s\0027\000", !15, !1, !9} ; [ DW_TAG_auto_variable ]
+!18 = !MDLocation(line: 5, column: 41, scope: !0)
+!19 = !MDLocation(line: 5, column: 49, scope: !0)
+!20 = !MDLocation(line: 7, column: 5, scope: !13)
+!21 = !MDLocation(line: 8, column: 9, scope: !22)
+!22 = !{!"0xb\007\0014\001", !31, !13} ; [ DW_TAG_lexical_block ]
+!23 = !MDLocation(line: 9, column: 9, scope: !22)
+!24 = !MDLocation(line: 26, column: 38, scope: !15)
+!25 = !MDLocation(line: 27, column: 38, scope: !15)
+!26 = !MDLocation(line: 28, column: 9, scope: !15)
+!27 = !MDLocation(line: 30, column: 1, scope: !15)
+!28 = !{!0, !6}
+!29 = !{!10, !11, !12}
+!30 = !{!14, !17}
+!31 = !{!"rem_small.c", !"/private/tmp"}
+!32 = !{i32 0}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll b/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
index 445fc01231e4..b764da1ab585 100644
--- a/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
+++ b/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
@@ -41,5 +41,5 @@ entry:
 
 declare void @llvm.x86.mmx.emms() nounwind
 
-!0 = metadata !{i32 888, i32 917, i32 945, i32 973, i32 1001, i32 1029, i32 1057}
-!1 = metadata !{i32 1390, i32 1430, i32 1469, i32 1508, i32 1547, i32 1586, i32 1625, i32 1664}
+!0 = !{i32 888, i32 917, i32 945, i32 973, i32 1001, i32 1029, i32 1057}
+!1 = !{i32 1390, i32 1430, i32 1469, i32 1508, i32 1547, i32 1586, i32 1625, i32 1664}
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index a95dcb580702..b278ad674152 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-linux-gnu | FileCheck %s --check-prefix=CHECK-DEFAULT
+; RUN: llc < %s -mtriple=i386-linux-gnu -use-ctors | FileCheck %s --check-prefix=CHECK-DEFAULT
 ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s --check-prefix=CHECK-DARWIN
 ; PR5329
 
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
index cd8a16f5732a..b78c13f9d4e6 100644
--- a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-linux -mattr=-sse | FileCheck %s
 ; PR11768
 
 @ptr = external global i8*
diff --git a/test/CodeGen/X86/2012-04-26-sdglue.ll b/test/CodeGen/X86/2012-04-26-sdglue.ll
index 16706ae957f2..6651af705551 100644
--- a/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -8,7 +8,7 @@
 ;CHECK: vpxor
 ;CHECK: vinserti128
 ;CHECK: vpshufd
-;CHECK: vpshufd
+;CHECK: vpbroadcastd
 ;CHECK: vmulps
 ;CHECK: vmulps
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll
deleted file mode 100644
index 1c1e8e2f0a21..000000000000
--- a/test/CodeGen/X86/2012-05-19-avx2-store.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx2 | FileCheck %s
-
-define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
-entry:
-  ; CHECK: vmovaps
-  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
-  ; CHECK: vmovups
-  %A = load <4 x i32>* %Ap
-  %B = load <4 x i32>* %Bp
-  %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  store <8 x i32> %Z, <8 x i32>* %P, align 16
-  ret void
-}
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 1c39c747cdc8..519c7cac736f 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s
 
 declare x86_fastcallcc i64 @barrier()
 
diff --git a/test/CodeGen/X86/2012-10-02-DAGCycle.ll b/test/CodeGen/X86/2012-10-02-DAGCycle.ll
index 8d914db3315f..403d21ae9733 100644
--- a/test/CodeGen/X86/2012-10-02-DAGCycle.ll
+++ b/test/CodeGen/X86/2012-10-02-DAGCycle.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s
-; RUN: llc -mtriple=x86_64-apple-macosx -relocation-model=pic < %s
+; RUN: llc -mtriple=i386-apple-macosx -relocation-model=pic < %s > /dev/null
+; RUN: llc -mtriple=x86_64-apple-macosx -relocation-model=pic < %s > /dev/null
 
 ; rdar://12393897
 
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index 62ee1e15fda0..c33b48dfecb5 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -12,11 +12,11 @@
 %struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
 %struct.bnode.1.28 = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode.1.28*, %struct.bnode.1.28* }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
 entry:
-  call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
+  call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !{!"0x102"})
   %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
   %0 = load i16* %type, align 2
   %cmp = icmp eq i16 %0, 1
@@ -33,16 +33,20 @@ return:                                           ; preds = %for.cond.preheader,
   ret i16 %retval.0
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
-!2 = metadata !{}
-!4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
-!5 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786454, metadata !11, null, metadata !"hgstruct", i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"", i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ]
-!11 = metadata !{metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh"}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 168918) (llvm/trunk 168920)\001\00\000\00\000", !11, !2, !2, !13, !2, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!2 = !{}
+!4 = !{!"0x101\00hg\0067109589\000", null, !5, !6} ; [ DW_TAG_arg_variable ] [hg] [line 725]
+!5 = !{!"0x29", !11} ; [ DW_TAG_file_type ]
+!6 = !{!"0x16\00hgstruct\00492\000\000\000\000", !11, null, !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x13\00\00487\00512\0064\000\000\000", !11, null, null, null, null, i32 0, null} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [def] [from ]
+!11 = !{!"MultiSource/Benchmarks/Olden/bh/newbh.c", !"MultiSource/Benchmarks/Olden/bh"}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{!14}
+!14 = !{!"0x2e\00subdivp\00subdivp\00\000\000\001\000\006\00256\001\001", !11, !5, !15, null, i16 (%struct.node.0.27*, double, double, %struct.hgstruct.2.29* )* @subdivp, null, null, null} ; [ DW_TAG_subprogram ] [def] [subdivp]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 36667def6110..28ceb2fad2fc 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -12,7 +12,7 @@
 
 @.str15 = external hidden unnamed_addr constant [6 x i8], align 1
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
 entry:
@@ -43,7 +43,7 @@ if.then3344:
   br label %if.then4073
 
 if.then4073:                                      ; preds = %if.then3344
-  call void @llvm.dbg.declare(metadata !{[20 x i8]* %num14075}, metadata !4)
+  call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !{!"0x102"})
   %arraydecay4078 = getelementptr inbounds [20 x i8]* %num14075, i64 0, i64 0
   %0 = load i32* undef, align 4
   %add4093 = add nsw i32 %0, 0
@@ -65,25 +65,30 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35}
 
-!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{}
-!4 = metadata !{i32 786688, metadata !5, metadata !"num1", metadata !14, i32 815, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [num1] [line 815]
-!5 = metadata !{i32 786443, metadata !14, metadata !6, i32 815, i32 0, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!6 = metadata !{i32 786443, metadata !14, metadata !7, i32 812, i32 0, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!7 = metadata !{i32 786443, metadata !14, metadata !8, i32 807, i32 0, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!8 = metadata !{i32 786443, metadata !14, metadata !9, i32 440, i32 0, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!9 = metadata !{i32 786443, metadata !14, metadata !10, i32 435, i32 0, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!10 = metadata !{i32 786443, metadata !14, metadata !11, i32 434, i32 0, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!11 = metadata !{i32 786443, metadata !14, metadata !12, i32 250, i32 0, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!12 = metadata !{i32 786443, metadata !14, metadata !13, i32 249, i32 0, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!13 = metadata !{i32 786443, metadata !14, metadata !2, i32 221, i32 0, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
-!14 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
-!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
-!19 = metadata !{metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 168918) (llvm/trunk 168920)\001\00\000\00\000", !19, !2, !2, !20, !2, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!1 = !{!2}
+!2 = !{}
+!4 = !{!"0x100\00num1\00815\000", !5, !14, !15} ; [ DW_TAG_auto_variable ] [num1] [line 815]
+!5 = !{!"0xb\00815\000\00177", !14, !6} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!6 = !{!"0xb\00812\000\00176", !14, !7} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!7 = !{!"0xb\00807\000\00175", !14, !8} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!8 = !{!"0xb\00440\000\0094", !14, !9} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!9 = !{!"0xb\00435\000\0091", !14, !10} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!10 = !{!"0xb\00434\000\0090", !14, !11} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!11 = !{!"0xb\00250\000\0024", !14, !12} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!12 = !{!"0xb\00249\000\0023", !14, !13} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!13 = !{!"0xb\00221\000\0019", !14, !2} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!14 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!15 = !{!"0x1\00\000\00160\008\000\000", null, null, !16, !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!16 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!17 = !{!18}
+!18 = !{!"0x21\000\0020"}       ; [ DW_TAG_subrange_type ] [0, 19]
+!19 = !{!"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", !"MultiSource/Benchmarks/MiBench/consumer-typeset"}
+
+!20 = !{!21}
+!21 = !{!"0x2e\00AttachGalley\00AttachGalley\00\000\000\001\000\006\00256\001\001", !19, !14, !22, null, i32 (%union.rec**)* @AttachGalley, null, null, null} ; [ DW_TAG_subprogram ] [def] [AttachGalley]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null}
 
 ; Test DebugValue uses visited by RegisterPressureTracker findUseBetween().
 ;
@@ -103,7 +108,7 @@ cond.true:                                        ; preds = %entry
   unreachable
 
 cond.end:                                         ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !31)
+  call void @llvm.dbg.declare(metadata %"class.__gnu_cxx::hash_map"* %X, metadata !31, metadata !{!"0x102"})
   %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
   invoke void @_Znwm()
           to label %exit.i unwind label %lpad2.i.i.i.i
@@ -129,9 +134,11 @@ declare void @_Znwm()
 
 !llvm.dbg.cu = !{!30}
 
-!30 = metadata !{i32 786449, metadata !34, i32 4, metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
-!31 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !32, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
-!32 = metadata !{i32 786454, metadata !34, null, metadata !"HM", i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
-!33 = metadata !{i32 786473, metadata !34} ; [ DW_TAG_file_type ]
-!34 = metadata !{metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++"}
-!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!30 = !{!"0x11\004\00clang version 3.3 (trunk 169129) (llvm/trunk 169135)\001\00\000\00\000", !34, !2, !2, !36, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
+!31 = !{!"0x100\00X\0029\000", null, null, !32} ; [ DW_TAG_auto_variable ] [X] [line 29]
+!32 = !{!"0x16\00HM\0028\000\000\000\000", !34, null, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
+!33 = !{!"0x29", !34} ; [ DW_TAG_file_type ]
+!34 = !{!"SingleSource/Benchmarks/Shootout-C++/hash.cpp", !"SingleSource/Benchmarks/Shootout-C++"}
+!35 = !{i32 1, !"Debug Info Version", i32 2}
+!36 = !{!37}
+!37 = !{!"0x2e\00main\00main\00\000\000\001\000\006\00256\001\001", !19, !14, !22, null, void ()* @main, null, null, null} ; [ DW_TAG_subprogram ] [def] [main]
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 5aec3d92c70f..04b31749ce58 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -9,7 +9,7 @@
 
 %struct.btCompoundLeafCallback = type { i32, i32 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define void @test() unnamed_addr uwtable ssp align 2 {
 entry:
@@ -20,7 +20,7 @@ if.then:                                          ; preds = %entry
   unreachable
 
 if.end:                                           ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{%struct.btCompoundLeafCallback* %callback}, metadata !3)
+  call void @llvm.dbg.declare(metadata %struct.btCompoundLeafCallback* %callback, metadata !3, metadata !{!"0x102"})
   %m = getelementptr inbounds %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
   store i32 0, i32* undef, align 8
   %cmp12447 = icmp sgt i32 undef, 0
@@ -36,11 +36,13 @@ invoke.cont44:                                    ; preds = %if.end
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8}
 
-!0 = metadata !{i32 786449, metadata !6, i32 4, metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, metadata !"", i32 0, metadata !2, metadata !7, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
-!2 = metadata !{null}
-!3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
-!4 = metadata !{i32 786451, metadata !6, null, metadata !"btCompoundLeafCallback", i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [def] [from ]
-!5 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
-!6 = metadata !{metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet"}
-!7 = metadata !{i32 0}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 168984) (llvm/trunk 168983)\001\00\000\00\000", !6, null, null, !1, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!2}
+!2 = !{!"0x2e\00test\00test\00\000\000\001\000\006\00256\001\001", !6, !5, !7, null, void ()* @test, null, null, null} ; [ DW_TAG_subprogram ] [def] [test]
+!3 = !{!"0x100\00callback\00214\000", null, null, !4} ; [ DW_TAG_auto_variable ] [callback] [line 214]
+!4 = !{!"0x13\00btCompoundLeafCallback\0090\00512\0064\000\000\000", !6, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [def] [from ]
+!5 = !{!"0x29", !6} ; [ DW_TAG_file_type ]
+!6 = !{!"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", !"MultiSource/Benchmarks/Bullet"}
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{null}
diff --git a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
index bbba796eed24..9cd150a2f56d 100644
--- a/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
+++ b/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -6,7 +6,7 @@
 ; we may reference variables that were not live across basic blocks
 ; resulting in undefined virtual registers.
 ;
-; In this example, this is illustrated by a the spill/reload of the
+; In this example, this is illustrated by a spill/reload of the
 ; LOADED_PTR_SLOT.
 ;
 ; Before this patch, the compiler was accessing two different spill
@@ -41,7 +41,7 @@ entry:
     i1 false, label %label_end
   ]
 default:
-  unreachable
+  br label %label_end
 
 label_true:
   br label %label_end
@@ -80,7 +80,7 @@ entry:
     i1 false, label %label_end
   ]
 default:
-  unreachable
+  br label %label_end
 
 label_true:
   br label %label_end
@@ -119,7 +119,7 @@ entry:
     i1 false, label %label_end
   ]
 default:
-  unreachable
+  br label %label_end
 
 label_true:
   br label %label_end
diff --git a/test/CodeGen/X86/2014-08-29-CompactUnwind.ll b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
new file mode 100644
index 000000000000..f65d7c9d2e05
--- /dev/null
+++ b/test/CodeGen/X86/2014-08-29-CompactUnwind.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - | llvm-objdump -d -unwind-info -s - | FileCheck %s
+; Regression test for http://llvm.org/bugs/show_bug.cgi?id=20800.
+
+; ModuleID = 'asan_report.ii'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+@.str = private unnamed_addr constant [3 x i8] c"=>\00", align 1
+@.str1 = private unnamed_addr constant [3 x i8] c"  \00", align 1
+@.str2 = private unnamed_addr constant [6 x i8] c"%s%p:\00", align 1
+
+; CHECK: ___asan_report_error:
+
+; subq instruction starts at 0x0a, so the second byte of the compact encoding
+; (UNWIND_X86_64_FRAMELESS_STACK_SIZE in mach-o/compact_unwind_encoding.h)
+; must be 0x0d.
+; CHECK: {{a:.*subq.*%rsp}}
+
+; CHECK: Contents of __compact_unwind section
+; CHECK: ___asan_report_error
+
+; Because of incorrect push instruction size in X86AsmBackend.cpp the stack
+; size was also calculated incorrectly.
+; CHECK-NOT: {{compact encoding:.*0x0309f800}}
+; CHECK: {{compact encoding:.*0x030df800}}
+
+define void @__asan_report_error() #0 {
+  %str.i = alloca i64, align 8
+  %stack = alloca [256 x i64], align 8
+  br label %print_shadow_bytes.exit.i
+
+print_shadow_bytes.exit.i: ; preds = %print_shadow_bytes.exit.i, %0
+  %iv.i = phi i64 [ -5, %0 ], [ %iv.next.i, %print_shadow_bytes.exit.i ]
+  %reg15 = icmp eq i64 %iv.i, 0
+  %.str..str1.i = select i1 %reg15, [3 x i8]* @.str, [3 x i8]* @.str1
+  %reg16 = getelementptr inbounds [3 x i8]* %.str..str1.i, i64 0, i64 0
+  %reg17 = shl i64 %iv.i, 1
+  %reg19 = inttoptr i64 %reg17 to i8*
+  call void (i64*, i8*, ...)* @append(i64* %str.i, i8* getelementptr inbounds ([6 x i8]* @.str2, i64 0, i64 0), i8* %reg16, i8* %reg19)
+  %iv.next.i = add nsw i64 %iv.i, 0
+  br label %print_shadow_bytes.exit.i
+}
+
+declare void @append(i64*, i8*, ...)
+
+attributes #0 = { "no-frame-pointer-elim"="false" }
diff --git a/test/CodeGen/X86/MachineBranchProb.ll b/test/CodeGen/X86/MachineBranchProb.ll
index a8931527ea6d..cf41ef2ea3ad 100644
--- a/test/CodeGen/X86/MachineBranchProb.ll
+++ b/test/CodeGen/X86/MachineBranchProb.ll
@@ -31,4 +31,4 @@ for.inc20:                                        ; preds = %for.cond2
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 112017436, i32 -735157296}
+!0 = !{!"branch_weights", i32 112017436, i32 -735157296}
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 4ce2fb3dcafb..3a2c58f97e8c 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -4,10 +4,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.7.0"
 
 define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6), !dbg !12
+  tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
   %ab = load i32* %c, align 1, !dbg !14
-  tail call void @llvm.dbg.value(metadata !{i32* %c}, i64 0, metadata !7), !dbg !13
-  tail call void @llvm.dbg.value(metadata !{i32 %ab}, i64 0, metadata !10), !dbg !14
+  tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !13
+  tail call void @llvm.dbg.value(metadata i32 %ab, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !14
   %cd = icmp eq i32 %i, 42, !dbg !15
   br i1 %cd, label %bb1, label %bb2, !dbg !15
 
@@ -23,31 +23,31 @@ bb2:
   ret i32 %.0, !dbg !17
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @foo, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786468, null, metadata !0, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 2, i32 13, metadata !1, null}
-!13 = metadata !{i32 2, i32 22, metadata !1, null}
-!14 = metadata !{i32 3, i32 14, metadata !11, null}
-!15 = metadata !{i32 4, i32 3, metadata !11, null}
-!16 = metadata !{i32 5, i32 5, metadata !11, null}
-!17 = metadata !{i32 7, i32 1, metadata !11, null}
-!18 = metadata !{metadata !1}
-!19 = metadata !{metadata !6, metadata !7, metadata !10}
-!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
-!21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)\001\00\000\00\001", !20, !21, !21, !18, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\00256\001\000", !20, !2, !3, null, i32 (i32, i32*)* @foo, null, null, !19} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00i\0016777218\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!7 = !{!"0x101\00c\0033554434\000", !1, !2, !8} ; [ DW_TAG_arg_variable ]
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, !0, !9} ; [ DW_TAG_pointer_type ]
+!9 = !{!"0x24\00char\000\008\008\000\000\006", null, !0} ; [ DW_TAG_base_type ]
+!10 = !{!"0x100\00a\003\000", !11, !2, !9} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\002\0025\000", !20, !1} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 2, column: 13, scope: !1)
+!13 = !MDLocation(line: 2, column: 22, scope: !1)
+!14 = !MDLocation(line: 3, column: 14, scope: !11)
+!15 = !MDLocation(line: 4, column: 3, scope: !11)
+!16 = !MDLocation(line: 5, column: 5, scope: !11)
+!17 = !MDLocation(line: 7, column: 1, scope: !11)
+!18 = !{!1}
+!19 = !{!6, !7, !10}
+!20 = !{!"a.c", !"/private/tmp"}
+!21 = !{i32 0}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index f6d68520b7b4..dfdaea523fdf 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -148,12 +148,12 @@ define void @merge_nonconst_store(i32 %count, i8 %zz, %struct.A* nocapture %p) n
 }
 
 
-;CHECK-LABEL: merge_loads_i16:
-; load:
-;CHECK: movw
-; store:
-;CHECK: movw
-;CHECK: ret
+; CHECK-LABEL: merge_loads_i16:
+;  load:
+; CHECK: movw
+;  store:
+; CHECK: movw
+; CHECK: ret
 define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
@@ -181,13 +181,13 @@ define void @merge_loads_i16(i32 %count, %struct.A* noalias nocapture %q, %struc
   ret void
 }
 
-; The loads and the stores are interleved. Can't merge them.
-;CHECK-LABEL: no_merge_loads:
-;CHECK: movb
-;CHECK: movb
-;CHECK: movb
-;CHECK: movb
-;CHECK: ret
+; The loads and the stores are interleaved. Can't merge them.
+; CHECK-LABEL: no_merge_loads:
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: movb
+; CHECK: ret
 define void @no_merge_loads(i32 %count, %struct.A* noalias nocapture %q, %struct.A* noalias nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
@@ -216,12 +216,12 @@ a4:                                       ; preds = %4, %.lr.ph
 }
 
 
-;CHECK-LABEL: merge_loads_integer:
-; load:
-;CHECK: movq
-; store:
-;CHECK: movq
-;CHECK: ret
+; CHECK-LABEL: merge_loads_integer:
+;  load:
+; CHECK: movq
+;  store:
+; CHECK: movq
+; CHECK: ret
 define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
   %1 = icmp sgt i32 %count, 0
   br i1 %1, label %.lr.ph, label %._crit_edge
@@ -250,12 +250,12 @@ define void @merge_loads_integer(i32 %count, %struct.B* noalias nocapture %q, %s
 }
 
 
-;CHECK-LABEL: merge_loads_vector:
-; load:
-;CHECK: movups
-; store:
-;CHECK: movups
-;CHECK: ret
+; CHECK-LABEL: merge_loads_vector:
+;  load:
+; CHECK: movups
+;  store:
+; CHECK: movups
+; CHECK: ret
 define void @merge_loads_vector(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
   %a1 = icmp sgt i32 %count, 0
   br i1 %a1, label %.lr.ph, label %._crit_edge
@@ -291,18 +291,18 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
-;CHECK-LABEL: merge_loads_no_align:
-; load:
-;CHECK: movl
-;CHECK: movl
-;CHECK: movl
-;CHECK: movl
-; store:
-;CHECK: movl
-;CHECK: movl
-;CHECK: movl
-;CHECK: movl
-;CHECK: ret
+; CHECK-LABEL: merge_loads_no_align:
+;  load:
+; CHECK: movl
+; CHECK: movl
+; CHECK: movl
+; CHECK: movl
+;  store:
+; CHECK: movl
+; CHECK: movl
+; CHECK: movl
+; CHECK: movl
+; CHECK: ret
 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
   %a1 = icmp sgt i32 %count, 0
   br i1 %a1, label %.lr.ph, label %._crit_edge
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 51d0d1775c67..498ad7edaa9d 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define void @foo() nounwind uwtable ssp {
 entry:
@@ -17,7 +17,7 @@ entry:
 for.body:
   call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
   call void @llvm.lifetime.start(i64 -1, i8* %x.i) nounwind
-  call void @llvm.dbg.declare(metadata !{i8* %x.i}, metadata !22) nounwind
+  call void @llvm.dbg.declare(metadata i8* %x.i, metadata !22, metadata !{!"0x102"}) nounwind
   br label %for.body
 }
 
@@ -27,9 +27,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!23}
-!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !2, metadata !2, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !"t.c", metadata !""}
-!16 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6}
-!2 = metadata !{i32 0}
-!22 = metadata !{i32 786688, null, metadata !"x", metadata !2, i32 16, metadata !16, i32 0, i32 0}
-!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\001\00clang\001\00\000\00\000", !1, !2, !2, null, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"t.c", !""}
+!16 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!2 = !{i32 0}
+!22 = !{!"0x100\00x\0016\000", null, !2, !16} ; [ DW_TAG_auto_variable ]
+!23 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
index 100817a676e8..a435272dca44 100644
--- a/test/CodeGen/X86/SwizzleShuff.ll
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s
 
 ; Check that we perform a scalar XOR on i32.
 
diff --git a/test/CodeGen/X86/TruncAssertZext.ll b/test/CodeGen/X86/TruncAssertZext.ll
new file mode 100644
index 000000000000..8c664127f92a
--- /dev/null
+++ b/test/CodeGen/X86/TruncAssertZext.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -O2 -march=x86-64 | FileCheck %s
+; Checks that a zeroing mov is inserted for the trunc/zext pair even when
+; the source of the zext is an AssertSext node
+; PR20494
+
+define i64 @main(i64 %a) {
+; CHECK-LABEL: main
+; CHECK: movl %e{{..}}, %eax
+; CHECK: ret
+  %or = or i64 %a, -2
+  %trunc = trunc i64 %or to i32
+  br label %l
+l:
+  %ext = zext i32 %trunc to i64
+  ret i64 %ext
+}
diff --git a/test/CodeGen/X86/add_shl_constant.ll b/test/CodeGen/X86/add_shl_constant.ll
new file mode 100644
index 000000000000..33074e4780e6
--- /dev/null
+++ b/test/CodeGen/X86/add_shl_constant.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; CHECK-LABEL: add_shl_add_constant_1_i32
+; CHECK: leal 984(%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+define i32 @add_shl_add_constant_1_i32(i32 %x, i32 %y) nounwind {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %shl, %y
+  ret i32 %add.1
+}
+
+; CHECK-LABEL: add_shl_add_constant_2_i32
+; CHECK: leal 984(%rsi,%rdi,8), %eax
+; CHECK-NEXT: retq
+define i32 @add_shl_add_constant_2_i32(i32 %x, i32 %y) nounwind {
+  %add.0 = add i32 %x, 123
+  %shl = shl i32 %add.0, 3
+  %add.1 = add i32 %y, %shl
+  ret i32 %add.1
+}
+
+; CHECK: LCPI2_0:
+; CHECK: .long 984
+; CHECK: _add_shl_add_constant_1_v4i32
+; CHECK: pslld $3, %[[REG:xmm[0-9]+]]
+; CHECK: paddd %xmm1, %[[REG]]
+; CHECK: paddd LCPI2_0(%rip), %[[REG:xmm[0-9]+]]
+; CHECK: retq
+define <4 x i32> @add_shl_add_constant_1_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+  %add.0 = add <4 x i32> %x, <i32 123, i32 123, i32 123, i32 123>
+  %shl = shl <4 x i32> %add.0, <i32 3, i32 3, i32 3, i32 3>
+  %add.1 = add <4 x i32> %shl, %y
+  ret <4 x i32> %add.1
+}
+
+; CHECK: LCPI3_0:
+; CHECK: .long 984
+; CHECK: _add_shl_add_constant_2_v4i32
+; CHECK: pslld $3, %[[REG:xmm[0-9]+]]
+; CHECK: paddd %xmm1, %[[REG]]
+; CHECK: paddd LCPI3_0(%rip), %[[REG:xmm[0-9]+]]
+; CHECK: retq
+define <4 x i32> @add_shl_add_constant_2_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+  %add.0 = add <4 x i32> %x, <i32 123, i32 123, i32 123, i32 123>
+  %shl = shl <4 x i32> %add.0, <i32 3, i32 3, i32 3, i32 3>
+  %add.1 = add <4 x i32> %y, %shl
+  ret <4 x i32> %add.1
+}
diff --git a/test/CodeGen/X86/addr-mode-matcher.ll b/test/CodeGen/X86/addr-mode-matcher.ll
new file mode 100644
index 000000000000..d5920910f289
--- /dev/null
+++ b/test/CodeGen/X86/addr-mode-matcher.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s | FileCheck %s
+
+; This testcase used to hit an assert during ISel.  For details, see the big
+; comment inside the function.
+
+; CHECK-LABEL: foo:
+; The AND should be turned into a subreg access.
+; CHECK-NOT: and
+; The shift (leal) should be folded into the scale of the address in the load.
+; CHECK-NOT: leal
+; CHECK: movl {{.*}},4),
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.6.0"
+
+define void @foo(i32 %a) {
+bb:
+  br label %bb1692
+
+bb1692:
+  %tmp1694 = phi i32 [ 0, %bb ], [ %tmp1745, %bb1692 ]
+  %xor = xor i32 0, %tmp1694
+
+; %load1 = (load (and (shl %xor, 2), 1020))
+  %tmp1701 = shl i32 %xor, 2
+  %tmp1702 = and i32 %tmp1701, 1020
+  %tmp1703 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1702
+  %tmp1704 = bitcast i8* %tmp1703 to i32*
+  %load1 = load i32* %tmp1704, align 4
+
+; %load2 = (load (shl (and %xor, 255), 2))
+  %tmp1698 = and i32 %xor, 255
+  %tmp1706 = shl i32 %tmp1698, 2
+  %tmp1707 = getelementptr inbounds [1028 x i8]* null, i32 0, i32 %tmp1706
+  %tmp1708 = bitcast i8* %tmp1707 to i32*
+  %load2 = load i32* %tmp1708, align 4
+
+  %tmp1710 = or i32 %load2, %a
+
+; While matching xor we address-match %load1.  The and-of-shift reassocication
+; in address matching transform this into into a shift-of-and and the resuting
+; node becomes identical to %load2.  CSE replaces %load1 which leaves its
+; references in MatchScope and RecordedNodes stale.
+  %tmp1711 = xor i32 %load1, %tmp1710
+
+  %tmp1744 = getelementptr inbounds [256 x i32]* null, i32 0, i32 %tmp1711
+  store i32 0, i32* %tmp1744, align 4
+  %tmp1745 = add i32 %tmp1694, 1
+  indirectbr i8* undef, [label %bb1756, label %bb1692]
+
+bb1756:
+  br label %bb2705
+
+bb2705:
+  indirectbr i8* undef, [label %bb5721, label %bb5736]
+
+bb5721:
+  br label %bb2705
+
+bb5736:
+  ret void
+}
diff --git a/test/CodeGen/X86/adx-intrinsics.ll b/test/CodeGen/X86/adx-intrinsics.ll
new file mode 100644
index 000000000000..0498177a9c12
--- /dev/null
+++ b/test/CodeGen/X86/adx-intrinsics.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 --show-mc-encoding| FileCheck %s --check-prefix=NOADX --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell --show-mc-encoding| FileCheck %s --check-prefix=ADX --check-prefix=CHECK
+
+declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*)
+
+define i8 @test_addcarryx_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) {
+; CHECK-LABEL: test_addcarryx_u32
+; CHECK: addb
+; ADX: adcxl
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.addcarryx.u32(i8 %c, i32 %a, i32 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
+declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*)
+
+define i8 @test_addcarryx_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) {
+; CHECK-LABEL: test_addcarryx_u64
+; CHECK: addb
+; ADX: adcxq
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.addcarryx.u64(i8 %c, i64 %a, i64 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
+declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*)
+
+define i8 @test_addcarry_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) {
+; CHECK-LABEL: test_addcarry_u32
+; CHECK: addb
+; ADX: adcxl
+; NOADX: adcl
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.addcarry.u32(i8 %c, i32 %a, i32 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
+declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*)
+
+define i8 @test_addcarry_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) {
+; CHECK-LABEL: test_addcarry_u64
+; CHECK: addb
+; ADX: adcxq
+; NOADX: adcq
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.addcarry.u64(i8 %c, i64 %a, i64 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
+declare i8 @llvm.x86.subborrow.u32(i8, i32, i32, i8*)
+
+define i8 @test_subborrow_u32(i8 %c, i32 %a, i32 %b, i8* %ptr) {
+; CHECK-LABEL: test_subborrow_u32
+; CHECK: addb
+; CHECK: sbbl
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.subborrow.u32(i8 %c, i32 %a, i32 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
+declare i8 @llvm.x86.subborrow.u64(i8, i64, i64, i8*)
+
+define i8 @test_subborrow_u64(i8 %c, i64 %a, i64 %b, i8* %ptr) {
+; CHECK-LABEL: test_subborrow_u64
+; CHECK: addb
+; CHECK: sbbq
+; CHECK: setb
+; CHECK: retq
+  %ret = tail call i8 @llvm.x86.subborrow.u64(i8 %c, i64 %a, i64 %b, i8* %ptr)
+  ret i8 %ret;
+}
+
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index bf55644de41e..82a8e482b7fa 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -30,12 +30,12 @@ define i32 @foo_f() {
   ret i32 0
 }
 ; CHECK-DAG: .weak	bar_f
-@bar_f = alias weak %FunTy* @foo_f
+@bar_f = weak alias %FunTy* @foo_f
 
-@bar_l = alias linkonce_odr i32* @bar
+@bar_l = linkonce_odr alias i32* @bar
 ; CHECK-DAG: .weak	bar_l
 
-@bar_i = alias internal i32* @bar
+@bar_i = internal alias i32* @bar
 
 ; CHECK-DAG: .globl	A
 @A = alias bitcast (i32* @bar to i64*)
diff --git a/test/CodeGen/X86/aligned-variadic.ll b/test/CodeGen/X86/aligned-variadic.ll
new file mode 100644
index 000000000000..e2155fe4b373
--- /dev/null
+++ b/test/CodeGen/X86/aligned-variadic.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin   | FileCheck %s -check-prefix=X32
+
+%struct.Baz = type { [17 x i8] }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+; Function Attrs: nounwind uwtable
+define void @bar(%struct.Baz* byval nocapture readnone align 8 %x, ...) {
+entry:
+  %va = alloca [1 x %struct.__va_list_tag], align 16
+  %arraydecay = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i64 0, i64 0
+  %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %va to i8*
+  call void @llvm.va_start(i8* %arraydecay1)
+  %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag]* %va, i64 0, i64 0, i32 2
+  %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
+  %overflow_arg_area.next = getelementptr i8* %overflow_arg_area, i64 24
+  store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
+; X32: leal    68(%esp), [[REG:%.*]]
+; X32: movl    [[REG]], 16(%esp)
+; X64: leaq    232(%rsp), [[REG:%.*]]
+; X64: movq    [[REG]], 184(%rsp)
+; X64: leaq    176(%rsp), %rdi
+  call void @qux(%struct.__va_list_tag* %arraydecay)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*)
+
+declare void @qux(%struct.__va_list_tag*)
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 74b9470db752..9d8b6cfa6730 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnux32 -enable-misched=false | FileCheck %s -check-prefix=X32ABI
 
 declare void @bar(<2 x i64>* %n)
 
@@ -6,15 +7,29 @@ define void @foo(i64 %h) {
   %p = alloca <2 x i64>, i64 %h
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo
+; CHECK-LABEL: foo
 ; CHECK-NOT: andq $-32, %rax
+; X32ABI-LABEL: foo
+; X32ABI-NOT: andl $-32, %eax
 }
 
 define void @foo2(i64 %h) {
   %p = alloca <2 x i64>, i64 %h, align 32
   call void @bar(<2 x i64>* %p)
   ret void
-; CHECK: foo2
+; CHECK-LABEL: foo2
 ; CHECK: andq $-32, %rsp
 ; CHECK: andq $-32, %rax
+; X32ABI-LABEL: foo2
+; X32ABI: andl $-32, %esp
+; X32ABI: andl $-32, %eax
+}
+
+define void @foo3(i64 %h) {
+  %p = alloca <2 x i64>, i64 %h
+  ret void
+; CHECK-LABEL: foo3
+; CHECK: movq %rbp, %rsp
+; X32ABI-LABEL: foo3
+; X32ABI: movl %ebp, %esp
 }
diff --git a/test/CodeGen/X86/asm-block-labels.ll b/test/CodeGen/X86/asm-block-labels.ll
index 6dbfb16a6d50..93524386c6ba 100644
--- a/test/CodeGen/X86/asm-block-labels.ll
+++ b/test/CodeGen/X86/asm-block-labels.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts | llc -no-integrated-as
+; RUN: opt < %s -O3 | llc -no-integrated-as
 ; ModuleID = 'block12.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin8"
diff --git a/test/CodeGen/X86/asm-label.ll b/test/CodeGen/X86/asm-label.ll
index 1fc6e2eaf2b7..1da66e74d34f 100644
--- a/test/CodeGen/X86/asm-label.ll
+++ b/test/CodeGen/X86/asm-label.ll
@@ -24,7 +24,7 @@ if.end:                                           ; preds = %if.then
   br label %cleanup
 
 cleanup:                                          ; preds = %if.end, %if.then9
-  switch i32 undef, label %unreachable [
+  switch i32 undef, label %default [
     i32 0, label %cleanup.cont
     i32 1, label %if.end11
   ]
@@ -35,6 +35,6 @@ cleanup.cont:                                     ; preds = %cleanup
 if.end11:                                         ; preds = %cleanup.cont, %cleanup, %land.lhs.true, %entry
   ret void
 
-unreachable:                                      ; preds = %cleanup
-  unreachable
+default:                                          ; preds = %cleanup
+  br label %if.end11
 }
diff --git a/test/CodeGen/X86/atomic-load-store-wide.ll b/test/CodeGen/X86/atomic-load-store-wide.ll
index 7352d5a58006..ad1a5c6d0267 100644
--- a/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -4,16 +4,18 @@
 ; FIXME: The generated code can be substantially improved.
 
 define void @test1(i64* %ptr, i64 %val1) {
-; CHECK: test1
-; CHECK: cmpxchg8b
+; CHECK-LABEL: test1
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
 ; CHECK-NEXT: jne
   store atomic i64 %val1, i64* %ptr seq_cst, align 8
   ret void
 }
 
 define i64 @test2(i64* %ptr) {
-; CHECK: test2
-; CHECK: cmpxchg8b
+; CHECK-LABEL: test2
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
   %val = load atomic i64* %ptr seq_cst, align 8
   ret i64 %val
 }
diff --git a/test/CodeGen/X86/atomic16.ll b/test/CodeGen/X86/atomic16.ll
index faaa4c49d39b..f6892de43d89 100644
--- a/test/CodeGen/X86/atomic16.ll
+++ b/test/CodeGen/X86/atomic16.ll
@@ -15,17 +15,17 @@ entry:
 ; X32:       incw
   %t2 = atomicrmw add  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       addw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       addw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       addw $3
   %t3 = atomicrmw add  i16* @sc16, i16 5 acquire
 ; X64:       lock
-; X64:       xaddw {{.*}} # encoding: [0xf0,0x66
+; X64:       xaddw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xaddw
   %t4 = atomicrmw add  i16* @sc16, i16 %t3 acquire
 ; X64:       lock
-; X64:       addw {{.*}} # encoding: [0xf0,0x66
+; X64:       addw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       addw
   ret void
@@ -43,17 +43,17 @@ define void @atomic_fetch_sub16() nounwind {
 ; X32:       decw
   %t2 = atomicrmw sub  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       subw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       subw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       subw $3
   %t3 = atomicrmw sub  i16* @sc16, i16 5 acquire
 ; X64:       lock
-; X64:       xaddw {{.*}} # encoding: [0xf0,0x66
+; X64:       xaddw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xaddw
   %t4 = atomicrmw sub  i16* @sc16, i16 %t3 acquire
 ; X64:       lock
-; X64:       subw {{.*}} # encoding: [0xf0,0x66
+; X64:       subw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       subw
   ret void
@@ -66,7 +66,7 @@ define void @atomic_fetch_and16() nounwind {
 ; X32-LABEL:   atomic_fetch_and16
   %t1 = atomicrmw and  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       andw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       andw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       andw $3
   %t2 = atomicrmw and  i16* @sc16, i16 5 acquire
@@ -78,7 +78,7 @@ define void @atomic_fetch_and16() nounwind {
 ; X32:       cmpxchgw
   %t3 = atomicrmw and  i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       andw {{.*}} # encoding: [0xf0,0x66
+; X64:       andw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       andw
   ret void
@@ -91,7 +91,7 @@ define void @atomic_fetch_or16() nounwind {
 ; X32-LABEL:   atomic_fetch_or16
   %t1 = atomicrmw or   i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       orw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       orw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       orw $3
   %t2 = atomicrmw or   i16* @sc16, i16 5 acquire
@@ -103,7 +103,7 @@ define void @atomic_fetch_or16() nounwind {
 ; X32:       cmpxchgw
   %t3 = atomicrmw or   i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       orw {{.*}} # encoding: [0xf0,0x66
+; X64:       orw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       orw
   ret void
@@ -116,7 +116,7 @@ define void @atomic_fetch_xor16() nounwind {
 ; X32-LABEL:   atomic_fetch_xor16
   %t1 = atomicrmw xor  i16* @sc16, i16 3 acquire
 ; X64:       lock
-; X64:       xorw $3, {{.*}} # encoding: [0xf0,0x66
+; X64:       xorw $3, {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xorw $3
   %t2 = atomicrmw xor  i16* @sc16, i16 5 acquire
@@ -128,7 +128,7 @@ define void @atomic_fetch_xor16() nounwind {
 ; X32:       cmpxchgw
   %t3 = atomicrmw xor  i16* @sc16, i16 %t2 acquire
 ; X64:       lock
-; X64:       xorw {{.*}} # encoding: [0xf0,0x66
+; X64:       xorw {{.*}} # encoding: [0x66,0xf0
 ; X32:       lock
 ; X32:       xorw
   ret void
diff --git a/test/CodeGen/X86/atomic_add.ll b/test/CodeGen/X86/atomic_add.ll
index bdd25e6a2a56..f60212de5339 100644
--- a/test/CodeGen/X86/atomic_add.ll
+++ b/test/CodeGen/X86/atomic_add.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
 
 ; rdar://7103704
 
@@ -14,6 +15,8 @@ define void @inc4(i64* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: inc4:
 ; CHECK: incq
+; SLOW_INC-LABEL: inc4:
+; SLOW_INC-NOT: incq
   %0 = atomicrmw add i64* %p, i64 1 monotonic
   ret void
 }
@@ -39,6 +42,8 @@ define void @inc3(i8* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: inc3:
 ; CHECK: incb
+; SLOW_INC-LABEL: inc3:
+; SLOW_INC-NOT: incb
   %0 = atomicrmw add i8* %p, i8 1 monotonic
   ret void
 }
@@ -64,6 +69,8 @@ define void @inc2(i16* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: inc2:
 ; CHECK: incw
+; SLOW_INC-LABEL: inc2:
+; SLOW_INC-NOT: incw
   %0 = atomicrmw add i16* %p, i16 1 monotonic
   ret void
 }
@@ -89,6 +96,8 @@ define void @inc1(i32* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: inc1:
 ; CHECK: incl
+; SLOW_INC-LABEL: inc1:
+; SLOW_INC-NOT: incl
   %0 = atomicrmw add i32* %p, i32 1 monotonic
   ret void
 }
@@ -113,6 +122,8 @@ define void @dec4(i64* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: dec4:
 ; CHECK: decq
+; SLOW_INC-LABEL: dec4:
+; SLOW_INC-NOT: decq
   %0 = atomicrmw sub i64* %p, i64 1 monotonic
   ret void
 }
@@ -138,6 +149,8 @@ define void @dec3(i8* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: dec3:
 ; CHECK: decb
+; SLOW_INC-LABEL: dec3:
+; SLOW_INC-NOT: decb
   %0 = atomicrmw sub i8* %p, i8 1 monotonic
   ret void
 }
@@ -163,6 +176,8 @@ define void @dec2(i16* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: dec2:
 ; CHECK: decw
+; SLOW_INC-LABEL: dec2:
+; SLOW_INC-NOT: decw
   %0 = atomicrmw sub i16* %p, i16 1 monotonic
   ret void
 }
@@ -189,6 +204,8 @@ define void @dec1(i32* nocapture %p) nounwind ssp {
 entry:
 ; CHECK-LABEL: dec1:
 ; CHECK: decl
+; SLOW_INC-LABEL: dec1:
+; SLOW_INC-NOT: decl
   %0 = atomicrmw sub i32* %p, i32 1 monotonic
   ret void
 }
diff --git a/test/CodeGen/X86/atomic_idempotent.ll b/test/CodeGen/X86/atomic_idempotent.ll
new file mode 100644
index 000000000000..1afc535133d6
--- /dev/null
+++ b/test/CodeGen/X86/atomic_idempotent.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+; RUN: llc < %s -march=x86 -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X32
+
+; On x86, an atomic rmw operation that does not modify the value in memory
+; (such as atomic add 0) can be replaced by an mfence followed by a mov.
+; This is explained (with the motivation for such an optimization) in
+; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
+
+define i8 @add8(i8* %p) {
+; CHECK-LABEL: add8
+; CHECK: mfence
+; CHECK: movb
+  %1 = atomicrmw add i8* %p, i8 0 monotonic
+  ret i8 %1
+}
+
+define i16 @or16(i16* %p) {
+; CHECK-LABEL: or16
+; CHECK: mfence
+; CHECK: movw
+  %1 = atomicrmw or i16* %p, i16 0 acquire
+  ret i16 %1
+}
+
+define i32 @xor32(i32* %p) {
+; CHECK-LABEL: xor32
+; CHECK: mfence
+; CHECK: movl
+  %1 = atomicrmw xor i32* %p, i32 0 release
+  ret i32 %1
+}
+
+define i64 @sub64(i64* %p) {
+; CHECK-LABEL: sub64
+; X64: mfence
+; X64: movq
+; X32-NOT: mfence
+  %1 = atomicrmw sub i64* %p, i64 0 seq_cst
+  ret i64 %1
+}
+
+define i128 @or128(i128* %p) {
+; CHECK-LABEL: or128
+; CHECK-NOT: mfence
+  %1 = atomicrmw or i128* %p, i128 0 monotonic
+  ret i128 %1
+}
+
+; For 'and', the idempotent value is (-1)
+define i32 @and32 (i32* %p) {
+; CHECK-LABEL: and32
+; CHECK: mfence
+; CHECK: movl
+  %1 = atomicrmw and i32* %p, i32 -1 acq_rel
+  ret i32 %1
+}
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll
new file mode 100644
index 000000000000..19e019eaddcd
--- /dev/null
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -0,0 +1,525 @@
+; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
+
+; This file checks that atomic (non-seq_cst) stores of immediate values are
+; done in one mov instruction and not 2. More precisely, it makes sure that the
+; immediate is not first copied uselessly into a register.
+
+; Similarily, it checks that a binary operation of an immediate with an atomic
+; variable that is stored back in that variable is done as a single instruction.
+; For example: x.store(42 + x.load(memory_order_acquire), memory_order_release)
+; should be just an add instruction, instead of loading x into a register, doing
+; an add and storing the result back.
+; The binary operations supported are currently add, and, or, xor.
+; sub is not supported because they are translated by an addition of the
+; negated immediate.
+; Finally, we also check the same kind of pattern for inc/dec
+
+; seq_cst stores are left as (lock) xchgl, but we try to check every other
+; attribute at least once.
+
+; Please note that these operations do not require the lock prefix: only
+; sequentially consistent stores require this kind of protection on X86.
+; And even for seq_cst operations, llvm uses the xchg instruction which has
+; an implicit lock prefix, so making it explicit is not required.
+
+define void @store_atomic_imm_8(i8* %p) {
+; X64-LABEL: store_atomic_imm_8
+; X64: movb
+; X64-NOT: movb
+; X32-LABEL: store_atomic_imm_8
+; X32: movb
+; X32-NOT: movb
+  store atomic i8 42, i8* %p release, align 1
+  ret void
+}
+
+define void @store_atomic_imm_16(i16* %p) {
+; X64-LABEL: store_atomic_imm_16
+; X64: movw
+; X64-NOT: movw
+; X32-LABEL: store_atomic_imm_16
+; X32: movw
+; X32-NOT: movw
+  store atomic i16 42, i16* %p monotonic, align 2
+  ret void
+}
+
+define void @store_atomic_imm_32(i32* %p) {
+; X64-LABEL: store_atomic_imm_32
+; X64: movl
+; X64-NOT: movl
+;   On 32 bits, there is an extra movl for each of those functions
+;   (probably for alignment reasons).
+; X32-LABEL: store_atomic_imm_32
+; X32: movl 4(%esp), %eax
+; X32: movl
+; X32-NOT: movl
+  store atomic i32 42, i32* %p release, align 4
+  ret void
+}
+
+define void @store_atomic_imm_64(i64* %p) {
+; X64-LABEL: store_atomic_imm_64
+; X64: movq
+; X64-NOT: movq
+;   These are implemented with a CAS loop on 32 bit architectures, and thus
+;   cannot be optimized in the same way as the others.
+; X32-LABEL: store_atomic_imm_64
+; X32: cmpxchg8b
+  store atomic i64 42, i64* %p release, align 8
+  ret void
+}
+
+; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
+; even on X64, one must use movabsq that can only target a register.
+define void @store_atomic_imm_64_big(i64* %p) {
+; X64-LABEL: store_atomic_imm_64_big
+; X64: movabsq
+; X64: movq
+  store atomic i64 100000000000, i64* %p monotonic, align 8
+  ret void
+}
+
+; It would be incorrect to replace a lock xchgl by a movl
+define void @store_atomic_imm_32_seq_cst(i32* %p) {
+; X64-LABEL: store_atomic_imm_32_seq_cst
+; X64: xchgl
+; X32-LABEL: store_atomic_imm_32_seq_cst
+; X32: xchgl
+  store atomic i32 42, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- ADD -----
+
+define void @add_8(i8* %p) {
+; X64-LABEL: add_8
+; X64-NOT: lock
+; X64: addb
+; X64-NOT: movb
+; X32-LABEL: add_8
+; X32-NOT: lock
+; X32: addb
+; X32-NOT: movb
+  %1 = load atomic i8* %p seq_cst, align 1
+  %2 = add i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @add_16(i16* %p) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: add_16
+; X64-NOT: addw
+; X32-LABEL: add_16
+; X32-NOT: addw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = add i16 %1, 2
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @add_32(i32* %p) {
+; X64-LABEL: add_32
+; X64-NOT: lock
+; X64: addl
+; X64-NOT: movl
+; X32-LABEL: add_32
+; X32-NOT: lock
+; X32: addl
+; X32-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = add i32 %1, 2
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret void
+}
+
+define void @add_64(i64* %p) {
+; X64-LABEL: add_64
+; X64-NOT: lock
+; X64: addq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'addq'.
+; X32-LABEL: add_64
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = add i64 %1, 2
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @add_32_seq_cst(i32* %p) {
+; X64-LABEL: add_32_seq_cst
+; X64: xchgl
+; X32-LABEL: add_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = add i32 %1, 2
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- AND -----
+
+define void @and_8(i8* %p) {
+; X64-LABEL: and_8
+; X64-NOT: lock
+; X64: andb
+; X64-NOT: movb
+; X32-LABEL: and_8
+; X32-NOT: lock
+; X32: andb
+; X32-NOT: movb
+  %1 = load atomic i8* %p monotonic, align 1
+  %2 = and i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @and_16(i16* %p) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: and_16
+; X64-NOT: andw
+; X32-LABEL: and_16
+; X32-NOT: andw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = and i16 %1, 2
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @and_32(i32* %p) {
+; X64-LABEL: and_32
+; X64-NOT: lock
+; X64: andl
+; X64-NOT: movl
+; X32-LABEL: and_32
+; X32-NOT: lock
+; X32: andl
+; X32-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = and i32 %1, 2
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @and_64(i64* %p) {
+; X64-LABEL: and_64
+; X64-NOT: lock
+; X64: andq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'andq'.
+; X32-LABEL: and_64
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = and i64 %1, 2
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @and_32_seq_cst(i32* %p) {
+; X64-LABEL: and_32_seq_cst
+; X64: xchgl
+; X32-LABEL: and_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = and i32 %1, 2
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- OR -----
+
+define void @or_8(i8* %p) {
+; X64-LABEL: or_8
+; X64-NOT: lock
+; X64: orb
+; X64-NOT: movb
+; X32-LABEL: or_8
+; X32-NOT: lock
+; X32: orb
+; X32-NOT: movb
+  %1 = load atomic i8* %p acquire, align 1
+  %2 = or i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @or_16(i16* %p) {
+; X64-LABEL: or_16
+; X64-NOT: orw
+; X32-LABEL: or_16
+; X32-NOT: orw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = or i16 %1, 2
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @or_32(i32* %p) {
+; X64-LABEL: or_32
+; X64-NOT: lock
+; X64: orl
+; X64-NOT: movl
+; X32-LABEL: or_32
+; X32-NOT: lock
+; X32: orl
+; X32-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = or i32 %1, 2
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @or_64(i64* %p) {
+; X64-LABEL: or_64
+; X64-NOT: lock
+; X64: orq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'orq'.
+; X32-LABEL: or_64
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = or i64 %1, 2
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @or_32_seq_cst(i32* %p) {
+; X64-LABEL: or_32_seq_cst
+; X64: xchgl
+; X32-LABEL: or_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = or i32 %1, 2
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- XOR -----
+
+define void @xor_8(i8* %p) {
+; X64-LABEL: xor_8
+; X64-NOT: lock
+; X64: xorb
+; X64-NOT: movb
+; X32-LABEL: xor_8
+; X32-NOT: lock
+; X32: xorb
+; X32-NOT: movb
+  %1 = load atomic i8* %p acquire, align 1
+  %2 = xor i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @xor_16(i16* %p) {
+; X64-LABEL: xor_16
+; X64-NOT: xorw
+; X32-LABEL: xor_16
+; X32-NOT: xorw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = xor i16 %1, 2
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @xor_32(i32* %p) {
+; X64-LABEL: xor_32
+; X64-NOT: lock
+; X64: xorl
+; X64-NOT: movl
+; X32-LABEL: xor_32
+; X32-NOT: lock
+; X32: xorl
+; X32-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = xor i32 %1, 2
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @xor_64(i64* %p) {
+; X64-LABEL: xor_64
+; X64-NOT: lock
+; X64: xorq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'xorq'.
+; X32-LABEL: xor_64
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = xor i64 %1, 2
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @xor_32_seq_cst(i32* %p) {
+; X64-LABEL: xor_32_seq_cst
+; X64: xchgl
+; X32-LABEL: xor_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = xor i32 %1, 2
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- INC -----
+
+define void @inc_8(i8* %p) {
+; X64-LABEL: inc_8
+; X64-NOT: lock
+; X64: incb
+; X64-NOT: movb
+; X32-LABEL: inc_8
+; X32-NOT: lock
+; X32: incb
+; X32-NOT: movb
+; SLOW_INC-LABEL: inc_8
+; SLOW_INC-NOT: incb
+; SLOW_INC-NOT: movb
+  %1 = load atomic i8* %p seq_cst, align 1
+  %2 = add i8 %1, 1
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @inc_16(i16* %p) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: inc_16
+; X64-NOT: incw
+; X32-LABEL: inc_16
+; X32-NOT: incw
+; SLOW_INC-LABEL: inc_16
+; SLOW_INC-NOT: incw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = add i16 %1, 1
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @inc_32(i32* %p) {
+; X64-LABEL: inc_32
+; X64-NOT: lock
+; X64: incl
+; X64-NOT: movl
+; X32-LABEL: inc_32
+; X32-NOT: lock
+; X32: incl
+; X32-NOT: movl
+; SLOW_INC-LABEL: inc_32
+; SLOW_INC-NOT: incl
+; SLOW_INC-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = add i32 %1, 1
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret void
+}
+
+define void @inc_64(i64* %p) {
+; X64-LABEL: inc_64
+; X64-NOT: lock
+; X64: incq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'incq'.
+; X32-LABEL: inc_64
+; SLOW_INC-LABEL: inc_64
+; SLOW_INC-NOT: incq
+; SLOW_INC-NOT: movq
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = add i64 %1, 1
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @inc_32_seq_cst(i32* %p) {
+; X64-LABEL: inc_32_seq_cst
+; X64: xchgl
+; X32-LABEL: inc_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = add i32 %1, 1
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
+; ----- DEC -----
+
+define void @dec_8(i8* %p) {
+; X64-LABEL: dec_8
+; X64-NOT: lock
+; X64: decb
+; X64-NOT: movb
+; X32-LABEL: dec_8
+; X32-NOT: lock
+; X32: decb
+; X32-NOT: movb
+; SLOW_INC-LABEL: dec_8
+; SLOW_INC-NOT: decb
+; SLOW_INC-NOT: movb
+  %1 = load atomic i8* %p seq_cst, align 1
+  %2 = sub i8 %1, 1
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @dec_16(i16* %p) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: dec_16
+; X64-NOT: decw
+; X32-LABEL: dec_16
+; X32-NOT: decw
+; SLOW_INC-LABEL: dec_16
+; SLOW_INC-NOT: decw
+  %1 = load atomic i16* %p acquire, align 2
+  %2 = sub i16 %1, 1
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @dec_32(i32* %p) {
+; X64-LABEL: dec_32
+; X64-NOT: lock
+; X64: decl
+; X64-NOT: movl
+; X32-LABEL: dec_32
+; X32-NOT: lock
+; X32: decl
+; X32-NOT: movl
+; SLOW_INC-LABEL: dec_32
+; SLOW_INC-NOT: decl
+; SLOW_INC-NOT: movl
+  %1 = load atomic i32* %p acquire, align 4
+  %2 = sub i32 %1, 1
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret void
+}
+
+define void @dec_64(i64* %p) {
+; X64-LABEL: dec_64
+; X64-NOT: lock
+; X64: decq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'decq'.
+; X32-LABEL: dec_64
+; SLOW_INC-LABEL: dec_64
+; SLOW_INC-NOT: decq
+; SLOW_INC-NOT: movq
+  %1 = load atomic i64* %p acquire, align 8
+  %2 = sub i64 %1, 1
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @dec_32_seq_cst(i32* %p) {
+; X64-LABEL: dec_32_seq_cst
+; X64: xchgl
+; X32-LABEL: dec_32_seq_cst
+; X32: xchgl
+  %1 = load atomic i32* %p monotonic, align 4
+  %2 = sub i32 %1, 1
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avoid_complex_am.ll b/test/CodeGen/X86/avoid_complex_am.ll
index 7f095190ab8f..e5e7bd23a641 100644
--- a/test/CodeGen/X86/avoid_complex_am.ll
+++ b/test/CodeGen/X86/avoid_complex_am.ll
@@ -22,7 +22,7 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx = getelementptr inbounds double* %b, i64 %tmp
   %tmp1 = load double* %arrayidx, align 8
 ; The induction variable should carry the scaling factor: 1.
-; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK: [[IVNEXT]] = add nuw i64 [[IV]], 1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds double* %c, i64 %indvars.iv.next
   %tmp2 = load double* %arrayidx2, align 8
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 1fd9085838df..02ea173c8032 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -51,46 +51,6 @@ entry:
   ret <4 x i64> %shuffle
 }
 
-;;;
-;;; Check that some 256-bit vectors are xformed into 128 ops
-; CHECK: _A
-; CHECK: vshufpd $1
-; CHECK-NEXT: vextractf128 $1
-; CHECK-NEXT: vshufpd $1
-; CHECK-NEXT: vinsertf128 $1
-define <4 x i64> @A(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: _B
-; CHECK: vshufpd $1, %ymm
-define <4 x i64> @B(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 undef, i32 undef, i32 6>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: movlhps
-; CHECK-NEXT: vextractf128  $1
-; CHECK-NEXT: movlhps
-; CHECK-NEXT: vinsertf128 $1
-define <4 x i64> @C(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 undef, i32 0, i32 undef, i32 6>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: vpshufd $-96
-; CHECK: vpshufd $-6
-; CHECK: vinsertf128 $1
-define <8 x i32> @D(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 10, i32 10, i32 11, i32 11>
-  ret <8 x i32> %shuffle
-}
-
 ;;; Don't crash on movd
 ; CHECK: _VMOVZQI2PQI
 ; CHECK: vmovd (%
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll
deleted file mode 100644
index d2a22d709474..000000000000
--- a/test/CodeGen/X86/avx-blend.ll
+++ /dev/null
@@ -1,202 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx  -mattr=+avx | FileCheck %s
-
-; AVX128 tests:
-
-;CHECK-LABEL: vsel_float:
-; select mask is <i1 true, i1 false, i1 true, i1 false>.
-; Big endian representation is 0101 = 5.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 1010 = 10.
-; According to the ABI:
-; v1 is in xmm0 => first argument is xmm0.
-; v2 is in xmm1 => second argument is xmm1.
-; result is in xmm0 => destination argument.
-;CHECK: vblendps    $10, %xmm1, %xmm0, %xmm0
-;CHECK: ret
-define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
-  ret <4 x float> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
-;CHECK: ret
-define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
-  ret <4 x i32> %vsel
-}
-
-
-;CHECK-LABEL: vsel_double:
-;CHECK: vmovsd
-;CHECK: ret
-define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
-  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
-  ret <2 x double> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i64:
-;CHECK: vmovsd
-;CHECK: ret
-define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
-  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
-  ret <2 x i64> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i8:
-;CHECK: vpblendvb
-;CHECK: ret
-define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
-  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
-  ret <16 x i8> %vsel
-}
-
-
-; AVX256 tests:
-
-
-;CHECK-LABEL: vsel_float8:
-;CHECK-NOT: vinsertf128
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
-;CHECK: ret
-define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
-  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
-  ret <8 x float> %vsel
-}
-
-;CHECK-LABEL: vsel_i328:
-;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
-;CHECK-NEXT: ret
-define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
-  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
-  ret <8 x i32> %vsel
-}
-
-;CHECK-LABEL: vsel_double8:
-; select mask is 2x: 0001 => intel mask: ~0001 = 14
-; ABI:
-; v1 is in ymm0 and ymm1.
-; v2 is in ymm2 and ymm3.
-; result is in ymm0 and ymm1.
-; Compute the low part: res.low = blend v1.low, v2.low, blendmask
-;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
-; Compute the high part.
-;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
-;CHECK: ret
-define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
-  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
-  ret <8 x double> %vsel
-}
-
-;CHECK-LABEL: vsel_i648:
-;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
-;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
-;CHECK: ret
-define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
-  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
-  ret <8 x i64> %vsel
-}
-
-;CHECK-LABEL: vsel_double4:
-;CHECK-NOT: vinsertf128
-;CHECK: vblendpd $10
-;CHECK-NEXT: ret
-define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
-  ret <4 x double> %vsel
-}
-
-;; TEST blend + compares
-; CHECK: testa
-define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
-  ; CHECK: vcmplepd
-  ; CHECK: vblendvpd
-  %max_is_x = fcmp oge <2 x double> %x, %y
-  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
-  ret <2 x double> %max
-}
-
-; CHECK: testb
-define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
-  ; CHECK: vcmpnlepd
-  ; CHECK: vblendvpd
-  %min_is_x = fcmp ult <2 x double> %x, %y
-  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
-  ret <2 x double> %min
-}
-
-; If we can figure out a blend has a constant mask, we should emit the
-; blend instruction with an immediate mask
-define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
-; CHECK-LABEL: constant_blendvpd_avx:
-; CHECK-NOT: mov
-; CHECK: vblendpd
-; CHECK: ret
-  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
-  ret <4 x double> %1
-}
-
-define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
-; CHECK-LABEL: constant_blendvps_avx:
-; CHECK-NOT: mov
-; CHECK: vblendps
-; CHECK: ret
-  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
-  ret <8 x float> %1
-}
-
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
-
-;; 4 tests for shufflevectors that optimize to blend + immediate
-; CHECK-LABEL: @blend_shufflevector_4xfloat
-define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
-; Equivalent select mask is <i1 true, i1 false, i1 true, i1 false>.
-; Big endian representation is 0101 = 5.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; Inverted mask: 1010 = 10.
-; According to the ABI:
-; a is in xmm0 => first argument is xmm0.
-; b is in xmm1 => second argument is xmm1.
-; Result is in xmm0 => destination argument.
-; CHECK: vblendps $10, %xmm1, %xmm0, %xmm0
-; CHECK: ret
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %1
-}
-
-; CHECK-LABEL: @blend_shufflevector_8xfloat
-define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
-; CHECK: vblendps $190, %ymm1, %ymm0, %ymm0
-; CHECK: ret
-  %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
-  ret <8 x float> %1
-}
-
-; CHECK-LABEL: @blend_shufflevector_4xdouble
-define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
-; CHECK: vblendpd $2, %ymm1, %ymm0, %ymm0
-; CHECK: ret
-  %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-  ret <4 x double> %1
-}
-
-; CHECK-LABEL: @blend_shufflevector_4xi64
-define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
-; CHECK: vblendpd $13, %ymm1, %ymm0, %ymm0
-; CHECK: ret
-  %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  ret <4 x i64> %1
-}
diff --git a/test/CodeGen/X86/avx-intel-ocl.ll b/test/CodeGen/X86/avx-intel-ocl.ll
index 3e051bff768d..70ec1248cdd7 100644
--- a/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/test/CodeGen/X86/avx-intel-ocl.ll
@@ -89,23 +89,23 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
 ; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
 
 ; X64-LABEL: test_prolog_epilog
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
-; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
+; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Spill
 ; X64: call
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
-; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Folded Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
+; X64: vmovups {{.*}}(%rsp), {{%ymm([8-9]|1[0-5])}} ## 32-byte Reload
 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
    %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)
    ret <16 x float> %c
diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
new file mode 100644
index 000000000000..d2b44cd64efb
--- /dev/null
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7-avx | FileCheck %s
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index ce31161dbbcd..bb9354cff038 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -458,7 +458,7 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
-  ; CHECK: vpslldq
+  ; CHECK: vpslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -466,7 +466,7 @@ declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpslldq
+  ; CHECK: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -554,7 +554,7 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
-  ; CHECK: vpsrldq
+  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -562,7 +562,7 @@ declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: vpsrldq
+  ; CHECK: vpsrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -818,18 +818,18 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
 
 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vblendpd
-  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vblendps
-  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
@@ -850,35 +850,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
 
 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vdppd
-  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vdpps
-  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vinsertps
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vmpsadbw
-  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -899,10 +899,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
 
 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpblendw
-  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
@@ -1770,18 +1770,18 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
 
 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
   ; CHECK: vblendpd
-  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
   ret <4 x double> %res
 }
-declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
 
 
 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
   ; CHECK: vblendps
-  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
@@ -1950,10 +1950,10 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
 
 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   ; CHECK: vdpps
-  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
@@ -2309,7 +2309,7 @@ declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) noun
 
 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
   ; CHECK: vpermilpd
-  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
@@ -2324,7 +2324,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 
 
 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
-  ; CHECK: vpshufd
+  ; CHECK: vpermilps
   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
diff --git a/test/CodeGen/X86/avx-movdup.ll b/test/CodeGen/X86/avx-movdup.ll
deleted file mode 100644
index 42d84def98a0..000000000000
--- a/test/CodeGen/X86/avx-movdup.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vmovsldup
-define <8 x float> @movdupA(<8 x float> %src) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vmovshdup
-define <8 x float> @movdupB(<8 x float> %src) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x float> %src, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  ret <8 x float> %shuffle.i
-}
-
-; CHECK: vmovsldup
-define <4 x i64> @movdupC(<4 x i64> %src) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x i64> %src to <8 x float>
-  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
-  ret <4 x i64> %1
-}
-
-; CHECK: vmovshdup
-define <4 x i64> @movdupD(<4 x i64> %src) nounwind uwtable readnone ssp {
-entry:
-  %0 = bitcast <4 x i64> %src to <8 x float>
-  %shuffle.i = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  %1 = bitcast <8 x float> %shuffle.i to <4 x i64>
-  ret <4 x i64> %1
-}
-
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
deleted file mode 100755
index fb2287f52892..000000000000
--- a/test/CodeGen/X86/avx-sext.ll
+++ /dev/null
@@ -1,199 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=pentium4 | FileCheck %s -check-prefix=SSE2
-
-define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
-; AVX: sext_8i16_to_8i32
-; AVX: vpmovsxwd
-
-  %B = sext <8 x i16> %A to <8 x i32>
-  ret <8 x i32>%B
-}
-
-define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
-; AVX: sext_4i32_to_4i64
-; AVX: vpmovsxdq
-
-  %B = sext <4 x i32> %A to <4 x i64>
-  ret <4 x i64>%B
-}
-
-; AVX: load_sext_test1
-; AVX: vpmovsxwd (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test1
-; SSSE3: movq
-; SSSE3: punpcklwd %xmm{{.*}}, %xmm{{.*}}
-; SSSE3: psrad $16
-; SSSE3: ret
-
-; SSE2: load_sext_test1
-; SSE2: movq
-; SSE2: punpcklwd %xmm{{.*}}, %xmm{{.*}}
-; SSE2: psrad $16
-; SSE2: ret
-define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
- %X = load <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i32>
- ret <4 x i32>%Y
-}
-
-; AVX: load_sext_test2
-; AVX: vpmovsxbd (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test2
-; SSSE3: movd
-; SSSE3: pshufb
-; SSSE3: psrad $24
-; SSSE3: ret
-
-; SSE2: load_sext_test2
-; SSE2: movl
-; SSE2: psrad $24
-; SSE2: ret
-define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
- %X = load <4 x i8>* %ptr
- %Y = sext <4 x i8> %X to <4 x i32>
- ret <4 x i32>%Y
-}
-
-; AVX: load_sext_test3
-; AVX: vpmovsxbq (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test3
-; SSSE3: movsbq
-; SSSE3: movsbq
-; SSSE3: punpcklqdq
-; SSSE3: ret
-
-; SSE2: load_sext_test3
-; SSE2: movsbq
-; SSE2: movsbq
-; SSE2: punpcklqdq
-; SSE2: ret
-define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
- %X = load <2 x i8>* %ptr
- %Y = sext <2 x i8> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-; AVX: load_sext_test4
-; AVX: vpmovsxwq (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test4
-; SSSE3: movswq
-; SSSE3: movswq
-; SSSE3: punpcklqdq
-; SSSE3: ret
-
-; SSE2: load_sext_test4
-; SSE2: movswq
-; SSE2: movswq
-; SSE2: punpcklqdq
-; SSE2: ret
-define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
- %X = load <2 x i16>* %ptr
- %Y = sext <2 x i16> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-; AVX: load_sext_test5
-; AVX: vpmovsxdq (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test5
-; SSSE3: movslq
-; SSSE3: movslq
-; SSSE3: punpcklqdq
-; SSSE3: ret
-
-; SSE2: load_sext_test5
-; SSE2: movslq
-; SSE2: movslq
-; SSE2: punpcklqdq
-; SSE2: ret
-define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
- %X = load <2 x i32>* %ptr
- %Y = sext <2 x i32> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-; AVX: load_sext_test6
-; AVX: vpmovsxbw (%r{{[^,]*}}), %xmm{{.*}}
-; AVX: ret
-
-; SSSE3: load_sext_test6
-; SSSE3: movq
-; SSSE3: punpcklbw
-; SSSE3: psraw $8
-; SSSE3: ret
-
-; SSE2: load_sext_test6
-; SSE2: movq
-; SSE2: punpcklbw
-; SSE2: psraw $8
-; SSE2: ret
-define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
- %X = load <8 x i8>* %ptr
- %Y = sext <8 x i8> %X to <8 x i16>
- ret <8 x i16>%Y
-}
-
-; AVX: sext_4i1_to_4i64
-; AVX: vpslld  $31
-; AVX: vpsrad  $31
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
-  %extmask = sext <4 x i1> %mask to <4 x i64>
-  ret <4 x i64> %extmask
-}
-
-; AVX-LABEL: sext_16i8_to_16i16
-; AVX: vpmovsxbw
-; AVX: vmovhlps
-; AVX: vpmovsxbw
-; AVX: ret
-define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
- %X = load <16 x i8>* %ptr
- %Y = sext <16 x i8> %X to <16 x i16>
- ret <16 x i16> %Y
-}
-
-; AVX: sext_4i8_to_4i64
-; AVX: vpslld  $24
-; AVX: vpsrad  $24
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
-  %extmask = sext <4 x i8> %mask to <4 x i64>
-  ret <4 x i64> %extmask
-}
-
-; AVX: sext_4i8_to_4i64
-; AVX: vpmovsxbd
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
- %X = load <4 x i8>* %ptr
- %Y = sext <4 x i8> %X to <4 x i64>
- ret <4 x i64>%Y
-}
-
-; AVX: sext_4i16_to_4i64
-; AVX: vpmovsxwd
-; AVX: vpmovsxdq
-; AVX: vpmovsxdq
-; AVX: ret
-define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
- %X = load <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i64>
- ret <4 x i64>%Y
-}
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
deleted file mode 100644
index 4a996d79815c..000000000000
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ /dev/null
@@ -1,336 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; PR11102
-define <4 x float> @test1(<4 x float> %a) nounwind {
-  %b = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 5, i32 undef, i32 undef>
-  ret <4 x float> %b
-; CHECK-LABEL: test1:
-;; TODO: This test could be improved by removing the xor instruction and
-;; having vinsertps zero out the needed elements.
-; CHECK: vxorps
-; CHECK: vinsertps
-}
-
-; rdar://10538417
-define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
-; CHECK-LABEL: test2:
-; CHECK: vinsertf128
-  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
-  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
-  ret <3 x i64> %2
-; CHECK: ret
-}
-
-define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
-  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
-  ret <4 x i64> %c
-; CHECK-LABEL: test3:
-; CHECK: vblendpd
-; CHECK: ret
-}
-
-define <8 x float> @test4(float %a) nounwind {
-  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
-  ret <8 x float> %b
-; CHECK-LABEL: test4:
-; CHECK: vinsertf128
-}
-
-; rdar://10594409
-define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
-entry:
-  %0 = bitcast float* %f to <4 x float>*
-  %1 = load <4 x float>* %0, align 16
-; CHECK: test5
-; CHECK: vmovaps
-; CHECK-NOT: vxorps
-; CHECK-NOT: vinsertf128
-  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
-  ret <8 x float> %shuffle.i
-}
-
-define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
-entry:
-  %0 = bitcast double* %d to <2 x double>*
-  %1 = load <2 x double>* %0, align 16
-; CHECK: test6
-; CHECK: vmovaps
-; CHECK-NOT: vxorps
-; CHECK-NOT: vinsertf128
-  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-  ret <4 x double> %shuffle.i
-}
-
-define <16 x i16> @test7(<4 x i16> %a) nounwind {
-; CHECK: test7
-  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK: ret
-  ret <16 x i16> %b
-}
-
-; CHECK: test8
-define void @test8() {
-entry:
-  %0 = load <16 x i64> addrspace(1)* null, align 128
-  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
-  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
-  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
-; CHECK: ret
-  ret void
-}
-
-; Extract a value from a shufflevector..
-define i32 @test9(<4 x i32> %a) nounwind {
-; CHECK: test9
-; CHECK: vpextrd
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
-  %r = extractelement <8 x i32> %b, i32 2
-; CHECK: ret
-  ret i32 %r
-}
-
-; Extract a value which is the result of an undef mask.
-define i32 @test10(<4 x i32> %a) nounwind {
-; CHECK: @test10
-; CHECK-NOT: {{^[^#]*[a-z]}}
-; CHECK: ret
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %r = extractelement <8 x i32> %b, i32 2
-  ret i32 %r
-}
-
-define <4 x float> @test11(<4 x float> %a) nounwind  {
-; CHECK: test11
-; CHECK: vpshufd $27
-  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x float> %tmp1
-}
-
-define <4 x float> @test12(<4 x float>* %a) nounwind  {
-; CHECK: test12
-; CHECK: vpshufd
-  %tmp0 = load <4 x float>* %a
-  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x float> %tmp1
-}
-
-define <4 x i32> @test13(<4 x i32> %a) nounwind  {
-; CHECK: test13
-; CHECK: vpshufd $27
-  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i32> %tmp1
-}
-
-define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
-; CHECK: test14
-; CHECK: vpshufd $27, (
-  %tmp0 = load <4 x i32>* %a
-  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-  ret <4 x i32> %tmp1
-}
-
-; CHECK: test15
-; CHECK: vpshufd $8
-; CHECK: ret
-define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
-  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  ret <4 x i32>%x1
-}
-
-; rdar://10974078
-define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
-entry:
-  %0 = bitcast float* %f to <4 x float>*
-  %1 = load <4 x float>* %0, align 8
-; CHECK: test16
-; CHECK: vmovups
-; CHECK-NOT: vxorps
-; CHECK-NOT: vinsertf128
-  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
-  ret <8 x float> %shuffle.i
-}
-
-; PR12413
-; CHECK: shuf1
-; CHECK: vpshufb
-; CHECK: vpshufb
-; CHECK: vpshufb
-; CHECK: vpshufb
-define <32 x i8> @shuf1(<32 x i8> %inval1, <32 x i8> %inval2) {
-entry:
- %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
- ret <32 x i8> %0
-}
-
-; handle the case where only half of the 256-bits is splittable
-; CHECK: shuf2
-; CHECK: vpshufb
-; CHECK: vpshufb
-; CHECK: vpextrb
-; CHECK: vpextrb
-define <32 x i8> @shuf2(<32 x i8> %inval1, <32 x i8> %inval2) {
-entry:
- %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 31, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62>
- ret <32 x i8> %0
-}
-
-; CHECK: blend1
-; CHECK: vblendps
-; CHECK: ret
-define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x i32> %t
-}
-
-; CHECK: blend2
-; CHECK: vblendps
-; CHECK: ret
-define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i32> %t
-}
-
-; CHECK: blend2a
-; CHECK: vblendps
-; CHECK: ret
-define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %t
-}
-
-; CHECK: blend3
-; CHECK-NOT: vblendps
-; CHECK: ret
-define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
-  ret <4 x i32> %t
-}
-
-; CHECK: blend4
-; CHECK: vblendpd
-; CHECK: ret
-define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x i64> %t
-}
-
-; CHECK: narrow
-; CHECK: vpermilps
-; CHECK: ret
-define <16 x i16> @narrow(<16 x i16> %a) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 undef, i32 14, i32 15, i32 undef, i32 undef>
-  ret <16 x i16> %t
-}
-
-;CHECK-LABEL: test17:
-;CHECK-NOT: vinsertf128
-;CHECK: ret
-define   <8 x float> @test17(<4 x float> %y) {
-  %x = shufflevector <4 x float> %y, <4 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <8 x float> %x
-}
-
-; CHECK: test18
-; CHECK: vmovshdup
-; CHECK: vblendps
-; CHECK: ret
-define <8 x float> @test18(<8 x float> %A, <8 x float>%B) nounwind {
-  %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-  ret <8 x float>%S
-}
-
-; CHECK: test19
-; CHECK: vmovsldup
-; CHECK: vblendps
-; CHECK: ret
-define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
-  %S = shufflevector <8 x float> %A, <8 x float> %B, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-  ret <8 x float>%S
-}
-
-; rdar://12684358
-; Make sure loads happen before stores.
-; CHECK: swap8doubles
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
-; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
-; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
-; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
-; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
-; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
-; CHECK: vextractf128
-; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)
-; CHECK: vextractf128
-; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
-; CHECK: vmovaps %ymm{{[0-9]+}}, {{[0-9]*}}(%rsi)
-define void @swap8doubles(double* nocapture %A, double* nocapture %C) nounwind uwtable ssp {
-entry:
-  %add.ptr = getelementptr inbounds double* %A, i64 2
-  %v.i = bitcast double* %A to <2 x double>*
-  %0 = load <2 x double>* %v.i, align 1
-  %shuffle.i.i = shufflevector <2 x double> %0, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-  %v1.i = bitcast double* %add.ptr to <2 x double>*
-  %1 = load <2 x double>* %v1.i, align 1
-  %2 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i, <2 x double> %1, i8 1) nounwind
-  %add.ptr1 = getelementptr inbounds double* %A, i64 6
-  %add.ptr2 = getelementptr inbounds double* %A, i64 4
-  %v.i27 = bitcast double* %add.ptr2 to <2 x double>*
-  %3 = load <2 x double>* %v.i27, align 1
-  %shuffle.i.i28 = shufflevector <2 x double> %3, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-  %v1.i29 = bitcast double* %add.ptr1 to <2 x double>*
-  %4 = load <2 x double>* %v1.i29, align 1
-  %5 = tail call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %shuffle.i.i28, <2 x double> %4, i8 1) nounwind
-  %6 = bitcast double* %C to <4 x double>*
-  %7 = load <4 x double>* %6, align 32
-  %add.ptr5 = getelementptr inbounds double* %C, i64 4
-  %8 = bitcast double* %add.ptr5 to <4 x double>*
-  %9 = load <4 x double>* %8, align 32
-  %shuffle.i26 = shufflevector <4 x double> %7, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-  %10 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %7, i8 1)
-  %shuffle.i = shufflevector <4 x double> %9, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-  %11 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %9, i8 1)
-  store <2 x double> %shuffle.i26, <2 x double>* %v.i, align 16
-  store <2 x double> %10, <2 x double>* %v1.i, align 16
-  store <2 x double> %shuffle.i, <2 x double>* %v.i27, align 16
-  store <2 x double> %11, <2 x double>* %v1.i29, align 16
-  store <4 x double> %2, <4 x double>* %6, align 32
-  store <4 x double> %5, <4 x double>* %8, align 32
-  ret void
-}
-declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
-declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
-
-; this test case just should not fail
-define void @test20() {
-  %a0 = insertelement <3 x double> <double 0.000000e+00, double 0.000000e+00, double undef>, double 0.000000e+00, i32 2
-  store <3 x double> %a0, <3 x double>* undef, align 1
-  %a1 = insertelement <3 x double> <double 0.000000e+00, double 0.000000e+00, double undef>, double undef, i32 2
-  store <3 x double> %a1, <3 x double>* undef, align 1
-  ret void
-}
-
-define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
-; CHECK-LABEL: test_insert_64_zext
-; CHECK-NOT: xor
-; CHECK: vmovq
-  %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
-  ret <2 x i64> %1
-}
-
-;; Ensure we don't use insertps from non v4x32 vectors.
-;; On SSE4.1 it works because bigger vectors use more than 1 register.
-;; On AVX they get passed in a single register.
-;; FIXME: We could probably optimize this case, if we're only using the
-;; first 4 indices.
-define <4 x i32> @insert_from_diff_size(<8 x i32> %x) {
-; CHECK-LABEL: insert_from_diff_size:
-; CHECK-NOT: insertps
-; CHECK: ret
-  %vecext = extractelement <8 x i32> %x, i32 0
-  %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
-  %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
-  %vecinit2 = insertelement <4 x i32> %vecinit1, i32 0, i32 2
-  %a.0 = extractelement <8 x i32> %x, i32 0
-  %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %a.0, i32 3
-  ret <4 x i32> %vecinit3
-}
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index b1b2f8b97a73..98c1645b9080 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,9 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 
-; CHECK: vpunpcklbw %xmm
-; CHECK-NEXT: vpunpckhbw %xmm
-; CHECK-NEXT: vpshufd $85
+; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
 ; CHECK-NEXT: vinsertf128 $1
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
 entry:
@@ -11,8 +9,7 @@ entry:
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vpunpckhwd %xmm
-; CHECK-NEXT: vpshufd $85
+; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
 ; CHECK-NEXT: vinsertf128 $1
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
 entry:
@@ -21,7 +18,7 @@ entry:
 }
 
 ; CHECK: vmovq
-; CHECK-NEXT: vmovlhps %xmm
+; CHECK-NEXT: vunpcklpd %xmm
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
 entry:
@@ -32,7 +29,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vpermilpd $0
+; CHECK: vunpcklpd %xmm
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
@@ -72,7 +69,7 @@ __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_ex
   ret <8 x float> %load_broadcast12281250
 }
 
-; CHECK: vpshufd $0
+; CHECK: vpermilps $4
 ; CHECK-NEXT: vinsertf128 $1
 define <8 x float> @funcF(i32 %val) nounwind {
   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
@@ -81,7 +78,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
   ret <8 x float> %tmp
 }
 
-; CHECK: vpshufd  $0
+; CHECK: vpermilps $0
 ; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
@@ -90,7 +87,7 @@ entry:
 }
 
 ; CHECK: vextractf128  $1
-; CHECK-NEXT: vpshufd
+; CHECK-NEXT: vpermilps $85
 ; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vmovddup.ll b/test/CodeGen/X86/avx-vmovddup.ll
deleted file mode 100644
index 1c56fe2b1a01..000000000000
--- a/test/CodeGen/X86/avx-vmovddup.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vmovddup %ymm
-define <4 x i64> @A(<4 x i64> %a) {
-  %c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  ret <4 x i64> %c
-}
-
-; CHECK: vmovddup (%
-define <4 x i64> @B(<4 x i64>* %ptr) {
-  %a = load <4 x i64>* %ptr
-  %c = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  ret <4 x i64> %c
-}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
deleted file mode 100644
index c20775bacad2..000000000000
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: _A
-; CHECK: vperm2f128 $1
-define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: _B
-; CHECK: vblendps $240
-define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: _C
-; CHECK: vperm2f128 $0
-define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: _D
-; CHECK: vperm2f128 $17
-define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: _E
-; CHECK: vperm2f128 $17
-define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <32 x i8> %shuffle
-}
-
-; CHECK: _E2
-; CHECK: vperm2f128 $3
-define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x i64> %shuffle
-}
-
-;;;; Cases with undef indicies mixed in the mask
-
-; CHECK: _F
-; CHECK: vperm2f128 $33
-define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
-  ret <8 x float> %shuffle
-}
-
-;;;; Cases we must not select vperm2f128
-
-; CHECK: _G
-; CHECK-NOT: vperm2f128
-define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
-  ret <8 x float> %shuffle
-}
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll
new file mode 100644
index 000000000000..43303ca57c4f
--- /dev/null
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: A:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: B:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: C:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: D:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: E:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: E2:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <32 x i8> @Ei(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+; AVX1-LABEL: Ei:
+; AVX1:       ## BB#0: ## %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: Ei:
+; AVX2:       ## BB#0: ## %entry
+; AVX2-NEXT:    vpaddb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
+; AVX2-NEXT:    retq
+entry:
+  ; add forces execution domain
+  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <4 x i64> @E2i(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+; AVX1-LABEL: E2i:
+; AVX1:       ## BB#0: ## %entry
+; AVX1-NEXT:    vpaddq {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: E2i:
+; AVX2:       ## BB#0: ## %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpaddq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
+; AVX2-NEXT:    retq
+entry:
+  ; add forces execution domain
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <8 x i32> @E3i(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+; AVX1-LABEL: E3i:
+; AVX1:       ## BB#0: ## %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: E3i:
+; AVX2:       ## BB#0: ## %entry
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    retq
+entry:
+  ; add forces execution domain
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+define <16 x i16> @E4i(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
+; AVX1-LABEL: E4i:
+; AVX1:       ## BB#0: ## %entry
+; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: E4i:
+; AVX2:       ## BB#0: ## %entry
+; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  ; add forces execution domain
+  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @E5i(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
+; AVX1-LABEL: E5i:
+; AVX1:       ## BB#0: ## %entry
+; AVX1-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX1-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovaps (%rsi), %ymm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: E5i:
+; AVX2:       ## BB#0: ## %entry
+; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX2-NEXT:    vmovdqa (%rsi), %ymm1
+; AVX2-NEXT:    vpaddw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %c = load <16 x i16>* %a
+  %d = load <16 x i16>* %b
+  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+;;;; Cases with undef indicies mixed in the mask
+
+define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: F:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[0,1,0,1]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 9, i32 undef, i32 11>
+  ret <8 x float> %shuffle
+}
+
+;;;; Cases we must not select vperm2f128
+
+define <8 x float> @G(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: G:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
+; ALL-NEXT:    retq
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 12, i32 undef, i32 15>
+  ret <8 x float> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
deleted file mode 100644
index b7f8d72e58c9..000000000000
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vpermilps
-define <8 x float> @funcA(<8 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7, i32 5>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: vpermilpd
-define <4 x double> @funcB(<4 x double> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
-  ret <4 x double> %shuffle
-}
-
-; CHECK: vpermilps
-define <8 x i32> @funcC(<8 x i32> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 1, i32 5, i32 6, i32 7, i32 5>
-  ret <8 x i32> %shuffle
-}
-
-; CHECK: vpermilpd
-define <4 x i64> @funcD(<4 x i64> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: vpermilpd
-define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <4 x i64>* %a
-  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
-  ret <4 x i64> %shuffle
-}
-
-; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
-; target specific mask was correctly generated.
-; CHECK: vpermilps $-100
-define <8 x float> @funcE(<8 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 8, i32 3, i32 1, i32 2, i32 4, i32 8, i32 5, i32 6>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: palignr $8
-; CHECK: palignr $8
-define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-  ret <8 x float> %shuffle
-}
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
deleted file mode 100644
index ad3dbc1ed893..000000000000
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ /dev/null
@@ -1,157 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-; CHECK: vshufps  $-53, %ymm
-define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: vshufps  $-53, (%{{.*}}), %ymm
-define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <8 x float>* %a
-  %b2 = load <8 x float>* %b
-  %shuffle = shufflevector <8 x float> %a2, <8 x float> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: vshufps  $-53, %ymm
-define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
-  ret <8 x i32> %shuffle
-}
-
-; CHECK: vshufps  $-53, (%{{.*}}), %ymm
-define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <8 x i32>* %a
-  %b2 = load <8 x i32>* %b
-  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
-  ret <8 x i32> %shuffle
-}
-
-; CHECK: vblendpd  $10, %ymm
-define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x double> %shuffle
-}
-
-; CHECK: vblendpd  $10, (%{{.*}}), %ymm
-define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <4 x double>* %a
-  %b2 = load <4 x double>* %b
-  %shuffle = shufflevector <4 x double> %a2, <4 x double> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x double> %shuffle
-}
-
-; CHECK: vblendpd  $10, %ymm
-define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: vblendpd  $10, (%{{.*}}), %ymm
-define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <4 x i64>* %a
-  %b2 = load <4 x i64>* %b
-  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: vshufps  $-53, %ymm
-define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 undef, i32 undef, i32 11, i32 undef, i32 6, i32 12, i32 undef>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: vblendpd  $2, %ymm
-define <4 x double> @D(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
-  ret <4 x double> %shuffle
-}
-
-; CHECK: vshufps $-55, %ymm
-define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7>
-  ret <8 x float> %shuffle
-}
-
-; CHECK: vshufpd  $8, %ymm
-define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
-  ret <4 x double> %shuffle
-}
-
-; CHECK: vshufps  $-53, %xmm
-define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
-  ret <4 x float> %shuffle
-}
-
-; CHECK: vshufps  $-53, (%{{.*}}), %xmm
-define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <4 x float>* %a
-  %b2 = load <4 x float>* %b
-  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
-  ret <4 x float> %shuffle
-}
-
-; CHECK: vshufps  $-53, %xmm
-define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
-  ret <4 x i32> %shuffle
-}
-
-; CHECK: vshufps  $-53, (%{{.*}}), %xmm
-define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <4 x i32>* %a
-  %b2 = load <4 x i32>* %b
-  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
-  ret <4 x i32> %shuffle
-}
-
-; CHECK: vshufpd  $1, %xmm
-define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
-  ret <2 x double> %shuffle
-}
-
-; CHECK: vshufpd  $1, (%{{.*}}), %xmm
-define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <2 x double>* %a
-  %b2 = load <2 x double>* %b
-  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
-  ret <2 x double> %shuffle
-}
-
-; CHECK: vshufpd  $1, %xmm
-define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
-  ret <2 x i64> %shuffle
-}
-
-; CHECK: vshufpd  $1, (%{{.*}}), %xmm
-define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
-entry:
-  %a2 = load <2 x i64>* %a
-  %b2 = load <2 x i64>* %b
-  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
-  ret <2 x i64> %shuffle
-}
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
deleted file mode 100755
index 75117463bc39..000000000000
--- a/test/CodeGen/X86/avx-zext.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-
-define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
-;CHECK-LABEL: zext_8i16_to_8i32:
-;CHECK: vpunpckhwd
-;CHECK: ret
-
-  %B = zext <8 x i16> %A to <8 x i32>
-  ret <8 x i32>%B
-}
-
-define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
-;CHECK-LABEL: zext_4i32_to_4i64:
-;CHECK: vpunpckhdq
-;CHECK: ret
-
-  %B = zext <4 x i32> %A to <4 x i64>
-  ret <4 x i64>%B
-}
-
-define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
-;CHECK-LABEL: zext_8i8_to_8i32:
-;CHECK: vpunpckhwd
-;CHECK: vpmovzxwd
-;CHECK: vinsertf128
-;CHECK: ret
-  %t = zext <8 x i8> %z to <8 x i32>
-  ret <8 x i32> %t
-}
-
-; PR17654
-define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
-; CHECK-LABEL: zext_16i8_to_16i16:
-; CHECK: vpxor
-; CHECK: vpunpckhbw
-; CHECK: vpunpcklbw
-; CHECK: vinsertf128
-; CHECK: ret
-  %t = zext <16 x i8> %z to <16 x i16>
-  ret <16 x i16> %t
-}
diff --git a/test/CodeGen/X86/avx.ll b/test/CodeGen/X86/avx.ll
index 6069c14f0d80..cba6d98f5a84 100644
--- a/test/CodeGen/X86/avx.ll
+++ b/test/CodeGen/X86/avx.ll
@@ -60,7 +60,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
 ; X32: movl    8(%esp), %ecx
 ; CHECK-NOT: mov
 ;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps    $192, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps    $-64, 12(%{{...}},%{{...}}), %
 ; CHECK-NEXT: ret
   %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
   %2 = load <4 x float>* %1, align 16
diff --git a/test/CodeGen/X86/avx1-stack-reload-folding.ll b/test/CodeGen/X86/avx1-stack-reload-folding.ll
new file mode 100644
index 000000000000..54c192583d6e
--- /dev/null
+++ b/test/CodeGen/X86/avx1-stack-reload-folding.ll
@@ -0,0 +1,83 @@
+; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack.
+;
+; Many of these tests are primarily to check memory folding with specific instructions. Using a basic
+; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats
+; being tested - the load-execute version of the instruction from the tables would be matched instead.
+
+define void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) {
+  ;CHECK-LABEL: stack_fold_vmulpd
+  ;CHECK:       vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+
+  %1 = load <64 x double>* %a
+  %2 = load <64 x double>* %b
+  %3 = fadd <64 x double> %1, %2
+  %4 = fsub <64 x double> %1, %2
+  %5 = fmul <64 x double> %3, %4
+  store <64 x double> %5, <64 x double>* %c
+  ret void
+}
+
+define void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) {
+  ;CHECK-LABEL: stack_fold_cvtdq2ps
+  ;CHECK:   vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+
+  %1 = load <128 x i32>* %a
+  %2 = load <128 x i32>* %b
+  %3 = and <128 x i32> %1, %2
+  %4 = xor <128 x i32> %1, %2
+  %5 = sitofp <128 x i32> %3 to <128 x float>
+  %6 = sitofp <128 x i32> %4 to <128 x float>
+  %7 = fadd <128 x float> %5, %6
+  store <128 x float> %7, <128 x float>* %c
+  ret void
+}
+
+define void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {
+  ;CHECK-LABEL: stack_fold_cvtpd2ps
+  ;CHECK:   vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+
+  %1 = load <128 x double>* %a
+  %2 = load <128 x double>* %b
+  %3 = fadd <128 x double> %1, %2
+  %4 = fsub <128 x double> %1, %2
+  %5 = fptrunc <128 x double> %3 to <128 x float>
+  %6 = fptrunc <128 x double> %4 to <128 x float>
+  %7 = fadd <128 x float> %5, %6
+  store <128 x float> %7, <128 x float>* %c
+  ret void
+}
+
+define void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {
+  ;CHECK-LABEL: stack_fold_cvttpd2dq
+  ;CHECK:  vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+
+  %1 = load <64 x double>* %a
+  %2 = load <64 x double>* %b
+  %3 = fadd <64 x double> %1, %2
+  %4 = fsub <64 x double> %1, %2
+  %5 = fptosi <64 x double> %3 to <64 x i32>
+  %6 = fptosi <64 x double> %4 to <64 x i32>
+  %7 = or <64 x i32> %5, %6
+  store <64 x i32> %7, <64 x i32>* %c
+  ret void
+}
+
+define void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 {
+  ;CHECK-LABEL: stack_fold_cvttps2dq
+  ;CHECK:   vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+
+  %1 = load <128 x float>* %a
+  %2 = load <128 x float>* %b
+  %3 = fadd <128 x float> %1, %2
+  %4 = fsub <128 x float> %1, %2
+  %5 = fptosi <128 x float> %3 to <128 x i32>
+  %6 = fptosi <128 x float> %4 to <128 x i32>
+  %7 = or <128 x i32> %5, %6
+  store <128 x i32> %7, <128 x i32>* %c
+  ret void
+}
diff --git a/test/CodeGen/X86/avx2-blend.ll b/test/CodeGen/X86/avx2-blend.ll
deleted file mode 100644
index b02442b6fadd..000000000000
--- a/test/CodeGen/X86/avx2-blend.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
-
-define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
-; CHECK-LABEL: constant_pblendvb_avx2:
-; CHECK: vmovdqa
-; CHECK: vpblendvb
-  %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
-  ret <32 x i8> %1
-}
-
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
new file mode 100644
index 000000000000..ac2c73bb9321
--- /dev/null
+++ b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index ab3d591e1d9f..79a3361bfe86 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -161,7 +161,7 @@ declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
 
 
 define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
-  ; CHECK: vpslldq
+  ; CHECK: vpslldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
   %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
   ret <4 x i64> %res
 }
@@ -169,7 +169,7 @@ declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
 
 
 define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
-  ; CHECK: vpslldq
+  ; CHECK: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8],zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24]
   %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
   ret <4 x i64> %res
 }
@@ -257,7 +257,7 @@ declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
 
 
 define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
-  ; CHECK: vpsrldq
+  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31]
   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
   ret <4 x i64> %res
 }
@@ -265,7 +265,7 @@ declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
 
 
 define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
-  ; CHECK: vpsrldq
+  ; CHECK: vpsrldq {{.*#+}} ymm0 = ymm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,ymm0[23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero
   %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
   ret <4 x i64> %res
 }
@@ -475,10 +475,10 @@ declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
 
 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
   ; CHECK: vmpsadbw
-  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
   ret <16 x i16> %res
 }
-declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
 
 
 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
@@ -499,10 +499,10 @@ declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounw
 
 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
   ; CHECK: vpblendw
-  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
   ret <16 x i16> %res
 }
-declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
 
 
 define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
@@ -706,18 +706,18 @@ declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind re
 
 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
   ; CHECK: vpblendd
-  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
   ret <4 x i32> %res
 }
-declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
 
 
 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
   ; CHECK: vpblendd
-  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   ret <8 x i32> %res
 }
-declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
 
 
 define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
index 0768aae48e8c..4d28a979712a 100644
--- a/test/CodeGen/X86/avx2-nontemporal.ll
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -19,4 +19,4 @@ define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
   ret void
 }
 
-!0 = metadata !{i32 1}
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
deleted file mode 100644
index 83573dc7b260..000000000000
--- a/test/CodeGen/X86/avx2-palignr.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
-
-define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK-LABEL: test1:
-; CHECK: vpalignr $4
-  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
-  ret <8 x i32> %C
-}
-
-define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK-LABEL: test2:
-; CHECK: vpalignr $4
-  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
-  ret <8 x i32> %C
-}
-
-define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK-LABEL: test3:
-; CHECK: vpalignr $4
-  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
-  ret <8 x i32> %C
-}
-;
-define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
-; CHECK-LABEL: test4:
-; CHECK: vpalignr $8
-  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
-  ret <8 x i32> %C
-}
-
-define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK-LABEL: test5:
-; CHECK: vpalignr $6
-  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
-  ret <16 x i16> %C
-}
-
-define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK-LABEL: test6:
-; CHECK: vpalignr $6
-  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
-  ret <16 x i16> %C
-}
-
-define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
-; CHECK-LABEL: test7:
-; CHECK: vpalignr $6
-  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <16 x i16> %C
-}
-
-define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
-; CHECK-LABEL: test8:
-; CHECK: vpalignr $5
-  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
-  ret <32 x i8> %C
-}
diff --git a/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
new file mode 100644
index 000000000000..44eb42adb9f8
--- /dev/null
+++ b/test/CodeGen/X86/avx2-pmovx-256-old-shuffle.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -x86-experimental-vector-shuffle-lowering=false -mattr=+avx2 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; PR21876
+; The old shuffle lowering sometimes generates VZEXT nodes with both input
+; and output same-sized types, here 256-bits.  For instance, a v8i8 to v8i32
+; zero-extend would become a (v8i32 (VZEXT v32i8)) node, which can't happen
+; otherwise.  The companion commit r223996 added those patterns temporarily.
+; This test, along with the VR256 for AVX2 PMOVXrr instructions, should be
+; removed once the old vector shuffle lowering goes away.
+
+define void @test_avx2_pmovx_256(<8 x i8>* %tmp64, <8 x float>* %tmp75) {
+; CHECK-LABEL: test_avx2_pmovx_256
+; We really don't care about the generated code.
+; CHECK: vpmovzxbd
+; CHECK: vpbroadcastd
+; CHECK: vpand
+; CHECK: vcvtdq2ps
+; CHECK: vmovups
+; CHECK: vzeroupper
+; CHECK: retq
+
+  %wide.load458 = load <8 x i8>* %tmp64, align 1
+  %tmp68 = uitofp <8 x i8> %wide.load458 to <8 x float>
+  store <8 x float> %tmp68, <8 x float>* %tmp75, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll b/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
new file mode 100644
index 000000000000..7301b7cbfc4e
--- /dev/null
+++ b/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s
+
+define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
+; CHECK: vpmovsxbw (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
+; CHECK: vpmovsxbd (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
+; CHECK: vpmovsxbq (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
+; CHECK: vpmovsxwd (%rdi), %ymm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
+; CHECK: vpmovsxwq (%rdi), %ymm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
+  ret <4 x i64> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
+; CHECK: vpmovsxdq (%rdi), %ymm0
+  %1 = load <4 x i32>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
+  ret <4 x i64> %2
+}
+
+define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
+; CHECK: vpmovzxbw (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
+; CHECK: vpmovzxbd (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
+; CHECK: vpmovzxbq (%rdi), %ymm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
+; CHECK: vpmovzxwd (%rdi), %ymm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
+; CHECK: vpmovzxwq (%rdi), %ymm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
+  ret <4 x i64> %2
+}
+
+define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
+; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
+; CHECK: vpmovzxdq (%rdi), %ymm0
+  %1 = load <4 x i32>* %a, align 1
+  %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
+  ret <4 x i64> %2
+}
+
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
deleted file mode 100644
index 185b989458ae..000000000000
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ /dev/null
@@ -1,127 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
-
-; Make sure that we don't match this shuffle using the vpblendw YMM instruction.
-; The mask for the vpblendw instruction needs to be identical for both halves
-; of the YMM. Need to use two vpblendw instructions.
-
-; CHECK: vpblendw_test1
-; mask = 10010110,b = 150,d
-; CHECK: vpblendw  $150, %ymm
-; CHECK: ret
-define <16 x i16> @vpblendw_test1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3,  i32 20, i32 5,  i32 6,  i32 23, 
-                                                               i32 8, i32 25, i32 26, i32 11, i32 28, i32 13, i32 14, i32 31>
-  ret <16 x i16> %t
-}
-
-; CHECK: vpblendw_test2
-; mask1 = 00010110 = 22
-; mask2 = 10000000 = 128
-; CHECK: vpblendw  $128, %xmm
-; CHECK: vpblendw  $22, %xmm
-; CHECK: vinserti128
-; CHECK: ret
-define <16 x i16> @vpblendw_test2(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, 
-                                                               i32 8, i32 9,  i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
-  ret <16 x i16> %t
-}
-
-; CHECK: blend_test1
-; CHECK: vpblendd
-; CHECK: ret
-define <8 x i32> @blend_test1(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
-  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
-  ret <8 x i32> %t
-}
-
-; CHECK: blend_test2
-; CHECK: vpblendd
-; CHECK: ret
-define <8 x i32> @blend_test2(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
-  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
-  ret <8 x i32> %t
-}
-
-
-; CHECK: blend_test3
-; CHECK: vblendps
-; CHECK: ret
-define <8 x float> @blend_test3(<8 x float> %a, <8 x float> %b) nounwind alwaysinline {
-  %t = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
-  ret <8 x float> %t
-}
-
-; CHECK: blend_test4
-; CHECK: vblendpd
-; CHECK: ret
-define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
-  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-  ret <4 x i64> %t
-}
-
-;; 2 tests for shufflevectors that optimize to blend + immediate
-; CHECK-LABEL: @blend_test5
-; CHECK: vpblendd $10, %xmm1, %xmm0, %xmm0
-; CHECK: ret
-define <4 x i32> @blend_test5(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i32> %1
-}
-
-; CHECK-LABEL: @blend_test6
-; CHECK: vpblendw $134, %ymm1, %ymm0, %ymm0
-; CHECK: ret
-define <16 x i16> @blend_test6(<16 x i16> %a, <16 x i16> %b) {
-  %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32  3, i32  4, i32  5, i32  6, i32 23,
-                                                               i32 8, i32 25, i32 26, i32 11, i32 12, i32 13, i32 14, i32 31>
-  ret <16 x i16> %1
-}
-
-; CHECK: vpshufhw $27, %ymm
-define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
-  ret <16 x i16> %shuffle.i
-}
-
-; CHECK: vpshuflw $27, %ymm
-define <16 x i16> @vpshuflw(<16 x i16> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 3, i32 undef, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x i16> %shuffle.i
-}
-
-; CHECK: vpshufb_test
-; CHECK: vpshufb {{.*\(%r.*}}, %ymm
-; CHECK: ret
-define <32 x i8> @vpshufb_test(<32 x i8> %a) nounwind {
-  %S = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
-                                                                i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15,  
-                                                                i32 18, i32 19, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
-                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
-  ret <32 x i8>%S
-}
-
-; CHECK: vpshufb1_test
-; CHECK: vpshufb {{.*\(%r.*}}, %ymm
-; CHECK: ret
-define <32 x i8> @vpshufb1_test(<32 x i8> %a) nounwind {
-  %S = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
-                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
-                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
-                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
-  ret <32 x i8>%S
-}
-
-
-; CHECK: vpshufb2_test
-; CHECK: vpshufb {{.*\(%r.*}}, %ymm
-; CHECK: ret
-define <32 x i8> @vpshufb2_test(<32 x i8> %a) nounwind {
-  %S = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a, <32 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15, 
-                                                                i32 1, i32 9, i32 36, i32 11, i32 5, i32 13, i32 7, i32 15,  
-                                                                i32 18, i32 49, i32 30, i32 16, i32 25, i32 23, i32 17, i32 25, 
-                                                                i32 20, i32 19, i32 31, i32 17, i32 23, i32 undef, i32 29, i32 18>
-  ret <32 x i8>%S
-}
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
deleted file mode 100644
index 6d17443489ae..000000000000
--- a/test/CodeGen/X86/avx2-unpack.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
-
-; CHECK: vpunpckhdq
-define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
-  ret <8 x i32> %shuffle.i
-}
-
-; CHECK: vpunpckhqdq
-define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-  ret <4 x i64> %shuffle.i
-}
-
-; CHECK: vpunpckldq
-define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
-  ret <8 x i32> %shuffle.i
-}
-
-; CHECK: vpunpcklqdq
-define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-  ret <4 x i64> %shuffle.i
-}
-
-; CHECK: vpunpckhwd
-define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-  ret <16 x i16> %shuffle.i
-}
-
-; CHECK: vpunpcklwd
-define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
-  ret <16 x i16> %shuffle.i
-}
-
-; CHECK: vpunpckhbw
-define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
-  ret <32 x i8> %shuffle.i
-}
-
-; CHECK: vpunpcklbw
-define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
-  ret <32 x i8> %shuffle.i
-}
-
-; CHECK: vpunpckhdq
-define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
-  ret <8 x i32> %shuffle.i
-}
-
-; CHECK: vpunpckhqdq
-define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-  ret <4 x i64> %shuffle.i
-}
-
-; CHECK: vpunpckhwd
-define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
-  ret <16 x i16> %shuffle.i
-}
-
-; CHECK: vpunpcklwd
-define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
-entry:
-  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
-  ret <16 x i16> %shuffle.i
-}
-
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 66f586d23d14..924c06eba768 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -317,7 +317,7 @@ define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
 }
 
 ;CHECK-LABEL: _inreg2xdouble:
-;CHECK: vpbroadcastq
+;CHECK: vunpcklpd
 ;CHECK: ret
 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
   %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll
deleted file mode 100644
index 1937db5d7c16..000000000000
--- a/test/CodeGen/X86/avx2-vperm2i128.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
-
-; CHECK: vperm2i128 $17
-define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
-entry:
-  ; add forces execution domain
-  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <32 x i8> %shuffle
-}
-
-; CHECK: vperm2i128 $3
-define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
-entry:
-  ; add forces execution domain
-  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
-  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x i64> %shuffle
-}
-
-; CHECK: vperm2i128 $49
-define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
-entry:
-  ; add forces execution domain
-  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
-  ret <8 x i32> %shuffle
-}
-
-; CHECK: vperm2i128 $2
-define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
-entry:
-  ; add forces execution domain
-  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <16 x i16> %shuffle
-}
-
-; CHECK: vperm2i128 $2, (%
-define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
-entry:
-  %c = load <16 x i16>* %a
-  %d = load <16 x i16>* %b
-  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <16 x i16> %shuffle
-}
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
index 4d1c9f7cd973..94b08215b896 100644
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -1,189 +1,217 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-; CHECK-LABEL: addpd512
-; CHECK: vaddpd
-; CHECK: ret
 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: addpd512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %add.i = fadd <8 x double> %x, %y
   ret <8 x double> %add.i
 }
 
-; CHECK-LABEL: addpd512fold
-; CHECK: vaddpd LCP{{.*}}(%rip)
-; CHECK: ret
 define <8 x double> @addpd512fold(<8 x double> %y) {
+; CHECK-LABEL: addpd512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
   ret <8 x double> %add.i
 }
 
-; CHECK-LABEL: addps512
-; CHECK: vaddps
-; CHECK: ret
 define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: addps512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %add.i = fadd <16 x float> %x, %y
   ret <16 x float> %add.i
 }
 
-; CHECK-LABEL: addps512fold
-; CHECK: vaddps LCP{{.*}}(%rip)
-; CHECK: ret
 define <16 x float> @addps512fold(<16 x float> %y) {
+; CHECK-LABEL: addps512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000,  float 0x4002666660000000, float 0x3FF3333340000000>
   ret <16 x float> %add.i
 }
 
-; CHECK-LABEL: subpd512
-; CHECK: vsubpd
-; CHECK: ret
 define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: subpd512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsubpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %sub.i = fsub <8 x double> %x, %y
   ret <8 x double> %sub.i
 }
 
-; CHECK-LABEL: @subpd512fold
-; CHECK: vsubpd (%
-; CHECK: ret
 define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
+; CHECK-LABEL: subpd512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %tmp2 = load <8 x double>* %x, align 8
   %sub.i = fsub <8 x double> %y, %tmp2
   ret <8 x double> %sub.i
 }
 
-; CHECK-LABEL: @subps512
-; CHECK: vsubps
-; CHECK: ret
 define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: subps512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsubps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %sub.i = fsub <16 x float> %x, %y
   ret <16 x float> %sub.i
 }
 
-; CHECK-LABEL: subps512fold
-; CHECK: vsubps (%
-; CHECK: ret
 define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
+; CHECK-LABEL: subps512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %tmp2 = load <16 x float>* %x, align 4
   %sub.i = fsub <16 x float> %y, %tmp2
   ret <16 x float> %sub.i
 }
 
-; CHECK-LABEL: imulq512
-; CHECK: vpmuludq
-; CHECK: vpmuludq
-; CHECK: ret
 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
+; CHECK-LABEL: imulq512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
+; CHECK-NEXT:    vpsrlq $32, %zmm0, %zmm3
+; CHECK-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
+; CHECK-NEXT:    vpsllq $32, %zmm3, %zmm3
+; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
+; CHECK-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vpsllq $32, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %z = mul <8 x i64>%x, %y
   ret <8 x i64>%z
 }
 
-; CHECK-LABEL: mulpd512
-; CHECK: vmulpd
-; CHECK: ret
 define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: mulpd512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmulpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %mul.i = fmul <8 x double> %x, %y
   ret <8 x double> %mul.i
 }
 
-; CHECK-LABEL: mulpd512fold
-; CHECK: vmulpd LCP{{.*}}(%rip)
-; CHECK: ret
 define <8 x double> @mulpd512fold(<8 x double> %y) {
+; CHECK-LABEL: mulpd512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmulpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
   ret <8 x double> %mul.i
 }
 
-; CHECK-LABEL: mulps512
-; CHECK: vmulps
-; CHECK: ret
 define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: mulps512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmulps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %mul.i = fmul <16 x float> %x, %y
   ret <16 x float> %mul.i
 }
 
-; CHECK-LABEL: mulps512fold
-; CHECK: vmulps LCP{{.*}}(%rip)
-; CHECK: ret
 define <16 x float> @mulps512fold(<16 x float> %y) {
+; CHECK-LABEL: mulps512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
   ret <16 x float> %mul.i
 }
 
-; CHECK-LABEL: divpd512
-; CHECK: vdivpd
-; CHECK: ret
 define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
+; CHECK-LABEL: divpd512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vdivpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %div.i = fdiv <8 x double> %x, %y
   ret <8 x double> %div.i
 }
 
-; CHECK-LABEL: divpd512fold
-; CHECK: vdivpd LCP{{.*}}(%rip)
-; CHECK: ret
 define <8 x double> @divpd512fold(<8 x double> %y) {
+; CHECK-LABEL: divpd512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vdivpd {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
   ret <8 x double> %div.i
 }
 
-; CHECK-LABEL: divps512
-; CHECK: vdivps
-; CHECK: ret
 define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
+; CHECK-LABEL: divps512:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vdivps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %div.i = fdiv <16 x float> %x, %y
   ret <16 x float> %div.i
 }
 
-; CHECK-LABEL: divps512fold
-; CHECK: vdivps LCP{{.*}}(%rip)
-; CHECK: ret
 define <16 x float> @divps512fold(<16 x float> %y) {
+; CHECK-LABEL: divps512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vdivps {{.*}}(%rip), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
   ret <16 x float> %div.i
 }
 
-; CHECK-LABEL: vpaddq_test
-; CHECK: vpaddq %zmm
-; CHECK: ret
 define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+; CHECK-LABEL: vpaddq_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = add <8 x i64> %i, %j
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpaddq_fold_test
-; CHECK: vpaddq (%
-; CHECK: ret
 define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
+; CHECK-LABEL: vpaddq_fold_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %tmp = load <8 x i64>* %j, align 4
   %x = add <8 x i64> %i, %tmp
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpaddq_broadcast_test
-; CHECK: vpaddq LCP{{.*}}(%rip){1to8}
-; CHECK: ret
 define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
+; CHECK-LABEL: vpaddq_broadcast_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = add <8 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpaddq_broadcast2_test
-; CHECK: vpaddq (%rdi){1to8}
-; CHECK: ret
 define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
+; CHECK-LABEL: vpaddq_broadcast2_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %tmp = load i64* %j
   %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
   %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
@@ -197,55 +225,67 @@ define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpaddd_test
-; CHECK: vpaddd %zmm
-; CHECK: ret
 define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+; CHECK-LABEL: vpaddd_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = add <16 x i32> %i, %j
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpaddd_fold_test
-; CHECK: vpaddd (%
-; CHECK: ret
 define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
+; CHECK-LABEL: vpaddd_fold_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %tmp = load <16 x i32>* %j, align 4
   %x = add <16 x i32> %i, %tmp
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpaddd_broadcast_test
-; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
-; CHECK: ret
 define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
+; CHECK-LABEL: vpaddd_broadcast_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpaddd_mask_test
-; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
-; CHECK: ret
 define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %x = add <16 x i32> %i, %j
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpaddd_maskz_test
-; CHECK: vpaddd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z} }}
-; CHECK: ret
 define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %x = add <16 x i32> %i, %j
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpaddd_mask_fold_test
-; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
-; CHECK: ret
 define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_fold_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %j = load <16 x i32>* %j.ptr
   %x = add <16 x i32> %i, %j
@@ -253,20 +293,26 @@ define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpaddd_mask_broadcast_test
-; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} }}
-; CHECK: ret
 define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_mask_broadcast_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpaddd_maskz_fold_test
-; CHECK: vpaddd (%rdi), {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
-; CHECK: ret
 define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_fold_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %j = load <16 x i32>* %j.ptr
   %x = add <16 x i32> %i, %j
@@ -274,125 +320,141 @@ define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpaddd_maskz_broadcast_test
-; CHECK: vpaddd LCP{{.*}}(%rip){1to16}, {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}} {z}
-; CHECK: ret
 define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
+; CHECK-LABEL: vpaddd_maskz_broadcast_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %r
 }
 
-; CHECK-LABEL: vpsubq_test
-; CHECK: vpsubq %zmm
-; CHECK: ret
 define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
+; CHECK-LABEL: vpsubq_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = sub <8 x i64> %i, %j
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpsubd_test
-; CHECK: vpsubd
-; CHECK: ret
 define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
+; CHECK-LABEL: vpsubd_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = sub <16 x i32> %i, %j
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpmulld_test
-; CHECK: vpmulld %zmm
-; CHECK: ret
 define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
+; CHECK-LABEL: vpmulld_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %x = mul <16 x i32> %i, %j
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: sqrtA
-; CHECK: vsqrtss {{.*}} encoding: [0x62
-; CHECK: ret
 declare float @sqrtf(float) readnone
 define float @sqrtA(float %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: sqrtA:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %conv1 = tail call float @sqrtf(float %a) nounwind readnone
   ret float %conv1
 }
 
-; CHECK-LABEL: sqrtB
-; CHECK: vsqrtsd {{.*}}## encoding: [0x62
-; CHECK: ret
 declare double @sqrt(double) readnone
 define double @sqrtB(double %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: sqrtB:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %call = tail call double @sqrt(double %a) nounwind readnone
   ret double %call
 }
 
-; CHECK-LABEL: sqrtC
-; CHECK: vsqrtss {{.*}}## encoding: [0x62
-; CHECK: ret
 declare float @llvm.sqrt.f32(float)
 define float @sqrtC(float %a) nounwind {
+; CHECK-LABEL: sqrtC:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = call float @llvm.sqrt.f32(float %a)
   ret float %b
 }
 
-; CHECK-LABEL: sqrtD
-; CHECK: vsqrtps {{.*}}
-; CHECK: ret
 declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
 define <16 x float> @sqrtD(<16 x float> %a) nounwind {
+; CHECK-LABEL: sqrtD:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
   ret <16 x float> %b
 }
 
-; CHECK-LABEL: sqrtE
-; CHECK: vsqrtpd {{.*}}
-; CHECK: ret
 declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
 define <8 x double> @sqrtE(<8 x double> %a) nounwind {
+; CHECK-LABEL: sqrtE:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
   ret <8 x double> %b
 }
 
-; CHECK-LABEL: fadd_broadcast
-; CHECK: LCP{{.*}}(%rip){1to16}, %zmm0, %zmm0
-; CHECK: ret
 define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
+; CHECK-LABEL: fadd_broadcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   ret <16 x float> %b
 }
 
-; CHECK-LABEL: addq_broadcast
-; CHECK: vpaddq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
 define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
+; CHECK-LABEL: addq_broadcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   ret <8 x i64> %b
 }
 
-; CHECK-LABEL: orq_broadcast
-; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+; CHECK-LABEL: orq_broadcast:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   ret <8 x i64> %b
 }
 
-; CHECK-LABEL: andd512fold
-; CHECK: vpandd (%
-; CHECK: ret
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+; CHECK-LABEL: andd512fold:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %a = load <16 x i32>* %x, align 4
   %b = and <16 x i32> %y, %a
   ret <16 x i32> %b
 }
 
-; CHECK-LABEL: andqbrst
-; CHECK: vpandq  (%rdi){1to8}, %zmm
-; CHECK: ret
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+; CHECK-LABEL: andqbrst:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
 entry:
   %a = load i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
@@ -400,3 +462,193 @@ entry:
   %d = and <8 x i64> %p1, %c
   ret <8 x i64>%d
 }
+
+; CHECK-LABEL: test_mask_vaddps
+; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %x = fadd <16 x float> %i, %j
+  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps
+; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %x = fmul <16 x float> %i, %j
+  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps
+; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <16 x float> %i, %j
+  %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
+  %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd
+; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
+                                     <8 x double> %j, <8 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <8 x double> %i, %j
+  %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
+  %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps
+; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <16 x float> %i, %j
+  %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
+  %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd
+; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
+                                     <8 x double> %j, <8 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <8 x double> %i, %j
+  %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
+  %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps
+; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %x = fsub <16 x float> %i, %j
+  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vdivps
+; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
+                                     <16 x float> %j, <16 x i32> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %x = fdiv <16 x float> %i, %j
+  %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
+  ret <16 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd
+; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
+                                     <8 x double> %j, <8 x i64> %mask1)
+                                     nounwind readnone {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %x = fadd <8 x double> %i, %j
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd
+; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
+                                      <8 x i64> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %x = fadd <8 x double> %i, %j
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd
+; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
+                                     <8 x double>* %j,  <8 x i64> %mask1)
+                                     nounwind {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %tmp = load <8 x double>* %j, align 8
+  %x = fadd <8 x double> %i, %tmp
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd
+; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
+                                      <8 x i64> %mask1) nounwind {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %tmp = load <8 x double>* %j, align 8
+  %x = fadd <8 x double> %i, %tmp
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
+; CHECK: ret
+define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
+  %tmp = load double* %j
+  %b = insertelement <8 x double> undef, double %tmp, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef,
+                     <8 x i32> zeroinitializer
+  %x = fadd <8 x double> %c, %i
+  ret <8 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
+                                      double* %j, <8 x i64> %mask1) nounwind {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %b = insertelement <8 x double> undef, double %tmp, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef,
+                     <8 x i32> zeroinitializer
+  %x = fadd <8 x double> %c, %i
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
+  ret <8 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd
+; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
+                                       <8 x i64> %mask1) nounwind {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %b = insertelement <8 x double> undef, double %tmp, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef,
+                     <8 x i32> zeroinitializer
+  %x = fadd <8 x double> %c, %i
+  %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
+  ret <8 x double> %r
+}
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index b5a2aa80ce16..9e9ad31c916f 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -1,30 +1,43 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-; CHECK-LABEL: test1
-; CHECK: vpxord
-; CHECK: ret
 define <16 x i32> @test1(i32* %x) {
+; CHECK-LABEL: test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovd (%rdi), %xmm0
+; CHECK-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7]
+; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
    %y = load i32* %x, align 4
    %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
    ret <16 x i32>%res
 }
 
-; CHECK-LABEL: test2
-; CHECK: vpaddd LCP{{.*}}(%rip){1to16}
-; CHECK: ret
 define <16 x i32> @test2(<16 x i32> %x) {
+; CHECK-LABEL: test2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
    %res = add <16 x i32><i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %x
    ret <16 x i32>%res
 }
 
-; CHECK-LABEL: test3
-; CHECK: vinsertf128
-; CHECK: vinsertf64x4
-; CHECK: ret
 define <16 x float> @test3(<4 x float> %a) {
+; CHECK-LABEL: test3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vmovss %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[1,0],xmm0[0,1]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; CHECK-NEXT:    vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = extractelement <4 x float> %a, i32 2
   %c = insertelement <16 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %b, i32 5
   %b1 = extractelement <4 x float> %a, i32 0
   %c1 = insertelement <16 x float> %c, float %b1, i32 6
   ret <16 x float>%c1
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll
index 47e50a93796a..6e0d18558c51 100644
--- a/test/CodeGen/X86/avx512-cmp.ll
+++ b/test/CodeGen/X86/avx512-cmp.ll
@@ -28,10 +28,9 @@ l2:
   ret float %c1
 }
 
+; FIXME: Can use vcmpeqss and extract from the mask here in AVX512.
 ; CHECK-LABEL: test3
-; CHECK: vcmpeqss
-; CHECK: kmov
-; CHECK: ret
+; CHECK: vucomiss {{.*}}encoding: [0x62
 define i32 @test3(float %a, float %b) {
 
   %cmp10.i = fcmp oeq float %a, %b
@@ -86,3 +85,17 @@ define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
   %res = select i1 %tmp5, i32 1, i32 %a3
   ret i32 %res
 }
+
+; CHECK-LABEL: test9
+; CHECK: testb
+; CHECK-NOT: kmov
+; CHECK: ret
+define i32 @test9(i64 %a) {
+ %b = and i64 %a, 1
+ %cmp10.i = icmp eq i64 %b, 0
+ br i1 %cmp10.i, label %A, label %B
+A:
+ ret i32 6
+B:
+ ret i32 7
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index f5cda96b99fa..2b672a72d539 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -255,3 +255,56 @@ define double @uitofp03(i32 %a) nounwind {
   %b = uitofp i32 %a to double
   ret double %b
 }
+
+; CHECK-LABEL: @sitofp_16i1_float
+; CHECK: vpbroadcastd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
+  %mask = icmp slt <16 x i32> %a, zeroinitializer
+  %1 = sitofp <16 x i1> %mask to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_16i8_float
+; CHECK: vpmovsxbd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
+  %1 = sitofp <16 x i8> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_16i16_float
+; CHECK: vpmovsxwd
+; CHECK: vcvtdq2ps
+define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
+  %1 = sitofp <16 x i16> %a to <16 x float>
+  ret <16 x float> %1
+}
+
+; CHECK-LABEL: @sitofp_8i16_double
+; CHECK: vpmovsxwd
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
+  %1 = sitofp <8 x i16> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+; CHECK-LABEL: sitofp_8i8_double
+; CHECK: vpmovzxwd
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
+  %1 = sitofp <8 x i8> %a to <8 x double>
+  ret <8 x double> %1
+}
+
+
+; CHECK-LABEL: @sitofp_8i1_double
+; CHECK: vpbroadcastq
+; CHECK: vcvtdq2pd
+define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
+  %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
+  %1 = sitofp <8 x i1> %cmpres to <8 x double>
+  ret <8 x double> %1
+}
diff --git a/test/CodeGen/X86/avx512-fma-intrinsics.ll b/test/CodeGen/X86/avx512-fma-intrinsics.ll
index ce3d7590f396..116531d5af9e 100644
--- a/test/CodeGen/X86/avx512-fma-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-fma-intrinsics.ll
@@ -1,97 +1,184 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
 
 define <16 x float> @test_x86_vfmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfmadd_ps_z
   ; CHECK: vfmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_mask_vfmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd_ps
+  ; CHECK: vfmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
 
 define <8 x double> @test_x86_vfmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmadd_pd_z
   ; CHECK: vfmadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_mask_fmadd_pd(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
+; CHECK-LABEL: test_mask_fmadd_pd:
+; CHECK: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa8,0xc2]
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
+declare <8 x double> @llvm.x86.fma.mask.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
 
 define <16 x float> @test_x86_vfmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfmsubps_z
   ; CHECK: vfmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_mask_vfmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub_ps
+  ; CHECK: vfmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
 
 define <8 x double> @test_x86_vfmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmsubpd_z
   ; CHECK: vfmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <8 x double> @test_mask_vfmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub_pd
+  ; CHECK: vfmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
 
 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfnmadd_ps_z
   ; CHECK: vfnmadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd_ps
+  ; CHECK: vfnmadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.mask.vfnmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfnmadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
 
 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfnmadd_pd_z
   ; CHECK: vfnmadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd_pd
+  ; CHECK: vfnmadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.mask.vfnmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfnmadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
 
 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfnmsubps_z
   ; CHECK: vfnmsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub_ps
+  ; CHECK: vfnmsub213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.mask.vfnmsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfnmsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
 
 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfnmsubpd_z
   ; CHECK: vfnmsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub_pd
+  ; CHECK: vfnmsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.mask.vfnmsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfnmsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
 
 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfmaddsubps_z
   ; CHECK: vfmaddsub213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
+; CHECK-LABEL: test_mask_fmaddsub_ps:
+; CHECK: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0xa6,0xc2]
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
+
+declare <16 x float> @llvm.x86.fma.mask.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmaddsubpd_z
   ; CHECK: vfmaddsub213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmaddsub_pd
+  ; CHECK: vfmaddsub213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
 
 define <16 x float> @test_x86_vfmsubaddps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   ; CHECK-LABEL: test_x86_vfmsubaddps_z
   ; CHECK: vfmsubadd213ps %zmm
-  %res = call <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) nounwind
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 -1, i32 4) nounwind
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_mask_vfmsubadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd_ps
+  ; CHECK: vfmsubadd213ps %zmm
+  %res = call <16 x float> @llvm.x86.fma.mask.vfmsubadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask, i32 4) nounwind
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.fma.vfmsubadd.ps.512(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
 
 define <8 x double> @test_x86_vfmsubaddpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
   ; CHECK-LABEL: test_x86_vfmsubaddpd_z
   ; CHECK: vfmsubadd213pd %zmm
-  %res = call <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) nounwind
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 -1, i32 4) nounwind
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <8 x double> @test_mask_vfmsubadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd_pd
+  ; CHECK: vfmsubadd213pd %zmm
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.fma.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>) nounwind readnone
+
diff --git a/test/CodeGen/X86/avx512-i1test.ll b/test/CodeGen/X86/avx512-i1test.ll
new file mode 100755
index 000000000000..4814314a6442
--- /dev/null
+++ b/test/CodeGen/X86/avx512-i1test.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: func
+; CHECK: kortestw
+; CHECK: kortestw
+define void @func() {
+bb1:
+  br i1 undef, label %L_10, label %L_10
+
+L_10:                                             ; preds = %bb1, %bb1
+  br i1 undef, label %L_30, label %bb56
+
+bb56:                                             ; preds = %L_10
+  br label %bb33
+
+bb33:                                             ; preds = %bb51, %bb56
+  %r111 = load i64* undef, align 8
+  br i1 undef, label %bb51, label %bb35
+
+bb35:                                             ; preds = %bb33
+  br i1 undef, label %L_19, label %bb37
+
+bb37:                                             ; preds = %bb35
+  %r128 = and i64 %r111, 576460752303423488
+  %phitmp = icmp eq i64 %r128, 0
+  br label %L_19
+
+L_19:                                             ; preds = %bb37, %bb35
+  %"$V_S25.0" = phi i1 [ %phitmp, %bb37 ], [ true, %bb35 ]
+  br i1 undef, label %bb51, label %bb42
+
+bb42:                                             ; preds = %L_19
+  %r136 = select i1 %"$V_S25.0", i32* undef, i32* undef
+  br label %bb51
+
+bb51:                                             ; preds = %bb42, %L_19, %bb33
+  br i1 false, label %L_30, label %bb33
+
+L_30:                                             ; preds = %bb51, %L_10
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index b360c716b004..eba895ebf565 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefix=KNL --check-prefix=CHECK %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX --check-prefix=CHECK %s
 
 ;CHECK-LABEL: test1:
 ;CHECK: vinsertps
@@ -12,9 +13,11 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
 }
 
 ;CHECK-LABEL: test2:
-;CHECK: vinsertf32x4
-;CHECK: vextractf32x4
-;CHECK: vinsertf32x4
+;KNL: vinsertf32x4 $0
+;SKX: vinsertf64x2 $0
+;CHECK: vextractf32x4 $3
+;KNL: vinsertf32x4 $3
+;SKX: vinsertf64x2 $3
 ;CHECK: ret
 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
   %rrr = load double* %br
@@ -24,8 +27,8 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
 }
 
 ;CHECK-LABEL: test3:
-;CHECK: vextractf32x4
-;CHECK: vinsertf32x4
+;CHECK: vextractf32x4 $1
+;CHECK: vinsertf32x4 $0
 ;CHECK: ret
 define <16 x float> @test3(<16 x float> %x) nounwind {
   %eee = extractelement <16 x float> %x, i32 4
@@ -34,8 +37,9 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
 }
 
 ;CHECK-LABEL: test4:
-;CHECK: vextracti32x4
-;CHECK: vinserti32x4
+;CHECK: vextracti32x4 $2
+;KNL: vinserti32x4 $0
+;SKX: vinserti64x2 $0
 ;CHECK: ret
 define <8 x i64> @test4(<8 x i64> %x) nounwind {
   %eee = extractelement <8 x i64> %x, i32 4
@@ -186,12 +190,13 @@ define i16 @test16(i1 *%addr, i16 %a) {
 ;CHECK-LABEL: test17
 ;CHECK: kshiftlw
 ;CHECK: kshiftrw
-;CHECK: korw
+;KNL: korw
+;SKX: korb
 ;CHECK: ret
 define i8 @test17(i1 *%addr, i8 %a) {
   %x = load i1 * %addr, align 128
   %a1 = bitcast i8 %a to <8 x i1>
-  %x1 = insertelement <8 x i1> %a1, i1 %x, i32 10
+  %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
   %x2 = bitcast <8 x i1>%x1 to i8
   ret i8 %x2
 }
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 18cfcfe78b05..7cd01683fa98 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -60,20 +60,6 @@ define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
 }
 declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
 
-define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
-  ; CHECK: vrcp28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
-  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
-  ; CHECK: vrcp28pd {sae}, {{.*}}encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
-  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ; <<8 x double>> [#uses=1]
-  ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
 
 define <8 x double> @test7(<8 x double> %a) {
@@ -97,13 +83,6 @@ define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
 }
 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
 
-define <16 x float> @test_rsqrt28_ps_512(<16 x float> %a0) {
-  ; CHECK: vrsqrt28ps {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ; <<16 x float>> [#uses=1]
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
   ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
@@ -111,13 +90,6 @@ define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
 }
 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
-define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
-  ; CHECK: vrsqrt28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
   ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
   %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
@@ -125,26 +97,19 @@ define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
 }
 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
-define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
-  ; CHECK: vrcp28ss {sae}, {{.*}}encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
-  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
   ; CHECK: vsqrtpd
-  %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+  %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
-declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
   ; CHECK: vsqrtps
-  %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+  %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
-declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vsqrtss {{.*}}encoding: [0x62
@@ -375,7 +340,7 @@ define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
 declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
 
 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK: vblendmps
+  ; CHECK: vblendmps %zmm1, %zmm0
   %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
@@ -383,7 +348,7 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x
 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
 
 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
-  ; CHECK: vblendmpd
+  ; CHECK: vblendmpd %zmm1, %zmm0
   %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
@@ -611,3 +576,800 @@ define <8 x i64> @test_vmovntdqa(i8 *%x) {
 }
 
 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
+
+define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
+  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
+; CHECK-LABEL: test_mask_valign_q:
+; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
+
+define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+; CHECK-LABEL: test_maskz_valign_d:
+; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
+  %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
+
+define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
+ ; CHECK-LABEL: test_mask_store_ss
+ ; CHECK: vmovss %xmm0, (%rdi) {%k1}     ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
+ call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
+ ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
+
+define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
+; CHECK-LABEL: test_pcmpeq_d
+; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_d
+; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
+
+define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test_pcmpeq_q
+; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_q
+; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
+
+define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
+; CHECK-LABEL: test_pcmpgt_d
+; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_d
+; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
+
+define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: test_pcmpgt_q
+; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_q
+; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
+
+define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK_LABEL: test_cmp_d_512
+; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_cmp_d_512
+; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
+
+define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
+; CHECK_LABEL: test_ucmp_d_512
+; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_ucmp_d_512
+; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
+
+define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK_LABEL: test_cmp_q_512
+; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+; CHECK_LABEL: test_mask_cmp_q_512
+; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
+; CHECK_LABEL: test_ucmp_q_512
+; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+; CHECK_LABEL: test_mask_ucmp_q_512
+; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
+
+define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
+; CHECK-LABEL: test_mask_vextractf32x4:
+; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
+  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
+
+define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
+; CHECK-LABEL: test_mask_vextracti64x4:
+; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
+  ret <4 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
+
+define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
+; CHECK-LABEL: test_maskz_vextracti32x4:
+; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
+
+define <4 x double> @test_vextractf64x4(<8 x double> %a) {
+; CHECK-LABEL: test_vextractf64x4:
+; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
+  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+
+define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_pslli_d
+  ; CHECK: vpslld
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_pslli_d 
+  ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}  
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
+  ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_pslli_q
+  ; CHECK: vpsllq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
+  ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}   
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
+  ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z} 
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_psrli_d
+  ; CHECK: vpsrld
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
+  ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}  
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
+  ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_psrli_q
+  ; CHECK: vpsrlq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
+  ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}  
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
+  ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_psrai_d
+  ; CHECK: vpsrad
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
+  ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}  
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
+  ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
+  ; CHECK-LABEL: test_x86_avx512_psrai_q
+  ; CHECK: vpsraq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
+  ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}   
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
+  ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psll_d
+  ; CHECK: vpslld
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psll_d
+  ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
+  ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psll_q
+  ; CHECK: vpsllq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psll_q
+  ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
+  ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrl_d
+  ; CHECK: vpsrld
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
+  ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
+  ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrl_q
+  ; CHECK: vpsrlq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
+  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
+  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psra_d
+  ; CHECK: vpsrad
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psra_d
+  ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
+  ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psra_q
+  ; CHECK: vpsraq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psra_q
+  ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
+  ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psllv_d
+  ; CHECK: vpsllvd
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
+  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
+  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psllv_q
+  ; CHECK: vpsllvq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
+  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
+  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
+
+
+define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrav_d
+  ; CHECK: vpsravd
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
+  ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
+  ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrav_q
+  ; CHECK: vpsravq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
+  ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
+  ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrlv_d
+  ; CHECK: vpsrlvd
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
+  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
+  ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
+  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
+  ret <16 x i32> %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
+  ; CHECK-LABEL: test_x86_avx512_psrlv_q
+  ; CHECK: vpsrlvq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
+  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
+  ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
+  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
+  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
+  ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
+  ; CHECK: vpsrlvq (%
+  %b = load <8 x i64>* %ptr
+  %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
+  ret <8 x i64> %res
+}
diff --git a/test/CodeGen/X86/avx512-logic.ll b/test/CodeGen/X86/avx512-logic.ll
new file mode 100644
index 000000000000..bee4f52b3216
--- /dev/null
+++ b/test/CodeGen/X86/avx512-logic.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+; CHECK-LABEL: vpandd
+; CHECK: vpandd %zmm
+; CHECK: ret
+define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
+                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = and <16 x i32> %a2, %b
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpord
+; CHECK: vpord %zmm
+; CHECK: ret
+define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
+                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = or <16 x i32> %a2, %b
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpxord
+; CHECK: vpxord %zmm
+; CHECK: ret
+define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
+                            i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = xor <16 x i32> %a2, %b
+  ret <16 x i32> %x
+}
+
+; CHECK-LABEL: vpandq
+; CHECK: vpandq %zmm
+; CHECK: ret
+define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+  %x = and <8 x i64> %a2, %b
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vporq
+; CHECK: vporq %zmm
+; CHECK: ret
+define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+  %x = or <8 x i64> %a2, %b
+  ret <8 x i64> %x
+}
+
+; CHECK-LABEL: vpxorq
+; CHECK: vpxorq %zmm
+; CHECK: ret
+define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+  %x = xor <8 x i64> %a2, %b
+  ret <8 x i64> %x
+}
+
+
+; CHECK-LABEL: orq_broadcast
+; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
+; CHECK: ret
+define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+  %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+  ret <8 x i64> %b
+}
+
+; CHECK-LABEL: andd512fold
+; CHECK: vpandd (%
+; CHECK: ret
+define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+entry:
+  %a = load <16 x i32>* %x, align 4
+  %b = and <16 x i32> %y, %a
+  ret <16 x i32> %b
+}
+
+; CHECK-LABEL: andqbrst
+; CHECK: vpandq  (%rdi){1to8}, %zmm
+; CHECK: ret
+define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+entry:
+  %a = load i64* %ap, align 8
+  %b = insertelement <8 x i64> undef, i64 %a, i32 0
+  %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %d = and <8 x i64> %p1, %c
+  ret <8 x i64>%d
+}
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index dd33ffdb640d..35d334813fa8 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,12 +1,14 @@
-; RUN: llc < %s -march=x86-64 -mcpu=knl | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
 define i16 @mask16(i16 %x) {
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <16 x i1> %m1 to i16
   ret i16 %ret
-; CHECK: mask16
-; CHECK: knotw
+; CHECK-LABEL: mask16
+; CHECK: kmovw
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw
 ; CHECK: ret
 }
 
@@ -15,8 +17,38 @@ define i8 @mask8(i8 %x) {
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <8 x i1> %m1 to i8
   ret i8 %ret
-; CHECK: mask8
-; CHECK: knotw
+; CHECK-LABEL: mask8
+; CHECK: kmovw
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw
+; CHECK: ret
+}
+
+define void @mask16_mem(i16* %ptr) {
+  %x = load i16* %ptr, align 4
+  %m0 = bitcast i16 %x to <16 x i1>
+  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <16 x i1> %m1 to i16
+  store i16 %ret, i16* %ptr, align 4
+  ret void
+; CHECK-LABEL: mask16_mem
+; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
+; CHECK: ret
+}
+
+define void @mask8_mem(i8* %ptr) {
+  %x = load i8* %ptr, align 4
+  %m0 = bitcast i8 %x to <8 x i1>
+  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <8 x i1> %m1 to i8
+  store i8 %ret, i8* %ptr, align 4
+  ret void
+; CHECK-LABEL: mask8_mem
+; CHECK: kmovw ([[ARG1]]), %k{{[0-7]}}
+; CHECK-NEXT: knotw
+; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/X86/avx512-mov.ll b/test/CodeGen/X86/avx512-mov.ll
index 009802f1742d..93875e839e22 100644
--- a/test/CodeGen/X86/avx512-mov.ll
+++ b/test/CodeGen/X86/avx512-mov.ll
@@ -153,31 +153,295 @@ define void @test18(i8 * %addr, <8 x i64> %data) {
   ret void
 }
 
-; CHECK-LABEL: store_i1_1
-; CHECK: movb
-; CHECK: movb
+; CHECK-LABEL: test19
+; CHECK: vmovdqu32
+; CHECK: ret
+define void @test19(i8 * %addr, <16 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  store <16 x i32>%data, <16 x i32>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test20
+; CHECK: vmovdqa32
+; CHECK: ret
+define void @test20(i8 * %addr, <16 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  store <16 x i32>%data, <16 x i32>* %vaddr, align 64
+  ret void
+}
+
+; CHECK-LABEL: test21
+; CHECK: vmovdqa64
 ; CHECK: ret
-define void @store_i1_1() {
-  store i1 true, i1 addrspace(3)* undef, align 128
-  store i1 false, i1 addrspace(2)* undef, align 128
+define  <8 x i64> @test21(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %res = load <8 x i64>* %vaddr, align 64
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test22
+; CHECK: vmovdqu64
+; CHECK: ret
+define void @test22(i8 * %addr, <8 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  store <8 x i64>%data, <8 x i64>* %vaddr, align 1
   ret void
 }
 
-; CHECK-LABEL: store_i1_2
-; CHECK: movb
+; CHECK-LABEL: test23
+; CHECK: vmovdqu64
 ; CHECK: ret
-define void @store_i1_2(i64 %a, i64 %b) {
-  %res = icmp eq i64 %a, %b
-  store i1 %res, i1 addrspace(3)* undef, align 128
+define <8 x i64> @test23(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %res = load <8 x i64>* %vaddr, align 1
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test24
+; CHECK: vmovapd
+; CHECK: ret
+define void @test24(i8 * %addr, <8 x double> %data) {
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  store <8 x double>%data, <8 x double>* %vaddr, align 64
   ret void
 }
 
-; CHECK-LABEL: store_i1_3
-; CHECK: kmovw
+; CHECK-LABEL: test25
+; CHECK: vmovapd
+; CHECK: ret
+define <8 x double> @test25(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %res = load <8 x double>* %vaddr, align 64
+  ret <8 x double>%res
+}
+
+; CHECK-LABEL: test26
+; CHECK: vmovaps
 ; CHECK: ret
-define void @store_i1_3(i16 %a) {
-  %a_vec = bitcast i16 %a to <16 x i1>
-  %res = extractelement <16 x i1> %a_vec, i32 4
-  store i1 %res, i1 addrspace(3)* undef, align 128
+define void @test26(i8 * %addr, <16 x float> %data) {
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  store <16 x float>%data, <16 x float>* %vaddr, align 64
   ret void
 }
+
+; CHECK-LABEL: test27
+; CHECK: vmovaps
+; CHECK: ret
+define <16 x float> @test27(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %res = load <16 x float>* %vaddr, align 64
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test28
+; CHECK: vmovupd
+; CHECK: ret
+define void @test28(i8 * %addr, <8 x double> %data) {
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  store <8 x double>%data, <8 x double>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test29
+; CHECK: vmovupd
+; CHECK: ret
+define <8 x double> @test29(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %res = load <8 x double>* %vaddr, align 1
+  ret <8 x double>%res
+}
+
+; CHECK-LABEL: test30
+; CHECK: vmovups
+; CHECK: ret
+define void @test30(i8 * %addr, <16 x float> %data) {
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  store <16 x float>%data, <16 x float>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test31
+; CHECK: vmovups
+; CHECK: ret
+define <16 x float> @test31(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %res = load <16 x float>* %vaddr, align 1
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test32
+; CHECK: vmovdqa32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %r = load <16 x i32>* %vaddr, align 64
+  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test33
+; CHECK: vmovdqu32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %r = load <16 x i32>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test34
+; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %r = load <16 x i32>* %vaddr, align 64
+  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test35
+; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i32>*
+  %r = load <16 x i32>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
+  ret <16 x i32>%res
+}
+
+; CHECK-LABEL: test36
+; CHECK: vmovdqa64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %r = load <8 x i64>* %vaddr, align 64
+  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test37
+; CHECK: vmovdqu64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %r = load <8 x i64>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test38
+; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %r = load <8 x i64>* %vaddr, align 64
+  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test39
+; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
+  %mask = icmp ne <8 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i64>*
+  %r = load <8 x i64>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
+  ret <8 x i64>%res
+}
+
+; CHECK-LABEL: test40
+; CHECK: vmovaps{{.*{%k[1-7]} }}
+; CHECK: ret
+define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
+  %mask = fcmp one <16 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %r = load <16 x float>* %vaddr, align 64
+  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test41
+; CHECK: vmovups{{.*{%k[1-7]} }}
+; CHECK: ret
+define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
+  %mask = fcmp one <16 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %r = load <16 x float>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test42
+; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
+  %mask = fcmp one <16 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %r = load <16 x float>* %vaddr, align 64
+  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test43
+; CHECK: vmovups{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
+  %mask = fcmp one <16 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x float>*
+  %r = load <16 x float>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
+  ret <16 x float>%res
+}
+
+; CHECK-LABEL: test44
+; CHECK: vmovapd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
+  %mask = fcmp one <8 x double> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %r = load <8 x double>* %vaddr, align 64
+  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
+  ret <8 x double>%res
+}
+
+; CHECK-LABEL: test45
+; CHECK: vmovupd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
+  %mask = fcmp one <8 x double> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %r = load <8 x double>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
+  ret <8 x double>%res
+}
+
+; CHECK-LABEL: test46
+; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
+  %mask = fcmp one <8 x double> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %r = load <8 x double>* %vaddr, align 64
+  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
+  ret <8 x double>%res
+}
+
+; CHECK-LABEL: test47
+; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
+  %mask = fcmp one <8 x double> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x double>*
+  %r = load <8 x double>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
+  ret <8 x double>%res
+}
diff --git a/test/CodeGen/X86/avx512-nontemporal.ll b/test/CodeGen/X86/avx512-nontemporal.ll
index ef50cdb82831..bf57d021acab 100644
--- a/test/CodeGen/X86/avx512-nontemporal.ll
+++ b/test/CodeGen/X86/avx512-nontemporal.ll
@@ -16,4 +16,4 @@ define void @f(<16 x float> %A, <16 x float> %AA, i8* %B, <8 x double> %C, <8 x
   ret void
 }
 
-!0 = metadata !{i32 1}
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
index 83f46984781f..0dbf286d3c5d 100644
--- a/test/CodeGen/X86/avx512-select.ll
+++ b/test/CodeGen/X86/avx512-select.ll
@@ -39,3 +39,56 @@ define double @select03(double %a, double %b, double %c, double %eps) {
   %cond = select i1 %cmp, double %c, double %b
   ret double %cond
 }
+
+; CHECK-LABEL: @select04
+; CHECK: vmovaps %zmm3, %zmm1
+; CHECK-NEXT: ret
+; PR20677
+define <16 x double> @select04(<16 x double> %a, <16 x double> %b) {
+  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
+  ret <16 x double> %sel
+}
+
+; CHECK-LABEL: select05
+; CHECK: kmovw   %esi, %k0
+; CHECK-NEXT: kmovw   %edi, %k1
+; CHECK-NEXT: korw    %k1, %k0, %k0
+; CHECK-NEXT: kmovw   %k0, %eax
+define i8 @select05(i8 %a.0, i8 %m) {
+  %mask = bitcast i8 %m to <8 x i1>
+  %a = bitcast i8 %a.0 to <8 x i1>
+  %r = select <8 x i1> %mask, <8 x i1> <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>, <8 x i1> %a
+  %res = bitcast <8 x i1> %r to i8
+  ret i8 %res;
+}
+
+; CHECK-LABEL: select06
+; CHECK: kmovw   %esi, %k0
+; CHECK-NEXT: kmovw   %edi, %k1
+; CHECK-NEXT: kandw    %k1, %k0, %k0
+; CHECK-NEXT: kmovw   %k0, %eax
+define i8 @select06(i8 %a.0, i8 %m) {
+  %mask = bitcast i8 %m to <8 x i1>
+  %a = bitcast i8 %a.0 to <8 x i1>
+  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> zeroinitializer
+  %res = bitcast <8 x i1> %r to i8
+  ret i8 %res;
+}
+
+; CHECK-LABEL: select07
+; CHECK-DAG:  kmovw   %edx, %k0
+; CHECK-DAG:  kmovw   %edi, %k1
+; CHECK-DAG:  kmovw   %esi, %k2
+; CHECK: kandw %k0, %k1, %k1
+; CHECK-NEXT: knotw    %k0, %k0
+; CHECK-NEXT: kandw    %k0, %k2, %k0
+; CHECK-NEXT: korw %k0, %k1, %k0
+; CHECK-NEXT: kmovw   %k0, %eax
+define i8 @select07(i8 %a.0, i8 %b.0, i8 %m) {
+  %mask = bitcast i8 %m to <8 x i1>
+  %a = bitcast i8 %a.0 to <8 x i1>
+  %b = bitcast i8 %b.0 to <8 x i1>
+  %r = select <8 x i1> %mask, <8 x i1> %a, <8 x i1> %b
+  %res = bitcast <8 x i1> %r to i8
+  ret i8 %res;
+}
diff --git a/test/CodeGen/X86/avx512-shuffle.ll b/test/CodeGen/X86/avx512-shuffle.ll
deleted file mode 100644
index b99e89a9a546..000000000000
--- a/test/CodeGen/X86/avx512-shuffle.ll
+++ /dev/null
@@ -1,314 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
-; CHECK: LCP
-; CHECK: .long 2
-; CHECK: .long 5
-; CHECK: .long 0
-; CHECK: .long 0
-; CHECK: .long 7
-; CHECK: .long 0
-; CHECK: .long 10
-; CHECK: .long 1
-; CHECK: .long 0
-; CHECK: .long 5
-; CHECK: .long 0
-; CHECK: .long 4
-; CHECK: .long 7
-; CHECK: .long 0
-; CHECK: .long 10
-; CHECK: .long 1
-; CHECK-LABEL: test1:
-; CHECK: vpermps
-; CHECK: ret
-define <16 x float> @test1(<16 x float> %a) nounwind {
-  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
-  ret <16 x float> %c
-}
-
-; CHECK-LABEL: test2:
-; CHECK: vpermd
-; CHECK: ret
-define <16 x i32> @test2(<16 x i32> %a) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: test3:
-; CHECK: vpermq
-; CHECK: ret
-define <8 x i64> @test3(<8 x i64> %a) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 5, i32 1, i32 undef, i32 7, i32 undef, i32 3, i32 1>
-  ret <8 x i64> %c
-}
-
-; CHECK-LABEL: test4:
-; CHECK: vpermpd
-; CHECK: ret
-define <8 x double> @test4(<8 x double> %a) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test5:
-; CHECK: vpermt2pd
-; CHECK: ret
-define <8 x double> @test5(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  ret <8 x double> %c
-}
-
-; The reg variant of vpermt2 with a writemask
-; CHECK-LABEL: test5m:
-; CHECK: vpermt2pd {{.* {%k[1-7]} {z}}}
-define <8 x double> @test5m(<8 x double> %a, <8 x double> %b, i8 %mask) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  %m = bitcast i8 %mask to <8 x i1>
-  %res = select <8 x i1> %m, <8 x double> %c, <8 x double> zeroinitializer
-  ret <8 x double> %res
-}
-
-; CHECK-LABEL: test6:
-; CHECK: vpermq $30
-; CHECK: ret
-define <8 x i64> @test6(<8 x i64> %a) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
-  ret <8 x i64> %c
-}
-
-; CHECK-LABEL: test7:
-; CHECK: vpermt2q
-; CHECK: ret
-define <8 x i64> @test7(<8 x i64> %a, <8 x i64> %b) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  ret <8 x i64> %c
-}
-
-; The reg variant of vpermt2 with a writemask
-; CHECK-LABEL: test7m:
-; CHECK: vpermt2q {{.* {%k[1-7]} {z}}}
-define <8 x i64> @test7m(<8 x i64> %a, <8 x i64> %b, i8 %mask) nounwind {
-  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  %m = bitcast i8 %mask to <8 x i1>
-  %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
-  ret <8 x i64> %res
-}
-
-; The mem variant of vpermt2 with a writemask
-; CHECK-LABEL: test7mm:
-; CHECK: vpermt2q {{\(.*\).* {%k[1-7]} {z}}}
-define <8 x i64> @test7mm(<8 x i64> %a, <8 x i64> *%pb, i8 %mask) nounwind {
-  %b = load <8 x i64>* %pb
-  %c = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 8, i32 0, i32 1, i32 6, i32 10, i32 4, i32 5>
-  %m = bitcast i8 %mask to <8 x i1>
-  %res = select <8 x i1> %m, <8 x i64> %c, <8 x i64> zeroinitializer
-  ret <8 x i64> %res
-}
-
-; CHECK-LABEL: test8:
-; CHECK: vpermt2d
-; CHECK: ret
-define <16 x i32> @test8(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x i32> %c
-}
-
-; The reg variant of vpermt2 with a writemask
-; CHECK-LABEL: test8m:
-; CHECK: vpermt2d {{.* {%k[1-7]} {z}}}
-define <16 x i32> @test8m(<16 x i32> %a, <16 x i32> %b, i16 %mask) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  %m = bitcast i16 %mask to <16 x i1>
-  %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
-  ret <16 x i32> %res
-}
-
-; The mem variant of vpermt2 with a writemask
-; CHECK-LABEL: test8mm:
-; CHECK: vpermt2d {{\(.*\).* {%k[1-7]} {z}}}
-define <16 x i32> @test8mm(<16 x i32> %a, <16 x i32> *%pb, i16 %mask) nounwind {
-  %b = load <16 x i32> * %pb
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  %m = bitcast i16 %mask to <16 x i1>
-  %res = select <16 x i1> %m, <16 x i32> %c, <16 x i32> zeroinitializer
-  ret <16 x i32> %res
-}
-
-; CHECK-LABEL: test9:
-; CHECK: vpermt2ps
-; CHECK: ret
-define <16 x float> @test9(<16 x float> %a, <16 x float> %b) nounwind {
-  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x float> %c
-}
-
-; The reg variant of vpermt2 with a writemask
-; CHECK-LABEL: test9m:
-; CHECK: vpermt2ps {{.*}} {%k{{.}}} {z}
-define <16 x float> @test9m(<16 x float> %a, <16 x float> %b, i16 %mask) nounwind {
-  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  %m = bitcast i16 %mask to <16 x i1>
-  %res = select <16 x i1> %m, <16 x float> %c, <16 x float> zeroinitializer
-  ret <16 x float> %res
-}
-
-; CHECK-LABEL: test10:
-; CHECK: vpermt2ps (
-; CHECK: ret
-define <16 x float> @test10(<16 x float> %a, <16 x float>* %b) nounwind {
-  %c = load <16 x float>* %b
-  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x float> %d
-}
-
-; CHECK-LABEL: test11:
-; CHECK: vpermt2d 
-; CHECK: ret
-define <16 x i32> @test11(<16 x i32> %a, <16 x i32>* %b) nounwind {
-  %c = load <16 x i32>* %b
-  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
-  ret <16 x i32> %d
-}
-
-; CHECK-LABEL: test12
-; CHECK: vmovlhps {{.*}}## encoding: [0x62
-; CHECK: ret
-define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) nounwind {
-  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x i32> %c
-}
-
-; CHECK-LABEL: test13
-; CHECK: vpermilps $-79, %zmm
-; CHECK: ret
-define <16 x float> @test13(<16 x float> %a) {
- %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: test14
-; CHECK: vpermilpd $-53, %zmm
-; CHECK: ret
-define <8 x double> @test14(<8 x double> %a) {
- %b = shufflevector <8 x double> %a, <8 x double> undef, <8 x i32><i32 1, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 7>
- ret <8 x double> %b
-}
-
-; CHECK-LABEL: test15
-; CHECK: vpshufd $-79, %zmm
-; CHECK: ret
-define <16 x i32> @test15(<16 x i32> %a) {
- %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32><i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
- ret <16 x i32> %b
-}
-; CHECK-LABEL: test16
-; CHECK: valignq $2, %zmm0, %zmm1
-; CHECK: ret
-define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test17
-; CHECK: vshufpd $19, %zmm1, %zmm0
-; CHECK: ret
-define <8 x double> @test17(<8 x double> %a, <8 x double> %b) nounwind {
-  %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 9, i32 2, i32 10, i32 5, i32 undef, i32 undef, i32 undef>
-  ret <8 x double> %c
-}
-
-; CHECK-LABEL: test18
-; CHECK: vpunpckhdq %zmm
-; CHECK: ret
-define <16 x i32> @test18(<16 x i32> %a, <16 x i32> %c) {
- %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15, i32 18, i32 26, i32 19, i32 27, i32 22, i32 30, i32 23, i32 31>
- ret <16 x i32> %b
-}
-
-; CHECK-LABEL: test19
-; CHECK: vpunpckldq %zmm
-; CHECK: ret
-define <16 x i32> @test19(<16 x i32> %a, <16 x i32> %c) {
- %b = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
- ret <16 x i32> %b
-}
-
-; CHECK-LABEL: test20
-; CHECK: vpunpckhqdq  %zmm
-; CHECK: ret
-define <8 x i64> @test20(<8 x i64> %a, <8 x i64> %c) {
- %b = shufflevector <8 x i64> %a, <8 x i64> %c, <8 x i32><i32 1, i32 5, i32 3, i32 7, i32 9, i32 13, i32 11, i32 15>
- ret <8 x i64> %b
-}
-
-; CHECK-LABEL: test21
-; CHECK: vunpcklps %zmm
-; CHECK: ret
-define <16 x float> @test21(<16 x float> %a, <16 x float> %c) {
- %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13, i32 16, i32 24, i32 17, i32 25, i32 20, i32 28, i32 21, i32 29>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: test22
-; CHECK: vmovhlps {{.*}}## encoding: [0x62
-; CHECK: ret
-define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) nounwind {
-  %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
-  ret <4 x i32> %c
-}
-
-; CHECK-LABEL: @test23
-; CHECK: vshufps $-112, %zmm
-; CHECK: ret
-define <16 x float> @test23(<16 x float> %a, <16 x float> %c) {
- %b = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32><i32 0, i32 0, i32 17, i32 18, i32 4, i32 4, i32 21, i32 22, i32 8, i32 8, i32 25, i32 26, i32 12, i32 12, i32 29, i32 30>
- ret <16 x float> %b
-}
-
-; CHECK-LABEL: @test24
-; CHECK: vpermt2d
-; CHECK: ret
-define <16 x i32> @test24(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test25
-; CHECK: vshufps  $52
-; CHECK: ret
-define <16 x i32> @test25(<16 x i32> %a, <16 x i32> %b) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 19, i32 undef, i32 4, i32 5, i32 23, i32 undef, i32 8, i32 9, i32 27, i32 undef, i32 12, i32 13, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test26
-; CHECK: vmovshdup
-; CHECK: ret
-define <16 x i32> @test26(<16 x i32> %a) nounwind {
-  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 undef, i32 9, i32 9, i32 undef, i32 11, i32 13, i32 undef, i32 undef, i32 undef>
-  ret <16 x i32> %c
-}
-
-; CHECK-LABEL: @test27
-; CHECK: ret
-define <16 x i32> @test27(<4 x i32>%a) {
- %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
- ret <16 x i32> %res
-}
-
-; CHECK-LABEL: @test28
-; CHECK: vinserti64x4 $1
-; CHECK: ret
-define <16 x i32> @test28(<16 x i32>%x, <16 x i32>%y) {
- %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
-                                                              i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
- ret <16 x i32> %res
-}
-
-; CHECK-LABEL: @test29
-; CHECK: vinserti64x4 $0
-; CHECK: ret
-define <16 x i32> @test29(<16 x i32>%x, <16 x i32>%y) {
- %res = shufflevector <16 x i32>%x, <16 x i32>%y, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
-                                                              i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- ret <16 x i32> %res
-}
-
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
index 5e097be04cdf..91ef5d58f438 100644
--- a/test/CodeGen/X86/avx512-trunc-ext.ll
+++ b/test/CodeGen/X86/avx512-trunc-ext.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
 
 ; CHECK-LABEL: trunc_16x32_to_16x8
 ; CHECK: vpmovdb
@@ -118,6 +119,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
 
 ; CHECK-LABEL: sext_8i1_8i32
 ; CHECK: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX: vpmovm2d
 ; CHECK: ret
 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   %x = icmp slt <8 x i32> %a1, %a2
@@ -135,9 +137,8 @@ define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
 }
 
 ; CHECK-LABEL: trunc_i32_to_i1
-; CHECK: andl
-; CHECK: kmov
-; CHECK: kortest
+; CHECK: testb
+; CHECK: setne
 ; CKECK: orl
 ; CHECK: ret
 define i16 @trunc_i32_to_i1(i32 %a) {
@@ -146,3 +147,30 @@ define i16 @trunc_i32_to_i1(i32 %a) {
   %res = bitcast <16 x i1> %maskv to i16
   ret i16 %res
 }
+
+; CHECK-LABEL: sext_8i1_8i16
+; SKX: vpmovm2w
+; CHECK: ret
+define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+  %x = icmp slt <8 x i32> %a1, %a2
+  %y = sext <8 x i1> %x to <8 x i16>
+  ret <8 x i16> %y
+}
+
+; CHECK-LABEL: sext_16i1_16i32
+; SKX: vpmovm2d
+; CHECK: ret
+define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
+  %x = icmp slt <16 x i32> %a1, %a2
+  %y = sext <16 x i1> %x to <16 x i32>
+  ret <16 x i32> %y
+}
+
+; CHECK-LABEL: sext_8i1_8i64
+; SKX: vpmovm2q
+; CHECK: ret
+define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+  %x = icmp slt <8 x i32> %a1, %a2
+  %y = sext <8 x i1> %x to <8 x i64>
+  ret <8 x i64> %y
+}
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 9c6db11d8f45..5bb82338d087 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -1,59 +1,192 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-;CHECK-LABEL: _inreg16xi32:
-;CHECK: vpbroadcastd {{.*}}, %zmm
-;CHECK: ret
 define   <16 x i32> @_inreg16xi32(i32 %a) {
+; CHECK-LABEL: _inreg16xi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
+; CHECK-NEXT:    retq
   %b = insertelement <16 x i32> undef, i32 %a, i32 0
   %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
   ret <16 x i32> %c
 }
 
-;CHECK-LABEL: _inreg8xi64:
-;CHECK: vpbroadcastq {{.*}}, %zmm
-;CHECK: ret
 define   <8 x i64> @_inreg8xi64(i64 %a) {
+; CHECK-LABEL: _inreg8xi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
+; CHECK-NEXT:    retq
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
   ret <8 x i64> %c
 }
 
-;CHECK-LABEL: _inreg16xfloat:
-;CHECK: vbroadcastss {{.*}}, %zmm
+;CHECK-LABEL: _ss16xfloat_v4
+;CHECK: vbroadcastss %xmm0, %zmm0
 ;CHECK: ret
+define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
+  %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
+  ret <16 x float> %b
+}
+
 define   <16 x float> @_inreg16xfloat(float %a) {
+; CHECK-LABEL: _inreg16xfloat:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %c
 }
 
-;CHECK-LABEL: _inreg8xdouble:
-;CHECK: vbroadcastsd {{.*}}, %zmm
+;CHECK-LABEL: _ss16xfloat_mask:
+;CHECK: vbroadcastss %xmm0, %zmm1 {%k1}
+;CHECK: ret
+define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
+  ret <16 x float> %r
+}
+
+;CHECK-LABEL: _ss16xfloat_maskz:
+;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z}
 ;CHECK: ret
+define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
+  ret <16 x float> %r
+}
+
+;CHECK-LABEL: _ss16xfloat_load:
+;CHECK: vbroadcastss (%{{.*}}, %zmm
+;CHECK: ret
+define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
+  %a = load float* %a.ptr
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  ret <16 x float> %c
+}
+
+;CHECK-LABEL: _ss16xfloat_mask_load:
+;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
+;CHECK: ret
+define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
+  %a = load float* %a.ptr
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
+  ret <16 x float> %r
+}
+
+;CHECK-LABEL: _ss16xfloat_maskz_load:
+;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
+;CHECK: ret
+define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
+  %a = load float* %a.ptr
+  %mask = icmp ne <16 x i32> %mask1, zeroinitializer
+  %b = insertelement <16 x float> undef, float %a, i32 0
+  %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
+  %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
+  ret <16 x float> %r
+}
+
 define   <8 x double> @_inreg8xdouble(double %a) {
+; CHECK-LABEL: _inreg8xdouble:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double> %c
 }
 
-;CHECK-LABEL: _xmm16xi32
-;CHECK: vpbroadcastd
+;CHECK-LABEL: _sd8xdouble_mask:
+;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1}
+;CHECK: ret
+define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
+  ret <8 x double> %r
+}
+
+;CHECK-LABEL: _sd8xdouble_maskz:
+;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
 ;CHECK: ret
+define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
+  ret <8 x double> %r
+}
+
+;CHECK-LABEL: _sd8xdouble_load:
+;CHECK: vbroadcastsd (%rdi), %zmm
+;CHECK: ret
+define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
+  %a = load double* %a.ptr
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  ret <8 x double> %c
+}
+
+;CHECK-LABEL: _sd8xdouble_mask_load:
+;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
+;CHECK: ret
+define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
+  %a = load double* %a.ptr
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
+  ret <8 x double> %r
+}
+
+define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
+; CHECK-LABEL: _sd8xdouble_maskz_load:
+; CHECK:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
+; CHECK:    ret
+  %a = load double* %a.ptr
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %b = insertelement <8 x double> undef, double %a, i32 0
+  %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
+  %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
+  ret <8 x double> %r
+}
+
 define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
+; CHECK-LABEL: _xmm16xi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
   ret <16 x i32> %b
 }
 
-;CHECK-LABEL: _xmm16xfloat
-;CHECK: vbroadcastss {{.*}}## encoding: [0x62
-;CHECK: ret
 define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
+; CHECK-LABEL: _xmm16xfloat:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %b
 }
 
 define <16 x i32> @test_vbroadcast() {
-  ; CHECK: vpbroadcastd
+; CHECK-LABEL: test_vbroadcast:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK-NEXT:    knotw %k1, %k1
+; CHECK-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
 entry:
   %0 = sext <16 x i1> zeroinitializer to <16 x i32>
   %1 = fcmp uno <16 x float> undef, zeroinitializer
@@ -62,3 +195,108 @@ entry:
   ret <16 x i32> %3
 }
 
+; We implement the set1 intrinsics with vector initializers.  Verify that the
+; IR generated will produce broadcasts at the end.
+define <8 x double> @test_set1_pd(double %d) #2 {
+; CHECK-LABEL: test_set1_pd:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT:    retq
+entry:
+  %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
+  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
+  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
+  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
+  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
+  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
+  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
+  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
+  ret <8 x double> %vecinit7.i
+}
+
+define <8 x i64> @test_set1_epi64(i64 %d) #2 {
+; CHECK-LABEL: test_set1_epi64:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
+; CHECK-NEXT:    retq
+entry:
+  %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
+  %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
+  %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
+  %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
+  %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
+  %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
+  %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
+  %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
+  ret <8 x i64> %vecinit7.i
+}
+
+define <16 x float> @test_set1_ps(float %f) #2 {
+; CHECK-LABEL: test_set1_ps:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
+; CHECK-NEXT:    retq
+entry:
+  %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
+  %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
+  %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
+  %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
+  %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
+  %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
+  %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
+  %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
+  %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
+  %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
+  %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
+  %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
+  %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
+  %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
+  %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
+  %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
+  ret <16 x float> %vecinit15.i
+}
+
+define <16 x i32> @test_set1_epi32(i32 %f) #2 {
+; CHECK-LABEL: test_set1_epi32:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
+; CHECK-NEXT:    retq
+entry:
+  %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
+  %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
+  %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
+  %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
+  %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
+  %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
+  %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
+  %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
+  %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
+  %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
+  %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
+  %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
+  %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
+  %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
+  %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
+  %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
+  ret <16 x i32> %vecinit15.i
+}
+
+; We implement the scalar broadcast intrinsics with vector initializers.
+; Verify that the IR generated will produce the broadcast at the end.
+define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
+; CHECK-LABEL: test_mm512_broadcastsd_pd:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = extractelement <2 x double> %a, i32 0
+  %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
+  %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
+  %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
+  %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
+  %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
+  %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
+  %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
+  %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
+  ret <8 x double> %vecinit7.i
+}
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index d762f0083e35..b16f5c9663c6 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1,145 +1,176 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
-; CHECK-LABEL: test1
-; CHECK: vcmpleps
-; CHECK: vmovups
-; CHECK: ret
 define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpleps %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %mask = fcmp ole <16 x float> %x, %y
   %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
   ret <16 x float> %max
 }
 
-; CHECK-LABEL: test2
-; CHECK: vcmplepd
-; CHECK: vmovupd
-; CHECK: ret
 define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmplepd %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vmovapd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %mask = fcmp ole <8 x double> %x, %y
   %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
   ret <8 x double> %max
 }
 
-; CHECK-LABEL: test3
-; CHECK: vpcmpeqd  (%rdi)
-; CHECK: vmovdqu32
-; CHECK: ret
 define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqd (%rdi), %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %y = load <16 x i32>* %yp, align 4
   %mask = icmp eq <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
   ret <16 x i32> %max
 }
 
-; CHECK-LABEL: @test4_unsigned
-; CHECK: vpcmpnltud
-; CHECK: vmovdqu32
-; CHECK: ret
-define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
+define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
+; CHECK-LABEL: test4_unsigned:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %mask = icmp uge <16 x i32> %x, %y
-  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
   ret <16 x i32> %max
 }
 
-; CHECK-LABEL: test5
-; CHECK: vpcmpeqq {{.*}}%k1
-; CHECK: vmovdqu64 {{.*}}%k1
-; CHECK: ret
 define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
+; CHECK-LABEL: test5:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %mask = icmp eq <8 x i64> %x, %y
   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
   ret <8 x i64> %max
 }
 
-; CHECK-LABEL: test6_unsigned
-; CHECK: vpcmpnleuq {{.*}}%k1
-; CHECK: vmovdqu64 {{.*}}%k1
-; CHECK: ret
-define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
+define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
+; CHECK-LABEL: test6_unsigned:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %mask = icmp ugt <8 x i64> %x, %y
-  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
   ret <8 x i64> %max
 }
 
-; CHECK-LABEL: test7
-; CHECK: xor
-; CHECK: vcmpltps
-; CHECK: vblendvps
-; CHECK: ret
 define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test7:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vcmpltps %xmm2, %xmm0, %xmm2
+; CHECK-NEXT:    vblendvps %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %mask = fcmp olt <4 x float> %a, zeroinitializer
   %c = select <4 x i1>%mask, <4 x float>%a, <4 x float>%b
   ret <4 x float>%c
 }
 
-; CHECK-LABEL: test8
-; CHECK: xor
-; CHECK: vcmpltpd
-; CHECK: vblendvpd
-; CHECK: ret
 define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vcmpltpd %xmm2, %xmm0, %xmm2
+; CHECK-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %mask = fcmp olt <2 x double> %a, zeroinitializer
   %c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
   ret <2 x double>%c
 }
 
-; CHECK-LABEL: test9
-; CHECK: vpcmpeqd
-; CHECK: vpblendmd
-; CHECK: ret
 define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
+; CHECK-LABEL: test9:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:      ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; CHECK-NEXT:      ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:      ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; CHECK-NEXT:    retq
   %mask = icmp eq <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
   ret <8 x i32> %max
 }
 
-; CHECK-LABEL: test10
-; CHECK: vcmpeqps
-; CHECK: vblendmps
-; CHECK: ret
 define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
+; CHECK-LABEL: test10:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:      ## kill: YMM1<def> YMM1<kill> ZMM1<def>
+; CHECK-NEXT:      ## kill: YMM0<def> YMM0<kill> ZMM0<def>
+; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vblendmps %zmm0, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:      ## kill: YMM0<def> YMM0<kill> ZMM0<kill>
+; CHECK-NEXT:    retq
   %mask = fcmp oeq <8 x float> %x, %y
   %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
   ret <8 x float> %max
 }
 
-; CHECK-LABEL: test11_unsigned
-; CHECK: vpmaxud
-; CHECK: ret
 define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
+; CHECK-LABEL: test11_unsigned:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %mask = icmp ugt <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
   ret <8 x i32> %max
 }
 
-; CHECK-LABEL: test12
-; CHECK: vpcmpeqq        %zmm2, %zmm0, [[LO:%k[0-7]]]
-; CHECK: vpcmpeqq        %zmm3, %zmm1, [[HI:%k[0-7]]]
-; CHECK: kunpckbw        [[LO]], [[HI]], {{%k[0-7]}}
 
 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
+; CHECK-LABEL: test12:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
+; CHECK-NEXT:    vpcmpeqq %zmm3, %zmm1, %k1
+; CHECK-NEXT:    kunpckbw %k0, %k1, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:      ## kill: AX<def> AX<kill> EAX<kill>
+; CHECK-NEXT:    retq
   %res = icmp eq <16 x i64> %a, %b
   %res1 = bitcast <16 x i1> %res to i16
   ret i16 %res1
 }
 
-; CHECK-LABEL: test13
-; CHECK: vcmpeqps        %zmm
-; CHECK: vpbroadcastd
-; CHECK: ret
 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
+; CHECK-LABEL: test13:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
 {
   %cmpvector_i = fcmp oeq <16 x float> %a, %b
   %conv = zext <16 x i1> %cmpvector_i to <16 x i32>
   ret <16 x i32> %conv
 }
 
-; CHECK-LABEL: test14
-; CHECK: vpcmp
-; CHECK-NOT: vpcmp
-; CHECK: vmovdqu32 {{.*}}{%k1} {z}
-; CHECK: ret
 define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
+; CHECK-LABEL: test14:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm1
+; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; CHECK-NEXT:    knotw %k0, %k0
+; CHECK-NEXT:    knotw %k0, %k1
+; CHECK-NEXT:    vmovdqu32 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %sub_r = sub <16 x i32> %a, %b
   %cmp.i2.i = icmp sgt <16 x i32> %sub_r, %a
   %sext.i3.i = sext <16 x i1> %cmp.i2.i to <16 x i32>
@@ -148,12 +179,15 @@ define <16 x i32> @test14(<16 x i32>%a, <16 x i32>%b) {
   ret <16 x i32>%res
 }
 
-; CHECK-LABEL: test15
-; CHECK: vpcmpgtq
-; CHECK-NOT: vpcmp
-; CHECK: vmovdqu64 {{.*}}{%k1} {z}
-; CHECK: ret
 define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
+; CHECK-LABEL: test15:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm1
+; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
+; CHECK-NEXT:    knotw %k0, %k0
+; CHECK-NEXT:    knotw %k0, %k1
+; CHECK-NEXT:    vmovdqu64 %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %sub_r = sub <8 x i64> %a, %b
   %cmp.i2.i = icmp sgt <8 x i64> %sub_r, %a
   %sext.i3.i = sext <8 x i1> %cmp.i2.i to <8 x i64>
@@ -162,3 +196,181 @@ define <8 x i64> @test15(<8 x i64>%a, <8 x i64>%b) {
   ret <8 x i64>%res
 }
 
+define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
+; CHECK-LABEL: test16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpled %zmm0, %zmm1, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask = icmp sge <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
+  ret <16 x i32> %max
+}
+
+define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+; CHECK-LABEL: test17:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpgtd (%rdi), %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+; CHECK-LABEL: test18:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpled (%rdi), %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp sle <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nounwind {
+; CHECK-LABEL: test19:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask = icmp ule <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+; CHECK-LABEL: test20:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp eq <16 x i32> %x1, %y1
+  %mask0 = icmp eq <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  ret <16 x i32> %max
+}
+
+define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+; CHECK-LABEL: test21:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k1
+; CHECK-NEXT:    vpcmpleq %zmm2, %zmm3, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp sge <8 x i64> %x1, %y1
+  %mask0 = icmp sle <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+; CHECK-LABEL: test22:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpgtq %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp sgt <8 x i64> %x1, %y1
+  %y = load <8 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+; CHECK-LABEL: test23:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
+; CHECK-NEXT:    vpcmpleud (%rdi), %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp sge <16 x i32> %x1, %y1
+  %y = load <16 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
+; CHECK-LABEL: test24:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqq (%rdi){1to8}, %zmm0, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
+  %mask = icmp eq <8 x i64> %x, %y
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
+
+define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind {
+; CHECK-LABEL: test25:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpled (%rdi){1to16}, %zmm0, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
+  %mask = icmp sle <16 x i32> %x, %y
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32> %y1) nounwind {
+; CHECK-LABEL: test26:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm2, %k1
+; CHECK-NEXT:    vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp sge <16 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
+  %mask0 = icmp sgt <16 x i32> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
+  ret <16 x i32> %max
+}
+
+define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y1) nounwind {
+; CHECK-LABEL: test27:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpleq        %zmm1, %zmm2, %k1
+; CHECK-NEXT:    vpcmpleq        (%rdi){1to8}, %zmm0, %k1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %mask1 = icmp sge <8 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
+  %mask0 = icmp sle <8 x i64> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
+  ret <8 x i64> %max
+}
diff --git a/test/CodeGen/X86/avx512-zext-load-crash.ll b/test/CodeGen/X86/avx512-zext-load-crash.ll
deleted file mode 100644
index 07ded13a0e3c..000000000000
--- a/test/CodeGen/X86/avx512-zext-load-crash.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-
-define <8 x i16> @test_zext_load() {
-  ; CHECK: vmovq
-entry:
-  %0 = load <2 x i16> ** undef, align 8
-  %1 = getelementptr inbounds <2 x i16>* %0, i64 1
-  %2 = load <2 x i16>* %0, align 1
-  %3 = shufflevector <2 x i16> %2, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %4 = load <2 x i16>* %1, align 1
-  %5 = shufflevector <2 x i16> %4, <2 x i16> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %6 = shufflevector <8 x i16> %3, <8 x i16> %5, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <8 x i16> %6
-}
diff --git a/test/CodeGen/X86/avx512bw-arith.ll b/test/CodeGen/X86/avx512bw-arith.ll
new file mode 100644
index 000000000000..94f68a2ddc28
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-arith.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw| FileCheck %s
+
+; CHECK-LABEL: vpaddb512_test
+; CHECK: vpaddb %zmm{{.*}}
+; CHECK: ret
+define <64 x i8> @vpaddb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone {
+  %x = add <64 x i8> %i, %j
+  ret <64 x i8> %x
+}
+
+; CHECK-LABEL: vpaddb512_fold_test
+; CHECK: vpaddb (%rdi), %zmm{{.*}}
+; CHECK: ret
+define <64 x i8> @vpaddb512_fold_test(<64 x i8> %i, <64 x i8>* %j) nounwind {
+  %tmp = load <64 x i8>* %j, align 4
+  %x = add <64 x i8> %i, %tmp
+  ret <64 x i8> %x
+}
+
+; CHECK-LABEL: vpaddw512_test
+; CHECK: vpaddw %zmm{{.*}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone {
+  %x = add <32 x i16> %i, %j
+  ret <32 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw512_fold_test
+; CHECK: vpaddw (%rdi), %zmm{{.*}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_fold_test(<32 x i16> %i, <32 x i16>* %j) nounwind {
+  %tmp = load <32 x i16>* %j, align 4
+  %x = add <32 x i16> %i, %tmp
+  ret <32 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw512_mask_test
+; CHECK: vpaddw %zmm{{.*%k[1-7].*}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_mask_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %x = add <32 x i16> %i, %j
+  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
+  ret <32 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw512_maskz_test
+; CHECK: vpaddw %zmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_maskz_test(<32 x i16> %i, <32 x i16> %j, <32 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %x = add <32 x i16> %i, %j
+  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw512_mask_fold_test
+; CHECK: vpaddw (%rdi), %zmm{{.*%k[1-7]}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %j = load <32 x i16>* %j.ptr
+  %x = add <32 x i16> %i, %j
+  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
+  ret <32 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw512_maskz_fold_test
+; CHECK: vpaddw (%rdi), %zmm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <32 x i16> @vpaddw512_maskz_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %j = load <32 x i16>* %j.ptr
+  %x = add <32 x i16> %i, %j
+  %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %r
+}
+
+; CHECK-LABEL: vpsubb512_test
+; CHECK: vpsubb %zmm{{.*}}
+; CHECK: ret
+define <64 x i8> @vpsubb512_test(<64 x i8> %i, <64 x i8> %j) nounwind readnone {
+  %x = sub <64 x i8> %i, %j
+  ret <64 x i8> %x
+}
+
+; CHECK-LABEL: vpsubw512_test
+; CHECK: vpsubw %zmm{{.*}}
+; CHECK: ret
+define <32 x i16> @vpsubw512_test(<32 x i16> %i, <32 x i16> %j) nounwind readnone {
+  %x = sub <32 x i16> %i, %j
+  ret <32 x i16> %x
+}
+
+; CHECK-LABEL: vpmullw512_test
+; CHECK: vpmullw %zmm{{.*}}
+; CHECK: ret
+define <32 x i16> @vpmullw512_test(<32 x i16> %i, <32 x i16> %j) {
+  %x = mul <32 x i16> %i, %j
+  ret <32 x i16> %x
+}
+
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
new file mode 100644
index 000000000000..c807d222ce53
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -0,0 +1,353 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s
+
+define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
+; CHECK-LABEL: test_pcmpeq_b
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
+  ret i64 %res
+}
+
+define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_b
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
+  ret i64 %res
+}
+
+declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
+
+define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
+; CHECK-LABEL: test_pcmpeq_w
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
+  ret i32 %res
+}
+
+define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_w
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
+
+define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
+; CHECK-LABEL: test_pcmpgt_b
+; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
+  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
+  ret i64 %res
+}
+
+define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_b
+; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
+  ret i64 %res
+}
+
+declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
+
+define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
+; CHECK-LABEL: test_pcmpgt_w
+; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
+  ret i32 %res
+}
+
+define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_w
+; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
+
+define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; CHECK_LABEL: test_cmp_b_512
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
+  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
+; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
+  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
+  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
+; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
+  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
+  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
+; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
+  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
+  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
+; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
+  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
+  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
+; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
+  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
+  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
+; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
+  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
+  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
+; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
+  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
+  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
+  ret <8 x i64> %vec7
+}
+
+define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; CHECK_LABEL: test_mask_cmp_b_512
+; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
+  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
+; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
+  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
+; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
+  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
+; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
+  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
+; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
+  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
+; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
+  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
+; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
+  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
+; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
+  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
+  ret <8 x i64> %vec7
+}
+
+declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
+
+define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; CHECK_LABEL: test_ucmp_b_512
+; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
+  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
+  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
+; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
+  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
+  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
+; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
+  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
+  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
+; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
+  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
+  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
+; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
+  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
+  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
+; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
+  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
+  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
+; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
+  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
+  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
+; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
+  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
+  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
+  ret <8 x i64> %vec7
+}
+
+define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; CHECK_LABEL: test_mask_ucmp_b_512
+; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
+  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
+; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
+  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
+; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
+  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
+; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
+  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
+; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
+  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
+; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
+  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
+; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
+  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
+; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
+  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
+  ret <8 x i64> %vec7
+}
+
+declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
+
+define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK_LABEL: test_cmp_w_512
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; CHECK_LABEL: test_mask_cmp_w_512
+; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
+
+define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; CHECK_LABEL: test_ucmp_w_512
+; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; CHECK_LABEL: test_mask_ucmp_w_512
+; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
+
+; CHECK-LABEL: test_x86_mask_blend_b_256
+; CHECK: vpblendmb
+define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
+  %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_256
+define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
+  ; CHECK: vpblendmw
+  %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_b_512
+; CHECK: vpblendmb
+define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
+  %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
+  ret <64 x i8> %res
+}
+declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_512
+define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
+  ; CHECK: vpblendmw
+  %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
+  ret <32 x i16> %res
+}
+declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_b_128
+; CHECK: vpblendmb
+define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_128
+define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpblendmw
+  %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
diff --git a/test/CodeGen/X86/avx512bw-mask-op.ll b/test/CodeGen/X86/avx512bw-mask-op.ll
new file mode 100644
index 000000000000..9d7630c5d0ad
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+define i32 @mask32(i32 %x) {
+  %m0 = bitcast i32 %x to <32 x i1>
+  %m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <32 x i1> %m1 to i32
+  ret i32 %ret
+; CHECK-LABEL: mask32
+; CHECK: kmovd
+; CHECK-NEXT: knotd
+; CHECK-NEXT: kmovd
+; CHECK_NEXT: ret
+}
+
+define i64 @mask64(i64 %x) {
+  %m0 = bitcast i64 %x to <64 x i1>
+  %m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <64 x i1> %m1 to i64
+  ret i64 %ret
+; CHECK-LABEL: mask64
+; CHECK: kmovq
+; CHECK-NEXT: knotq
+; CHECK-NEXT: kmovq
+; CHECK_NEXT: ret
+}
+
+define void @mask32_mem(i32* %ptr) {
+  %x = load i32* %ptr, align 4
+  %m0 = bitcast i32 %x to <32 x i1>
+  %m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <32 x i1> %m1 to i32
+  store i32 %ret, i32* %ptr, align 4
+  ret void
+; CHECK-LABEL: mask32_mem
+; CHECK: kmovd ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
+; CHECK-NEXT: knotd
+; CHECK-NEXT: kmovd %k{{[0-7]}}, ([[ARG1]])
+; CHECK_NEXT: ret
+}
+
+define void @mask64_mem(i64* %ptr) {
+  %x = load i64* %ptr, align 4
+  %m0 = bitcast i64 %x to <64 x i1>
+  %m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
+                            i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <64 x i1> %m1 to i64
+  store i64 %ret, i64* %ptr, align 4
+  ret void
+; CHECK-LABEL: mask64_mem
+; CHECK: kmovq ([[ARG1]]), %k{{[0-7]}}
+; CHECK-NEXT: knotq
+; CHECK-NEXT: kmovq %k{{[0-7]}}, ([[ARG1]])
+; CHECK_NEXT: ret
+}
+
+define i32 @mand32(i32 %x, i32 %y) {
+  %ma = bitcast i32 %x to <32 x i1>
+  %mb = bitcast i32 %y to <32 x i1>
+  %mc = and <32 x i1> %ma, %mb
+  %md = xor <32 x i1> %ma, %mb
+  %me = or <32 x i1> %mc, %md
+  %ret = bitcast <32 x i1> %me to i32
+; CHECK: kandd
+; CHECK: kxord
+; CHECK: kord
+  ret i32 %ret
+}
+
+define i64 @mand64(i64 %x, i64 %y) {
+  %ma = bitcast i64 %x to <64 x i1>
+  %mb = bitcast i64 %y to <64 x i1>
+  %mc = and <64 x i1> %ma, %mb
+  %md = xor <64 x i1> %ma, %mb
+  %me = or <64 x i1> %mc, %md
+  %ret = bitcast <64 x i1> %me to i64
+; CHECK: kandq
+; CHECK: kxorq
+; CHECK: korq
+  ret i64 %ret
+}
diff --git a/test/CodeGen/X86/avx512bw-mov.ll b/test/CodeGen/X86/avx512bw-mov.ll
new file mode 100644
index 000000000000..2ff6d280ab8f
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-mov.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vmovdqu8
+; CHECK: ret
+define <64 x i8> @test1(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <64 x i8>*
+  %res = load <64 x i8>* %vaddr, align 1
+  ret <64 x i8>%res
+}
+
+; CHECK-LABEL: test2
+; CHECK: vmovdqu8
+; CHECK: ret
+define void @test2(i8 * %addr, <64 x i8> %data) {
+  %vaddr = bitcast i8* %addr to <64 x i8>*
+  store <64 x i8>%data, <64 x i8>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test3
+; CHECK: vmovdqu8{{.*{%k[1-7]}}}
+; CHECK: ret
+define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
+  %mask = icmp ne <64 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <64 x i8>*
+  %r = load <64 x i8>* %vaddr, align 1
+  %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old
+  ret <64 x i8>%res
+}
+
+; CHECK-LABEL: test4
+; CHECK: vmovdqu8{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
+  %mask = icmp ne <64 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <64 x i8>*
+  %r = load <64 x i8>* %vaddr, align 1
+  %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer
+  ret <64 x i8>%res
+}
+
+; CHECK-LABEL: test5
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test5(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <32 x i16>*
+  %res = load <32 x i16>* %vaddr, align 1
+  ret <32 x i16>%res
+}
+
+; CHECK-LABEL: test6
+; CHECK: vmovdqu16
+; CHECK: ret
+define void @test6(i8 * %addr, <32 x i16> %data) {
+  %vaddr = bitcast i8* %addr to <32 x i16>*
+  store <32 x i16>%data, <32 x i16>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test7
+; CHECK: vmovdqu16{{.*{%k[1-7]}}}
+; CHECK: ret
+define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <32 x i16>*
+  %r = load <32 x i16>* %vaddr, align 1
+  %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old
+  ret <32 x i16>%res
+}
+
+; CHECK-LABEL: test8
+; CHECK: vmovdqu16{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
+  %mask = icmp ne <32 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <32 x i16>*
+  %r = load <32 x i16>* %vaddr, align 1
+  %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
+  ret <32 x i16>%res
+}
diff --git a/test/CodeGen/X86/avx512bw-vec-cmp.ll b/test/CodeGen/X86/avx512bw-vec-cmp.ll
new file mode 100644
index 000000000000..6ba4db68662e
--- /dev/null
+++ b/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind {
+  %mask = icmp eq <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: test2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
+  %mask = icmp sgt <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind {
+  %mask = icmp sge <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: test4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
+  %mask = icmp ugt <64 x i8> %x, %y
+  %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: test5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
+  %y = load <32 x i16>* %yp, align 4
+  %mask = icmp eq <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp sle <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask = icmp ule <32 x i16> %x, %y
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test9
+; CHECK: vpcmpeqw %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16> %y1) nounwind {
+  %mask1 = icmp eq <32 x i16> %x1, %y1
+  %mask0 = icmp eq <32 x i16> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %y
+  ret <32 x i16> %max
+}
+
+; CHECK-LABEL: @test10
+; CHECK: vpcmpleb %zmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y1) nounwind {
+  %mask1 = icmp sge <64 x i8> %x1, %y1
+  %mask0 = icmp sle <64 x i8> %x, %y
+  %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <64 x i8> %x1, %y1
+  %y = load <64 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <64 x i8> %x, %y
+  %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
+  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
+  ret <64 x i8> %max
+}
+
+; CHECK-LABEL: @test12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
+  %mask1 = icmp sge <32 x i16> %x1, %y1
+  %y = load <32 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <32 x i16> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
+  ret <32 x i16> %max
+}
diff --git a/test/CodeGen/X86/avx512bwvl-arith.ll b/test/CodeGen/X86/avx512bwvl-arith.ll
new file mode 100644
index 000000000000..96f01409f5be
--- /dev/null
+++ b/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -0,0 +1,206 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl| FileCheck %s
+
+; 256-bit
+
+; CHECK-LABEL: vpaddb256_test
+; CHECK: vpaddb %ymm{{.*}}
+; CHECK: ret
+define <32 x i8> @vpaddb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK-LABEL: vpaddb256_fold_test
+; CHECK: vpaddb (%rdi), %ymm{{.*}}
+; CHECK: ret
+define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
+  %tmp = load <32 x i8>* %j, align 4
+  %x = add <32 x i8> %i, %tmp
+  ret <32 x i8> %x
+}
+
+; CHECK-LABEL: vpaddw256_test
+; CHECK: vpaddw %ymm{{.*}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw256_fold_test
+; CHECK: vpaddw (%rdi), %ymm{{.*}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
+  %tmp = load <16 x i16>* %j, align 4
+  %x = add <16 x i16> %i, %tmp
+  ret <16 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw256_mask_test
+; CHECK: vpaddw %ymm{{.*%k[1-7].*}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_mask_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %x = add <16 x i16> %i, %j
+  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
+  ret <16 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw256_maskz_test
+; CHECK: vpaddw %ymm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_maskz_test(<16 x i16> %i, <16 x i16> %j, <16 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %x = add <16 x i16> %i, %j
+  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw256_mask_fold_test
+; CHECK: vpaddw (%rdi), %ymm{{.*%k[1-7]}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %j = load <16 x i16>* %j.ptr
+  %x = add <16 x i16> %i, %j
+  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
+  ret <16 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw256_maskz_fold_test
+; CHECK: vpaddw (%rdi), %ymm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %j = load <16 x i16>* %j.ptr
+  %x = add <16 x i16> %i, %j
+  %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %r
+}
+
+; CHECK-LABEL: vpsubb256_test
+; CHECK: vpsubb %ymm{{.*}}
+; CHECK: ret
+define <32 x i8> @vpsubb256_test(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK-LABEL: vpsubw256_test
+; CHECK: vpsubw %ymm{{.*}}
+; CHECK: ret
+define <16 x i16> @vpsubw256_test(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK-LABEL: vpmullw256_test
+; CHECK: vpmullw %ymm{{.*}}
+; CHECK: ret
+define <16 x i16> @vpmullw256_test(<16 x i16> %i, <16 x i16> %j) {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; 128-bit
+
+; CHECK-LABEL: vpaddb128_test
+; CHECK: vpaddb %xmm{{.*}}
+; CHECK: ret
+define <16 x i8> @vpaddb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %x = add <16 x i8> %i, %j
+  ret <16 x i8> %x
+}
+
+; CHECK-LABEL: vpaddb128_fold_test
+; CHECK: vpaddb (%rdi), %xmm{{.*}}
+; CHECK: ret
+define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
+  %tmp = load <16 x i8>* %j, align 4
+  %x = add <16 x i8> %i, %tmp
+  ret <16 x i8> %x
+}
+
+; CHECK-LABEL: vpaddw128_test
+; CHECK: vpaddw %xmm{{.*}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %x = add <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw128_fold_test
+; CHECK: vpaddw (%rdi), %xmm{{.*}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
+  %tmp = load <8 x i16>* %j, align 4
+  %x = add <8 x i16> %i, %tmp
+  ret <8 x i16> %x
+}
+
+; CHECK-LABEL: vpaddw128_mask_test
+; CHECK: vpaddw %xmm{{.*%k[1-7].*}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_mask_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %x = add <8 x i16> %i, %j
+  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
+  ret <8 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw128_maskz_test
+; CHECK: vpaddw %xmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_maskz_test(<8 x i16> %i, <8 x i16> %j, <8 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %x = add <8 x i16> %i, %j
+  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+  ret <8 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw128_mask_fold_test
+; CHECK: vpaddw (%rdi), %xmm{{.*%k[1-7]}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %j = load <8 x i16>* %j.ptr
+  %x = add <8 x i16> %i, %j
+  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
+  ret <8 x i16> %r
+}
+
+; CHECK-LABEL: vpaddw128_maskz_fold_test
+; CHECK: vpaddw (%rdi), %xmm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %j = load <8 x i16>* %j.ptr
+  %x = add <8 x i16> %i, %j
+  %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+  ret <8 x i16> %r
+}
+
+; CHECK-LABEL: vpsubb128_test
+; CHECK: vpsubb %xmm{{.*}}
+; CHECK: ret
+define <16 x i8> @vpsubb128_test(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %x = sub <16 x i8> %i, %j
+  ret <16 x i8> %x
+}
+
+; CHECK-LABEL: vpsubw128_test
+; CHECK: vpsubw %xmm{{.*}}
+; CHECK: ret
+define <8 x i16> @vpsubw128_test(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %x = sub <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
+; CHECK-LABEL: vpmullw128_test
+; CHECK: vpmullw %xmm{{.*}}
+; CHECK: ret
+define <8 x i16> @vpmullw128_test(<8 x i16> %i, <8 x i16> %j) {
+  %x = mul <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
new file mode 100644
index 000000000000..678f252dea42
--- /dev/null
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -0,0 +1,998 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+; 256-bit
+
+define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: test_pcmpeq_b_256
+; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
+  ret i32 %res
+}
+
+define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_b_256
+; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
+
+define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: test_pcmpeq_w_256
+; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_w_256
+; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
+
+define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: test_pcmpgt_b_256
+; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
+  ret i32 %res
+}
+
+define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_b_256
+; CHECK: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
+
+define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: test_pcmpgt_w_256
+; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_w_256
+; CHECK: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
+
+define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
+; CHECK_LABEL: test_cmp_b_256
+; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltb %ymm1, %ymm0, %k0 ##
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleb %ymm1, %ymm0, %k0 ##
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 ##
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 ##
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 ##
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 ##
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordb %ymm1, %ymm0, %k0 ##
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
+; CHECK_LABEL: test_mask_cmp_b_256
+; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
+
+define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
+; CHECK_LABEL: test_ucmp_b_256
+; CHECK: vpcmpequb %ymm1, %ymm0, %k0 ##
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltub %ymm1, %ymm0, %k0 ##
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleub %ymm1, %ymm0, %k0 ##
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 ##
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 ##
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 ##
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 ##
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordub %ymm1, %ymm0, %k0 ##
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
+; CHECK_LABEL: test_mask_ucmp_b_256
+; CHECK: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
+  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
+; CHECK: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
+  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
+; CHECK: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
+  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
+; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
+  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
+; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
+  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
+; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
+  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
+; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
+  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
+; CHECK: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
+  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
+  ret <8 x i32> %vec7
+}
+
+declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
+
+define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK_LABEL: test_cmp_w_256
+; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltw %ymm1, %ymm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmplew %ymm1, %ymm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordw %ymm1, %ymm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_cmp_w_256
+; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltw %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmplew %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordw %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
+
+define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
+; CHECK_LABEL: test_ucmp_w_256
+; CHECK: vpcmpequw %ymm1, %ymm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmporduw %ymm1, %ymm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_ucmp_w_256
+; CHECK: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmporduw %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
+
+; 128-bit
+
+define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_pcmpeq_b_128
+; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_b_128
+; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
+
+define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_pcmpeq_w_128
+; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_w_128
+; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
+
+define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_pcmpgt_b_128
+; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
+  ret i16 %res
+}
+
+define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_b_128
+; CHECK: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
+  ret i16 %res
+}
+
+declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
+
+define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_pcmpgt_w_128
+; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_w_128
+; CHECK: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
+
+define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK_LABEL: test_cmp_b_128
+; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltb %xmm1, %xmm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleb %xmm1, %xmm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordb %xmm1, %xmm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_cmp_b_128
+; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltb %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleb %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordb %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
+
+define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+; CHECK_LABEL: test_ucmp_b_128
+; CHECK: vpcmpequb %xmm1, %xmm0, %k0 ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltub %xmm1, %xmm0, %k0 ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleub %xmm1, %xmm0, %k0 ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordub %xmm1, %xmm0, %k0 ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
+; CHECK_LABEL: test_mask_ucmp_b_128
+; CHECK: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
+  %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
+; CHECK: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
+  %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
+; CHECK: vpcmpleub %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
+  %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
+; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
+  %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
+; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
+  %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
+; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
+  %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
+; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
+  %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
+; CHECK: vpcmpordub %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
+  %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
+  ret <8 x i16> %vec7
+}
+
+declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
+
+define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK_LABEL: test_cmp_w_128
+; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltw %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmplew %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordw %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
+; CHECK_LABEL: test_mask_cmp_w_128
+; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltw %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmplew %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordw %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+; CHECK_LABEL: test_ucmp_w_128
+; CHECK: vpcmpequw %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduw %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
+; CHECK_LABEL: test_mask_ucmp_w_128
+; CHECK: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduw %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
+
+declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd256_ps
+  ; CHECK: vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps
+  ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
+; CHECK-LABEL: test_mask_fmadd256_pd:
+; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
+; CHECK-LABEL: test_mask_fmadd128_pd:
+; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
+  ret <2 x double> %res
+}
+
+declare <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_vfmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub256_ps
+  ; CHECK: vfmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xaa,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_vfmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub128_ps
+  ; CHECK: vfmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaa,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x double> @test_mask_vfmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub256_pd
+  ; CHECK: vfmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xaa,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_mask_vfmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsub128_pd
+  ; CHECK: vfmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaa,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+declare <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd256_ps
+  ; CHECK: vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd128_ps
+  ; CHECK: vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd256_pd
+  ; CHECK: vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmadd128_pd
+  ; CHECK: vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+declare <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub256_ps
+  ; CHECK: vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub128_ps
+  ; CHECK: vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub256_pd
+  ; CHECK: vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfnmsub128_pd
+  ; CHECK: vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+declare <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
+; CHECK-LABEL: test_mask_fmaddsub256_ps:
+; CHECK: vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
+; CHECK-LABEL: test_mask_fmaddsub128_ps:
+; CHECK: vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmaddsub256_pd
+  ; CHECK: vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmaddsub128_pd
+  ; CHECK: vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+declare <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <8 x float> @test_mask_vfmsubadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd256_ps
+  ; CHECK: vfmsubadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa7,0xc2]
+  %res = call <8 x float> @llvm.x86.fma.mask.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
+  ret <8 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <4 x float> @test_mask_vfmsubadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd128_ps
+  ; CHECK: vfmsubadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa7,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmsubadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x double> @test_mask_vfmsubadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd256_pd
+  ; CHECK: vfmsubadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa7,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <2 x double> @test_mask_vfmsubadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd128_pd
+  ; CHECK: vfmsubadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
+  ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
+  %a2 = load <2 x double>* %ptr_a2
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+declare <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) nounwind readnone
+define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmsubaddrm_pd
+  ; CHECK: vfmsubadd213pd  (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
+  %a2 = load <8 x double>* %ptr_a2, align 8
+  %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
+  ret <8 x double> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_r
+  ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rz
+  ; CHECK: vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
+  ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
+  %a2 = load <4 x float>* %ptr_a2
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
+  ; CHECK: vfmadd213ps     (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
+  %a2 = load <4 x float>* %ptr_a2, align 8
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
+  ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
+  %a2 = load <4 x float>* %ptr_a2
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
+  ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
+  %a2 = load <4 x float>* %ptr_a2, align 4
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
+  ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
+  %q = load float* %ptr_a2
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
+  ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
+  %q = load float* %ptr_a2, align 4
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
+  ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0  ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
+  %q = load float* %ptr_a2
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
+  ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0  ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
+  %q = load float* %ptr_a2, align 4
+  %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
+  %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_pd_r
+  ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_pd_rz
+  ; CHECK: vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
+  ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
+  %a2 = load <2 x double>* %ptr_a2
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
+  ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
+  %a2 = load <2 x double>* %ptr_a2
+  %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
+  ret <2 x double> %res
+}
+
+define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd256_pd_r
+  ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK-LABEL: test_mask_vfmadd256_pd_rz
+  ; CHECK: vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
+  ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
+  ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
+  %a2 = load <4 x double>* %ptr_a2
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
+  ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
+  ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
+  %a2 = load <4 x double>* %ptr_a2
+  %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
+  ret <4 x double> %res
+}
diff --git a/test/CodeGen/X86/avx512bwvl-mov.ll b/test/CodeGen/X86/avx512bwvl-mov.ll
new file mode 100644
index 000000000000..835844fc821c
--- /dev/null
+++ b/test/CodeGen/X86/avx512bwvl-mov.ll
@@ -0,0 +1,162 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+; CHECK-LABEL: test_256_1
+; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <32 x i8> @test_256_1(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <32 x i8>*
+  %res = load <32 x i8>* %vaddr, align 1
+  ret <32 x i8>%res
+}
+
+; CHECK-LABEL: test_256_2
+; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_2(i8 * %addr, <32 x i8> %data) {
+  %vaddr = bitcast i8* %addr to <32 x i8>*
+  store <32 x i8>%data, <32 x i8>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_3
+; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
+; CHECK: ret
+define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
+  %mask = icmp ne <32 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <32 x i8>*
+  %r = load <32 x i8>* %vaddr, align 1
+  %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old
+  ret <32 x i8>%res
+}
+
+; CHECK-LABEL: test_256_4
+; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
+; CHECK: ret
+define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
+  %mask = icmp ne <32 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <32 x i8>*
+  %r = load <32 x i8>* %vaddr, align 1
+  %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer
+  ret <32 x i8>%res
+}
+
+; CHECK-LABEL: test_256_5
+; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
+; CHECK: ret
+define <16 x i16> @test_256_5(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i16>*
+  %res = load <16 x i16>* %vaddr, align 1
+  ret <16 x i16>%res
+}
+
+; CHECK-LABEL: test_256_6
+; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_6(i8 * %addr, <16 x i16> %data) {
+  %vaddr = bitcast i8* %addr to <16 x i16>*
+  store <16 x i16>%data, <16 x i16>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_7
+; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
+; CHECK: ret
+define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i16>*
+  %r = load <16 x i16>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old
+  ret <16 x i16>%res
+}
+
+; CHECK-LABEL: test_256_8
+; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
+; CHECK: ret
+define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
+  %mask = icmp ne <16 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i16>*
+  %r = load <16 x i16>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer
+  ret <16 x i16>%res
+}
+
+; CHECK-LABEL: test_128_1
+; CHECK: vmovdqu8 {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <16 x i8> @test_128_1(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <16 x i8>*
+  %res = load <16 x i8>* %vaddr, align 1
+  ret <16 x i8>%res
+}
+
+; CHECK-LABEL: test_128_2
+; CHECK: vmovdqu8{{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_2(i8 * %addr, <16 x i8> %data) {
+  %vaddr = bitcast i8* %addr to <16 x i8>*
+  store <16 x i8>%data, <16 x i8>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_3
+; CHECK: vmovdqu8{{.*{%k[1-7]} }}## encoding: [0x62
+; CHECK: ret
+define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
+  %mask = icmp ne <16 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i8>*
+  %r = load <16 x i8>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old
+  ret <16 x i8>%res
+}
+
+; CHECK-LABEL: test_128_4
+; CHECK: vmovdqu8{{.*{%k[1-7]} {z} }}## encoding: [0x62
+; CHECK: ret
+define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
+  %mask = icmp ne <16 x i8> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <16 x i8>*
+  %r = load <16 x i8>* %vaddr, align 1
+  %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer
+  ret <16 x i8>%res
+}
+
+; CHECK-LABEL: test_128_5
+; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
+; CHECK: ret
+define <8 x i16> @test_128_5(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x i16>*
+  %res = load <8 x i16>* %vaddr, align 1
+  ret <8 x i16>%res
+}
+
+; CHECK-LABEL: test_128_6
+; CHECK: vmovdqu16{{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_6(i8 * %addr, <8 x i16> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i16>*
+  store <8 x i16>%data, <8 x i16>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_7
+; CHECK: vmovdqu16{{.*{%k[1-7]} }}## encoding: [0x62
+; CHECK: ret
+define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i16>*
+  %r = load <8 x i16>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old
+  ret <8 x i16>%res
+}
+
+; CHECK-LABEL: test_128_8
+; CHECK: vmovdqu16{{.*{%k[1-7]} {z} }}## encoding: [0x62
+; CHECK: ret
+define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) {
+  %mask = icmp ne <8 x i16> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i16>*
+  %r = load <8 x i16>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer
+  ret <8 x i16>%res
+}
+
diff --git a/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
new file mode 100644
index 000000000000..2d13a166a725
--- /dev/null
+++ b/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -0,0 +1,269 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test256_1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind {
+  %mask = icmp eq <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: test256_2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
+  %mask = icmp sgt <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounwind {
+  %mask = icmp sge <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: test256_4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind {
+  %mask = icmp ugt <32 x i8> %x, %y
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: test256_5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
+  %y = load <16 x i16>* %yp, align 4
+  %mask = icmp eq <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp sle <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask = icmp ule <16 x i16> %x, %y
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_9
+; CHECK: vpcmpeqw %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x i16> %y1) nounwind {
+  %mask1 = icmp eq <16 x i16> %x1, %y1
+  %mask0 = icmp eq <16 x i16> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %y
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: @test256_10
+; CHECK: vpcmpleb %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8> %y1) nounwind {
+  %mask1 = icmp sge <32 x i8> %x1, %y1
+  %mask0 = icmp sle <32 x i8> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <32 x i8> %x1, %y1
+  %y = load <32 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <32 x i8> %x, %y
+  %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
+  %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
+  ret <32 x i8> %max
+}
+
+; CHECK-LABEL: @test256_12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
+  %mask1 = icmp sge <16 x i16> %x1, %y1
+  %y = load <16 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <16 x i16> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
+  ret <16 x i16> %max
+}
+
+; CHECK-LABEL: test128_1
+; CHECK: vpcmpeqb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind {
+  %mask = icmp eq <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: test128_2
+; CHECK: vpcmpgtb {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
+  %mask = icmp sgt <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_3
+; CHECK: vpcmplew {{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind {
+  %mask = icmp sge <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: test128_4
+; CHECK: vpcmpnleub {{.*%k[0-7]}}
+; CHECK: vmovdqu8 {{.*}}%k1
+; CHECK: ret
+define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind {
+  %mask = icmp ugt <16 x i8> %x, %y
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: test128_5
+; CHECK: vpcmpeqw  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
+  %y = load <8 x i16>* %yp, align 4
+  %mask = icmp eq <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_6
+; CHECK: vpcmpgtw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp sgt <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_7
+; CHECK: vpcmplew (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp sle <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_8
+; CHECK: vpcmpleuw (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask = icmp ule <8 x i16> %x, %y
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_9
+; CHECK: vpcmpeqw %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16> %y1) nounwind {
+  %mask1 = icmp eq <8 x i16> %x1, %y1
+  %mask0 = icmp eq <8 x i16> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %max
+}
+
+; CHECK-LABEL: @test128_10
+; CHECK: vpcmpleb %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8> %y1) nounwind {
+  %mask1 = icmp sge <16 x i8> %x1, %y1
+  %mask0 = icmp sle <16 x i8> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_11
+; CHECK: vpcmpgtb (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu8
+; CHECK: ret
+define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
+  %mask1 = icmp sgt <16 x i8> %x1, %y1
+  %y = load <16 x i8>* %y.ptr, align 4
+  %mask0 = icmp sgt <16 x i8> %x, %y
+  %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
+  %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
+  ret <16 x i8> %max
+}
+
+; CHECK-LABEL: @test128_12
+; CHECK: vpcmpleuw (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqu16
+; CHECK: ret
+define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
+  %mask1 = icmp sge <8 x i16> %x1, %y1
+  %y = load <8 x i16>* %y.ptr, align 4
+  %mask0 = icmp ule <8 x i16> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
+  ret <8 x i16> %max
+}
diff --git a/test/CodeGen/X86/avx512dq-mask-op.ll b/test/CodeGen/X86/avx512dq-mask-op.ll
new file mode 100644
index 000000000000..32a2633f8d06
--- /dev/null
+++ b/test/CodeGen/X86/avx512dq-mask-op.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+define i8 @mask8(i8 %x) {
+  %m0 = bitcast i8 %x to <8 x i1>
+  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <8 x i1> %m1 to i8
+  ret i8 %ret
+; CHECK: mask8
+; CHECK: knotb
+; CHECK: ret
+}
+
+define void @mask8_mem(i8* %ptr) {
+  %x = load i8* %ptr, align 4
+  %m0 = bitcast i8 %x to <8 x i1>
+  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
+  %ret = bitcast <8 x i1> %m1 to i8
+  store i8 %ret, i8* %ptr, align 4
+  ret void
+; CHECK-LABEL: mask8_mem
+; CHECK: kmovb ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
+; CHECK-NEXT: knotb
+; CHECK-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
+; CHECK: ret
+}
+
+define i8 @mand8(i8 %x, i8 %y) {
+  %ma = bitcast i8 %x to <8 x i1>
+  %mb = bitcast i8 %y to <8 x i1>
+  %mc = and <8 x i1> %ma, %mb
+  %md = xor <8 x i1> %ma, %mb
+  %me = or <8 x i1> %mc, %md
+  %ret = bitcast <8 x i1> %me to i8
+; CHECK: kandb
+; CHECK: kxorb
+; CHECK: korb
+  ret i8 %ret
+}
diff --git a/test/CodeGen/X86/avx512er-intrinsics.ll b/test/CodeGen/X86/avx512er-intrinsics.ll
new file mode 100644
index 000000000000..fa4352e64dce
--- /dev/null
+++ b/test/CodeGen/X86/avx512er-intrinsics.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=knl --show-mc-encoding| FileCheck %s
+
+define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
+  ; CHECK: vrsqrt28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
+  ; CHECK: kmovw
+  ; CHECK: vrsqrt28ps %zmm0, %zmm1 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) {
+  ; CHECK: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
+  ; CHECK: kmovw
+  ; CHECK: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
+  ; CHECK: vrsqrt28ps %zmm0, %zmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8) 
+  ret <16 x float> %res
+}
+
+
+declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
+  ; CHECK: vrcp28ps %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
+  ; CHECK: vrcp28pd %zmm0, %zmm0 {sae} # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
+  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) 
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
+  ; CHECK: vexp2ps %zmm0, %zmm0 {sae}      # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
+  %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
+  ; CHECK: vexp2pd %zmm0, %zmm0 {sae}      # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
+  %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
+  ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
+  ; CHECK: vrcp28ss %xmm0, %xmm0, %xmm0 {sae} # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
+  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0) {
+  ; CHECK: vrsqrt28ss %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 7, i32 8) ; 
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0) {
+  ; CHECK: vrsqrt28ss %xmm1, %xmm0, %xmm2 {%k1}{sae} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 7, i32 8) ;
+  ret <4 x float> %res
+}
+
+define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0) {
+  ; CHECK: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z}{sae} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 7, i32 8) ; 
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr ) {
+  ; CHECK: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
+  %mem = load double * %ptr, align 8
+  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ; 
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr ) {
+  ; CHECK: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
+  %ptr1 = getelementptr double* %ptr, i32 18
+  %mem = load double * %ptr1, align 8
+  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
+  ret <2 x double> %res
+}
+
diff --git a/test/CodeGen/X86/avx512vl-arith.ll b/test/CodeGen/X86/avx512vl-arith.ll
new file mode 100644
index 000000000000..1f7da7814cc9
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-arith.ll
@@ -0,0 +1,794 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl| FileCheck %s
+
+; 256-bit
+
+; CHECK-LABEL: vpaddq256_test
+; CHECK: vpaddq %ymm{{.*}}
+; CHECK: ret
+define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpaddq256_fold_test
+; CHECK: vpaddq (%rdi), %ymm{{.*}}
+; CHECK: ret
+define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
+  %tmp = load <4 x i64>* %j, align 4
+  %x = add <4 x i64> %i, %tmp
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpaddq256_broadcast_test
+; CHECK: vpaddq LCP{{.*}}(%rip){1to4}, %ymm{{.*}}
+; CHECK: ret
+define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind {
+  %x = add <4 x i64> %i, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpaddq256_broadcast2_test
+; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
+; CHECK: ret
+define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
+  %j = load i64* %j.ptr
+  %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
+  %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %x = add <4 x i64> %i, %j.v
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpaddd256_test
+; CHECK: vpaddd %ymm{{.*}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd256_fold_test
+; CHECK: vpaddd (%rdi), %ymm{{.*}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
+  %tmp = load <8 x i32>* %j, align 4
+  %x = add <8 x i32> %i, %tmp
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd256_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind {
+  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd256_mask_test
+; CHECK: vpaddd %ymm{{.*%k[1-7].*}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = add <8 x i32> %i, %j
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd256_maskz_test
+; CHECK: vpaddd %ymm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = add <8 x i32> %i, %j
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd256_mask_fold_test
+; CHECK: vpaddd (%rdi), %ymm{{.*%k[1-7]}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %j = load <8 x i32>* %j.ptr
+  %x = add <8 x i32> %i, %j
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd256_mask_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]}}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd256_maskz_fold_test
+; CHECK: vpaddd (%rdi), %ymm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %j = load <8 x i32>* %j.ptr
+  %x = add <8 x i32> %i, %j
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd256_maskz_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to8}, %ymm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = add <8 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %r
+}
+
+; CHECK-LABEL: vpsubq256_test
+; CHECK: vpsubq %ymm{{.*}}
+; CHECK: ret
+define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpsubd256_test
+; CHECK: vpsubd %ymm{{.*}}
+; CHECK: ret
+define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpmulld256_test
+; CHECK: vpmulld %ymm{{.*}}
+; CHECK: ret
+define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) {
+  %x = mul <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: test_vaddpd_256
+; CHECK: vaddpd{{.*}}
+; CHECK: ret
+define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) {
+entry:
+  %add.i = fadd <4 x double> %x, %y
+  ret <4 x double> %add.i
+}
+
+; CHECK-LABEL: test_fold_vaddpd_256
+; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
+; CHECK: ret
+define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) {
+entry:
+  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00>
+  ret <4 x double> %add.i
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd_256
+; CHECK: LCP{{.*}}(%rip){1to8}, %ymm0, %ymm0
+; CHECK: ret
+define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind {
+  %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <8 x float> %b
+}
+
+; CHECK-LABEL: test_mask_vaddps_256
+; CHECK: vaddps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = fadd <8 x float> %i, %j
+  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps_256
+; CHECK: vmulps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = fmul <8 x float> %i, %j
+  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps_256
+; CHECK: vminps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <8 x float> %i, %j
+  %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
+  %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps_256
+; CHECK: vmaxps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <8 x float> %i, %j
+  %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j
+  %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps_256
+; CHECK: vsubps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = fsub <8 x float> %i, %j
+  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vdivps_256
+; CHECK: vdivps {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i,
+                                        <8 x float> %j, <8 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %x = fdiv <8 x float> %i, %j
+  %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst
+  ret <8 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulpd_256
+; CHECK: vmulpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i,
+                                        <4 x double> %j, <4 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %x = fmul <4 x double> %i, %j
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd_256
+; CHECK: vminpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i,
+                                        <4 x double> %j, <4 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <4 x double> %i, %j
+  %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
+  %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd_256
+; CHECK: vmaxpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i,
+                                        <4 x double> %j, <4 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <4 x double> %i, %j
+  %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j
+  %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubpd_256
+; CHECK: vsubpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i,
+                                        <4 x double> %j, <4 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %x = fsub <4 x double> %i, %j
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vdivpd_256
+; CHECK: vdivpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i,
+                                        <4 x double> %j, <4 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %x = fdiv <4 x double> %i, %j
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd_256
+; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+                                         <4 x double> %j, <4 x i64> %mask1)
+                                         nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %x = fadd <4 x double> %i, %j
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd_256
+; CHECK: vaddpd {{%ymm[0-9]{1,2}, %ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j,
+                                          <4 x i64> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %x = fadd <4 x double> %i, %j
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd_256
+; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+                                         <4 x double>* %j,  <4 x i64> %mask1)
+                                         nounwind {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %tmp = load <4 x double>* %j
+  %x = fadd <4 x double> %i, %tmp
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd_256
+; CHECK: vaddpd (%rdi), {{.*%ymm[0-9]{1,2}, %ymm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
+                                          <4 x i64> %mask1) nounwind {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %tmp = load <4 x double>* %j
+  %x = fadd <4 x double> %i, %tmp
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast2_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
+; CHECK: ret
+define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
+  %tmp = load double* %j
+  %b = insertelement <4 x double> undef, double %tmp, i32 0
+  %c = shufflevector <4 x double> %b, <4 x double> undef,
+                     <4 x i32> zeroinitializer
+  %x = fadd <4 x double> %c, %i
+  ret <4 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
+                                          double* %j, <4 x i64> %mask1) nounwind {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %b = insertelement <4 x double> undef, double %tmp, i32 0
+  %c = shufflevector <4 x double> %b, <4 x double> undef,
+                     <4 x i32> zeroinitializer
+  %x = fadd <4 x double> %c, %i
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i
+  ret <4 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd_256
+; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
+                                           <4 x i64> %mask1) nounwind {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %b = insertelement <4 x double> undef, double %tmp, i32 0
+  %c = shufflevector <4 x double> %b, <4 x double> undef,
+                     <4 x i32> zeroinitializer
+  %x = fadd <4 x double> %c, %i
+  %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
+  ret <4 x double> %r
+}
+
+; 128-bit
+
+; CHECK-LABEL: vpaddq128_test
+; CHECK: vpaddq %xmm{{.*}}
+; CHECK: ret
+define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+  %x = add <2 x i64> %i, %j
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vpaddq128_fold_test
+; CHECK: vpaddq (%rdi), %xmm{{.*}}
+; CHECK: ret
+define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
+  %tmp = load <2 x i64>* %j, align 4
+  %x = add <2 x i64> %i, %tmp
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vpaddq128_broadcast2_test
+; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
+; CHECK: ret
+define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
+  %tmp = load i64* %j
+  %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+  %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
+  %x = add <2 x i64> %i, %j.1
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vpaddd128_test
+; CHECK: vpaddd %xmm{{.*}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+  %x = add <4 x i32> %i, %j
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd128_fold_test
+; CHECK: vpaddd (%rdi), %xmm{{.*}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
+  %tmp = load <4 x i32>* %j, align 4
+  %x = add <4 x i32> %i, %tmp
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd128_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind {
+  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpaddd128_mask_test
+; CHECK: vpaddd %xmm{{.*%k[1-7].*}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = add <4 x i32> %i, %j
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd128_maskz_test
+; CHECK: vpaddd %xmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = add <4 x i32> %i, %j
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd128_mask_fold_test
+; CHECK: vpaddd (%rdi), %xmm{{.*%k[1-7]}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %j = load <4 x i32>* %j.ptr
+  %x = add <4 x i32> %i, %j
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd128_mask_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]}}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd128_maskz_fold_test
+; CHECK: vpaddd (%rdi), %xmm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %j = load <4 x i32>* %j.ptr
+  %x = add <4 x i32> %i, %j
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpaddd128_maskz_broadcast_test
+; CHECK: vpaddd LCP{{.*}}(%rip){1to4}, %xmm{{.*{%k[1-7]} {z}}}
+; CHECK: ret
+define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = add <4 x i32> %i, <i32 1, i32 1, i32 1, i32 1>
+  %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %r
+}
+
+; CHECK-LABEL: vpsubq128_test
+; CHECK: vpsubq %xmm{{.*}}
+; CHECK: ret
+define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+  %x = sub <2 x i64> %i, %j
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vpsubd128_test
+; CHECK: vpsubd %xmm{{.*}}
+; CHECK: ret
+define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+  %x = sub <4 x i32> %i, %j
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpmulld128_test
+; CHECK: vpmulld %xmm{{.*}}
+; CHECK: ret
+define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) {
+  %x = mul <4 x i32> %i, %j
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: test_vaddpd_128
+; CHECK: vaddpd{{.*}}
+; CHECK: ret
+define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) {
+entry:
+  %add.i = fadd <2 x double> %x, %y
+  ret <2 x double> %add.i
+}
+
+; CHECK-LABEL: test_fold_vaddpd_128
+; CHECK: vaddpd LCP{{.*}}(%rip){{.*}}
+; CHECK: ret
+define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) {
+entry:
+  %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00>
+  ret <2 x double> %add.i
+}
+
+; CHECK-LABEL: test_broadcast_vaddpd_128
+; CHECK: LCP{{.*}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK: ret
+define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind {
+  %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
+  ret <4 x float> %b
+}
+
+; CHECK-LABEL: test_mask_vaddps_128
+; CHECK: vaddps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = fadd <4 x float> %i, %j
+  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulps_128
+; CHECK: vmulps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = fmul <4 x float> %i, %j
+  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vminps_128
+; CHECK: vminps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <4 x float> %i, %j
+  %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
+  %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxps_128
+; CHECK: vmaxps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <4 x float> %i, %j
+  %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j
+  %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vsubps_128
+; CHECK: vsubps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = fsub <4 x float> %i, %j
+  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+
+; CHECK-LABEL: test_mask_vdivps_128
+; CHECK: vdivps {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i,
+                                        <4 x float> %j, <4 x i32> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %x = fdiv <4 x float> %i, %j
+  %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst
+  ret <4 x float> %r
+}
+
+; CHECK-LABEL: test_mask_vmulpd_128
+; CHECK: vmulpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i,
+                                        <2 x double> %j, <2 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %x = fmul <2 x double> %i, %j
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vminpd_128
+; CHECK: vminpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i,
+                                        <2 x double> %j, <2 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %cmp_res = fcmp olt <2 x double> %i, %j
+  %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
+  %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vmaxpd_128
+; CHECK: vmaxpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i,
+                                        <2 x double> %j, <2 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %cmp_res = fcmp ogt <2 x double> %i, %j
+  %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j
+  %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vsubpd_128
+; CHECK: vsubpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i,
+                                        <2 x double> %j, <2 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %x = fsub <2 x double> %i, %j
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vdivpd_128
+; CHECK: vdivpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i,
+                                        <2 x double> %j, <2 x i64> %mask1)
+                                        nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %x = fdiv <2 x double> %i, %j
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_vaddpd_128
+; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}}}
+; CHECK: ret
+define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+                                         <2 x double> %j, <2 x i64> %mask1)
+                                         nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %x = fadd <2 x double> %i, %j
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_vaddpd_128
+; CHECK: vaddpd {{%xmm[0-9]{1,2}, %xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}}}
+; CHECK: ret
+define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j,
+                                          <2 x i64> %mask1) nounwind readnone {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %x = fadd <2 x double> %i, %j
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_mask_fold_vaddpd_128
+; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]}.*}}
+; CHECK: ret
+define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+                                         <2 x double>* %j,  <2 x i64> %mask1)
+                                         nounwind {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %tmp = load <2 x double>* %j
+  %x = fadd <2 x double> %i, %tmp
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_fold_vaddpd_128
+; CHECK: vaddpd (%rdi), {{.*%xmm[0-9]{1,2}, %xmm[0-9]{1,2} {%k[1-7]} {z}.*}}
+; CHECK: ret
+define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
+                                          <2 x i64> %mask1) nounwind {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %tmp = load <2 x double>* %j
+  %x = fadd <2 x double> %i, %tmp
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_broadcast2_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
+; CHECK: ret
+define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
+  %tmp = load double* %j
+  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+  %x = fadd <2 x double> %j.1, %i
+  ret <2 x double> %x
+}
+
+; CHECK-LABEL: test_mask_broadcast_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]}.*}}
+; CHECK: ret
+define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i,
+                                          double* %j, <2 x i64> %mask1)
+                                          nounwind {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+  %x = fadd <2 x double> %j.1, %i
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i
+  ret <2 x double> %r
+}
+
+; CHECK-LABEL: test_maskz_broadcast_vaddpd_128
+; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*{%k[1-7]} {z}.*}}
+; CHECK: ret
+define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
+                                           <2 x i64> %mask1) nounwind {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %tmp = load double* %j
+  %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
+  %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
+  %x = fadd <2 x double> %j.1, %i
+  %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
+  ret <2 x double> %r
+}
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
new file mode 100644
index 000000000000..d349f4f53786
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -0,0 +1,864 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+; 256-bit
+
+define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: test_pcmpeq_d_256
+; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_d_256
+; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
+
+define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: test_pcmpeq_q_256
+; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_q_256
+; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
+
+define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: test_pcmpgt_d_256
+; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_d_256
+; CHECK: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
+
+define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: test_pcmpgt_q_256
+; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_q_256
+; CHECK: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: test_cmp_d_256
+; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltd %ymm1, %ymm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpled %ymm1, %ymm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnled %ymm1, %ymm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordd %ymm1, %ymm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_cmp_d_256
+; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltd %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpled %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnled %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordd %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+; CHECK-LABEL: test_ucmp_d_256
+; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltud %ymm1, %ymm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleud %ymm1, %ymm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordud %ymm1, %ymm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_ucmp_d_256
+; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleud %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordud %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: test_cmp_q_256
+; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %ymm1, %ymm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %ymm1, %ymm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %ymm1, %ymm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_cmp_q_256
+; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+; CHECK-LABEL: test_ucmp_q_256
+; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %ymm1, %ymm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_ucmp_q_256
+; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %ymm1, %ymm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
+
+; 128-bit
+
+define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_pcmpeq_d_128
+; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_d_128
+; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
+
+define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_pcmpeq_q_128
+; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpeq_q_128
+; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
+
+define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_pcmpgt_d_128
+; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_d_128
+; CHECK: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
+
+define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_pcmpgt_q_128
+; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
+  ret i8 %res
+}
+
+define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
+; CHECK-LABEL: test_mask_pcmpgt_q_128
+; CHECK: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ##
+  %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
+  ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: test_cmp_d_128
+; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltd %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpled %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnled %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordd %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_cmp_d_128
+; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltd %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpled %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnled %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordd %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+; CHECK-LABEL: test_ucmp_d_128
+; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltud %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleud %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordud %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_ucmp_d_128
+; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleud %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordud %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: test_cmp_q_128
+; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_cmp_q_128
+; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltq %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleq %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmpordq %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
+
+define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
+; CHECK-LABEL: test_ucmp_q_128
+; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %xmm1, %xmm0, %k0 ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
+; CHECK-LABEL: test_mask_ucmp_q_128
+; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ##
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
+  %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
+; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ##
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
+  %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
+; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ##
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
+  %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
+; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ##
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
+  %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
+; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ##
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
+  %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
+; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ##
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
+  %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
+; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ##
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
+  %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
+; CHECK: vpcmporduq %xmm1, %xmm0, %k0 {%k1} ##
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
+  %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
+  ret <8 x i8> %vec7
+}
+
+declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
+
+; CHECK-LABEL: compr1
+; CHECK: vcompresspd %zmm0
+define void @compr1(i8* %addr, <8 x double> %data, i8 %mask) {
+  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+
+; CHECK-LABEL: compr2
+; CHECK: vcompresspd %ymm0
+define void @compr2(i8* %addr, <4 x double> %data, i8 %mask) {
+  call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+
+; CHECK-LABEL: compr3
+; CHECK: vcompressps %xmm0
+define void @compr3(i8* %addr, <4 x float> %data, i8 %mask) {
+  call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
+
+; CHECK-LABEL: compr4
+; CHECK: vcompresspd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0xc0]
+define <8 x double> @compr4(i8* %addr, <8 x double> %data, i8 %mask) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+  ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+; CHECK-LABEL: compr5
+; CHECK: vcompresspd %ymm0, %ymm1 {%k1}  ## encoding: [0x62,0xf2,0xfd,0x29,0x8a,0xc1]
+define <4 x double> @compr5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.compress.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.compress.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+; CHECK-LABEL: compr6
+; CHECK: vcompressps %xmm0
+define <4 x float> @compr6(<4 x float> %data, i8 %mask) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+; CHECK-LABEL: compr7
+; CHECK-NOT: vcompress
+; CHECK: vmovapd
+define void @compr7(i8* %addr, <8 x double> %data) {
+  call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
+  ret void
+}
+
+; CHECK-LABEL: compr8
+; CHECK-NOT: vcompressps %xmm0
+define <4 x float> @compr8(<4 x float> %data) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.compress.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+; CHECK-LABEL: compr9
+; CHECK: vpcompressq %zmm0, (%rdi) {%k1}  ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
+define void @compr9(i8* %addr, <8 x i64> %data, i8 %mask) {
+  call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+
+; CHECK-LABEL: compr10
+; CHECK: vpcompressd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x8b,0xc0]
+define <4 x i32> @compr10(<4 x i32> %data, i8 %mask) {
+  %res = call <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.compress.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+; Expand
+
+; CHECK-LABEL: expand1
+; CHECK: vexpandpd (%rdi), %zmm0 {%k1}  ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
+define <8 x double> @expand1(i8* %addr, <8 x double> %data, i8 %mask) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+  ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
+
+; CHECK-LABEL: expand2
+; CHECK: vexpandpd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
+define <4 x double> @expand2(i8* %addr, <4 x double> %data, i8 %mask) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
+
+; CHECK-LABEL: expand3
+; CHECK: vexpandps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
+define <4 x float> @expand3(i8* %addr, <4 x float> %data, i8 %mask) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
+
+; CHECK-LABEL: expand4
+; CHECK: vexpandpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0xc0]
+define <8 x double> @expand4(i8* %addr, <8 x double> %data, i8 %mask) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> zeroinitializer, i8 %mask)
+  ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.expand.pd.512(<8 x double> %data, <8 x double> %src0, i8 %mask)
+
+; CHECK-LABEL: expand5
+; CHECK: vexpandpd %ymm0, %ymm1 {%k1}  ## encoding: [0x62,0xf2,0xfd,0x29,0x88,0xc8]
+define <4 x double> @expand5(<4 x double> %data, <4 x double> %src0, i8 %mask) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.expand.pd.256( <4 x double> %data, <4 x double> %src0, i8 %mask)
+  ret <4 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.expand.pd.256(<4 x double> %data, <4 x double> %src0, i8 %mask)
+
+; CHECK-LABEL: expand6
+; CHECK: vexpandps %xmm0
+define <4 x float> @expand6(<4 x float> %data, i8 %mask) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float> %src0, i8 %mask)
+
+; CHECK-LABEL: expand7
+; CHECK-NOT: vexpand
+; CHECK: vmovapd
+define <8 x double> @expand7(i8* %addr, <8 x double> %data) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
+  ret <8 x double> %res
+}
+
+; CHECK-LABEL: expand8
+; CHECK-NOT: vexpandps %xmm0
+define <4 x float> @expand8(<4 x float> %data) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.expand.ps.128(<4 x float> %data, <4 x float>zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+; CHECK-LABEL: expand9
+; CHECK: vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
+define <8 x i64> @expand9(i8* %addr, <8 x i64> %data, i8 %mask) {
+  %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+  ret <8 x i64> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
+
+; CHECK-LABEL: expand10
+; CHECK: vpexpandd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x89,0xc0]
+define <4 x i32> @expand10(<4 x i32> %data, i8 %mask) {
+  %res = call <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32>zeroinitializer, i8 %mask)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.expand.d.128(<4 x i32> %data, <4 x i32> %src0, i8 %mask)
+
+define <8 x float> @test_x86_mask_blend_ps_256(i8 %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vblendmps %ymm1, %ymm0
+  %res = call <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float> %a1, <8 x float> %a2, i8 %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readonly
+
+define <4 x double> @test_x86_mask_blend_pd_256(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vblendmpd %ymm1, %ymm0
+  %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a1, <4 x double> %a2, i8 %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) {
+  ; CHECK-LABEL: test_x86_mask_blend_pd_256_memop
+  ; CHECK: vblendmpd (%
+  %b = load <4 x double>* %ptr
+  %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_d_256
+; CHECK: vpblendmd
+define <8 x i32> @test_x86_mask_blend_d_256(i8 %a0, <8 x i32> %a1, <8 x i32> %a2) {
+  %res = call <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32> %a1, <8 x i32> %a2, i8 %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx512.mask.blend.d.256(<8 x i32>, <8 x i32>, i8) nounwind readonly
+
+define <4 x i64> @test_x86_mask_blend_q_256(i8 %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpblendmq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64> %a1, <4 x i64> %a2, i8 %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx512.mask.blend.q.256(<4 x i64>, <4 x i64>, i8) nounwind readonly
+
+define <4 x float> @test_x86_mask_blend_ps_128(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vblendmps %xmm1, %xmm0
+  %res = call <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float> %a1, <4 x float> %a2, i8 %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.blend.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
+
+define <2 x double> @test_x86_mask_blend_pd_128(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vblendmpd %xmm1, %xmm0
+  %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a1, <2 x double> %a2, i8 %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) {
+  ; CHECK-LABEL: test_x86_mask_blend_pd_128_memop
+  ; CHECK: vblendmpd (%
+  %b = load <2 x double>* %ptr
+  %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double>, <2 x double>, i8) nounwind readonly
+
+define <4 x i32> @test_x86_mask_blend_d_128(i8 %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpblendmd
+  %res = call <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32> %a1, <4 x i32> %a2, i8 %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.mask.blend.d.128(<4 x i32>, <4 x i32>, i8) nounwind readonly
+
+define <2 x i64> @test_x86_mask_blend_q_128(i8 %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpblendmq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64> %a1, <2 x i64> %a2, i8 %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx512.mask.blend.q.128(<2 x i64>, <2 x i64>, i8) nounwind readonly
diff --git a/test/CodeGen/X86/avx512vl-logic.ll b/test/CodeGen/X86/avx512vl-logic.ll
new file mode 100644
index 000000000000..02cb8f978656
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-logic.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s
+
+; 256-bit
+
+; CHECK-LABEL: vpandd256
+; CHECK: vpandd %ymm
+; CHECK: ret
+define <8 x i32> @vpandd256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = and <8 x i32> %a2, %b
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpord256
+; CHECK: vpord %ymm
+; CHECK: ret
+define <8 x i32> @vpord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = or <8 x i32> %a2, %b
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpxord256
+; CHECK: vpxord %ymm
+; CHECK: ret
+define <8 x i32> @vpxord256(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %x = xor <8 x i32> %a2, %b
+  ret <8 x i32> %x
+}
+
+; CHECK-LABEL: vpandq256
+; CHECK: vpandq %ymm
+; CHECK: ret
+define <4 x i64> @vpandq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vporq256
+; CHECK: vporq %ymm
+; CHECK: ret
+define <4 x i64> @vporq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK-LABEL: vpxorq256
+; CHECK: vpxorq %ymm
+; CHECK: ret
+define <4 x i64> @vpxorq256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; 128-bit
+
+; CHECK-LABEL: vpandd128
+; CHECK: vpandd %xmm
+; CHECK: ret
+define <4 x i32> @vpandd128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+  %x = and <4 x i32> %a2, %b
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpord128
+; CHECK: vpord %xmm
+; CHECK: ret
+define <4 x i32> @vpord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+  %x = or <4 x i32> %a2, %b
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpxord128
+; CHECK: vpxord %xmm
+; CHECK: ret
+define <4 x i32> @vpxord128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+  %x = xor <4 x i32> %a2, %b
+  ret <4 x i32> %x
+}
+
+; CHECK-LABEL: vpandq128
+; CHECK: vpandq %xmm
+; CHECK: ret
+define <2 x i64> @vpandq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vporq128
+; CHECK: vporq %xmm
+; CHECK: ret
+define <2 x i64> @vporq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = or <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
+
+; CHECK-LABEL: vpxorq128
+; CHECK: vpxorq %xmm
+; CHECK: ret
+define <2 x i64> @vpxorq128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = xor <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
diff --git a/test/CodeGen/X86/avx512vl-mov.ll b/test/CodeGen/X86/avx512vl-mov.ll
new file mode 100644
index 000000000000..32246568ac2e
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-mov.ll
@@ -0,0 +1,642 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+; CHECK-LABEL: test_256_1
+; CHECK: vmovdqu32
+; CHECK: ret
+define <8 x i32> @test_256_1(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>* %vaddr, align 1
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_2
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test_256_2(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %res = load <8 x i32>* %vaddr, align 32
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_3
+; CHECK: vmovdqa64
+; CHECK: ret
+define void @test_256_3(i8 * %addr, <4 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 32
+  ret void
+}
+
+; CHECK-LABEL: test_256_4
+; CHECK: vmovdqu32
+; CHECK: ret
+define void @test_256_4(i8 * %addr, <8 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_5
+; CHECK: vmovdqa32
+; CHECK: ret
+define void @test_256_5(i8 * %addr, <8 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  store <8 x i32>%data, <8 x i32>* %vaddr, align 32
+  ret void
+}
+
+; CHECK-LABEL: test_256_6
+; CHECK: vmovdqa64
+; CHECK: ret
+define  <4 x i64> @test_256_6(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %res = load <4 x i64>* %vaddr, align 32
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_7
+; CHECK: vmovdqu64
+; CHECK: ret
+define void @test_256_7(i8 * %addr, <4 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  store <4 x i64>%data, <4 x i64>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_8
+; CHECK: vmovdqu64
+; CHECK: ret
+define <4 x i64> @test_256_8(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %res = load <4 x i64>* %vaddr, align 1
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_9
+; CHECK: vmovapd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_9(i8 * %addr, <4 x double> %data) {
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  store <4 x double>%data, <4 x double>* %vaddr, align 32
+  ret void
+}
+
+; CHECK-LABEL: test_256_10
+; CHECK: vmovapd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <4 x double> @test_256_10(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %res = load <4 x double>* %vaddr, align 32
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_256_11
+; CHECK: vmovaps {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_11(i8 * %addr, <8 x float> %data) {
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  store <8 x float>%data, <8 x float>* %vaddr, align 32
+  ret void
+}
+
+; CHECK-LABEL: test_256_12
+; CHECK: vmovaps {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <8 x float> @test_256_12(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %res = load <8 x float>* %vaddr, align 32
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_13
+; CHECK: vmovupd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_13(i8 * %addr, <4 x double> %data) {
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  store <4 x double>%data, <4 x double>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_14
+; CHECK: vmovupd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <4 x double> @test_256_14(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %res = load <4 x double>* %vaddr, align 1
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_256_15
+; CHECK: vmovups {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_256_15(i8 * %addr, <8 x float> %data) {
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  store <8 x float>%data, <8 x float>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_256_16
+; CHECK: vmovups {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <8 x float> @test_256_16(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %res = load <8 x float>* %vaddr, align 1
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_17
+; CHECK: vmovdqa32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %r = load <8 x i32>* %vaddr, align 32
+  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_18
+; CHECK: vmovdqu32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %r = load <8 x i32>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_19
+; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %r = load <8 x i32>* %vaddr, align 32
+  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_20
+; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
+  %mask = icmp ne <8 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x i32>*
+  %r = load <8 x i32>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
+  ret <8 x i32>%res
+}
+
+; CHECK-LABEL: test_256_21
+; CHECK: vmovdqa64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %r = load <4 x i64>* %vaddr, align 32
+  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_22
+; CHECK: vmovdqu64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %r = load <4 x i64>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_23
+; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %r = load <4 x i64>* %vaddr, align 32
+  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_24
+; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i64>*
+  %r = load <4 x i64>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
+  ret <4 x i64>%res
+}
+
+; CHECK-LABEL: test_256_25
+; CHECK: vmovaps{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
+  %mask = fcmp one <8 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %r = load <8 x float>* %vaddr, align 32
+  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_26
+; CHECK: vmovups{{.*{%k[1-7]} }}
+; CHECK: ret
+define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
+  %mask = fcmp one <8 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %r = load <8 x float>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_27
+; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
+  %mask = fcmp one <8 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %r = load <8 x float>* %vaddr, align 32
+  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_28
+; CHECK: vmovups{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
+  %mask = fcmp one <8 x float> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <8 x float>*
+  %r = load <8 x float>* %vaddr, align 1
+  %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
+  ret <8 x float>%res
+}
+
+; CHECK-LABEL: test_256_29
+; CHECK: vmovapd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %r = load <4 x double>* %vaddr, align 32
+  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_256_30
+; CHECK: vmovupd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %r = load <4 x double>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_256_31
+; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %r = load <4 x double>* %vaddr, align 32
+  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_256_32
+; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
+  %mask = icmp ne <4 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x double>*
+  %r = load <4 x double>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
+  ret <4 x double>%res
+}
+
+; CHECK-LABEL: test_128_1
+; CHECK: vmovdqu32
+; CHECK: ret
+define <4 x i32> @test_128_1(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %res = load <4 x i32>* %vaddr, align 1
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_2
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test_128_2(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %res = load <4 x i32>* %vaddr, align 16
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_3
+; CHECK: vmovdqa64
+; CHECK: ret
+define void @test_128_3(i8 * %addr, <2 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  store <2 x i64>%data, <2 x i64>* %vaddr, align 16
+  ret void
+}
+
+; CHECK-LABEL: test_128_4
+; CHECK: vmovdqu32
+; CHECK: ret
+define void @test_128_4(i8 * %addr, <4 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  store <4 x i32>%data, <4 x i32>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_5
+; CHECK: vmovdqa32
+; CHECK: ret
+define void @test_128_5(i8 * %addr, <4 x i32> %data) {
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  store <4 x i32>%data, <4 x i32>* %vaddr, align 16
+  ret void
+}
+
+; CHECK-LABEL: test_128_6
+; CHECK: vmovdqa64
+; CHECK: ret
+define  <2 x i64> @test_128_6(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %res = load <2 x i64>* %vaddr, align 16
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_7
+; CHECK: vmovdqu64
+; CHECK: ret
+define void @test_128_7(i8 * %addr, <2 x i64> %data) {
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  store <2 x i64>%data, <2 x i64>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_8
+; CHECK: vmovdqu64
+; CHECK: ret
+define <2 x i64> @test_128_8(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %res = load <2 x i64>* %vaddr, align 1
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_9
+; CHECK: vmovapd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_9(i8 * %addr, <2 x double> %data) {
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  store <2 x double>%data, <2 x double>* %vaddr, align 16
+  ret void
+}
+
+; CHECK-LABEL: test_128_10
+; CHECK: vmovapd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <2 x double> @test_128_10(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %res = load <2 x double>* %vaddr, align 16
+  ret <2 x double>%res
+}
+
+; CHECK-LABEL: test_128_11
+; CHECK: vmovaps {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_11(i8 * %addr, <4 x float> %data) {
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  store <4 x float>%data, <4 x float>* %vaddr, align 16
+  ret void
+}
+
+; CHECK-LABEL: test_128_12
+; CHECK: vmovaps {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <4 x float> @test_128_12(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %res = load <4 x float>* %vaddr, align 16
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_13
+; CHECK: vmovupd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_13(i8 * %addr, <2 x double> %data) {
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  store <2 x double>%data, <2 x double>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_14
+; CHECK: vmovupd {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <2 x double> @test_128_14(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %res = load <2 x double>* %vaddr, align 1
+  ret <2 x double>%res
+}
+
+; CHECK-LABEL: test_128_15
+; CHECK: vmovups {{.*}} ## encoding: [0x62
+; CHECK: ret
+define void @test_128_15(i8 * %addr, <4 x float> %data) {
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  store <4 x float>%data, <4 x float>* %vaddr, align 1
+  ret void
+}
+
+; CHECK-LABEL: test_128_16
+; CHECK: vmovups {{.*}} ## encoding: [0x62
+; CHECK: ret
+define <4 x float> @test_128_16(i8 * %addr) {
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %res = load <4 x float>* %vaddr, align 1
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_17
+; CHECK: vmovdqa32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %r = load <4 x i32>* %vaddr, align 16
+  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_18
+; CHECK: vmovdqu32{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %r = load <4 x i32>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_19
+; CHECK: vmovdqa32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %r = load <4 x i32>* %vaddr, align 16
+  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_20
+; CHECK: vmovdqu32{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x i32>*
+  %r = load <4 x i32>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
+  ret <4 x i32>%res
+}
+
+; CHECK-LABEL: test_128_21
+; CHECK: vmovdqa64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %r = load <2 x i64>* %vaddr, align 16
+  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_22
+; CHECK: vmovdqu64{{.*{%k[1-7]} }}
+; CHECK: ret
+define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %r = load <2 x i64>* %vaddr, align 1
+  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_23
+; CHECK: vmovdqa64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %r = load <2 x i64>* %vaddr, align 16
+  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_24
+; CHECK: vmovdqu64{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x i64>*
+  %r = load <2 x i64>* %vaddr, align 1
+  %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
+  ret <2 x i64>%res
+}
+
+; CHECK-LABEL: test_128_25
+; CHECK: vmovaps{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %r = load <4 x float>* %vaddr, align 16
+  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_26
+; CHECK: vmovups{{.*{%k[1-7]} }}
+; CHECK: ret
+define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %r = load <4 x float>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_27
+; CHECK: vmovaps{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %r = load <4 x float>* %vaddr, align 16
+  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_28
+; CHECK: vmovups{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
+  %mask = icmp ne <4 x i32> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <4 x float>*
+  %r = load <4 x float>* %vaddr, align 1
+  %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
+  ret <4 x float>%res
+}
+
+; CHECK-LABEL: test_128_29
+; CHECK: vmovapd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %r = load <2 x double>* %vaddr, align 16
+  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
+  ret <2 x double>%res
+}
+
+; CHECK-LABEL: test_128_30
+; CHECK: vmovupd{{.*{%k[1-7]} }}
+; CHECK: ret
+define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %r = load <2 x double>* %vaddr, align 1
+  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
+  ret <2 x double>%res
+}
+
+; CHECK-LABEL: test_128_31
+; CHECK: vmovapd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %r = load <2 x double>* %vaddr, align 16
+  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
+  ret <2 x double>%res
+}
+
+; CHECK-LABEL: test_128_32
+; CHECK: vmovupd{{.*{%k[1-7]} {z} }}
+; CHECK: ret
+define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
+  %mask = icmp ne <2 x i64> %mask1, zeroinitializer
+  %vaddr = bitcast i8* %addr to <2 x double>*
+  %r = load <2 x double>* %vaddr, align 1
+  %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
+  ret <2 x double>%res
+}
+
diff --git a/test/CodeGen/X86/avx512vl-nontemporal.ll b/test/CodeGen/X86/avx512vl-nontemporal.ll
new file mode 100644
index 000000000000..fdafb35807e4
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-nontemporal.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s  -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
+
+define void @f256(<8 x float> %A, <8 x float> %AA, i8* %B, <4 x double> %C, <4 x double> %CC, i32 %D, <4 x i64> %E, <4 x i64> %EE) {
+; CHECK: vmovntps %ymm{{.*}} ## encoding: [0x62
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, %AA
+  store <8 x float> %A2, <8 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %ymm{{.*}} ## encoding: [0x62
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, %EE
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %ymm{{.*}} ## encoding: [0x62
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, %CC
+  store <4 x double> %C2, <4 x double>* %cast2, align 64, !nontemporal !0
+  ret void
+}
+
+define void @f128(<4 x float> %A, <4 x float> %AA, i8* %B, <2 x double> %C, <2 x double> %CC, i32 %D, <2 x i64> %E, <2 x i64> %EE) {
+; CHECK: vmovntps %xmm{{.*}} ## encoding: [0x62
+  %cast = bitcast i8* %B to <4 x float>*
+  %A2 = fadd <4 x float> %A, %AA
+  store <4 x float> %A2, <4 x float>* %cast, align 64, !nontemporal !0
+; CHECK: vmovntdq %xmm{{.*}} ## encoding: [0x62
+  %cast1 = bitcast i8* %B to <2 x i64>*
+  %E2 = add <2 x i64> %E, %EE
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 64, !nontemporal !0
+; CHECK: vmovntpd %xmm{{.*}} ## encoding: [0x62
+  %cast2 = bitcast i8* %B to <2 x double>*
+  %C2 = fadd <2 x double> %C, %CC
+  store <2 x double> %C2, <2 x double>* %cast2, align 64, !nontemporal !0
+  ret void
+}
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll
new file mode 100644
index 000000000000..b6b508559ca3
--- /dev/null
+++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -0,0 +1,381 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
+
+; CHECK-LABEL: test256_1
+; CHECK: vpcmpeqq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
+  %mask = icmp eq <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_2
+; CHECK: vpcmpgtq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
+  %mask = icmp sgt <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_3
+; CHECK: vpcmpled {{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind {
+  %mask = icmp sge <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_4
+; CHECK: vpcmpnleuq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
+  %mask = icmp ugt <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_5
+; CHECK: vpcmpeqd  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
+  %y = load <8 x i32>* %yp, align 4
+  %mask = icmp eq <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_6
+; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_7
+; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp sle <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_8
+; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask = icmp ule <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_9
+; CHECK: vpcmpeqd %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp eq <8 x i32> %x1, %y1
+  %mask0 = icmp eq <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: @test256_10
+; CHECK: vpcmpleq %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sge <4 x i64> %x1, %y1
+  %mask0 = icmp sle <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_11
+; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sgt <4 x i64> %x1, %y1
+  %y = load <4 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: @test256_12
+; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp sge <8 x i32> %x1, %y1
+  %y = load <8 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_13
+; CHECK: vpcmpeqq  (%rdi){1to4}, %ymm
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %mask = icmp eq <4 x i64> %x, %y
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test256_14
+; CHECK: vpcmpled  (%rdi){1to8}, %ymm
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %mask = icmp sle <8 x i32> %x, %y
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_15
+; CHECK: vpcmpgtd  (%rdi){1to8}, %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
+  %mask1 = icmp sge <8 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
+  %mask0 = icmp sgt <8 x i32> %x, %y
+  %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
+  %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
+  ret <8 x i32> %max
+}
+
+; CHECK-LABEL: test256_16
+; CHECK: vpcmpgtq  (%rdi){1to4}, %ymm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
+  %mask1 = icmp sge <4 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
+  %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
+  %mask0 = icmp sgt <4 x i64> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
+  ret <4 x i64> %max
+}
+
+; CHECK-LABEL: test128_1
+; CHECK: vpcmpeqq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind {
+  %mask = icmp eq <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_2
+; CHECK: vpcmpgtq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
+  %mask = icmp sgt <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_3
+; CHECK: vpcmpled {{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind {
+  %mask = icmp sge <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_4
+; CHECK: vpcmpnleuq {{.*%k[0-7]}}
+; CHECK: vmovdqa64 {{.*}}%k1
+; CHECK: ret
+define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
+  %mask = icmp ugt <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_5
+; CHECK: vpcmpeqd  (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
+  %y = load <4 x i32>* %yp, align 4
+  %mask = icmp eq <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_6
+; CHECK: vpcmpgtd (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp sgt <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_7
+; CHECK: vpcmpled (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp sle <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_8
+; CHECK: vpcmpleud (%rdi){{.*%k[0-7]}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask = icmp ule <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_9
+; CHECK: vpcmpeqd %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp eq <4 x i32> %x1, %y1
+  %mask0 = icmp eq <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: @test128_10
+; CHECK: vpcmpleq %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sge <2 x i64> %x1, %y1
+  %mask0 = icmp sle <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_11
+; CHECK: vpcmpgtq (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sgt <2 x i64> %x1, %y1
+  %y = load <2 x i64>* %y.ptr, align 4
+  %mask0 = icmp sgt <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: @test128_12
+; CHECK: vpcmpleud (%rdi){{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp sge <4 x i32> %x1, %y1
+  %y = load <4 x i32>* %y.ptr, align 4
+  %mask0 = icmp ule <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_13
+; CHECK: vpcmpeqq  (%rdi){1to2}, %xmm
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
+  %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
+  %mask = icmp eq <2 x i64> %x, %y
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
+
+; CHECK-LABEL: test128_14
+; CHECK: vpcmpled  (%rdi){1to4}, %xmm
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mask = icmp sle <4 x i32> %x, %y
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_15
+; CHECK: vpcmpgtd  (%rdi){1to4}, %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa32
+; CHECK: ret
+define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
+  %mask1 = icmp sge <4 x i32> %x1, %y1
+  %yb = load i32* %yb.ptr, align 4
+  %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
+  %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %mask0 = icmp sgt <4 x i32> %x, %y
+  %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
+  %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
+  ret <4 x i32> %max
+}
+
+; CHECK-LABEL: test128_16
+; CHECK: vpcmpgtq  (%rdi){1to2}, %xmm{{.*{%k[1-7]}}}
+; CHECK: vmovdqa64
+; CHECK: ret
+define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
+  %mask1 = icmp sge <2 x i64> %x1, %y1
+  %yb = load i64* %yb.ptr, align 4
+  %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
+  %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
+  %mask0 = icmp sgt <2 x i64> %x, %y
+  %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
+  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
+  ret <2 x i64> %max
+}
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
deleted file mode 100644
index 34aaf2c31ace..000000000000
--- a/test/CodeGen/X86/blend-msb.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
-
-
-; Verify that we produce movss instead of blendvps when possible.
-
-;CHECK-LABEL: vsel_float:
-;CHECK-NOT: blend
-;CHECK: movss
-;CHECK: ret
-define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
-  ret <4 x float> %vsel
-}
-
-;CHECK-LABEL: vsel_4xi8:
-;CHECK-NOT: blend
-;CHECK: movss
-;CHECK: ret
-define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
-  ret <4 x i8> %vsel
-}
-
-;CHECK-LABEL: vsel_8xi16:
-; The select mask is
-; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
-; which translates into the boolean mask (big endian representation):
-; 00010001 = 17.
-; '1' means takes the first argument, '0' means takes the second argument.
-; This is the opposite of the intel syntax, thus we expect
-; the inverted mask: 11101110 = 238.
-; According to the ABI:
-; v1 is in xmm0 => first argument is xmm0.
-; v2 is in xmm1 => second argument is xmm1.
-;CHECK: pblendw $238, %xmm1, %xmm0
-;CHECK: ret
-define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
-  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
-  ret <8 x i16> %vsel
-}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index 2681c109ef5d..e35be6ae654f 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -124,7 +124,7 @@ exit:
   ret i32 %sum
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+!0 = !{!"branch_weights", i32 4, i32 64}
 
 define i32 @test_loop_early_exits(i32 %i, i32* %a) {
 ; Check that we sink early exit blocks out of loop bodies.
@@ -237,44 +237,6 @@ exit:
   ret i32 %base
 }
 
-define void @test_loop_rotate_reversed_blocks() {
-; This test case (greatly reduced from an Olden bencmark) ensures that the loop
-; rotate implementation doesn't assume that loops are laid out in a particular
-; order. The first loop will get split into two basic blocks, with the loop
-; header coming after the loop latch.
-;
-; CHECK: test_loop_rotate_reversed_blocks
-; CHECK: %entry
-; Look for a jump into the middle of the loop, and no branches mid-way.
-; CHECK: jmp
-; CHECK: %loop1
-; CHECK-NOT: j{{\w*}} .LBB{{.*}}
-; CHECK: %loop1
-; CHECK: je
-
-entry:
-  %cond1 = load volatile i1* undef
-  br i1 %cond1, label %loop2.preheader, label %loop1
-
-loop1:
-  call i32 @f()
-  %cond2 = load volatile i1* undef
-  br i1 %cond2, label %loop2.preheader, label %loop1
-
-loop2.preheader:
-  call i32 @f()
-  %cond3 = load volatile i1* undef
-  br i1 %cond3, label %exit, label %loop2
-
-loop2:
-  call i32 @f()
-  %cond4 = load volatile i1* undef
-  br i1 %cond4, label %exit, label %loop2
-
-exit:
-  ret void
-}
-
 define i32 @test_loop_align(i32 %i, i32* %a) {
 ; Check that we provide basic loop body alignment with the block placement
 ; pass.
@@ -544,7 +506,7 @@ if.end:
   ret void
 }
 
-!1 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+!1 = !{!"branch_weights", i32 1000, i32 1}
 
 declare i32 @f()
 declare i32 @g()
@@ -580,7 +542,7 @@ exit:
   ret i32 %result
 }
 
-!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+!2 = !{!"branch_weights", i32 3, i32 1}
 
 declare i32 @__gxx_personality_v0(...)
 
diff --git a/test/CodeGen/X86/break-avx-dep.ll b/test/CodeGen/X86/break-avx-dep.ll
deleted file mode 100644
index 210bda136b57..000000000000
--- a/test/CodeGen/X86/break-avx-dep.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
-;
-; rdar:15221834 False AVX register dependencies cause 5x slowdown on
-; flops-6. Make sure the unused register read by vcvtsi2sdq is zeroed
-; to avoid cyclic dependence on a write to the same register in a
-; previous iteration.
-
-; CHECK-LABEL: t1:
-; CHECK-LABEL: %loop
-; CHECK: vxorps %[[REG:xmm.]], %{{xmm.}}, %{{xmm.}}
-; CHECK: vcvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]], %{{xmm.}}
-define i64 @t1(i64* nocapture %x, double* nocapture %y) nounwind {
-entry:
-  %vx = load i64* %x
-  br label %loop
-loop:
-  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
-  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
-  %fi = sitofp i64 %i to double
-  %vy = load double* %y
-  %fipy = fadd double %fi, %vy
-  %iipy = fptosi double %fipy to i64
-  %s2 = add i64 %s1, %iipy
-  %inc = add nsw i64 %i, 1
-  %exitcond = icmp eq i64 %inc, 156250000
-  br i1 %exitcond, label %ret, label %loop
-ret:
-  ret i64 %s2
-}
diff --git a/test/CodeGen/X86/break-false-dep.ll b/test/CodeGen/X86/break-false-dep.ll
new file mode 100644
index 000000000000..7034fae5e8bd
--- /dev/null
+++ b/test/CodeGen/X86/break-false-dep.ll
@@ -0,0 +1,201 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -mcpu=nehalem | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 -mcpu=nehalem | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx -mcpu=corei7-avx | FileCheck %s --check-prefix=AVX
+
+define double @t1(float* nocapture %x) nounwind readonly ssp {
+entry:
+; SSE-LABEL: t1:
+; SSE: movss ([[A0:%rdi|%rcx]]), %xmm0
+; SSE: cvtss2sd %xmm0, %xmm0
+
+  %0 = load float* %x, align 4
+  %1 = fpext float %0 to double
+  ret double %1
+}
+
+define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
+entry:
+; SSE-LABEL: t2:
+; SSE: cvtsd2ss ([[A0]]), %xmm0
+  %0 = load double* %x, align 8
+  %1 = fptrunc double %0 to float
+  ret float %1
+}
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; SSE-LABEL: squirtf:
+; SSE: movss ([[A0]]), %xmm0
+; SSE: sqrtss %xmm0, %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; SSE-LABEL: squirt:
+; SSE: movsd ([[A0]]), %xmm0
+; SSE: sqrtsd %xmm0, %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; SSE-LABEL: squirtf_size:
+; SSE: sqrtss ([[A0]]), %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; SSE-LABEL: squirt_size:
+; SSE: sqrtsd ([[A0]]), %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+; SSE-LABEL: loopdep1
+; SSE: for.body
+;
+; This loop contains two cvtsi2ss instructions that update the same xmm
+; register.  Verify that the execution dependency fix pass breaks those
+; dependencies by inserting xorps instructions.
+;
+; If the register allocator chooses different registers for the two cvtsi2ss
+; instructions, they are still dependent on themselves.
+; SSE: xorps [[XMM1:%xmm[0-9]+]]
+; SSE: , [[XMM1]]
+; SSE: cvtsi2ssl %{{.*}}, [[XMM1]]
+; SSE: xorps [[XMM2:%xmm[0-9]+]]
+; SSE: , [[XMM2]]
+; SSE: cvtsi2ssl %{{.*}}, [[XMM2]]
+;
+define float @loopdep1(i32 %m) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %m, 0
+  br i1 %tobool3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %m.addr.07 = phi i32 [ %dec, %for.body ], [ %m, %entry ]
+  %s1.06 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
+  %s2.05 = phi float [ %add2, %for.body ], [ 0.000000e+00, %entry ]
+  %n.04 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
+  %conv = sitofp i32 %n.04 to float
+  %add = fadd float %s1.06, %conv
+  %conv1 = sitofp i32 %m.addr.07 to float
+  %add2 = fadd float %s2.05, %conv1
+  %inc = add nsw i32 %n.04, 1
+  %dec = add nsw i32 %m.addr.07, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s1.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %s2.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add2, %for.body ]
+  %sub = fsub float %s1.0.lcssa, %s2.0.lcssa
+  ret float %sub
+}
+
+; rdar:15221834 False AVX register dependencies cause 5x slowdown on
+; flops-6. Make sure the unused register read by vcvtsi2sdq is zeroed
+; to avoid cyclic dependence on a write to the same register in a
+; previous iteration.
+
+; AVX-LABEL: loopdep2:
+; AVX-LABEL: %loop
+; AVX: vxorps %[[REG:xmm.]], %{{xmm.}}, %{{xmm.}}
+; AVX: vcvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]], %{{xmm.}}
+; SSE-LABEL: loopdep2:
+; SSE-LABEL: %loop
+; SSE: xorps %[[REG:xmm.]], %[[REG]]
+; SSE: cvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]]
+define i64 @loopdep2(i64* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %vx = load i64* %x
+  br label %loop
+loop:
+  %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
+  %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
+  %fi = sitofp i64 %i to double
+  %vy = load double* %y
+  %fipy = fadd double %fi, %vy
+  %iipy = fptosi double %fipy to i64
+  %s2 = add i64 %s1, %iipy
+  %inc = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %inc, 156250000
+  br i1 %exitcond, label %ret, label %loop
+ret:
+  ret i64 %s2
+}
+
+; This loop contains a cvtsi2sd instruction that has a loop-carried
+; false dependency on an xmm that is modified by other scalar instructions
+; that follow it in the loop. Additionally, the source of convert is a 
+; memory operand. Verify the execution dependency fix pass breaks this
+; dependency by inserting a xor before the convert.
+@x = common global [1024 x double] zeroinitializer, align 16
+@y = common global [1024 x double] zeroinitializer, align 16
+@z = common global [1024 x double] zeroinitializer, align 16
+@w = common global [1024 x double] zeroinitializer, align 16
+@v = common global [1024 x i32] zeroinitializer, align 16
+
+define void @loopdep3() {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc14, %entry
+  %i.025 = phi i32 [ 0, %entry ], [ %inc15, %for.inc14 ]
+  br label %for.body3
+
+for.body3:
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds [1024 x i32]* @v, i64 0, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %conv = sitofp i32 %0 to double
+  %arrayidx5 = getelementptr inbounds [1024 x double]* @x, i64 0, i64 %indvars.iv
+  %1 = load double* %arrayidx5, align 8
+  %mul = fmul double %conv, %1
+  %arrayidx7 = getelementptr inbounds [1024 x double]* @y, i64 0, i64 %indvars.iv
+  %2 = load double* %arrayidx7, align 8
+  %mul8 = fmul double %mul, %2
+  %arrayidx10 = getelementptr inbounds [1024 x double]* @z, i64 0, i64 %indvars.iv
+  %3 = load double* %arrayidx10, align 8
+  %mul11 = fmul double %mul8, %3
+  %arrayidx13 = getelementptr inbounds [1024 x double]* @w, i64 0, i64 %indvars.iv
+  store double %mul11, double* %arrayidx13, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.inc14, label %for.body3
+
+for.inc14:                                        ; preds = %for.body3
+  %inc15 = add nsw i32 %i.025, 1
+  %exitcond26 = icmp eq i32 %inc15, 100000
+  br i1 %exitcond26, label %for.end16, label %for.cond1.preheader
+
+for.end16:                                        ; preds = %for.inc14
+  ret void
+
+;SSE-LABEL:@loopdep3
+;SSE: xorps [[XMM0:%xmm[0-9]+]], [[XMM0]]
+;SSE-NEXT: cvtsi2sdl {{.*}}, [[XMM0]]
+;SSE-NEXT: mulsd {{.*}}, [[XMM0]]
+;SSE-NEXT: mulsd {{.*}}, [[XMM0]]
+;SSE-NEXT: mulsd {{.*}}, [[XMM0]]
+;SSE-NEXT: movsd [[XMM0]],
+;AVX-LABEL:@loopdep3
+;AVX: vxorps [[XMM0:%xmm[0-9]+]], [[XMM0]]
+;AVX-NEXT: vcvtsi2sdl {{.*}}, [[XMM0]], [[XMM0]]
+;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
+;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
+;AVX-NEXT: vmulsd {{.*}}, [[XMM0]], [[XMM0]]
+;AVX-NEXT: vmovsd [[XMM0]],
+}
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
deleted file mode 100644
index 8124d6f52263..000000000000
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -mcpu=nehalem | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 -mcpu=nehalem | FileCheck %s
-
-define double @t1(float* nocapture %x) nounwind readonly ssp {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0
-; CHECK: cvtss2sd %xmm0, %xmm0
-
-  %0 = load float* %x, align 4
-  %1 = fpext float %0 to double
-  ret double %1
-}
-
-define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: cvtsd2ss ([[A0]]), %xmm0
-  %0 = load double* %x, align 8
-  %1 = fptrunc double %0 to float
-  ret float %1
-}
-
-define float @squirtf(float* %x) nounwind {
-entry:
-; CHECK-LABEL: squirtf:
-; CHECK: movss ([[A0]]), %xmm0
-; CHECK: sqrtss %xmm0, %xmm0
-  %z = load float* %x
-  %t = call float @llvm.sqrt.f32(float %z)
-  ret float %t
-}
-
-define double @squirt(double* %x) nounwind {
-entry:
-; CHECK-LABEL: squirt:
-; CHECK: sqrtsd ([[A0]]), %xmm0
-  %z = load double* %x
-  %t = call double @llvm.sqrt.f64(double %z)
-  ret double %t
-}
-
-define float @squirtf_size(float* %x) nounwind optsize {
-entry:
-; CHECK-LABEL: squirtf_size:
-; CHECK: sqrtss ([[A0]]), %xmm0
-  %z = load float* %x
-  %t = call float @llvm.sqrt.f32(float %z)
-  ret float %t
-}
-
-define double @squirt_size(double* %x) nounwind optsize {
-entry:
-; CHECK-LABEL: squirt_size:
-; CHECK: sqrtsd ([[A0]]), %xmm0
-  %z = load double* %x
-  %t = call double @llvm.sqrt.f64(double %z)
-  ret double %t
-}
-
-declare float @llvm.sqrt.f32(float)
-declare double @llvm.sqrt.f64(double)
diff --git a/test/CodeGen/X86/byval-callee-cleanup.ll b/test/CodeGen/X86/byval-callee-cleanup.ll
new file mode 100644
index 000000000000..8e059d433446
--- /dev/null
+++ b/test/CodeGen/X86/byval-callee-cleanup.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=i686-win32 | FileCheck %s
+
+; Previously we would forget to align to stack slot alignment after placing a
+; byval argument.  Subsequent arguments would align themselves, but if it was
+; the last argument, the argument size would not be a multiple of stack slot
+; size. This resulted in retl $6 in callee-cleanup functions, as well as subtle
+; varargs bugs.
+
+%struct.Six = type { [6 x i8] }
+
+define x86_stdcallcc void @f(%struct.Six* byval %a) {
+  ret void
+}
+; CHECK-LABEL: _f@8:
+; CHECK: retl $8
+
+define x86_thiscallcc void @g(i8* %this, %struct.Six* byval %a) {
+  ret void
+}
+; CHECK-LABEL: _g:
+; CHECK: retl $8
+
+define x86_fastcallcc void @h(i32 inreg %x, i32 inreg %y, %struct.Six* byval %a) {
+  ret void
+}
+; CHECK-LABEL: @h@16:
+; CHECK: retl $8
diff --git a/test/CodeGen/X86/cfi_enforcing.ll b/test/CodeGen/X86/cfi_enforcing.ll
new file mode 100644
index 000000000000..bcad8c168f24
--- /dev/null
+++ b/test/CodeGen/X86/cfi_enforcing.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=i386-unknown-linux-gnu -fcfi -cfi-enforcing <%s | FileCheck --check-prefix=X86 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -fcfi -cfi-enforcing <%s | FileCheck --check-prefix=X86-64 %s
+
+define void @indirect_fun() unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @m(void ()* %fun) {
+  call void ()* %fun()
+; CHECK: subl
+; X86-64: andq    $8,
+; X86-64: leaq    __llvm_jump_instr_table_0_1({{%[a-z0-9]+}}), [[REG:%[a-z0-9]+]]
+; X86-64-NOT: callq __llvm_cfi_pointer_warning
+; X86-64: callq   *[[REG]]
+; X86: andl    $8,
+; X86: leal    __llvm_jump_instr_table_0_1({{%[a-z0-9]+}}), [[REG:%[a-z0-9]+]]
+; X86-NOT: calll __llvm_cfi_pointer_warning
+; X86: calll   *[[REG]]
+  ret i32 0
+}
+
+define void ()* @get_fun() {
+  ret void ()* @indirect_fun
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %f = call void ()* ()* @get_fun()
+  %a = call i32 @m(void ()* %f)
+  ret i32 %a
+}
+
+; CHECK: .align 8
+; CHECK: __llvm_jump_instr_table_0_1:
+; CHECK: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/cfi_invoke.ll b/test/CodeGen/X86/cfi_invoke.ll
new file mode 100644
index 000000000000..dd0d42a59c3a
--- /dev/null
+++ b/test/CodeGen/X86/cfi_invoke.ll
@@ -0,0 +1,35 @@
+; RUN: llc <%s -fcfi -cfi-type=sub | FileCheck %s
+; ModuleID = 'test.cc'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @__gxx_personality_v0(...)
+
+@_ZTIPKc = external constant i8*
+@_ZTIi = external constant i8*
+
+define void @f() unnamed_addr jumptable {
+  ret void
+}
+
+@a = global void ()* @f
+
+; Make sure invoke gets targeted as well as regular calls
+define void @_Z3foov(void ()* %f) uwtable ssp {
+; CHECK-LABEL: _Z3foov:
+ entry:
+   invoke void %f()
+           to label %try.cont unwind label %lpad
+; CHECK: callq __llvm_cfi_pointer_warning
+; CHECK: callq *%rbx
+
+ lpad:
+   %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+                                  catch i8* bitcast (i8** @_ZTIi to i8*)
+                                  filter [1 x i8*] [i8* bitcast (i8** @_ZTIPKc to i8*)]
+   ret void
+
+ try.cont:
+   ret void
+}
+
diff --git a/test/CodeGen/X86/cfi_non_default_function.ll b/test/CodeGen/X86/cfi_non_default_function.ll
new file mode 100644
index 000000000000..29774a1d4425
--- /dev/null
+++ b/test/CodeGen/X86/cfi_non_default_function.ll
@@ -0,0 +1,27 @@
+; RUN: llc -fcfi -cfi-func-name=cfi_new_failure <%s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+define void @indirect_fun() unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @m(void ()* %fun) {
+; CHECK-LABEL: @m
+  call void ()* %fun()
+; CHECK: callq cfi_new_failure
+  ret i32 0
+}
+
+define void ()* @get_fun() {
+  ret void ()* @indirect_fun
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %f = call void ()* ()* @get_fun()
+  %a = call i32 @m(void ()* %f)
+  ret i32 %a
+}
+
+; CHECK: .align 8
+; CHECK: __llvm_jump_instr_table_0_1:
+; CHECK: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/cfi_simple_indirect_call.ll b/test/CodeGen/X86/cfi_simple_indirect_call.ll
new file mode 100644
index 000000000000..0ee118d984ea
--- /dev/null
+++ b/test/CodeGen/X86/cfi_simple_indirect_call.ll
@@ -0,0 +1,43 @@
+; RUN: llc -fcfi -cfi-type=sub <%s | FileCheck --check-prefix=SUB %s
+; RUN: llc -fcfi -cfi-type=add <%s | FileCheck --check-prefix=ADD %s
+; RUN: llc -fcfi -cfi-type=ror <%s | FileCheck --check-prefix=ROR %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @indirect_fun() unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @m(void ()* %fun) {
+  call void ()* %fun()
+; SUB: subl    
+; SUB: andq    $8
+; SUB-LABEL: leaq    __llvm_jump_instr_table_0_1
+; SUB-LABEL: callq   __llvm_cfi_pointer_warning
+
+; ROR: subq
+; ROR: rolq    $61
+; ROR: testq
+; ROR-LABEL: callq   __llvm_cfi_pointer_warning
+
+; ADD: andq    $8
+; ADD-LABEL: leaq    __llvm_jump_instr_table_0_1
+; ADD: cmpq
+; ADD-LABEL: callq   __llvm_cfi_pointer_warning
+ret i32 0
+}
+
+define void ()* @get_fun() {
+  ret void ()* @indirect_fun
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %f = call void ()* ()* @get_fun()
+  %a = call i32 @m(void ()* %f)
+  ret i32 %a
+}
+; SUB: .text
+; SUB: .align 8
+; SUB-LABEL: .type __llvm_jump_instr_table_0_1,@function
+; SUB-LABEL:__llvm_jump_instr_table_0_1:
+; SUB-LABEL: jmp indirect_fun@PLT
diff --git a/test/CodeGen/X86/chain_order.ll b/test/CodeGen/X86/chain_order.ll
index c88726e75a81..72e6f78bdef7 100644
--- a/test/CodeGen/X86/chain_order.ll
+++ b/test/CodeGen/X86/chain_order.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -mcpu=corei7-avx -mtriple=x86_64-linux | FileCheck %s
 
-;CHECK-LABEL: cftx020:
-;CHECK: vmovsd  (%rdi), %xmm{{.*}}
-;CHECK: vmovsd  16(%rdi), %xmm{{.*}}
-;CHECK: vmovsd  24(%rdi), %xmm{{.*}}
-;CHECK: vmovhpd  8(%rdi), %xmm{{.*}}
-;CHECK: vmovupd %xmm{{.*}}, (%rdi)
-;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
-;CHECK: ret
+; CHECK-LABEL: cftx020:
+; CHECK: vmovsd  (%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovsd  16(%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovhpd  24(%rdi), %xmm{{.*}}
+; CHECK-NEXT: vmovhpd  8(%rdi), %xmm{{.*}}
+; CHECK: vmovupd %xmm{{.*}}, (%rdi)
+; CHECK-NEXT: vmovupd %xmm{{.*}}, 16(%rdi)
+; CHECK: ret
 
 ; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
 define void @cftx020(double* nocapture %a) {
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
index 38a42dbf1aa1..4876c351a413 100644
--- a/test/CodeGen/X86/clobber-fi0.ll
+++ b/test/CodeGen/X86/clobber-fi0.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.0"
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index d38d2b430ccb..355c6b4165b9 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-cgp-select2branch | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-darwin10 -disable-cgp-select2branch | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
new file mode 100644
index 000000000000..b7995dbdf259
--- /dev/null
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -0,0 +1,87 @@
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
+
+declare i32 @bar()
+
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
+; CHECK-LABEL: test_intervening_call:
+; CHECK: cmpxchg
+; CHECK: pushf[[LQ:[lq]]]
+; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+
+; CHECK-NEXT: call[[LQ]] bar
+
+; CHECK-NEXT: push[[LQ]] [[FLAGS]]
+; CHECK-NEXT: popf[[LQ]]
+; CHECK-NEXT: jne
+  %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+  %p = extractvalue { i64, i1 } %cx, 1
+  call i32 @bar()
+  br i1 %p, label %t, label %f
+
+t:
+  ret i64 42
+
+f:
+  ret i64 0
+}
+
+; Interesting in producing a clobber without any function calls.
+define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
+; CHECK-LABEL: test_control_flow:
+
+; CHECK: cmpxchg
+; CHECK-NEXT: jne
+entry:
+  %cmp = icmp sgt i32 %i, %j
+  br i1 %cmp, label %loop_start, label %cond.end
+
+loop_start:
+  br label %while.condthread-pre-split.i
+
+while.condthread-pre-split.i:
+  %.pr.i = load i32* %p, align 4
+  br label %while.cond.i
+
+while.cond.i:
+  %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
+  %tobool.i = icmp eq i32 %0, 0
+  br i1 %tobool.i, label %while.cond.i, label %while.body.i
+
+while.body.i:
+  %.lcssa = phi i32 [ %0, %while.cond.i ]
+  %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
+  %2 = extractvalue { i32, i1 } %1, 1
+  br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
+
+cond.end.loopexit:
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
+  ret i32 %cond
+}
+
+; This one is an interesting case because CMOV doesn't have a chain
+; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
+define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: test_feed_cmov:
+
+; CHECK: cmpxchg
+; CHECK: pushf[[LQ:[lq]]]
+; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+
+; CHECK-NEXT: call[[LQ]] bar
+
+; CHECK-NEXT: push[[LQ]] [[FLAGS]]
+; CHECK-NEXT: popf[[LQ]]
+  %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %success = extractvalue { i32, i1 } %res, 1
+
+  %rhs = call i32 @bar()
+
+  %ret = select i1 %success, i32 %new, i32 %rhs
+  ret i32 %ret
+}
diff --git a/test/CodeGen/X86/coalesce_commute_subreg.ll b/test/CodeGen/X86/coalesce_commute_subreg.ll
new file mode 100644
index 000000000000..8d0a20cfebbd
--- /dev/null
+++ b/test/CodeGen/X86/coalesce_commute_subreg.ll
@@ -0,0 +1,51 @@
+; RUN: llc -mtriple="x86_64-apple-darwin" -o - -verify-machineinstrs %s
+
+define void @make_wanted() #0 {
+entry:
+  br i1 undef, label %for.end20, label %for.cond1.preheader.lr.ph
+
+for.cond1.preheader.lr.ph:
+  br label %for.body3
+
+for.body3:
+  %cmp20.i = icmp eq i32 undef, 0
+  %.col.057 = select i1 %cmp20.i, i32 0, i32 undef
+  br i1 undef, label %while.cond.i, label %for.body5.lr.ph.i
+
+for.body5.lr.ph.i:
+  %0 = sext i32 %.col.057 to i64
+  %1 = sub i32 0, %.col.057
+  %2 = zext i32 %1 to i64
+  %3 = add nuw nsw i64 %2, 1
+  %n.vec110 = and i64 %3, 8589934588
+  %end.idx.rnd.down111 = add nsw i64 %n.vec110, %0
+  br i1 undef, label %middle.block105, label %vector.ph103
+
+vector.ph103:
+  br i1 undef, label %middle.block105, label %vector.body104
+
+vector.body104:
+  %4 = icmp eq i64 undef, %end.idx.rnd.down111
+  br i1 %4, label %middle.block105, label %vector.body104
+
+middle.block105:
+  %resume.val114 = phi i64 [ %0, %for.body5.lr.ph.i ], [ %end.idx.rnd.down111, %vector.body104 ], [ %end.idx.rnd.down111, %vector.ph103 ]
+  %cmp.n116 = icmp eq i64 undef, %resume.val114
+  br i1 %cmp.n116, label %while.cond.i, label %for.body5.i.preheader
+
+for.body5.i.preheader:
+  %lcmp.or182 = or i1 undef, undef
+  br i1 %lcmp.or182, label %for.body5.i.prol, label %while.cond.i
+
+for.body5.i.prol:
+  br i1 undef, label %for.body5.i.prol, label %while.cond.i
+
+while.cond.i:
+  br i1 undef, label %while.cond.i, label %if.then
+
+if.then:
+  br label %for.body3
+
+for.end20:
+  ret void
+}
diff --git a/test/CodeGen/X86/coalescer-dce.ll b/test/CodeGen/X86/coalescer-dce.ll
index 7f72e3d8667e..208d70660faa 100644
--- a/test/CodeGen/X86/coalescer-dce.ll
+++ b/test/CodeGen/X86/coalescer-dce.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-fp-elim -disable-machine-dce -verify-coalescing
+; RUN: llc < %s -verify-machineinstrs -disable-fp-elim -disable-machine-dce -verify-coalescing
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
diff --git a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
index 78e1dd287f6e..85bfff2757e6 100644
--- a/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
+++ b/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -codegenprepare %s -o - | FileCheck %s
 ; RUN: opt -S -codegenprepare -addr-sink-using-gep=1 %s -o - | FileCheck -check-prefix=CHECK-GEP %s
 ; This file tests the different cases what are involved when codegen prepare
-; tries to get sign extension out of the way of addressing mode.
+; tries to get sign/zero extension out of the way of addressing mode.
 ; This tests require an actual target as addressing mode decisions depends
 ; on the target.
 
@@ -67,6 +67,43 @@ define i8 @oneArgPromotion(i32 %arg1, i8* %base) {
   ret i8 %res
 }
 
+; Check that we are able to merge a sign extension with a zero extension.
+; CHECK-LABEL: @oneArgPromotionZExt
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1ZEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionZExt(i8 %arg1, i8* %base) {
+  %zext = zext i8 %arg1 to i32
+  %add = add nsw i32 %zext, 1 
+  %sextadd = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
+; When promoting a constant zext, the IR builder returns a constant,
+; not an instruction. Make sure this is properly handled. This used
+; to crash.
+; Note: The constant zext is promoted, but does not help matching
+; more thing in the addressing mode. Therefore the modification is
+; rolled back.
+; Still, this test case exercises the desired code path.
+; CHECK-LABEL: @oneArgPromotionCstZExt
+; CHECK: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i16 undef to i32
+; CHECK: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXT]] to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionCstZExt(i8* %base) {
+  %cst = zext i16 undef to i32
+  %add = add nsw i32 %cst, 1
+  %sextadd = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %sextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
 ; Check that we do not promote truncate when we cannot determine the
 ; bits that are dropped.
 ; CHECK-LABEL: @oneArgPromotionBlockTrunc1
@@ -321,3 +358,177 @@ end:
   %final = load i32* %addr
   ret i32 %final
 }
+
+%struct.dns_packet = type { i32, i32, %union.anon }
+%union.anon = type { i32 }
+
+@a = common global i32 0, align 4
+@b = common global i16 0, align 2
+
+; We used to crash on this function because we did not return the right
+; promoted instruction for %conv.i.
+; Make sure we generate the right code now.
+; CHECK-LABEL: @fn3
+; %conv.i is used twice and only one of its use is being promoted.
+; Use it at the starting point for the matching.
+; CHECK: %conv.i = zext i16 [[PLAIN_OPND:%[.a-zA-Z_0-9-]+]] to i32
+; CHECK-NEXT: [[PROMOTED_CONV:%[.a-zA-Z_0-9-]+]] = zext i16 [[PLAIN_OPND]] to i64
+; CHECK-NEXT: [[BASE:%[a-zA-Z_0-9-]+]] = ptrtoint %struct.dns_packet* %P to i64
+; CHECK-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add i64 [[BASE]], [[PROMOTED_CONV]]
+; CHECK-NEXT: [[ADDR:%[a-zA-Z_0-9-]+]] = add i64 [[ADD]], 7
+; CHECK-NEXT: [[CAST:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[ADDR]] to i8*
+; CHECK-NEXT: load i8* [[CAST]], align 1
+define signext i16 @fn3(%struct.dns_packet* nocapture readonly %P) {
+entry:
+  %tmp = getelementptr inbounds %struct.dns_packet* %P, i64 0, i32 2
+  %data.i.i = bitcast %union.anon* %tmp to [0 x i8]*
+  br label %while.body.i.i
+
+while.body.i.i:                                   ; preds = %while.body.i.i, %entry
+  %src.addr.0.i.i = phi i16 [ 0, %entry ], [ %inc.i.i, %while.body.i.i ]
+  %inc.i.i = add i16 %src.addr.0.i.i, 1
+  %idxprom.i.i = sext i16 %src.addr.0.i.i to i64
+  %arrayidx.i.i = getelementptr inbounds [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i.i
+  %tmp1 = load i8* %arrayidx.i.i, align 1
+  %conv2.i.i = zext i8 %tmp1 to i32
+  %and.i.i = and i32 %conv2.i.i, 15
+  store i32 %and.i.i, i32* @a, align 4
+  %tobool.i.i = icmp eq i32 %and.i.i, 0
+  br i1 %tobool.i.i, label %while.body.i.i, label %fn1.exit.i
+
+fn1.exit.i:                                       ; preds = %while.body.i.i
+  %inc.i.i.lcssa = phi i16 [ %inc.i.i, %while.body.i.i ]
+  %conv.i = zext i16 %inc.i.i.lcssa to i32
+  %sub.i = add nsw i32 %conv.i, -1
+  %idxprom.i = sext i32 %sub.i to i64
+  %arrayidx.i = getelementptr inbounds [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i
+  %tmp2 = load i8* %arrayidx.i, align 1
+  %conv2.i = sext i8 %tmp2 to i16
+  store i16 %conv2.i, i16* @b, align 2
+  %sub4.i = sub nsw i32 0, %conv.i
+  %conv5.i = zext i16 %conv2.i to i32
+  %cmp.i = icmp sgt i32 %conv5.i, %sub4.i
+  br i1 %cmp.i, label %if.then.i, label %fn2.exit
+
+if.then.i:                                        ; preds = %fn1.exit.i
+  %end.i = getelementptr inbounds %struct.dns_packet* %P, i64 0, i32 1
+  %tmp3 = load i32* %end.i, align 4
+  %sub7.i = add i32 %tmp3, 65535
+  %conv8.i = trunc i32 %sub7.i to i16
+  br label %fn2.exit
+
+fn2.exit:                                         ; preds = %if.then.i, %fn1.exit.i
+  %retval.0.i = phi i16 [ %conv8.i, %if.then.i ], [ undef, %fn1.exit.i ]
+  ret i16 %retval.0.i
+}
+
+; Check that we do not promote an extension if the non-wrapping flag does not
+; match the kind of the extension.
+; CHECK-LABEL: @noPromotionFlag
+; CHECK: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32 %arg1, %arg2
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = zext i32 [[ADD]] to i64
+; CHECK: inttoptr i64 [[PROMOTED]] to i8*
+; CHECK: ret
+define i8 @noPromotionFlag(i32 %arg1, i32 %arg2) {
+  %add = add nsw i32 %arg1, %arg2 
+  %zextadd = zext i32 %add to i64
+  %base = inttoptr i64 %zextadd to i8*
+  %res = load i8* %base
+  ret i8 %res
+}
+
+; Check that we correctly promote both operands of the promotable add with zext.
+; CHECK-LABEL: @twoArgsPromotionZExt
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i32 %arg1 to i64
+; CHECK: [[ARG2ZEXT:%[a-zA-Z_0-9-]+]] = zext i32 %arg2 to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], [[ARG2ZEXT]]
+; CHECK: inttoptr i64 [[PROMOTED]] to i8*
+; CHECK: ret
+define i8 @twoArgsPromotionZExt(i32 %arg1, i32 %arg2) {
+  %add = add nuw i32 %arg1, %arg2 
+  %zextadd = zext i32 %add to i64
+  %base = inttoptr i64 %zextadd to i8*
+  %res = load i8* %base
+  ret i8 %res
+}
+
+; Check that we correctly promote constant arguments.
+; CHECK-LABEL: @oneArgPromotionNegativeCstZExt
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 255
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionNegativeCstZExt(i8 %arg1, i8* %base) {
+  %add = add nuw i8 %arg1, -1 
+  %zextadd = zext i8 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
+; Check that we are able to merge two zero extensions.
+; CHECK-LABEL: @oneArgPromotionZExtZExt
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 %arg1 to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionZExtZExt(i8 %arg1, i8* %base) {
+  %zext = zext i8 %arg1 to i32
+  %add = add nuw i32 %zext, 1 
+  %zextadd = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
+; Check that we do not promote truncate when the dropped bits
+; are of a different kind.
+; CHECK-LABEL: @oneArgPromotionBlockTruncZExt
+; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i32
+; CHECK: [[ARG1TRUNC:%[a-zA-Z_0-9-]+]] = trunc i32 [[ARG1SEXT]] to i8
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[ARG1TRUNC]] to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionBlockTruncZExt(i1 %arg1, i8* %base) {
+  %sextarg1 = sext i1 %arg1 to i32
+  %trunc = trunc i32 %sextarg1 to i8
+  %add = add nuw i8 %trunc, 1 
+  %zextadd = zext i8 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
+; Check that we are able to promote truncate when we know all the bits
+; that are dropped.
+; CHECK-LABEL: @oneArgPromotionPassTruncZExt
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i1 %arg1 to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionPassTruncZExt(i1 %arg1, i8* %base) {
+  %sextarg1 = zext i1 %arg1 to i32
+  %trunc = trunc i32 %sextarg1 to i8
+  %add = add nuw i8 %trunc, 1 
+  %zextadd = zext i8 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
+
+; Check that we do not promote sext with zext.
+; CHECK-LABEL: @oneArgPromotionBlockSExtZExt
+; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i1 %arg1 to i8
+; CHECK: [[ARG1ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[ARG1SEXT]] to i64
+; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nuw i64 [[ARG1ZEXT]], 1
+; CHECK: getelementptr inbounds i8* %base, i64 [[PROMOTED]]
+; CHECK: ret
+define i8 @oneArgPromotionBlockSExtZExt(i1 %arg1, i8* %base) {
+  %sextarg1 = sext i1 %arg1 to i8
+  %add = add nuw i8 %sextarg1, 1 
+  %zextadd = zext i8 %add to i64
+  %arrayidx = getelementptr inbounds i8* %base, i64 %zextadd
+  %res = load i8* %arrayidx
+  ret i8 %res
+}
diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll
index 9320706d9728..9b27c33a80f9 100644
--- a/test/CodeGen/X86/codegen-prepare-extload.ll
+++ b/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -1,12 +1,21 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
-; rdar://7304838
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=NONSTRESS
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -stress-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=OPT --check-prefix=STRESS
+; RUN: opt -codegenprepare < %s -mtriple=x86_64-apple-macosx -S -disable-cgp-ext-ld-promotion | FileCheck %s --check-prefix=OPTALL --check-prefix=DISABLE
 
+; rdar://7304838
 ; CodeGenPrepare should move the zext into the block with the load
 ; so that SelectionDAG can select it with the load.
-
+;
+; CHECK-LABEL: foo:
 ; CHECK: movsbl ({{%rdi|%rcx}}), %eax
-
+;
+; OPTALL-LABEL: @foo
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPTALL: store i32 [[ZEXT]], i32* %q
+; OPTALL: ret
 define void @foo(i8* %p, i32* %q) {
 entry:
   %t = load i8* %p
@@ -19,3 +28,336 @@ true:
 false:
   ret void
 }
+
+; Check that we manage to form a zextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; OPTALL-LABEL: @promoteOneArg
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
+; Make sure the operation is not promoted when the promotion pass is disabled.
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArg(i8* %p, i32* %q) {
+entry:
+  %t = load i8* %p
+  %add = add nuw i8 %t, 2
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload is an operation with only one
+; argument to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteOneArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteOneArgSExt(i8* %p, i32* %q) {
+entry:
+  %t = load i8* %p
+  %add = add nsw i8 %t, 2
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a zextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Extending %add will create two extensions:
+; 1. One for %b.
+; 2. One for %t.
+; #1 will not be removed as we do not know anything about %b.
+; #2 may not be merged with the load because %t is used in a comparison.
+; Since two extensions may be emitted in the end instead of one before the
+; transformation, the regular heuristic does not apply the optimization. 
+; 
+; OPTALL-LABEL: @promoteTwoArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
+entry:
+  %t = load i8* %p
+  %add = add nuw i8 %t, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload is an operation with two
+; arguments to explicitly extend is in the the way.
+; Version with sext.
+; OPTALL-LABEL: @promoteTwoArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
+; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXTLD]], [[SEXTB]]
+;
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i8 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
+entry:
+  %t = load i8* %p
+  %add = add nsw i8 %t, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we do not a zextload if we need to introduce more than
+; one additional extension.
+; OPTALL-LABEL: @promoteThreeArgZext
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
+; STRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+; STRESS-NEXT: [[ZEXTC:%[a-zA-Z_0-9-]+]] = zext i8 %c to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[TMP]], [[ZEXTC]]
+;
+; NONSTRESS-NEXT: [[TMP:%[a-zA-Z_0-9-]+]] = add nuw i8 [[LD]], %b
+; NONSTRESS-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8 [[TMP]], %c
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; DISABLE: add nuw i8
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i8
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i8 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
+entry:
+  %t = load i8* %p
+  %tmp = add nuw i8 %t, %b
+  %add = add nuw i8 %tmp, %c
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a zextload after promoting and merging
+; two extensions.
+; OPTALL-LABEL: @promoteMergeExtArgZExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = zext i16 [[ADD]] to i32
+;
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
+entry:
+  %t = load i8* %p
+  %ext = zext i8 %t to i16
+  %add = add nuw i16 %ext, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i16 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to form a sextload after promoting and merging
+; two extensions.
+; Version with sext.
+; OPTALL-LABEL: @promoteMergeExtArgSExt
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+;
+; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
+; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
+; STRESS-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[ZEXTLD]], [[ZEXTB]]
+;
+; NONSTRESS: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; NONSTRESS: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; NONSTRESS: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; DISABLE: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i16
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i16 [[ZEXTLD]], %b
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+; OPTALL: store i32 [[RES]], i32* %q
+; OPTALL: ret
+define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
+entry:
+  %t = load i8* %p
+  %ext = zext i8 %t to i16
+  %add = add nsw i16 %ext, %b
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = sext i16 %add to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
+
+; Check that we manage to catch all the extload opportunities that are exposed
+; by the different iterations of codegen prepare.
+; Moreover, check that we do not promote more than we need to.
+; Here is what is happening in this test (not necessarly in this order):
+; 1. We try to promote the operand of %sextadd.
+;    a. This creates one sext of %ld2 and one of %zextld
+;    b. The sext of %ld2 can be combine with %ld2, so we remove one sext but
+;       introduced one. This is fine with the current heuristic: neutral.
+;    => We have one zext of %zextld left and we created one sext of %ld2.
+; 2. We try to promote the operand of %sextaddza.
+;    a. This creates one sext of %zexta and one of %zextld
+;    b. The sext of %zexta does not lead to any load, it stays here, even if it
+;       could have been combine with the zext of %a.
+;    c. The sext of %zextld leads to %ld and can be combined with it. This is
+;       done by promoting %zextld. This is fine with the current heuristic:
+;       neutral.
+;    => We have created a new zext of %ld and we created one sext of %zexta.
+; 3. We try to promote the operand of %sextaddb.
+;    a. This creates one sext of %b and one of %zextld
+;    b. The sext of %b is a dead-end, nothing to be done.
+;    c. Same thing as 2.c. happens.
+;    => We have created a new zext of %ld and we created one sext of %b.
+; 4. We try to promote the operand of the zext of %zextld introduced in #1.
+;    a. Same thing as 2.c. happens.
+;    b. %zextld does not have any other uses. It is dead coded.
+;    => We have created a new zext of %ld and we removed a zext of %zextld and
+;       a zext of %ld.
+; Currently we do not try to reuse existing extensions, so in the end we have
+; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
+;
+; OPTALL-LABEL: @severalPromotions
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %addr1
+; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32* %addr2
+; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
+; We do not combine this one: see 2.b.
+; OPT-NEXT: [[ZEXTA:%[a-zA-Z_0-9-]+]] = zext i8 %a to i32
+; OPT-NEXT: [[SEXTZEXTA:%[a-zA-Z_0-9-]+]] = sext i32 [[ZEXTA]] to i64
+; OPT-NEXT: [[RESZA:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTZEXTA]], [[ZEXTLD1_3]]
+; OPT-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i32 %b to i64
+; OPT-NEXT: [[RESB:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTB]], [[ZEXTLD1_2]]
+;
+; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RES:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADD]] to i64
+; DISABLE: [[ADDZA:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESZA:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDZA]] to i64
+; DISABLE: [[ADDB:%[a-zA-Z_0-9-]+]] = add nsw i32
+; DISABLE: [[RESB:%[a-zA-Z_0-9-]+]]  = sext i32 [[ADDB]] to i64
+;
+; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
+; OPTALL: ret
+define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
+  %ld = load i8* %addr1
+  %zextld = zext i8 %ld to i32
+  %ld2 = load i32* %addr2
+  %add = add nsw i32 %ld2, %zextld
+  %sextadd = sext i32 %add to i64
+  %zexta = zext i8 %a to i32
+  %addza = add nsw i32 %zexta, %zextld
+  %sextaddza = sext i32 %addza to i64
+  %addb = add nsw i32 %b, %zextld
+  %sextaddb = sext i32 %addb to i64
+  call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb)
+  ret void
+}
+
+declare void @dummy(i64, i64, i64)
+
+; Make sure we do not try to promote vector types since the type promotion
+; helper does not support them for now.
+; OPTALL-LABEL: @vectorPromotion
+; OPTALL: [[SHL:%[a-zA-Z_0-9-]+]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+; OPTALL: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext <2 x i32> [[SHL]] to <2 x i64>
+; OPTALL: ret
+define void @vectorPromotion() {
+entry:
+  %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret void
+}
+
+@a = common global i32 0, align 4
+@c = common global [2 x i32] zeroinitializer, align 4
+
+; PR21978.
+; Make sure we support promotion of operands that produces a Value as opposed
+; to an instruction.
+; This used to cause a crash.
+; OPTALL-LABEL: @promotionOfArgEndsUpInValue
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16* %addr
+
+; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
+; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
+;
+; DISABLE-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add nuw nsw i16 [[LD]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+; DISABLE-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sext i16 [[ADD]] to i32
+;
+; OPTALL-NEXT: ret i32 [[RES]]
+define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
+entry:
+  %val = load i16* %addr
+  %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
+  %conv3 = sext i16 %add to i32
+  ret i32 %conv3
+}
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
index bf27b2fff1fa..dcbbe1097d53 100644
--- a/test/CodeGen/X86/coff-comdat.ll
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -1,58 +1,58 @@
 ; RUN: llc -mtriple i386-pc-win32 < %s | FileCheck %s
 
 $f1 = comdat any
-@v1 = global i32 0, comdat $f1
-define void @f1() comdat $f1 {
+@v1 = global i32 0, comdat($f1)
+define void @f1() comdat($f1) {
   ret void
 }
 
 $f2 = comdat exactmatch
-@v2 = global i32 0, comdat $f2
-define void @f2() comdat $f2 {
+@v2 = global i32 0, comdat($f2)
+define void @f2() comdat($f2) {
   ret void
 }
 
 $f3 = comdat largest
-@v3 = global i32 0, comdat $f3
-define void @f3() comdat $f3 {
+@v3 = global i32 0, comdat($f3)
+define void @f3() comdat($f3) {
   ret void
 }
 
 $f4 = comdat noduplicates
-@v4 = global i32 0, comdat $f4
-define void @f4() comdat $f4 {
+@v4 = global i32 0, comdat($f4)
+define void @f4() comdat($f4) {
   ret void
 }
 
 $f5 = comdat samesize
-@v5 = global i32 0, comdat $f5
-define void @f5() comdat $f5 {
+@v5 = global i32 0, comdat($f5)
+define void @f5() comdat($f5) {
   ret void
 }
 
 $f6 = comdat samesize
-@v6 = global i32 0, comdat $f6
-@f6 = global i32 0, comdat $f6
+@v6 = global i32 0, comdat($f6)
+@f6 = global i32 0, comdat($f6)
 
 $"\01@f7@0" = comdat any
-define x86_fastcallcc void @"\01@v7@0"() comdat $"\01@f7@0" {
+define x86_fastcallcc void @"\01@v7@0"() comdat($"\01@f7@0") {
   ret void
 }
-define x86_fastcallcc void @"\01@f7@0"() comdat $"\01@f7@0" {
+define x86_fastcallcc void @"\01@f7@0"() comdat($"\01@f7@0") {
   ret void
 }
 
 $f8 = comdat any
-define x86_fastcallcc void @v8() comdat $f8 {
+define x86_fastcallcc void @v8() comdat($f8) {
   ret void
 }
-define x86_fastcallcc void @f8() comdat $f8 {
+define x86_fastcallcc void @f8() comdat($f8) {
   ret void
 }
 
 $vftable = comdat largest
 
-@some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat $vftable
+@some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat($vftable)
 @vftable = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
 
 ; CHECK: .section        .text,"xr",discard,_f1
@@ -73,19 +73,19 @@ $vftable = comdat largest
 ; CHECK: .globl  @v8@0
 ; CHECK: .section        .text,"xr",discard,@f8@0
 ; CHECK: .globl  @f8@0
-; CHECK: .section        .bss,"bw",associative,_f1
+; CHECK: .section        .bss,"wb",associative,_f1
 ; CHECK: .globl  _v1
-; CHECK: .section        .bss,"bw",associative,_f2
+; CHECK: .section        .bss,"wb",associative,_f2
 ; CHECK: .globl  _v2
-; CHECK: .section        .bss,"bw",associative,_f3
+; CHECK: .section        .bss,"wb",associative,_f3
 ; CHECK: .globl  _v3
-; CHECK: .section        .bss,"bw",associative,_f4
+; CHECK: .section        .bss,"wb",associative,_f4
 ; CHECK: .globl  _v4
-; CHECK: .section        .bss,"bw",associative,_f5
+; CHECK: .section        .bss,"wb",associative,_f5
 ; CHECK: .globl  _v5
-; CHECK: .section        .bss,"bw",associative,_f6
+; CHECK: .section        .bss,"wb",associative,_f6
 ; CHECK: .globl  _v6
-; CHECK: .section        .bss,"bw",same_size,_f6
+; CHECK: .section        .bss,"wb",same_size,_f6
 ; CHECK: .globl  _f6
 ; CHECK: .section        .rdata,"rd",largest,_vftable
 ; CHECK: .globl  _vftable
diff --git a/test/CodeGen/X86/coff-comdat2.ll b/test/CodeGen/X86/coff-comdat2.ll
index 6744b5b02ad7..a417d096c47d 100644
--- a/test/CodeGen/X86/coff-comdat2.ll
+++ b/test/CodeGen/X86/coff-comdat2.ll
@@ -5,5 +5,5 @@ target triple = "i686-pc-windows-msvc"
 
 $foo = comdat largest
 @foo = global i32 0
-@bar = global i32 0, comdat $foo
-; CHECK: Associative COMDAT symbol 'foo' is not a key for it's COMDAT.
+@bar = global i32 0, comdat($foo)
+; CHECK: Associative COMDAT symbol 'foo' is not a key for its COMDAT.
diff --git a/test/CodeGen/X86/coff-comdat3.ll b/test/CodeGen/X86/coff-comdat3.ll
index 76e464b27547..01651ce4820a 100644
--- a/test/CodeGen/X86/coff-comdat3.ll
+++ b/test/CodeGen/X86/coff-comdat3.ll
@@ -4,5 +4,5 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat largest
-@bar = global i32 0, comdat $foo
+@bar = global i32 0, comdat($foo)
 ; CHECK: Associative COMDAT symbol 'foo' does not exist.
diff --git a/test/CodeGen/X86/combine-and.ll b/test/CodeGen/X86/combine-and.ll
new file mode 100644
index 000000000000..dace806b4bb9
--- /dev/null
+++ b/test/CodeGen/X86/combine-and.ll
@@ -0,0 +1,164 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
+;
+; Verify that the DAGCombiner is able to fold a vector AND into a blend
+; if one of the operands to the AND is a vector of all constants, and each
+; constant element is either zero or all-ones.
+
+
+define <4 x i32> @test1(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test1
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test2(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test2
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test3(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test3
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test4(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 0, i32 0, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test4
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test5(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test5
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test6(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test6
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test7(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 0, i32 -1, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test7
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test8(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test8
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5],xmm0[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test9(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test9
+; CHECK: movq %xmm0, %xmm0
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test10(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test10
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test11(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test11
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test12(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 -1, i32 0>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test12
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test13(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 -1, i32 0, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test13
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test14(<4 x i32> %A) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
+  ret <4 x i32> %1
+}
+; CHECK-LABEL: test14
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 -1>
+  %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 0>
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test15
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
+  %1 = and <4 x i32> %A, <i32 -1, i32 0, i32 -1, i32 0>
+  %2 = and <4 x i32> %B, <i32 0, i32 -1, i32 0, i32 -1>
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test16
+; CHECK: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-NEXT: retq
+
+
+define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
+  %1 = and <4 x i32> %A, <i32 0, i32 -1, i32 0, i32 -1>
+  %2 = and <4 x i32> %B, <i32 -1, i32 0, i32 -1, i32 0>
+  %3 = or <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+; CHECK-LABEL: test17
+; CHECK: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index df3b9015adda..280fcbc7a3a7 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -5,277 +5,290 @@
 ; instruction which performs a blend operation.
 
 define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test1
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK-NOT: orps
-; CHECK: ret
 
 
 define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test2
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 
 define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test3
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test4
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NOT: orps
-; CHECK: ret
 
 
 define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test5
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test6
-; CHECK-NOT: xorps
-; CHECK: blendps $12
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0>
   %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1>
   %or = or <4 x i32> %and1, %and2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test7
-; CHECK-NOT: xorps
-; CHECK: blendps $12
-; CHECK-NEXT: ret
 
 
 define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <2 x i64> %a, <i64 -1, i64 0>
   %and2 = and <2 x i64> %b, <i64 0, i64 -1>
   %or = or <2 x i64> %and1, %and2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test8
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK-NOT: orps
-; CHECK: ret
 
 
 define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test9:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1>
   %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0>
   %or = or <4 x i32> %and1, %and2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test9
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 
 define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <2 x i64> %a, <i64 0, i64 -1>
   %and2 = and <2 x i64> %b, <i64 -1, i64 0>
   %or = or <2 x i64> %and1, %and2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test10
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test11:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
   %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
   %or = or <4 x i32> %and1, %and2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test11
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NOT: orps
-; CHECK: ret
 
 
 define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test12:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; CHECK-NEXT:    retq
   %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
   %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
   %or = or <4 x i32> %and1, %and2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test12
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NEXT: ret
 
 
 ; Verify that the following test cases are folded into single shuffles.
 
 define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test13:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test13
-; CHECK-NOT: xorps
-; CHECK: shufps
-; CHECK-NEXT: ret
 
 
 define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test14:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test14
-; CHECK-NOT: pslldq
-; CHECK-NOT: por
-; CHECK: punpcklqdq
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test15:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1]
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test15
-; CHECK-NOT: xorps
-; CHECK: shufps
-; CHECK-NOT: shufps
-; CHECK-NOT: orps
-; CHECK: ret
 
 
 define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test16
-; CHECK-NOT: pslldq
-; CHECK-NOT: por
-; CHECK: punpcklqdq
-; CHECK: ret
 
 
 ; Verify that the dag-combiner does not fold a OR of two shuffles into a single
 ; shuffle instruction when the shuffle indexes are not compatible.
 
 define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test17:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xorps %xmm2, %xmm2
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2]
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; CHECK-NEXT:    orps %xmm1, %xmm2
+; CHECK-NEXT:    movaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test17
-; CHECK: por
-; CHECK-NEXT: ret
 
 
 define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test18:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pxor %xmm2, %xmm2
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; CHECK-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test18
-; CHECK: orps
-; CHECK: ret
 
 
 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test19:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xorps %xmm2, %xmm2
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,3]
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; CHECK-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2,2]
+; CHECK-NEXT:    orps %xmm1, %xmm2
+; CHECK-NEXT:    movaps %xmm2, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3>
   %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2>
   %or = or <4 x i32> %shuf1, %shuf2
   ret <4 x i32> %or
 }
-; CHECK-LABEL: test19
-; CHECK: por
-; CHECK-NEXT: ret
 
 
 define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test20:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    movq %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test20
-; CHECK-NOT: xorps
-; CHECK: orps
-; CHECK-NEXT: movq
-; CHECK-NEXT: ret
 
 
 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test21:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
   %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0>
   %or = or <2 x i64> %shuf1, %shuf2
   ret <2 x i64> %or
 }
-; CHECK-LABEL: test21
-; CHECK: por
-; CHECK-NEXT: pslldq
-; CHECK-NEXT: ret
 
 ; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle
 ; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to
 ; handle legal vector value types.
 define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_crash:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; CHECK-NEXT:    retq
   %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3>
   %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4>
   %or = or <4 x i8> %shuf1, %shuf2
   ret <4 x i8> %or
 }
-; CHECK-LABEL: test_crash
-; CHECK: movsd
-; CHECK: ret
 
diff --git a/test/CodeGen/X86/combine-vec-shuffle-2.ll b/test/CodeGen/X86/combine-vec-shuffle-2.ll
deleted file mode 100644
index 877d38260d61..000000000000
--- a/test/CodeGen/X86/combine-vec-shuffle-2.ll
+++ /dev/null
@@ -1,253 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
-
-; Check that DAGCombiner correctly folds the following pairs of shuffles
-; using the following rules:
-;  1. shuffle(shuffle(x, y), undef) -> x
-;  2. shuffle(shuffle(x, y), undef) -> y
-;  3. shuffle(shuffle(x, y), undef) -> shuffle(x, undef)
-;  4. shuffle(shuffle(x, y), undef) -> shuffle(undef, y)
-;
-; Rules 3. and 4. are used only if the resulting shuffle mask is legal.
-
-define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test1
-; Mask: [3,0,0,1]
-; CHECK: pshufd $67
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test2
-; Mask: [2,0,0,3]
-; CHECK: pshufd $-62
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test3
-; Mask: [2,0,0,3]
-; CHECK: pshufd $-62
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test4
-; Mask: [0,0,0,1]
-; CHECK: pshufd $64
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test5
-; Mask: [1,1]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test6(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test6
-; Mask: [2,0,0,0]
-; CHECK: pshufd $2
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test7(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test7
-; Mask: [0,2,0,2]
-; CHECK: pshufd $-120
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test8
-; Mask: [1,0,3,0]
-; CHECK: pshufd $49
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test9(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test9
-; Mask: [1,3,0,2]
-; CHECK: pshufd $-115
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test10(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test10
-; Mask: [1,0,1,0]
-; CHECK: pshufd $17
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test11(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test11
-; Mask: [1,0,2,1]
-; CHECK: pshufd $97
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test12(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test12
-; Mask: [0,0,0,0]
-; CHECK: pshufd $0
-; CHECK-NEXT: ret
-
-
-; The following pair of shuffles is folded into vector %A.
-define <4 x i32> @test13(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test13
-; CHECK-NOT: pshufd
-; CHECK: ret
-
-
-; The following pair of shuffles is folded into vector %B.
-define <4 x i32> @test14(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test14
-; CHECK-NOT: pshufd
-; CHECK: ret
-
-
-; Verify that we don't optimize the following cases. We expect more than one shuffle.
-
-define <4 x i32> @test15(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test15
-; CHECK: shufps $114
-; CHECK-NEXT: pshufd $-58
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test16(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test16
-; CHECK: blendps $10
-; CHECK-NEXT: pshufd $-58
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test17(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test17
-; CHECK: shufps $120
-; CHECK-NEXT: pshufd $-58
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test18(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test18
-; CHECK: blendps $11
-; CHECK-NEXT: pshufd $-59
-; CHECK-NEXT: ret
-
-define <4 x i32> @test19(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test19
-; CHECK: shufps $-104
-; CHECK-NEXT: pshufd $2
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test20(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test20
-; CHECK: shufps $11
-; CHECK-NEXT: pshufd $-58
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test21(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test21
-; CHECK: shufps $120
-; CHECK-NEXT: pshufd $-60
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) {
-  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test22
-; CHECK: blendps $11
-; CHECK-NEXT: pshufd $-43
-; CHECK-NEXT: ret
-
diff --git a/test/CodeGen/X86/combine-vec-shuffle-3.ll b/test/CodeGen/X86/combine-vec-shuffle-3.ll
deleted file mode 100644
index bd2d34ca189a..000000000000
--- a/test/CodeGen/X86/combine-vec-shuffle-3.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
-
-define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test1
-; Mask: [0,1,2,3]
-; CHECK: movaps
-; CHECK: ret
-
-define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test2
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test3
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test4
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test5
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-
-define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test6
-; Mask: [4,5,6,7]
-; CHECK: movaps
-; CHECK: ret
-
-define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test7
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test8
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test9
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test10
-; Mask: [4,1,6,7]
-; CHECK: blendps
-; CHECK: ret
-
-define <4 x float> @test11(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test11
-; Mask: [0,1,2,3]
-; CHECK-NOT: movaps
-; CHECK-NOT: blendps
-; CHECK: ret
-
-define <4 x float> @test12(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test12
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test13
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test14
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK: ret
-
-define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test15
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-define <4 x i32> @test16(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test16
-; Mask: [0,1,2,3]
-; CHECK-NOT: movaps
-; CHECK-NOT: blendps
-; CHECK: ret
-
-define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test17
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test18
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test19
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK: ret
-
-define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test20
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-; Check some negative cases.
-define <4 x float> @test1b(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test1b
-; CHECK: shufps
-; CHECK: shufps
-; CHECK: ret
-
-define <4 x float> @test2b(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test2b
-; CHECK: shufps
-; CHECK: pshufd
-; CHECK: ret
-
-define <4 x float> @test3b(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test3b
-; CHECK: shufps
-; CHECK: shufps
-; CHECK: ret
-
-define <4 x float> @test4b(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test4b
-; CHECK: shufps
-; CHECK: shufps
-; CHECK: ret
-
-
-; Verify that we correctly fold shuffles even when we use illegal vector types.
-define <4 x i8> @test1c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test1c
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK-NEXT: ret
-
-define <4 x i8> @test2c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
-  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test2c
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK-NEXT: ret
-
-define <4 x i8> @test3c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test3c
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test4c
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-; The following test cases are generated from this C++ code
-; 
-;__m128 blend_01(__m128 a, __m128 b)
-;{
-;  __m128 s = a;
-;  s = _mm_blend_ps( s, b, 1<<0 );
-;  s = _mm_blend_ps( s, b, 1<<1 );
-;  return s;
-;}
-;
-;__m128 blend_02(__m128 a, __m128 b)
-;{
-;  __m128 s = a;
-;  s = _mm_blend_ps( s, b, 1<<0 );
-;  s = _mm_blend_ps( s, b, 1<<2 );
-;  return s;
-;}
-;
-;__m128 blend_123(__m128 a, __m128 b)
-;{
-;  __m128 s = a;
-;  s = _mm_blend_ps( s, b, 1<<1 );
-;  s = _mm_blend_ps( s, b, 1<<2 );
-;  s = _mm_blend_ps( s, b, 1<<3 );
-;  return s;
-;}
-
-; Ideally, we should collapse the following shuffles into a single one.
-
-define <4 x float> @blend_01(<4 x float> %a, <4 x float> %b) {
-  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
-  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-  ret <4 x float> %shuffle6
-}
-; CHECK-LABEL: blend_01
-; CHECK: movsd
-; CHECK-NEXT: ret
-
-define <4 x float> @blend_02(<4 x float> %a, <4 x float> %b) {
-  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
-  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-  ret <4 x float> %shuffle6
-}
-; CHECK-LABEL: blend_02
-; CHECK: blendps $5
-; CHECK-NEXT: ret
-
-define <4 x float> @blend_123(<4 x float> %a, <4 x float> %b) {
-  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
-  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
-  %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-  ret <4 x float> %shuffle12
-}
-; CHECK-LABEL: blend_123
-; CHECK: movss
-; CHECK: ret
-
-define <4 x i32> @test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test_movhl_1
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x i32> @test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test_movhl_2
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x i32> @test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test_movhl_3
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
diff --git a/test/CodeGen/X86/combine-vec-shuffle-4.ll b/test/CodeGen/X86/combine-vec-shuffle-4.ll
deleted file mode 100644
index 0ddec2c12fb5..000000000000
--- a/test/CodeGen/X86/combine-vec-shuffle-4.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
-
-; Verify that we fold shuffles according to rule:
-;  (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2)
-
-define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test1
-; Mask: [4,5,2,3]
-; CHECK: movsd
-; CHECK: ret
-
-define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test2
-; Mask: [0,1,4,5]
-; CHECK: movlhps 
-; CHECK: ret
-
-define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test3
-; Mask: [0,1,4,u]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test4
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test5
-; Mask: [0,1,6,7]
-; CHECK: blendps $12
-; CHECK: ret
-
-; Verify that we fold shuffles according to rule:
-;  (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
-
-define <4 x float> @test6(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test6
-; Mask: [0,1,2,3]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK-NOT: movlhps
-; CHECK: ret
-
-define <4 x float> @test7(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test7
-; Mask: [0,1,0,1]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK: movlhps 
-; CHECK-NEXT: ret
-
-define <4 x float> @test8(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test8
-; Mask: [0,1,0,u]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK: movlhps
-; CHECK-NEXT: ret
-
-define <4 x float> @test9(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test9
-; Mask: [2,3,2,3]
-; CHECK-NOT: movlhps
-; CHECK-NOT: palignr
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test10(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test10
-; Mask: [0,1,2,3]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK-NOT: movlhps
-; CHECK: ret
-
-define <4 x float> @test11(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test11
-; Mask: [4,5,2,3]
-; CHECK: movsd
-; CHECK: ret
-
-define <4 x float> @test12(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test12
-; Mask: [0,1,4,5]
-; CHECK: movlhps 
-; CHECK: ret
-
-define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test13
-; Mask: [0,1,4,u]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test14
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test15
-; Mask: [0,1,6,7]
-; CHECK: blendps $12
-; CHECK: ret
-
-; Verify that shuffles are canonicalized according to rules:
-;  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
-;
-; This allows to trigger the following combine rule:
-;  (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
-;
-; As a result, all the shuffle pairs in each function below should be
-; combined into a single legal shuffle operation.
-
-define <4 x float> @test16(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test16
-; Mask: [0,1,2,3]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK-NOT: movlhps
-; CHECK: ret
-
-define <4 x float> @test17(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test17
-; Mask: [0,1,0,1]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK: movlhps 
-; CHECK-NEXT: ret
-
-define <4 x float> @test18(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test18
-; Mask: [0,1,0,u]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK: movlhps
-; CHECK-NEXT: ret
-
-define <4 x float> @test19(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test19
-; Mask: [2,3,2,3]
-; CHECK-NOT: movlhps
-; CHECK-NOT: palignr
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test20(<4 x float> %a) {
-  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test20
-; Mask: [0,1,2,3]
-; CHECK-NOT: pshufd
-; CHECK-NOT: shufps
-; CHECK-NOT: movlhps
-; CHECK: ret
-
diff --git a/test/CodeGen/X86/combine-vec-shuffle-5.ll b/test/CodeGen/X86/combine-vec-shuffle-5.ll
deleted file mode 100644
index 16c45efe4be6..000000000000
--- a/test/CodeGen/X86/combine-vec-shuffle-5.ll
+++ /dev/null
@@ -1,257 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
-
-; Verify that the DAGCombiner correctly folds all the shufflevector pairs
-; into a single shuffle operation.
-
-define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test1
-; Mask: [0,1,2,3]
-; CHECK: movaps
-; CHECK: ret
-
-define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test2
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test3
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test4
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test5
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-
-define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test6
-; Mask: [4,5,6,7]
-; CHECK: movaps
-; CHECK: ret
-
-define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test7
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x i32> @test8(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
-  %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test8
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test9
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK-NEXT: ret
-
-define <4 x i32> @test10(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %b, <4 x i32> %1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test10
-; Mask: [4,1,6,7]
-; CHECK: blendps
-; CHECK: ret
-
-define <4 x float> @test11(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test11
-; Mask: [0,1,2,3]
-; CHECK-NOT: movaps
-; CHECK-NOT: blendps
-; CHECK: ret
-
-define <4 x float> @test12(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test12
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test13
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test14
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK: ret
-
-define <4 x float> @test15(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  ret <4 x float> %2
-}
-; CHECK-LABEL: test15
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-define <4 x i32> @test16(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i32> %a, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test16
-; Mask: [0,1,2,3]
-; CHECK-NOT: movaps
-; CHECK-NOT: blendps
-; CHECK: ret
-
-define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  %2 = shufflevector <4 x i32> %a, <4 x i32> %1, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test17
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK: ret
-
-define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %2 = shufflevector <4 x i32> %a, <4 x i32> %1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test18
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK: ret
-
-define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
-  %2 = shufflevector <4 x i32> %a, <4 x i32> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test19
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK: ret
-
-define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
-  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  %2 = shufflevector <4 x i32> %a, <4 x i32> %1, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
-  ret <4 x i32> %2
-}
-; CHECK-LABEL: test20
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
-; Verify that we correctly fold shuffles even when we use illegal vector types.
-define <4 x i8> @test1c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  %2 = shufflevector <4 x i8> %B, <4 x i8> %1, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test1c
-; Mask: [0,5,6,7]
-; CHECK: movss
-; CHECK-NEXT: ret
-
-define <4 x i8> @test2c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
-  %2 = shufflevector <4 x i8> %B, <4 x i8> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test2c
-; Mask: [0,1,4,5]
-; CHECK: movlhps
-; CHECK-NEXT: ret
-
-define <4 x i8> @test3c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
-  %2 = shufflevector <4 x i8> %B, <4 x i8> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test3c
-; Mask: [6,7,2,3]
-; CHECK: movhlps
-; CHECK: ret
-
-define <4 x i8> @test4c(<4 x i8>* %a, <4 x i8>* %b) {
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
-  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  %2 = shufflevector <4 x i8> %B, <4 x i8> %1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-  ret <4 x i8> %2
-}
-; CHECK-LABEL: test4c
-; Mask: [4,1,6,7]
-; CHECK: blendps $13
-; CHECK: ret
-
diff --git a/test/CodeGen/X86/combine-vec-shuffle.ll b/test/CodeGen/X86/combine-vec-shuffle.ll
deleted file mode 100644
index 9e6ab892713b..000000000000
--- a/test/CodeGen/X86/combine-vec-shuffle.ll
+++ /dev/null
@@ -1,253 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
-
-; Verify that the DAGCombiner correctly folds according to the following rules:
-
-; fold (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
-; fold (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
-; fold (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
-
-; fold (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
-; fold (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
-; fold (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
-
-
-
-define <4 x i32> @test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test1
-; CHECK-NOT: pshufd
-; CHECK: pand
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test2
-; CHECK-NOT: pshufd
-; CHECK: por
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test3
-; CHECK-NOT: pshufd
-; CHECK: pxor
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test4
-; CHECK-NOT: pshufd
-; CHECK: pand
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test5
-; CHECK-NOT: pshufd
-; CHECK: por
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test6
-; CHECK-NOT: pshufd
-; CHECK: pxor
-; CHECK-NEXT: pshufd
-; CHECK-NEXT: ret
-
-
-; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
-; are not performing a swizzle operations.
-
-define <4 x i32> @test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test1b
-; CHECK-NOT: blendps
-; CHECK: andps
-; CHECK-NEXT: blendps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test2b
-; CHECK-NOT: blendps
-; CHECK: orps
-; CHECK-NEXT: blendps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test3b
-; CHECK-NOT: blendps
-; CHECK: xorps
-; CHECK-NEXT: xorps
-; CHECK-NEXT: blendps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test4b
-; CHECK-NOT: blendps
-; CHECK: andps
-; CHECK-NEXT: blendps
-; CHECK: ret
-
-
-define <4 x i32> @test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test5b
-; CHECK-NOT: blendps
-; CHECK: orps
-; CHECK-NEXT: blendps
-; CHECK: ret
-
-
-define <4 x i32> @test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test6b
-; CHECK-NOT: blendps
-; CHECK: xorps
-; CHECK-NEXT: xorps
-; CHECK-NEXT: blendps
-; CHECK: ret
-
-define <4 x i32> @test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test1c
-; CHECK-NOT: shufps
-; CHECK: andps
-; CHECK-NEXT: shufps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test2c
-; CHECK-NOT: shufps
-; CHECK: orps
-; CHECK-NEXT: shufps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test3c
-; CHECK-NOT: shufps
-; CHECK: xorps
-; CHECK-NEXT: xorps
-; CHECK-NEXT: shufps
-; CHECK-NEXT: ret
-
-
-define <4 x i32> @test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %and = and <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %and
-}
-; CHECK-LABEL: test4c
-; CHECK-NOT: shufps
-; CHECK: andps
-; CHECK-NEXT: shufps
-; CHECK: ret
-
-
-define <4 x i32> @test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %or = or <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %or
-}
-; CHECK-LABEL: test5c
-; CHECK-NOT: shufps
-; CHECK: orps
-; CHECK-NEXT: shufps
-; CHECK: ret
-
-
-define <4 x i32> @test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
-  %xor = xor <4 x i32> %shuf1, %shuf2
-  ret <4 x i32> %xor
-}
-; CHECK-LABEL: test6c
-; CHECK-NOT: shufps
-; CHECK: xorps
-; CHECK-NEXT: xorps
-; CHECK-NEXT: shufps
-; CHECK: ret
-
diff --git a/test/CodeGen/X86/commute-blend-avx2.ll b/test/CodeGen/X86/commute-blend-avx2.ll
new file mode 100644
index 000000000000..d06c6dad8dbf
--- /dev/null
+++ b/test/CodeGen/X86/commute-blend-avx2.ll
@@ -0,0 +1,89 @@
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=avx2 < %s | FileCheck %s
+
+define <8 x i16> @commute_fold_vpblendw_128(<8 x i16> %a, <8 x i16>* %b) #0 {
+  %1 = load <8 x i16>* %b
+  %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
+  ret <8 x i16> %2
+
+  ;LABEL:      commute_fold_vpblendw_128
+  ;CHECK:      vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
+  ;CHECK-NEXT: retq
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <16 x i16> @commute_fold_vpblendw_256(<16 x i16> %a, <16 x i16>* %b) #0 {
+  %1 = load <16 x i16>* %b
+  %2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %1, <16 x i16> %a, i8 17)
+  ret <16 x i16> %2
+
+  ;LABEL:      commute_fold_vpblendw_256
+  ;CHECK:      vpblendw {{.*#+}} ymm0 = ymm0[0],mem[1,2,3],ymm0[4],mem[5,6,7],ymm0[8],mem[9,10,11],ymm0[12],mem[13,14,15]
+  ;CHECK-NEXT: retq
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
+
+define <4 x i32> @commute_fold_vpblendd_128(<4 x i32> %a, <4 x i32>* %b) #0 {
+  %1 = load <4 x i32>* %b
+  %2 = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %1, <4 x i32> %a, i8 1)
+  ret <4 x i32> %2
+
+  ;LABEL:      commute_fold_vpblendd_128
+  ;CHECK:      vpblendd {{.*#+}} xmm0 = xmm0[0],mem[1,2,3]
+  ;CHECK-NEXT: retq
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <8 x i32> @commute_fold_vpblendd_256(<8 x i32> %a, <8 x i32>* %b) #0 {
+  %1 = load <8 x i32>* %b
+  %2 = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %1, <8 x i32> %a, i8 129)
+  ret <8 x i32> %2
+
+  ;LABEL:      commute_fold_vpblendd_256
+  ;CHECK:      vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1,2,3,4,5,6],ymm0[7]
+  ;CHECK-NEXT: retq
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
+
+define <4 x float> @commute_fold_vblendps_128(<4 x float> %a, <4 x float>* %b) #0 {
+  %1 = load <4 x float>* %b
+  %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
+  ret <4 x float> %2
+
+  ;LABEL:      commute_fold_vblendps_128
+  ;CHECK:      vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
+  ;CHECK-NEXT: retq
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @commute_fold_vblendps_256(<8 x float> %a, <8 x float>* %b) #0 {
+  %1 = load <8 x float>* %b
+  %2 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %1, <8 x float> %a, i8 7)
+  ret <8 x float> %2
+
+  ;LABEL:      commute_fold_vblendps_256
+  ;CHECK:      vblendps {{.*#+}} ymm0 = ymm0[0,1,2],mem[3,4,5,6,7]
+  ;CHECK-NEXT: retq
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 {
+  %1 = load <2 x double>* %b
+  %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
+  ret <2 x double> %2
+
+  ;LABEL:      commute_fold_vblendpd_128
+  ;CHECK:      vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+  ;CHECK-NEXT: retq
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 {
+  %1 = load <4 x double>* %b
+  %2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7)
+  ret <4 x double> %2
+
+  ;LABEL:      commute_fold_vblendpd_256
+  ;CHECK:      vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],mem[3]
+  ;CHECK-NEXT: retq
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/commute-blend-sse41.ll b/test/CodeGen/X86/commute-blend-sse41.ll
new file mode 100644
index 000000000000..59fef8c3a29f
--- /dev/null
+++ b/test/CodeGen/X86/commute-blend-sse41.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=corei7 < %s | FileCheck %s
+
+define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) #0 {
+  %1 = load <8 x i16>* %b
+  %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
+  ret <8 x i16> %2
+
+  ;LABEL:      commute_fold_pblendw
+  ;CHECK:      pblendw {{.*#+}} xmm0 = xmm0[0],mem[1,2,3],xmm0[4],mem[5,6,7]
+  ;CHECK-NEXT: retq
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+define <4 x float> @commute_fold_blendps(<4 x float> %a, <4 x float>* %b) #0 {
+  %1 = load <4 x float>* %b
+  %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
+  ret <4 x float> %2
+
+  ;LABEL:      commute_fold_blendps
+  ;CHECK:      blendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
+  ;CHECK-NEXT: retq
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) #0 {
+  %1 = load <2 x double>* %b
+  %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
+  ret <2 x double> %2
+
+  ;LABEL:      commute_fold_vblendpd
+  ;CHECK:      blendpd {{.*#+}} xmm0 = xmm0[0],mem[1]
+  ;CHECK-NEXT: retq
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/commuted-blend-mask.ll b/test/CodeGen/X86/commuted-blend-mask.ll
new file mode 100644
index 000000000000..e6322cbb7a14
--- /dev/null
+++ b/test/CodeGen/X86/commuted-blend-mask.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
+
+; When commuting the operands of a SSE blend, make sure that the resulting blend
+; mask can be encoded as a imm8.
+; Before, when commuting the operands to the shuffle in function @test, the backend
+; produced the following assembly:
+;   pblendw $4294967103, %xmm1, %xmm0
+
+define <4 x i32> @test(<4 x i32> %a, <4 x i32> %b) {
+  ;CHECK: pblendw $63, %xmm1, %xmm0
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+  ret <4 x i32> %shuffle
+}
diff --git a/test/CodeGen/X86/compact-unwind.ll b/test/CodeGen/X86/compact-unwind.ll
index 9d3a1257288c..d3b89a54e0b8 100644
--- a/test/CodeGen/X86/compact-unwind.ll
+++ b/test/CodeGen/X86/compact-unwind.ll
@@ -1,12 +1,20 @@
 ; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 | FileCheck -check-prefix=ASM %s
 ; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 -filetype=obj -o - \
-; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -unwind-info - \
 ; RUN:  | FileCheck -check-prefix=CU %s
 ; RUN: llc < %s -disable-fp-elim -mtriple x86_64-apple-darwin11 -mcpu corei7 \
 ; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
-; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -unwind-info - \
 ; RUN:  | FileCheck -check-prefix=FROM-ASM %s
 
+; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -mcpu corei7 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-macosx10.8.0 -unwind-info - \
+; RUN:  | FileCheck -check-prefix=NOFP-CU %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin11 -mcpu corei7 \
+; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
+; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -unwind-info - \
+; RUN:  | FileCheck -check-prefix=NOFP-FROM-ASM %s
+
 %ty = type { i8* }
 
 @gv = external global i32
@@ -17,15 +25,19 @@
 ; Even though we can't encode %rax into the compact unwind, We still want to be
 ; able to generate a compact unwind encoding in this particular case.
 
-; CU:      Contents of section __compact_unwind:
-; CU-NEXT: 0020 00000000 00000000 1e000000 01000101
-; CU-NEXT: 0030 00000000 00000000 00000000 00000000
+; CU:    Contents of __compact_unwind section:
+; CU-NEXT:      Entry at offset 0x0:
+; CU-NEXT:        start:                0x0 _test0
+; CU-NEXT:        length:               0x1e
+; CU-NEXT:        compact encoding:     0x01010001
 
-; FROM-ASM:      Contents of section __compact_unwind:
-; FROM-ASM-NEXT: 0020 00000000 00000000 1e000000 01000101
-; FROM-ASM-NEXT: 0030 00000000 00000000 00000000 00000000
+; FROM-ASM:    Contents of __compact_unwind section:
+; FROM-ASM-NEXT:      Entry at offset 0x0:
+; FROM-ASM-NEXT:        start:                0x0 _test0
+; FROM-ASM-NEXT:        length:               0x1e
+; FROM-ASM-NEXT:        compact encoding:     0x01010001
 
-define i8* @foo(i64 %size) {
+define i8* @test0(i64 %size) {
   %addr = alloca i64, align 8
   %tmp20 = load i32* @gv, align 4
   %tmp21 = call i32 @bar()
@@ -39,3 +51,61 @@ define i8* @foo(i64 %size) {
 }
 
 declare i32 @bar()
+
+%"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
+%struct.anon = type { %class.ImageLoader*, i64, i64 }
+%class.ImageLoader = type { i32 (...)**, i8*, i8*, i32, i64, i64, i32, i32, %"struct.ImageLoader::recursive_lock"*, i16, i16, [4 x i8] }
+%"struct.ImageLoader::recursive_lock" = type { i32, i32 }
+
+@G1 = external hidden global %"struct.dyld::MappedRanges", align 8
+
+declare void @OSMemoryBarrier() optsize
+
+; Test the code below uses UNWIND_X86_64_MODE_STACK_IMMD compact unwind
+; encoding.
+
+; NOFP-CU:      Entry at offset 0x20:
+; NOFP-CU-NEXT:        start:                0x1d _test1
+; NOFP-CU-NEXT:        length:               0x42
+; NOFP-CU-NEXT:        compact encoding:     0x02040c0a
+
+; NOFP-FROM-ASM:      Entry at offset 0x20:
+; NOFP-FROM-ASM-NEXT:        start:                0x1d _test1
+; NOFP-FROM-ASM-NEXT:        length:               0x42
+; NOFP-FROM-ASM-NEXT:        compact encoding:     0x02040c0a
+
+define void @test1(%class.ImageLoader* %image) optsize ssp uwtable {
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc10, %entry
+  %p.019 = phi %"struct.dyld::MappedRanges"* [ @G1, %entry ], [ %1, %for.inc10 ]
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
+  %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
+  %0 = load %class.ImageLoader** %image4, align 8
+  %cmp5 = icmp eq %class.ImageLoader* %0, %image
+  br i1 %cmp5, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body3
+  tail call void @OSMemoryBarrier() optsize
+  store %class.ImageLoader* null, %class.ImageLoader** %image4, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 400
+  br i1 %exitcond, label %for.inc10, label %for.body3
+
+for.inc10:                                        ; preds = %for.inc
+  %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
+  %1 = load %"struct.dyld::MappedRanges"** %next, align 8
+  %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
+  br i1 %cmp, label %for.end11, label %for.cond1.preheader
+
+for.end11:                                        ; preds = %for.inc10
+  ret void
+}
diff --git a/test/CodeGen/X86/constructor.ll b/test/CodeGen/X86/constructor.ll
index b57889643e0d..7160dcc614c0 100644
--- a/test/CodeGen/X86/constructor.ll
+++ b/test/CodeGen/X86/constructor.ll
@@ -1,6 +1,8 @@
-; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=CTOR %s
-; RUN: llc -mtriple x86_64-pc-linux -use-init-array < %s | FileCheck --check-prefix=INIT-ARRAY %s
-@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }, { i32, void ()* } { i32 15, void ()* @g }]
+; RUN: llc -mtriple x86_64-pc-linux -use-ctors < %s | FileCheck --check-prefix=CTOR %s
+; RUN: llc -mtriple x86_64-pc-linux < %s | FileCheck --check-prefix=INIT-ARRAY %s
+@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null}, { i32, void ()*, i8* } { i32 15, void ()* @g, i8* @v }]
+
+@v = weak_odr global i8 0
 
 define void @f() {
 entry:
@@ -12,14 +14,14 @@ entry:
   ret void
 }
 
-; CTOR:		.section	.ctors.65520,"aw",@progbits
+; CTOR:		.section	.ctors.65520,"aGw",@progbits,v,comdat
 ; CTOR-NEXT:	.align	8
 ; CTOR-NEXT:	.quad	g
 ; CTOR-NEXT:	.section	.ctors,"aw",@progbits
 ; CTOR-NEXT:	.align	8
 ; CTOR-NEXT:	.quad	f
 
-; INIT-ARRAY:		.section	.init_array.15,"aw",@init_array
+; INIT-ARRAY:		.section	.init_array.15,"aGw",@init_array,v,comdat
 ; INIT-ARRAY-NEXT:	.align	8
 ; INIT-ARRAY-NEXT:	.quad	g
 ; INIT-ARRAY-NEXT:	.section	.init_array,"aw",@init_array
diff --git a/test/CodeGen/X86/copysign-constant-magnitude.ll b/test/CodeGen/X86/copysign-constant-magnitude.ll
new file mode 100644
index 000000000000..537d6298ddf4
--- /dev/null
+++ b/test/CodeGen/X86/copysign-constant-magnitude.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @test_copysign_const_magnitude_d(double %X) {
+; CHECK: [[SIGNMASK:L.+]]:
+; CHECK-NEXT:   .quad -9223372036854775808    ## double -0.000000e+00
+; CHECK-NEXT:   .quad 0                       ## double 0.000000e+00
+; CHECK: [[ZERO:L.+]]:
+; CHECK-NEXT:   .space 16
+; CHECK: [[ONE:L.+]]:
+; CHECK-NEXT:   .quad 4607182418800017408     ## double 1.000000e+00
+; CHECK-NEXT:   .quad 0                       ## double 0.000000e+00
+; CHECK-LABEL: test_copysign_const_magnitude_d:
+
+; CHECK: id
+  %iX = call double @id_d(double %X)
+
+; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
+  %d0 = call double @copysign(double 0.000000e+00, double %iX)
+
+; CHECK-NEXT: id
+  %id0 = call double @id_d(double %d0)
+
+; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orpd [[ZERO]](%rip), %xmm0
+  %dn0 = call double @copysign(double -0.000000e+00, double %id0)
+
+; CHECK-NEXT: id
+  %idn0 = call double @id_d(double %dn0)
+
+; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0
+  %d1 = call double @copysign(double 1.000000e+00, double %idn0)
+
+; CHECK-NEXT: id
+  %id1 = call double @id_d(double %d1)
+
+; CHECK-NEXT: andpd [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orpd [[ONE]](%rip), %xmm0
+  %dn1 = call double @copysign(double -1.000000e+00, double %id1)
+
+; CHECK-NEXT: id
+  %idn1 = call double @id_d(double %dn1)
+
+; CHECK: retq
+  ret void
+}
+
+define void @test_copysign_const_magnitude_f(float %X) {
+; CHECK: [[SIGNMASK:L.+]]:
+; CHECK-NEXT:   .long	2147483648              ## float -0.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK: [[ZERO:L.+]]:
+; CHECK-NEXT:   .space 16
+; CHECK: [[ONE:L.+]]:
+; CHECK-NEXT:   .long	1065353216              ## float 1.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-LABEL: test_copysign_const_magnitude_f:
+
+; CHECK: id
+  %iX = call float @id_f(float %X)
+
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+  %d0 = call float @copysignf(float 0.000000e+00, float %iX)
+
+; CHECK-NEXT: id
+  %id0 = call float @id_f(float %d0)
+
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ZERO]](%rip), %xmm0
+  %dn0 = call float @copysignf(float -0.000000e+00, float %id0)
+
+; CHECK-NEXT: id
+  %idn0 = call float @id_f(float %dn0)
+
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ONE]](%rip), %xmm0
+  %d1 = call float @copysignf(float 1.000000e+00, float %idn0)
+
+; CHECK-NEXT: id
+  %id1 = call float @id_f(float %d1)
+
+; CHECK-NEXT: andps [[SIGNMASK]](%rip), %xmm0
+; CHECK-NEXT: orps [[ONE]](%rip), %xmm0
+  %dn1 = call float @copysignf(float -1.000000e+00, float %id1)
+
+; CHECK-NEXT: id
+  %idn1 = call float @id_f(float %dn1)
+
+; CHECK: retq
+  ret void
+}
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
+
+; Dummy identity functions, so we always have xmm0, and prevent optimizations.
+declare double @id_d(double)
+declare float @id_f(float)
diff --git a/test/CodeGen/X86/copysign-zero.ll b/test/CodeGen/X86/copysign-zero.ll
deleted file mode 100644
index 47522d808058..000000000000
--- a/test/CodeGen/X86/copysign-zero.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s | not grep orpd
-; RUN: llc < %s | grep andpd | count 1
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-
-define double @test(double %X) nounwind  {
-entry:
-	%tmp2 = tail call double @copysign( double 0.000000e+00, double %X ) nounwind readnone 		; <double> [#uses=1]
-	ret double %tmp2
-}
-
-declare double @copysign(double, double) nounwind readnone 
-
diff --git a/test/CodeGen/X86/cpus.ll b/test/CodeGen/X86/cpus.ll
new file mode 100644
index 000000000000..ee1f7bb5295b
--- /dev/null
+++ b/test/CodeGen/X86/cpus.ll
@@ -0,0 +1,35 @@
+; Test that the CPU names work.
+;
+; First ensure the error message matches what we expect.
+; CHECK-ERROR: not a recognized processor for this target
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=foobar 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+;
+; Now ensure the error message doesn't occur for valid CPUs.
+; CHECK-NO-ERROR-NOT: not a recognized processor for this target
+;
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=penryn 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bonnell 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k8 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=opteron 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon64 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon-fx 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=k8-sse3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=opteron-sse3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
diff --git a/test/CodeGen/X86/crash-O0.ll b/test/CodeGen/X86/crash-O0.ll
index 956d43b4e895..df8eaaf442b7 100644
--- a/test/CodeGen/X86/crash-O0.ll
+++ b/test/CodeGen/X86/crash-O0.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s
+; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10"
 
@@ -29,3 +29,23 @@ entry:
 "41":                                             ; preds = %"39"
   unreachable
 }
+
+; When using fast isel, sdiv is lowered into a sequence of CQO + DIV64.
+; CQO defines implicitly AX and DIV64 uses it implicitly too.
+; When an instruction gets between those two, RegAllocFast was reusing
+; AX for the vreg defined in between and the compiler crashed.
+;
+; An instruction gets between CQO and DIV64 because the load is folded
+; into the division but it requires a sign extension.
+; PR21700
+; CHECK-LABEL: addressModeWith32bitIndex:
+; CHECK: cqto
+; CHECK-NEXT: movslq
+; CHECK-NEXT: idivq
+; CHECK: retq
+define i64 @addressModeWith32bitIndex(i32 %V) {
+  %gep = getelementptr i64* null, i32 %V
+  %load = load i64* %gep
+  %sdiv = sdiv i64 0, %load
+  ret i64 %sdiv
+}
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index ee73377dffde..6b3dd3675750 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -108,8 +108,8 @@ do.body92:                                        ; preds = %if.then66
   ret void
 }
 
-!0 = metadata !{i32 633550}
-!1 = metadata !{i32 634261}
+!0 = !{i32 633550}
+!1 = !{i32 634261}
 
 
 ; Crash during XOR optimization.
diff --git a/test/CodeGen/X86/critical-anti-dep-breaker.ll b/test/CodeGen/X86/critical-anti-dep-breaker.ll
new file mode 100644
index 000000000000..32d3f49c79cc
--- /dev/null
+++ b/test/CodeGen/X86/critical-anti-dep-breaker.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -post-RA-scheduler=1 -break-anti-dependencies=critical  | FileCheck %s
+
+; PR20308 ( http://llvm.org/bugs/show_bug.cgi?id=20308 )
+; The critical-anti-dependency-breaker must not use register def information from a kill inst.
+; This test case expects such an instruction to appear as a comment with def info for RDI.
+; There is an anti-dependency (WAR) hazard using RAX using default reg allocation and scheduling.
+; The post-RA-scheduler and critical-anti-dependency breaker can eliminate that hazard using R10.
+; That is the first free register that isn't used as a param in the call to "@Image".
+
+@PartClass = external global i32
+@NullToken = external global i64
+
+; CHECK-LABEL: Part_Create:
+; CHECK-DAG: # kill: RDI<def> 
+; CHECK-DAG: movq PartClass@GOTPCREL(%rip), %r10
+define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
+  %PartObj = alloca i64*, align 8
+  %Vchunk = alloca i64, align 8
+  %1 = load i64* @NullToken, align 4
+  store i64 %1, i64* %Vchunk, align 8
+  %2 = load i32* @PartClass, align 4
+  call i32 @Image(i64* %Anchor, i32 %2, i32 0, i32 0, i32* %Status, i64* %PartTkn, i64** %PartObj)
+  call i32 @Create(i64* %Anchor)
+  ret i32 %2
+}
+
+declare i32 @Image(i64*, i32, i32, i32, i32*, i64*, i64**)
+declare i32 @Create(i64*)
diff --git a/test/CodeGen/X86/cttz-ctlz.ll b/test/CodeGen/X86/cttz-ctlz.ll
new file mode 100644
index 000000000000..8717d4015954
--- /dev/null
+++ b/test/CodeGen/X86/cttz-ctlz.ll
@@ -0,0 +1,422 @@
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s --check-prefix=ALL --check-prefix=BMI
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown -mattr=+lzcnt < %s | FileCheck %s --check-prefix=ALL --check-prefix=LZCNT
+; RUN: opt -S -codegenprepare -mtriple=x86_64-unknown-unknown < %s | FileCheck %s --check-prefix=ALL --check-prefix=GENERIC
+
+
+define i64 @test1(i64 %A) {
+; ALL-LABEL: @test1(
+; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; LZCNT-NEXT: ret i64 [[CTLZ]]
+; BMI: icmp eq i64 %A, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; GENERIC: icmp eq i64 %A, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2(i32 %A) {
+; ALL-LABEL: @test2(
+; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i32 @llvm.ctlz.i32(i32 %A, i1 false)
+; LZCNT-NEXT: ret i32 [[CTLZ]]
+; BMI: icmp eq i32 %A, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; GENERIC: icmp eq i32 %A, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3(i16 signext %A) {
+; ALL-LABEL: @test3(
+; LZCNT: [[CTLZ:%[A-Za-z0-9]+]] = call i16 @llvm.ctlz.i16(i16 %A, i1 false)
+; LZCNT-NEXT: ret i16 [[CTLZ]]
+; BMI: icmp eq i16 %A, 0
+; BMI: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+; GENERIC: icmp eq i16 %A, 0
+; GENERIC: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1b(i64 %A) {
+; ALL-LABEL: @test1b(
+; LZCNT: icmp eq i64 %A, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; BMI-NEXT: ret i64 [[CTTZ]]
+; GENERIC: icmp eq i64 %A, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 64, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2b(i32 %A) {
+; ALL-LABEL: @test2b(
+; LZCNT: icmp eq i32 %A, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %A, i1 false)
+; BMI-NEXT: ret i32 [[CTTZ]]
+; GENERIC: icmp eq i32 %A, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3b(i16 signext %A) {
+; ALL-LABEL: @test3b(
+; LZCNT: icmp eq i16 %A, 0
+; LZCNT: call i16 @llvm.cttz.i16(i16 %A, i1 true)
+; BMI: [[CTTZ:%[A-Za-z0-9]+]] = call i16 @llvm.cttz.i16(i16 %A, i1 false)
+; BMI-NEXT: ret i16 [[CTTZ]]
+; GENERIC: icmp eq i16 %A, 0
+; GENERIC: call i16 @llvm.cttz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 16, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1c(i64 %A) {
+; ALL-LABEL: @test1c(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2c(i32 %A) {
+; ALL-LABEL: @test2c(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3c(i16 signext %A) {
+; ALL-LABEL: @test3c(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+
+define i64 @test1d(i64 %A) {
+; ALL-LABEL: @test1d(
+; ALL: icmp eq i64 %A, 0
+; ALL: call i64 @llvm.cttz.i64(i64 %A, i1 true)
+entry:
+  %tobool = icmp eq i64 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %0, %cond.true ], [ 63, %entry ]
+  ret i64 %cond
+}
+
+
+define i32 @test2d(i32 %A) {
+; ALL-LABEL: @test2d(
+; ALL: icmp eq i32 %A, 0
+; ALL: call i32 @llvm.cttz.i32(i32 %A, i1 true)
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %0, %cond.true ], [ 31, %entry ]
+  ret i32 %cond
+}
+
+
+define signext i16 @test3d(i16 signext %A) {
+; ALL-LABEL: @test3d(
+; ALL: icmp eq i16 %A, 0
+; ALL: call i16 @llvm.cttz.i16(i16 %A, i1 true)
+entry:
+  %tobool = icmp eq i16 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %0, %cond.true ], [ 15, %entry ]
+  ret i16 %cond
+}
+
+; The following tests verify that calls to cttz/ctlz are speculated even if
+; basic block %cond.true has an extra zero extend/truncate which is "free"
+; for the target.
+
+define i64 @test1e(i32 %x) {
+; ALL-LABEL: @test1e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %phitmp2 = zext i32 %0 to i64
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test2e(i64 %x) {
+; ALL-LABEL: @test2e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i32 %cond
+}
+
+define i64 @test3e(i32 %x) {
+; ALL-LABEL: @test3e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %phitmp2 = zext i32 %0 to i64
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
+  ret i64 %cond
+}
+
+define i32 @test4e(i64 %x) {
+; ALL-LABEL: @test4e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i32
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i32 %cond
+}
+
+define i16 @test5e(i64 %x) {
+; ALL-LABEL: @test5e(
+; BMI: icmp eq i64 %x, 0
+; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test6e(i32 %x) {
+; ALL-LABEL: @test6e(
+; BMI: icmp eq i32 %x, 0
+; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test7e(i64 %x) {
+; ALL-LABEL: @test7e(
+; LZCNT: icmp eq i64 %x, 0
+; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
+; GENERIC: icmp eq i64 %x, 0
+; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
+entry:
+  %tobool = icmp eq i64 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  %cast = trunc i64 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
+  ret i16 %cond
+}
+
+define i16 @test8e(i32 %x) {
+; ALL-LABEL: @test8e(
+; LZCNT: icmp eq i32 %x, 0
+; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; GENERIC: icmp eq i32 %x, 0
+; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:                                        ; preds = %entry
+  %0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  %cast = trunc i32 %0 to i16
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
+  ret i16 %cond
+}
+
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index 23f83352eb2e..16d8f97c3a21 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -52,58 +52,153 @@ define void @_Z3barii(i32 %param1, i32 %param2) #0 {
 entry:
   %var1 = alloca %struct.AAA3, align 1
   %var2 = alloca %struct.AAA3, align 1
-  %tobool = icmp eq i32 %param2, 0
-  br i1 %tobool, label %if.end, label %if.then
+  tail call void @llvm.dbg.value(metadata i32 %param1, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !47
+  tail call void @llvm.dbg.value(metadata i32 %param2, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !47
+  tail call void @llvm.dbg.value(metadata i8* null, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !49
+  %tobool = icmp eq i32 %param2, 0, !dbg !50
+  br i1 %tobool, label %if.end, label %if.then, !dbg !50
 
 if.then:                                          ; preds = %entry
-  %call = call i8* @_Z5i2stri(i32 %param2)
-  br label %if.end
+  %call = tail call i8* @_Z5i2stri(i32 %param2), !dbg !52
+  tail call void @llvm.dbg.value(metadata i8* %call, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !49
+  br label %if.end, !dbg !54
 
 if.end:                                           ; preds = %entry, %if.then
-  call void @llvm.dbg.value(metadata !{%struct.AAA3* %var1}, i64 0, metadata !60)
-  call void @llvm.dbg.value(metadata !62, i64 0, metadata !63)
-  %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  call void @llvm.dbg.declare(metadata !{%struct.AAA3* %var2}, metadata !38)
-  %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  %tobool1 = icmp eq i32 %param1, 0
-  br i1 %tobool1, label %if.else, label %if.then2
+  tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !{!"0x102"}), !dbg !55
+  tail call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !56, metadata !{!"0x102"}), !dbg !57
+  tail call void @llvm.dbg.value(metadata !58, i64 0, metadata !59, metadata !{!"0x102"}), !dbg !60
+  %arraydecay.i = getelementptr inbounds %struct.AAA3* %var1, i64 0, i32 0, i64 0, !dbg !61
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !61
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !63
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !64, metadata !{!"0x102"}), !dbg !65
+  call void @llvm.dbg.value(metadata !58, i64 0, metadata !66, metadata !{!"0x102"}), !dbg !67
+  %arraydecay.i5 = getelementptr inbounds %struct.AAA3* %var2, i64 0, i32 0, i64 0, !dbg !68
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !68
+  %tobool1 = icmp eq i32 %param1, 0, !dbg !69
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !63
+  br i1 %tobool1, label %if.else, label %if.then2, !dbg !69
 
 if.then2:                                         ; preds = %if.end
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0))
-  br label %if.end3
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !71, metadata !{!"0x102"}), !dbg !73
+  call void @llvm.dbg.value(metadata !74, i64 0, metadata !75, metadata !{!"0x102"}), !dbg !76
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)), !dbg !76
+  br label %if.end3, !dbg !72
 
 if.else:                                          ; preds = %if.end
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0))
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var2, i64 0, metadata !77, metadata !{!"0x102"}), !dbg !79
+  call void @llvm.dbg.value(metadata !80, i64 0, metadata !81, metadata !{!"0x102"}), !dbg !82
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i5, i32 4, i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)), !dbg !82
   br label %if.end3
 
 if.end3:                                          ; preds = %if.else, %if.then2
-  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0))
-  ret void
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !33, metadata !{!"0x102"}), !dbg !55
+  call void @llvm.dbg.value(metadata %struct.AAA3* %var1, i64 0, metadata !83, metadata !{!"0x102"}), !dbg !85
+  call void @llvm.dbg.value(metadata !58, i64 0, metadata !86, metadata !{!"0x102"}), !dbg !87
+  call void @_Z3fooPcjPKc(i8* %arraydecay.i, i32 4, i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)), !dbg !87
+  ret void, !dbg !88
 }
 
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
-
-declare i8* @_Z5i2stri(i32) #2
+declare i8* @_Z5i2stri(i32) #1
 
-declare void @_Z3fooPcjPKc(i8*, i32, i8*) #2
+declare void @_Z3fooPcjPKc(i8*, i32, i8*) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!48, !49}
-!llvm.ident = !{!50}
-
-!38 = metadata !{i32 786688, null, metadata !"var2", null, i32 20, null, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [var2] [line 20]
-!48 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!49 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!50 = metadata !{metadata !"clang version 3.5 (202418)"}
-!60 = metadata !{i32 786689, null, metadata !"this", null, i32 16777216, null, i32 1088, null} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!62 = metadata !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)}
-!63 = metadata !{i32 786689, null, metadata !"value", null, i32 33554439, null, i32 0, null} ; [ DW_TAG_arg_variable ] [value] [line 7]
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!44, !45}
+!llvm.ident = !{!46}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !3, !23, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"dbg-changes-codegen-branch-folding.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00AAA3\004\0032\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS4AAA3"} ; [ DW_TAG_structure_type ] [AAA3] [line 4, size 32, align 8, offset 0] [def] [from ]
+!5 = !{!6, !11, !17, !18}
+!6 = !{!"0xd\00text\008\0032\008\000\000", !1, !"_ZTS4AAA3", !7} ; [ DW_TAG_member ] [text] [line 8, size 32, align 8, offset 0] [from ]
+!7 = !{!"0x1\00\000\0032\008\000\000", null, null, !8, !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!8 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!9 = !{!10}
+!10 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!11 = !{!"0x2e\00AAA3\00AAA3\00\005\000\000\000\006\00256\001\005", !1, !"_ZTS4AAA3", !12, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 5] [AAA3]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null, !14, !15}
+!14 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4AAA3]
+!15 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!16 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char]
+!17 = !{!"0x2e\00operator=\00operator=\00_ZN4AAA3aSEPKc\006\000\000\000\006\00256\001\006", !1, !"_ZTS4AAA3", !12, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 6] [operator=]
+!18 = !{!"0x2e\00operator const char *\00operator const char *\00_ZNK4AAA3cvPKcEv\007\000\000\000\006\00256\001\007", !1, !"_ZTS4AAA3", !19, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [operator const char *]
+!19 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{!15, !21}
+!21 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
+!22 = !{!"0x26\00\000\000\000\000\000", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS4AAA3]
+!23 = !{!24, !35, !40}
+!24 = !{!"0x2e\00bar\00bar\00_Z3barii\0011\000\001\000\006\00256\001\0011", !1, !25, !26, null, void (i32, i32)* @_Z3barii, null, null, !29} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
+!25 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!26 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !27, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = !{null, !28, !28}
+!28 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!29 = !{!30, !31, !32, !33, !34}
+!30 = !{!"0x101\00param1\0016777227\000", !24, !25, !28} ; [ DW_TAG_arg_variable ] [param1] [line 11]
+!31 = !{!"0x101\00param2\0033554443\000", !24, !25, !28} ; [ DW_TAG_arg_variable ] [param2] [line 11]
+!32 = !{!"0x100\00temp\0012\000", !24, !25, !15} ; [ DW_TAG_auto_variable ] [temp] [line 12]
+!33 = !{!"0x100\00var1\0017\000", !24, !25, !"_ZTS4AAA3"} ; [ DW_TAG_auto_variable ] [var1] [line 17]
+!34 = !{!"0x100\00var2\0018\000", !24, !25, !"_ZTS4AAA3"} ; [ DW_TAG_auto_variable ] [var2] [line 18]
+!35 = !{!"0x2e\00operator=\00operator=\00_ZN4AAA3aSEPKc\006\000\001\000\006\00256\001\006", !1, !"_ZTS4AAA3", !12, null, null, null, !17, !36} ; [ DW_TAG_subprogram ] [line 6] [def] [operator=]
+!36 = !{!37, !39}
+!37 = !{!"0x101\00this\0016777216\001088", !35, null, !38} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!38 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4AAA3"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4AAA3]
+!39 = !{!"0x101\00value\0033554438\000", !35, !25, !15} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!40 = !{!"0x2e\00AAA3\00AAA3\00_ZN4AAA3C2EPKc\005\000\001\000\006\00256\001\005", !1, !"_ZTS4AAA3", !12, null, null, null, !11, !41} ; [ DW_TAG_subprogram ] [line 5] [def] [AAA3]
+!41 = !{!42, !43}
+!42 = !{!"0x101\00this\0016777216\001088", !40, null, !38} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!43 = !{!"0x101\00value\0033554437\000", !40, !25, !15} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!44 = !{i32 2, !"Dwarf Version", i32 4}
+!45 = !{i32 2, !"Debug Info Version", i32 2}
+!46 = !{!"clang version 3.5.0 "}
+!47 = !MDLocation(line: 11, scope: !24)
+!48 = !{i8* null}
+!49 = !MDLocation(line: 12, scope: !24)
+!50 = !MDLocation(line: 14, scope: !51)
+!51 = !{!"0xb\0014\000\000", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!52 = !MDLocation(line: 15, scope: !53)
+!53 = !{!"0xb\0014\000\000", !1, !51} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!54 = !MDLocation(line: 16, scope: !53)
+!55 = !MDLocation(line: 17, scope: !24)
+!56 = !{!"0x101\00this\0016777216\001088", !40, null, !38, !55} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!57 = !MDLocation(line: 0, scope: !40, inlinedAt: !55)
+!58 = !{i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0)}
+!59 = !{!"0x101\00value\0033554437\000", !40, !25, !15, !55} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!60 = !MDLocation(line: 5, scope: !40, inlinedAt: !55)
+!61 = !MDLocation(line: 5, scope: !62, inlinedAt: !55)
+!62 = !{!"0xb\005\000\000", !1, !40} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!63 = !MDLocation(line: 18, scope: !24)
+!64 = !{!"0x101\00this\0016777216\001088", !40, null, !38, !63} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!65 = !MDLocation(line: 0, scope: !40, inlinedAt: !63)
+!66 = !{!"0x101\00value\0033554437\000", !40, !25, !15, !63} ; [ DW_TAG_arg_variable ] [value] [line 5]
+!67 = !MDLocation(line: 5, scope: !40, inlinedAt: !63)
+!68 = !MDLocation(line: 5, scope: !62, inlinedAt: !63)
+!69 = !MDLocation(line: 20, scope: !70)
+!70 = !{!"0xb\0020\000\000", !1, !24} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/dbg-changes-codegen-branch-folding.cpp]
+!71 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !72} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!72 = !MDLocation(line: 21, scope: !70)
+!73 = !MDLocation(line: 0, scope: !35, inlinedAt: !72)
+!74 = !{i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)}
+!75 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !72} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!76 = !MDLocation(line: 6, scope: !35, inlinedAt: !72)
+!77 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !78} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!78 = !MDLocation(line: 23, scope: !70)
+!79 = !MDLocation(line: 0, scope: !35, inlinedAt: !78)
+!80 = !{i8* getelementptr inbounds ([2 x i8]* @.str2, i64 0, i64 0)}
+!81 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !78} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!82 = !MDLocation(line: 6, scope: !35, inlinedAt: !78)
+!83 = !{!"0x101\00this\0016777216\001088", !35, null, !38, !84} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!84 = !MDLocation(line: 24, scope: !24)
+!85 = !MDLocation(line: 0, scope: !35, inlinedAt: !84)
+!86 = !{!"0x101\00value\0033554438\000", !35, !25, !15, !84} ; [ DW_TAG_arg_variable ] [value] [line 6]
+!87 = !MDLocation(line: 6, scope: !35, inlinedAt: !84)
+!88 = !MDLocation(line: 25, scope: !24)
diff --git a/test/CodeGen/X86/dbg-changes-codegen.ll b/test/CodeGen/X86/dbg-changes-codegen.ll
index 0b17c455408b..2179667245f1 100644
--- a/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -44,7 +44,7 @@
 define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
 entry:
   %0 = load %struct.Foo** @pfoo, align 8
-  tail call void @llvm.dbg.value(metadata !{%struct.Foo* %0}, i64 0, metadata !62)
+  tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !{!"0x102"})
   %cmp.i = icmp eq %struct.Foo* %0, %this
   ret i1 %cmp.i
 }
@@ -53,7 +53,7 @@ entry:
 define void @_Z3bazv() #1 {
 entry:
   %0 = load %struct.Wibble** @wibble1, align 8
-  tail call void @llvm.dbg.value(metadata !64, i64 0, metadata !65)
+  tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !{!"0x102"})
   %1 = load %struct.Wibble** @wibble2, align 8
   %cmp.i = icmp ugt %struct.Wibble* %1, %0
   br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
@@ -69,15 +69,15 @@ _ZN7Flibble3barEP6Wibble.exit:                    ; preds = %entry, %if.then.i
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
 
-!17 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from Foo]
-!45 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Flibble]
-!62 = metadata !{i32 786689, null, metadata !"arg", null, i32 33554436, metadata !17, i32 0, null} ; [ DW_TAG_arg_variable ] [arg] [line 4]
-!64 = metadata !{%struct.Flibble* undef}
-!65 = metadata !{i32 786689, null, metadata !"this", null, i32 16777229, metadata !45, i32 1088, null} ; [ DW_TAG_arg_variable ] [this] [line 13]
+!17 = !{!"0x10\00\000\000\000\000\000", null, null, null} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from Foo]
+!45 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Flibble]
+!62 = !{!"0x101\00arg\0033554436\000", null, null, !17} ; [ DW_TAG_arg_variable ] [arg] [line 4]
+!64 = !{%struct.Flibble* undef}
+!65 = !{!"0x101\00this\0016777229\001088", null, null, !45} ; [ DW_TAG_arg_variable ] [this] [line 13]
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 21225e340826..fd07a3f55100 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -31,6 +31,7 @@ entry:
 ; CHECK-LABEL: test3:
 ; CHECK: movzbl  8(%esp), %eax
 ; CHECK-NEXT: imull	$171, %eax
+; CHECK-NEXT: andl $65024, %eax
 ; CHECK-NEXT: shrl	$9, %eax
 ; CHECK-NEXT: ret
 }
@@ -56,9 +57,10 @@ entry:
   %div = sdiv i16 %x, 10
   ret i16 %div
 ; CHECK-LABEL: test6:
-; CHECK: imull $26215, %eax, %ecx
-; CHECK: sarl $18, %ecx
-; CHECK: shrl $15, %eax
+; CHECK: imull $26215, %eax
+; CHECK: movl %eax, %ecx
+; CHECK: shrl $31, %ecx
+; CHECK: sarl $18, %eax
 }
 
 define i32 @test7(i32 %x) nounwind {
diff --git a/test/CodeGen/X86/divrem8_ext.ll b/test/CodeGen/X86/divrem8_ext.ll
new file mode 100644
index 000000000000..ec367c86526d
--- /dev/null
+++ b/test/CodeGen/X86/divrem8_ext.ll
@@ -0,0 +1,100 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-64
+; RUN: llc -march=x86    < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-32
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define zeroext i8 @test_udivrem_zext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_udivrem_zext_ah
+; CHECK:   divb
+; CHECK:   movzbl %ah, [[REG_REM:%[a-z0-9]+]]
+; CHECK:   movb   %al, ([[REG_ZPTR:%[a-z0-9]+]])
+; CHECK:   movl   [[REG_REM]], %eax
+; CHECK:   ret
+  %div = udiv i8 %x, %y
+  store i8 %div, i8* @z
+  %1 = urem i8 %x, %y
+  ret i8 %1
+}
+
+define zeroext i8 @test_urem_zext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_urem_zext_ah
+; CHECK:   divb
+; CHECK:   movzbl %ah, %eax
+; CHECK:   ret
+  %1 = urem i8 %x, %y
+  ret i8 %1
+}
+
+define i8 @test_urem_noext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_urem_noext_ah
+; CHECK:   divb   [[REG_X:%[a-z0-9]+]]
+; CHECK:   movzbl %ah, %eax
+; CHECK:   addb   [[REG_X]], %al
+; CHECK:   ret
+  %1 = urem i8 %x, %y
+  %2 = add i8 %1, %y
+  ret i8 %2
+}
+
+define i64 @test_urem_zext64_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_urem_zext64_ah
+; CHECK:    divb
+; CHECK:    movzbl %ah, %eax
+; CHECK-32: xorl %edx, %edx
+; CHECK:    ret
+  %1 = urem i8 %x, %y
+  %2 = zext i8 %1 to i64
+  ret i64 %2
+}
+
+define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_sdivrem_sext_ah
+; CHECK:   cbtw
+; CHECK:   idivb
+; CHECK:   movsbl %ah, [[REG_REM:%[a-z0-9]+]]
+; CHECK:   movb   %al, ([[REG_ZPTR]])
+; CHECK:   movl   [[REG_REM]], %eax
+; CHECK:   ret
+  %div = sdiv i8 %x, %y
+  store i8 %div, i8* @z
+  %1 = srem i8 %x, %y
+  ret i8 %1
+}
+
+define signext i8 @test_srem_sext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_srem_sext_ah
+; CHECK:   cbtw
+; CHECK:   idivb
+; CHECK:   movsbl %ah, %eax
+; CHECK:   ret
+  %1 = srem i8 %x, %y
+  ret i8 %1
+}
+
+define i8 @test_srem_noext_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_srem_noext_ah
+; CHECK:   cbtw
+; CHECK:   idivb [[REG_X:%[a-z0-9]+]]
+; CHECK:   movsbl %ah, %eax
+; CHECK:   addb   [[REG_X]], %al
+; CHECK:   ret
+  %1 = srem i8 %x, %y
+  %2 = add i8 %1, %y
+  ret i8 %2
+}
+
+define i64 @test_srem_sext64_ah(i8 %x, i8 %y) {
+; CHECK-LABEL: test_srem_sext64_ah
+; CHECK:    cbtw
+; CHECK:    idivb
+; CHECK:    movsbl %ah, %eax
+; CHECK-32: movl %eax, %edx
+; CHECK-32: sarl $31, %edx
+; CHECK-64: movsbq %al, %rax
+; CHECK:    ret
+  %1 = srem i8 %x, %y
+  %2 = sext i8 %1 to i64
+  ret i64 %2
+}
+
+@z = external global i8
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index 0d5afa1b1384..c673f5d485f9 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -70,7 +70,7 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .weak weak_alias
 ; CHECK: weak_alias = f1
-@weak_alias = dllexport alias weak_odr void()* @f1
+@weak_alias = weak_odr dllexport alias void()* @f1
 
 @blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16
 @blob_alias = dllexport alias bitcast ([6 x i8]* @blob to i32 ()*)
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index e2c3f131ee06..5035aa153301 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -89,7 +89,7 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .weak _weak_alias
 ; CHECK: _weak_alias = _f1
-@weak_alias = dllexport alias weak_odr void()* @f1
+@weak_alias = weak_odr dllexport alias void()* @f1
 
 
 ; CHECK: .section .drectve
diff --git a/test/CodeGen/X86/dllimport-x86_64.ll b/test/CodeGen/X86/dllimport-x86_64.ll
index 666409fd4c07..839bca4f3c31 100644
--- a/test/CodeGen/X86/dllimport-x86_64.ll
+++ b/test/CodeGen/X86/dllimport-x86_64.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple x86_64-pc-mingw32 -O0 < %s | FileCheck %s -check-prefix=FAST
 ; PR6275
 ;
-; RUN: opt -mtriple x86_64-pc-win32 -std-compile-opts -S < %s | FileCheck %s -check-prefix=OPT
+; RUN: opt -mtriple x86_64-pc-win32 -O3 -S < %s | FileCheck %s -check-prefix=OPT
 
 @Var1 = external dllimport global i32
 @Var2 = available_externally dllimport unnamed_addr constant i32 1
diff --git a/test/CodeGen/X86/dllimport.ll b/test/CodeGen/X86/dllimport.ll
index 695bfce821bb..231ad65740b8 100644
--- a/test/CodeGen/X86/dllimport.ll
+++ b/test/CodeGen/X86/dllimport.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -mtriple i386-pc-mingw32 -O0 < %s | FileCheck %s -check-prefix=FAST
 ; PR6275
 ;
-; RUN: opt -mtriple i386-pc-win32 -std-compile-opts -S < %s | FileCheck %s -check-prefix=OPT
+; RUN: opt -mtriple i386-pc-win32 -O3 -S < %s | FileCheck %s -check-prefix=OPT
 
 @Var1 = external dllimport global i32
 @Var2 = available_externally dllimport unnamed_addr constant i32 1
diff --git a/test/CodeGen/X86/dont-trunc-store-double-to-float.ll b/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
new file mode 100644
index 000000000000..24d9533eba4a
--- /dev/null
+++ b/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+; CHECK-LABEL: @bar
+; CHECK: movl $1074339512,
+; CHECK: movl $1374389535,
+; CHECK: movl $1078523331,
+define void @bar() unnamed_addr {
+entry-block:
+  %a = alloca double
+  %b = alloca float
+
+  store double 3.140000e+00, double* %a
+  %0 = load double* %a
+
+  %1 = fptrunc double %0 to float
+
+  store float %1, float* %b
+
+  ret void
+}
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index c8d752771044..77eba63a83ec 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -7,15 +7,15 @@ target triple = "x86_64-unknown-linux-gnu"
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!5}
 
-!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, metadata !"", i32 0, metadata !2, metadata !7, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{}
-!3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"}
-!6 = metadata !{i32 786451, metadata !4, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
-!7 = metadata !{metadata !6}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 143523)\001\00\000\00\000", !4, !2, !7, !2, !2, null} ; [ DW_TAG_compile_unit ]
+!2 = !{}
+!3 = !{!"0x29", !4} ; [ DW_TAG_file_type ]
+!4 = !{!"empty.c", !"/home/nlewycky"}
+!6 = !{!"0x13\00foo\001\008\008\000\000\000", !4, null, null, !2, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!7 = !{!6}
 
 ; The important part of the following check is that dir = #0.
 ;                        Dir  Mod Time   File Len   File Name
 ;                        ---- ---------- ---------- ---------------------------
 ; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
-!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!5 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/dynamic-alloca-lifetime.ll b/test/CodeGen/X86/dynamic-alloca-lifetime.ll
new file mode 100644
index 000000000000..f019bed858c2
--- /dev/null
+++ b/test/CodeGen/X86/dynamic-alloca-lifetime.ll
@@ -0,0 +1,44 @@
+; RUN: llc -no-stack-coloring=false < %s | FileCheck %s
+
+; This test crashed in PEI because the stack protector was dead.
+; This was due to it being colored, which was in turn due to incorrect
+; lifetimes being applied to the stack protector frame index.
+
+; CHECK: stack_chk_guard
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.10.0"
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; Function Attrs: ssp
+define void @foo(i1 %cond1, i1 %cond2) #1 {
+entry:
+  %bitmapBuffer = alloca [8192 x i8], align 1
+  br i1 %cond1, label %end1, label %bb1
+
+bb1:
+  %bitmapBuffer229 = alloca [8192 x i8], align 1
+  br i1 %cond2, label %end1, label %if.else130
+
+end1:
+  ret void
+
+if.else130:                                       ; preds = %bb1
+  %tmp = getelementptr inbounds [8192 x i8]* %bitmapBuffer, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 8192, i8* %tmp) #0
+  call void @llvm.lifetime.end(i64 8192, i8* %tmp) #0
+  %tmp25 = getelementptr inbounds [8192 x i8]* %bitmapBuffer229, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 8192, i8* %tmp25) #0
+  call void @llvm.lifetime.end(i64 8192, i8* %tmp25) #0
+  br label %end1
+}
+
+declare void @bar()
+
+attributes #0 = { nounwind }
+attributes #1 = { ssp }
+\ No newline at end of file
diff --git a/test/CodeGen/X86/elf-comdat.ll b/test/CodeGen/X86/elf-comdat.ll
index c7e6df7d64f0..35d8d6f2d2af 100644
--- a/test/CodeGen/X86/elf-comdat.ll
+++ b/test/CodeGen/X86/elf-comdat.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
 
 $f = comdat any
-@v = global i32 0, comdat $f
-define void @f() comdat $f {
+@v = global i32 0, comdat($f)
+define void @f() comdat($f) {
   ret void
 }
 ; CHECK: .section        .text.f,"axG",@progbits,f,comdat
diff --git a/test/CodeGen/X86/elf-comdat2.ll b/test/CodeGen/X86/elf-comdat2.ll
index 209da39ed881..786cec78cc30 100644
--- a/test/CodeGen/X86/elf-comdat2.ll
+++ b/test/CodeGen/X86/elf-comdat2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
 
 $foo = comdat any
-@bar = global i32 42, comdat $foo
+@bar = global i32 42, comdat($foo)
 @foo = global i32 42
 
 ; CHECK:      .type   bar,@object
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index ac5174db5fc5..42349688a710 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -1,10 +1,14 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=LINUX-NO-FP %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -disable-fp-elim | FileCheck -check-prefix=LINUX-FP %s
 
 define void @func() {
 entry:
   unreachable
 }
+
+; MachO cannot handle an empty function.
 ; CHECK-NO-FP:     _func:
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
@@ -21,5 +25,30 @@ entry:
 ; CHECK-FP-NEXT: movq %rsp, %rbp
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
-; CHECK-FP-NEXT: nop
 ; CHECK-FP-NEXT: .cfi_endproc
+
+; An empty function is perfectly fine on ELF.
+; LINUX-NO-FP: func:
+; LINUX-NO-FP-NEXT: .cfi_startproc
+; LINUX-NO-FP-NEXT: {{^}}#
+; LINUX-NO-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-NO-FP-NEXT: .size   func, .L{{.*}}-func
+; LINUX-NO-FP-NEXT: .cfi_endproc
+
+; A cfi directive can point to the end of a function. It (and in fact the
+; entire body) could be optimized out because of the unreachable, but we
+; don't do it right now.
+; LINUX-FP: func:
+; LINUX-FP-NEXT: .cfi_startproc
+; LINUX-FP-NEXT: {{^}}#
+; LINUX-FP-NEXT: pushq %rbp
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT:  .cfi_def_cfa_offset 16
+; LINUX-FP-NEXT: {{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_offset %rbp, -16
+; LINUX-FP-NEXT: movq        %rsp, %rbp
+; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .cfi_def_cfa_register %rbp
+; LINUX-FP-NEXT:{{^}}.L{{.*}}:{{$}}
+; LINUX-FP-NEXT: .size   func, .Ltmp3-func
+; LINUX-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/equiv_with_fndef.ll b/test/CodeGen/X86/equiv_with_fndef.ll
new file mode 100644
index 000000000000..efbb8ab3da69
--- /dev/null
+++ b/test/CodeGen/X86/equiv_with_fndef.ll
@@ -0,0 +1,10 @@
+; RUN: not llc < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".equiv pselect, __pselect"
+
+define void @pselect() {
+  ret void
+}
+; CHECK: 'pselect' is a protected alias
diff --git a/test/CodeGen/X86/equiv_with_vardef.ll b/test/CodeGen/X86/equiv_with_vardef.ll
new file mode 100644
index 000000000000..29c19a107ec3
--- /dev/null
+++ b/test/CodeGen/X86/equiv_with_vardef.ll
@@ -0,0 +1,8 @@
+; RUN: not llc < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".equiv var, __var"
+
+@var = global i32 0
+; CHECK: symbol 'var' is already defined
diff --git a/test/CodeGen/X86/exedepsfix-broadcast.ll b/test/CodeGen/X86/exedepsfix-broadcast.ll
index a18f75195631..ab92fe0d1d0c 100644
--- a/test/CodeGen/X86/exedepsfix-broadcast.ll
+++ b/test/CodeGen/X86/exedepsfix-broadcast.ll
@@ -93,10 +93,11 @@ define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %
 
 
 ; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg
-; ExeDepsFix works top down, thus it coalesces vmovlhps domain with
-; vandps and there is nothing more you can do to match vmaxpd.
-; CHECK: vmovlhps
-; CHECK: vandps
+; ExeDepsFix works top down, thus it coalesces vpunpcklqdq domain with
+; vpand and there is nothing more you can do to match vmaxpd.
+; CHECK: vmovq
+; CHECK: vpbroadcastq
+; CHECK: vpand
 ; CHECK: vmaxpd
 ; CHECK: ret
 define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
diff --git a/test/CodeGen/X86/extractelement-load.ll b/test/CodeGen/X86/extractelement-load.ll
index cadc0fb723f9..732f698f59ff 100644
--- a/test/CodeGen/X86/extractelement-load.ll
+++ b/test/CodeGen/X86/extractelement-load.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | FileCheck %s
 ; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx -mcpu=btver2 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define i32 @t(<2 x i64>* %val) nounwind  {
 ; CHECK-LABEL: t:
@@ -23,3 +26,39 @@ undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3>
   %y = extractelement <8 x i32> %Shuff68, i32 0
   ret i32 %y
 }
+
+; This case could easily end up inf-looping in the DAG combiner due to an
+; low alignment load of the vector which prevents us from reliably forming a
+; narrow load.
+
+; The expected codegen is identical for the AVX case except
+; load/store instructions will have a leading 'v', so we don't
+; need to special-case the checks.
+
+define void @t3() {
+; CHECK-LABEL: t3:
+; CHECK: movupd
+; CHECK: movhpd
+
+bb:
+  %tmp13 = load <2 x double>* undef, align 1
+  %.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
+  store double %.sroa.3.24.vec.extract, double* undef, align 8
+  unreachable
+}
+
+; Case where a load is unary shuffled, then bitcast (to a type with the same
+; number of elements) before extractelement.
+; This is testing for an assertion - the extraction was assuming that the undef
+; second shuffle operand was a post-bitcast type instead of a pre-bitcast type.
+define i64 @t4(<2 x double>* %a) {
+; CHECK-LABEL: t4:
+; CHECK: mov
+; CHECK: ret
+  %b = load <2 x double>* %a, align 16
+  %c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> <i32 1, i32 0>
+  %d = bitcast <2 x double> %c to <2 x i64>
+  %e = extractelement <2 x i64> %d, i32 1
+  ret i64 %e
+}
+
diff --git a/test/CodeGen/X86/fast-isel-args-fail.ll b/test/CodeGen/X86/fast-isel-args-fail.ll
index 7467edd74f21..7e783d2891d4 100644
--- a/test/CodeGen/X86/fast-isel-args-fail.ll
+++ b/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -1,7 +1,6 @@
 ; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-apple-darwin10
 ; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -fast-isel -verify-machineinstrs -mtriple=x86_64-pc-win64 | FileCheck %s -check-prefix=WIN64
-; REQUIRES: asserts
 
 ; Previously, this would cause an assert.
 define i31 @t1(i31 %a, i31 %b, i31 %c) {
diff --git a/test/CodeGen/X86/fast-isel-branch_weights.ll b/test/CodeGen/X86/fast-isel-branch_weights.ll
index bc41395e1e83..d2b02aad182d 100644
--- a/test/CodeGen/X86/fast-isel-branch_weights.ll
+++ b/test/CodeGen/X86/fast-isel-branch_weights.ll
@@ -16,4 +16,4 @@ success:
   ret i64 0
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/X86/fast-isel-call-bool.ll b/test/CodeGen/X86/fast-isel-call-bool.ll
new file mode 100644
index 000000000000..5cdb2c941161
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-call-bool.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -fast-isel -mcpu=core2 -mtriple=x86_64-unknown-unknown -O1 | FileCheck %s
+; See PR21557
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i64 @bar(i1)
+
+define i64 @foo(i8* %arg) {
+; CHECK-LABEL: foo:
+top:
+  %0 = load i8* %arg
+; CHECK: movb
+  %1 = trunc i8 %0 to i1
+; CHECK: andb $1,
+  %2 = call i64 @bar(i1 %1)
+; CHECK: callq
+  ret i64 %2
+}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
index 6e408f896663..684647ca9484 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-windows-itanium -asm-verbose=false < %s | FileCheck %s
 ; rdar://8337108
 
 ; Fast-isel shouldn't try to look through the compare because it's in a
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch3.ll b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
index a3f6851ca240..0df782d18ecf 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch3.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch3.ll
@@ -351,7 +351,7 @@ bb1:
 define i32 @icmp_eq(i32 %x) {
 ; CHECK-LABEL: icmp_eq
 ; CHECK-NOT:   cmpl
-; CHECK:       movl $0, %eax
+; CHECK:       xorl %eax, %eax
   %1 = icmp eq i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -387,7 +387,7 @@ bb1:
 define i32 @icmp_uge(i32 %x) {
 ; CHECK-LABEL: icmp_uge
 ; CHECK-NOT:   cmpl
-; CHECK:       movl $0, %eax
+; CHECK:       xorl %eax, %eax
   %1 = icmp uge i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -411,7 +411,7 @@ bb1:
 define i32 @icmp_ule(i32 %x) {
 ; CHECK-LABEL: icmp_ule
 ; CHECK-NOT:   cmpl
-; CHECK:       movl $0, %eax
+; CHECK:       xorl %eax, %eax
   %1 = icmp ule i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -435,7 +435,7 @@ bb1:
 define i32 @icmp_sge(i32 %x) {
 ; CHECK-LABEL: icmp_sge
 ; CHECK-NOT:   cmpl
-; CHECK:       movl $0, %eax
+; CHECK:       xorl %eax, %eax
   %1 = icmp sge i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
@@ -459,7 +459,7 @@ bb1:
 define i32 @icmp_sle(i32 %x) {
 ; CHECK-LABEL: icmp_sle
 ; CHECK-NOT:   cmpl
-; CHECK:       movl $0, %eax
+; CHECK:       xorl %eax, %eax
   %1 = icmp sle i32 %x, %x
   br i1 %1, label %bb1, label %bb2
 bb2:
diff --git a/test/CodeGen/X86/fast-isel-constpool.ll b/test/CodeGen/X86/fast-isel-constpool.ll
index bbbaeb233919..4e6f7c0f9e8e 100644
--- a/test/CodeGen/X86/fast-isel-constpool.ll
+++ b/test/CodeGen/X86/fast-isel-constpool.ll
@@ -1,19 +1,23 @@
-; RUN: llc < %s -fast-isel | FileCheck %s
-; CHECK: LCPI0_0(%rip)
+; RUN: llc -mtriple=x86_64-apple-darwin -fast-isel -code-model=small < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -fast-isel -code-model=large < %s | FileCheck %s --check-prefix=LARGE
 
-; Make sure fast isel uses rip-relative addressing when required.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9.0"
+; Make sure fast isel uses rip-relative addressing for the small code model.
+define float @constpool_float(float %x) {
+; CHECK-LABEL: constpool_float
+; CHECK:       LCPI0_0(%rip)
 
-define i32 @f0(double %x) nounwind {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%x.addr = alloca double		; <double*> [#uses=2]
-	store double %x, double* %x.addr
-	%tmp = load double* %x.addr		; <double> [#uses=1]
-	%cmp = fcmp olt double %tmp, 8.500000e-01		; <i1> [#uses=1]
-	%conv = zext i1 %cmp to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %retval
-	%0 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %0
+; LARGE-LABEL: constpool_float
+; LARGE:       movabsq  $LCPI0_0, %rax
+  %1 = fadd float %x, 16.50e+01
+  ret float %1
+}
+
+define double @constpool_double(double %x) nounwind {
+; CHECK-LABEL: constpool_double
+; CHECK:       LCPI1_0(%rip)
+
+; LARGE-LABEL: constpool_double
+; LARGE:       movabsq  $LCPI1_0, %rax
+  %1 = fadd double %x, 8.500000e-01
+  ret double %1
 }
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 4e47c7455c53..a65e0705f2b2 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-windows-itanium -O0 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
 
 ; GEP indices are interpreted as signed integers, so they
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index cd2dc1d02c8a..eca1ae9f02a3 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -36,11 +36,11 @@ entry:
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
 	ret void
 ; CHECK: _t:
-; CHECK:	movl	$0, %eax
+; CHECK:	xorl    %eax, %eax
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
 ; ATOM: _t:
 ; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %e{{..}}
-; ATOM:         movl    $0, %e{{..}}
+; ATOM:         xorl    %e{{..}}, %e{{..}}
 
 }
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index f71abd2fec01..686df43ac504 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -13,7 +13,7 @@ entry:
 ; CHECK: leal	v@TLSGD
 ; CHECK: __tls_get_addr
 
-@alias = alias internal i32* @v
+@alias = internal alias i32* @v
 define i32 @f_alias() nounwind {
 entry:
           %t = load i32* @v
diff --git a/test/CodeGen/X86/fast-isel-x32.ll b/test/CodeGen/X86/fast-isel-x32.ll
new file mode 100644
index 000000000000..d49a10801065
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-x32.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort | FileCheck %s
+
+; Test that alloca addresses are materialized with the right size instruction.
+
+declare void @bar(i32* %arg)
+
+; CHECK-LABEL: @foo
+define void @foo() {
+  %a = alloca i32
+; CHECK: leal {{.*}}, %edi
+  call void @bar(i32* %a)
+  ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index f7d2750b5b81..3747d049424d 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -144,7 +144,7 @@ if.end:                                           ; preds = %if.then, %entry
 ; CHECK-LABEL: test12:
 ; CHECK: testb	$1,
 ; CHECK-NEXT: je L
-; CHECK-NEXT: movl $0, %edi
+; CHECK-NEXT: xorl %edi, %edi
 ; CHECK-NEXT: callq
 }
 
@@ -154,7 +154,7 @@ define void @test13() nounwind {
   call void @test13f(i1 0)
   ret void
 ; CHECK-LABEL: test13:
-; CHECK: movl $0, %edi
+; CHECK: xorl %edi, %edi
 ; CHECK-NEXT: callq
 }
 
@@ -194,12 +194,10 @@ define void @test16() nounwind {
   br label %block2
 
 block2:
-; CHECK: movabsq $1
-; CHECK: cvtsi2sdq {{.*}} %xmm0
+; CHECK: movsd LCP{{.*}}_{{.*}}(%rip), %xmm0
 ; CHECK: movb $1, %al
 ; CHECK: callq _test16callee
 
-; AVX: movabsq $1
 ; AVX: vmovsd LCP{{.*}}_{{.*}}(%rip), %xmm0
 ; AVX: movb $1, %al
 ; AVX: callq _test16callee
@@ -280,7 +278,7 @@ entry:
   call void @foo22(i32 3)
   ret void
 ; CHECK-LABEL: test22:
-; CHECK: movl	$0, %edi
+; CHECK: xorl	%edi, %edi
 ; CHECK: callq	_foo22
 ; CHECK: movl	$1, %edi
 ; CHECK: callq	_foo22
@@ -304,3 +302,13 @@ define void @test23(i8* noalias sret %result) {
 }
 
 declare i8* @foo23()
+
+declare void @takesi32ptr(i32* %arg)
+
+; CHECK-LABEL: allocamaterialize
+define void @allocamaterialize() {
+  %a = alloca i32
+; CHECK: leaq {{.*}}, %rdi
+  call void @takesi32ptr(i32* %a)
+  ret void
+}
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index a212a7c6876e..61e9b98f6e7e 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -60,3 +60,21 @@ entry:
 ; CHECK: addl $28
 }
 declare fastcc void @test4fastccsret(%struct.a* sret)
+
+
+; Check that fast-isel cleans up when it fails to lower a call instruction.
+define void @test5() {
+entry:
+  %call = call i32 @test5dllimport(i32 42)
+  ret void
+; CHECK-LABEL: test5:
+; Local value area is still there:
+; CHECK: movl $42, {{%[a-z]+}}
+; Fast-ISel's arg push is not here:
+; CHECK-NOT: movl $42, (%esp)
+; SDag-ISel's arg push:
+; CHECK: movl %esp, [[REGISTER:%[a-z]+]]
+; CHECK: movl $42, ([[REGISTER]])
+; CHECK: movl __imp__test5dllimport
+}
+declare dllimport i32 @test5dllimport(i32)
diff --git a/test/CodeGen/X86/fastmath-optnone.ll b/test/CodeGen/X86/fastmath-optnone.ll
new file mode 100644
index 000000000000..0caadff89167
--- /dev/null
+++ b/test/CodeGen/X86/fastmath-optnone.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mcpu=corei7 -march=x86-64 -mattr=+sse2 | FileCheck %s
+; Verify that floating-point operations inside 'optnone' functions
+; are not optimized even if unsafe-fp-math is set.
+
+define float @foo(float %x) #0 {
+entry:
+  %add = fadd fast float %x, %x
+  %add1 = fadd fast float %add, %x
+  ret float %add1
+}
+
+; CHECK-LABEL: @foo
+; CHECK-NOT: add
+; CHECK: mul
+; CHECK-NOT: add
+; CHECK: ret
+
+define float @fooWithOptnone(float %x) #1 {
+entry:
+  %add = fadd fast float %x, %x
+  %add1 = fadd fast float %add, %x
+  ret float %add1
+}
+
+; CHECK-LABEL: @fooWithOptnone
+; CHECK-NOT: mul
+; CHECK: add
+; CHECK-NOT: mul
+; CHECK: add
+; CHECK-NOT: mul
+; CHECK: ret
+
+
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { noinline optnone "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fma-intrinsics-x86_64.ll b/test/CodeGen/X86/fma-intrinsics-x86_64.ll
new file mode 100644
index 000000000000..aadd7311bb89
--- /dev/null
+++ b/test/CodeGen/X86/fma-intrinsics-x86_64.ll
@@ -0,0 +1,278 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK-FMA --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK-FMA4 --check-prefix=CHECK
+
+; VFMADD
+define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmaddss
+  ; CHECK-FMA: vfmadd213ss
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmaddsd
+  ; CHECK-FMA: vfmadd213sd
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmaddps
+  ; CHECK-FMA: vfmadd213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmaddpd
+  ; CHECK-FMA: vfmadd213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfmaddps
+  ; CHECK-FMA: vfmadd213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfmaddpd
+  ; CHECK-FMA: vfmadd213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUB
+define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmsubss
+  ; CHECK-FMA: vfmsub213ss
+  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmsubsd
+  ; CHECK-FMA: vfmsub213sd
+  %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmsubps
+  ; CHECK-FMA: vfmsub213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmsubpd
+  ; CHECK-FMA: vfmsub213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfmsubps
+  ; CHECK-FMA: vfmsub213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfmsubpd
+  ; CHECK-FMA: vfmsub213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMADD
+define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfnmaddss
+  ; CHECK-FMA: vfnmadd213ss
+  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfnmaddsd
+  ; CHECK-FMA: vfnmadd213sd
+  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfnmaddps
+  ; CHECK-FMA: vfnmadd213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfnmaddpd
+  ; CHECK-FMA: vfnmadd213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfnmaddps
+  ; CHECK-FMA: vfnmadd213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfnmaddpd
+  ; CHECK-FMA: vfnmadd213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMSUB
+define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfnmsubss
+  ; CHECK-FMA: vfnmsub213ss
+  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfnmsubsd
+  ; CHECK-FMA: vfnmsub213sd
+  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfnmsubps
+  ; CHECK-FMA: vfnmsub213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfnmsubpd
+  ; CHECK-FMA: vfnmsub213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfnmsubps
+  ; CHECK-FMA: vfnmsub213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfnmsubpd
+  ; CHECK-FMA: vfnmsub213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMADDSUB
+define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmaddsubps
+  ; CHECK-FMA: vfmaddsub213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmaddsubpd
+  ; CHECK-FMA: vfmaddsub213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfmaddsubps
+  ; CHECK-FMA: vfmaddsub213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfmaddsubpd
+  ; CHECK-FMA: vfmaddsub213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUBADD
+define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK-FMA4: vfmsubaddps
+  ; CHECK-FMA: vfmsubadd213ps
+  %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK-FMA4: vfmsubaddpd
+  ; CHECK-FMA: vfmsubadd213pd
+  %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK-FMA4: vfmsubaddps
+  ; CHECK-FMA: vfmsubadd213ps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2)
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK-FMA4: vfmsubaddpd
+  ; CHECK-FMA: vfmsubadd213pd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2)
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fma-phi-213-to-231.ll b/test/CodeGen/X86/fma-phi-213-to-231.ll
new file mode 100644
index 000000000000..9715bc7b328b
--- /dev/null
+++ b/test/CodeGen/X86/fma-phi-213-to-231.ll
@@ -0,0 +1,246 @@
+; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK-LABEL: fmaddsubpd_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmaddsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddpd_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmsubadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddpd_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubpd_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+
+; CHECK-LABEL: fmaddsubps_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmaddsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddps_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmsubadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddps_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubps_loop
+; CHECK: [[BODYLBL:LBB.+]]:
+; CHECK:   vfmsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
+; CHECK: [[INCLBL:LBB.+]]:
+; CHECK:   addl  $1, [[INDREG:%[a-z0-9]+]]
+; CHECK:   cmpl  {{%.+}}, [[INDREG]]
+; CHECK:   jl    [[BODYLBL]]
+define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
new file mode 100644
index 000000000000..64a2068aea43
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
+
+; VFMADD
+define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+
+declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+; To test execution dependency
+define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vmovaps
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a0
+  %y = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
+  ret < 4 x float > %res
+}
+
+define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+; To test execution dependency
+define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vmovapd
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a0
+  %y = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
+  ret < 2 x double > %res
+}
+
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
deleted file mode 100644
index 494cb28677a4..000000000000
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ /dev/null
@@ -1,316 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
-
-; VFMADD
-define < 4 x float > @test_x86_fma_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmaddss
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
-  ; CHECK: vfmaddss (%{{.*}})
-  %x = load float *%a2
-  %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
-  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
-  %x = load float *%a1
-  %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmaddsd
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
-  ; CHECK: vfmaddsd (%{{.*}})
-  %x = load double *%a2
-  %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
-  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
-  %x = load double *%a1
-  %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmaddps
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
-  ; CHECK: vfmaddps (%{{.*}})
-  %x = load <4 x float>* %a2
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
-  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
-  %x = load <4 x float>* %a1
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-; To test execution dependency
-define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
-  ; CHECK: vmovaps
-  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
-  %x = load <4 x float>* %a0
-  %y = load <4 x float>* %a1
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-
-define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmaddpd
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
-  ; CHECK: vfmaddpd (%{{.*}})
-  %x = load <2 x double>* %a2
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
-  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
-  %x = load <2 x double>* %a1
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-; To test execution dependency
-define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
-  ; CHECK: vmovapd
-  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
-  %x = load <2 x double>* %a0
-  %y = load <2 x double>* %a1
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-
-define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfmaddps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfmaddpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMSUB
-define < 4 x float > @test_x86_fma_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmsubss
-  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmsubsd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmsubps
-  %res = call < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmsubpd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfmsubps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfmsubpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFNMADD
-define < 4 x float > @test_x86_fma_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfnmaddss
-  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfnmaddsd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfnmaddps
-  %res = call < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfnmaddpd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfnmaddps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfnmaddpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFNMSUB
-define < 4 x float > @test_x86_fma_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfnmsubss
-  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfnmsubsd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 4 x float > @test_x86_fma_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfnmsubps
-  %res = call < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfnmsubpd
-  %res = call < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfnmsubps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfnmsubpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMADDSUB
-define < 4 x float > @test_x86_fma_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmaddsubps
-  %res = call < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmaddsubpd
-  %res = call < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfmaddsubps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfmaddsubpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
-
-; VFMSUBADD
-define < 4 x float > @test_x86_fma_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
-  ; CHECK: vfmsubaddps
-  %res = call < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
-  ret < 4 x float > %res
-}
-declare < 4 x float > @llvm.x86.fma.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
-
-define < 2 x double > @test_x86_fma_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
-  ; CHECK: vfmsubaddpd
-  %res = call < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
-  ret < 2 x double > %res
-}
-declare < 2 x double > @llvm.x86.fma.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
-
-define < 8 x float > @test_x86_fma_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
-  ; CHECK: vfmsubaddps
-  ; CHECK: ymm
-  %res = call < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
-  ret < 8 x float > %res
-}
-declare < 8 x float > @llvm.x86.fma.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
-
-define < 4 x double > @test_x86_fma_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
-  ; CHECK: vfmsubaddpd
-  ; CHECK: ymm
-  %res = call < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
-  ret < 4 x double > %res
-}
-declare < 4 x double > @llvm.x86.fma.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index cfb598df634c..9b52db9f14e9 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -184,7 +184,7 @@ define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
 
 ; CHECK: test_x86_fmadd_ps_load
 ; CHECK: vmovaps         (%rdi), %xmm2
-; CHECK: vfmadd213ps     %xmm1, %xmm0, %xmm2
+; CHECK: vfmadd213ps     %xmm1, %xmm2, %xmm0
 ; CHECK: ret
 ; CHECK_FMA4: test_x86_fmadd_ps_load
 ; CHECK_FMA4: vfmaddps     %xmm1, (%rdi), %xmm0, %xmm0
@@ -198,7 +198,7 @@ define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4
 
 ; CHECK: test_x86_fmsub_ps_load
 ; CHECK: vmovaps         (%rdi), %xmm2
-; CHECK: fmsub213ps     %xmm1, %xmm0, %xmm2
+; CHECK: fmsub213ps     %xmm1, %xmm2, %xmm0
 ; CHECK: ret
 ; CHECK_FMA4: test_x86_fmsub_ps_load
 ; CHECK_FMA4: vfmsubps     %xmm1, (%rdi), %xmm0, %xmm0
diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll
new file mode 100644
index 000000000000..23678c46dba0
--- /dev/null
+++ b/test/CodeGen/X86/fmaxnum.ll
@@ -0,0 +1,50 @@
+; RUN: llc  -march=x86 -mtriple=i386-linux-gnu  < %s | FileCheck %s
+
+declare float @fmaxf(float, float)
+declare double @fmax(double, double)
+declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
+
+; CHECK-LABEL: @test_fmaxf
+; CHECK: calll fmaxf
+define float @test_fmaxf(float %x, float %y) {
+  %z = call float @fmaxf(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_fmax
+; CHECK: calll fmax
+define double @test_fmax(double %x, double %y) {
+  %z = call double @fmax(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_fmaxl
+; CHECK: calll fmaxl
+define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
+  %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
+  ret x86_fp80 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxf
+; CHECK: calll fmaxf
+define float @test_intrinsic_fmaxf(float %x, float %y) {
+  %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax
+; CHECK: calll fmax
+define double @test_intrinsic_fmax(double %x, double %y) {
+  %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmaxl
+; CHECK: calll fmaxl
+define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
+  %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
+  ret x86_fp80 %z
+}
diff --git a/test/CodeGen/X86/fminnum.ll b/test/CodeGen/X86/fminnum.ll
new file mode 100644
index 000000000000..1e33cf4696af
--- /dev/null
+++ b/test/CodeGen/X86/fminnum.ll
@@ -0,0 +1,95 @@
+; RUN: llc  -march=x86 -mtriple=i386-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s
+
+declare float @fminf(float, float)
+declare double @fmin(double, double)
+declare x86_fp80 @fminl(x86_fp80, x86_fp80)
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80)
+
+declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
+
+; CHECK-LABEL: @test_fminf
+; CHECK: jmp fminf
+define float @test_fminf(float %x, float %y) {
+  %z = call float @fminf(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_fmin
+; CHECK: jmp fmin
+define double @test_fmin(double %x, double %y) {
+  %z = call double @fmin(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_fminl
+; CHECK: calll fminl
+define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
+  %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
+  ret x86_fp80 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fminf
+; CHECK: jmp fminf
+define float @test_intrinsic_fminf(float %x, float %y) {
+  %z = call float @llvm.minnum.f32(float %x, float %y) readnone
+  ret float %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin
+; CHECK: jmp fmin
+define double @test_intrinsic_fmin(double %x, double %y) {
+  %z = call double @llvm.minnum.f64(double %x, double %y) readnone
+  ret double %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fminl
+; CHECK: calll fminl
+define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) {
+  %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
+  ret x86_fp80 %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v2f32
+; CHECK: calll fminf
+; CHECK: calll fminf
+define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
+  %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
+  ret <2 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v4f32
+; CHECK: calll fminf
+; CHECK: calll fminf
+; CHECK: calll fminf
+; CHECK: calll fminf
+define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
+  %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
+  ret <4 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v2f64
+; CHECK: calll fmin
+; CHECK: calll fmin
+define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
+  %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
+  ret <2 x double> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmin_v8f64
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+; CHECK: calll fmin
+define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
+  %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
+  ret <8 x double> %z
+}
diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll
new file mode 100644
index 000000000000..703651153c11
--- /dev/null
+++ b/test/CodeGen/X86/fmul-combines.ll
@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -march=x86-64 < %s | FileCheck %s
+
+; CHECK-LABEL: fmul2_f32:
+; CHECK: addss %xmm0, %xmm0
+define float @fmul2_f32(float %x) {
+  %y = fmul float %x, 2.0
+  ret float %y
+}
+
+; fmul 2.0, x -> fadd x, x for vectors.
+
+; CHECK-LABEL: fmul2_v4f32:
+; CHECK: addps %xmm0, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmul2_v4f32(<4 x float> %x) {
+  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
+  ret <4 x float> %y
+}
+
+; CHECK-LABEL: constant_fold_fmul_v4f32:
+; CHECK: movaps
+; CHECK-NEXT: ret
+define <4 x float> @constant_fold_fmul_v4f32(<4 x float> %x) {
+  %y = fmul <4 x float> <float 4.0, float 4.0, float 4.0, float 4.0>, <float 2.0, float 2.0, float 2.0, float 2.0>
+  ret <4 x float> %y
+}
+
+; CHECK-LABEL: fmul0_v4f32:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmul0_v4f32(<4 x float> %x) #0 {
+  %y = fmul <4 x float> %x, <float 0.0, float 0.0, float 0.0, float 0.0>
+  ret <4 x float> %y
+}
+
+; CHECK-LABEL: fmul_c2_c4_v4f32:
+; CHECK-NOT: addps
+; CHECK: mulps
+; CHECK-NOT: mulps
+; CHECK-NEXT: ret
+define <4 x float> @fmul_c2_c4_v4f32(<4 x float> %x) #0 {
+  %y = fmul <4 x float> %x, <float 2.0, float 2.0, float 2.0, float 2.0>
+  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %z
+}
+
+; CHECK-LABEL: fmul_c3_c4_v4f32:
+; CHECK-NOT: addps
+; CHECK: mulps
+; CHECK-NOT: mulps
+; CHECK-NEXT: ret
+define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
+  %y = fmul <4 x float> %x, <float 3.0, float 3.0, float 3.0, float 3.0>
+  %z = fmul <4 x float> %y, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %z
+}
+
+; We should be able to pre-multiply the two constant vectors.
+; CHECK: float 5.000000e+00
+; CHECK: float 1.200000e+01
+; CHECK: float 2.100000e+01
+; CHECK: float 3.200000e+01
+; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
+; CHECK: mulps
+; CHECK-NOT: mulps
+; CHECK-NEXT: ret
+define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
+  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
+  ret <4 x float> %z
+}
+
+; Same as above, but reverse operands to make sure non-canonical form is also handled.
+; CHECK: float 5.000000e+00
+; CHECK: float 1.200000e+01
+; CHECK: float 2.100000e+01
+; CHECK: float 3.200000e+01
+; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
+; CHECK: mulps
+; CHECK-NOT: mulps
+; CHECK-NEXT: ret
+define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x) #0 {
+  %y = fmul <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
+  %z = fmul <4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>, %y
+  ret <4 x float> %z
+}
+
+; More than one use of a constant multiply should not inhibit the optimization.
+; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. 
+; CHECK: float 5.000000e+00
+; CHECK: float 1.200000e+01
+; CHECK: float 2.100000e+01
+; CHECK: float 3.200000e+01
+; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
+; CHECK: mulps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: ret
+define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
+  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %z = fmul <4 x float> %y, <float 5.0, float 6.0, float 7.0, float 8.0>
+  %a = fadd <4 x float> %y, %z
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: fmul_c2_c4_f32:
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: mulss
+; CHECK-NEXT: ret
+define float @fmul_c2_c4_f32(float %x) #0 {
+  %y = fmul float %x, 2.0
+  %z = fmul float %y, 4.0
+  ret float %z
+}
+
+; CHECK-LABEL: fmul_c3_c4_f32:
+; CHECK-NOT: addss
+; CHECK: mulss
+; CHECK-NOT: mulss
+; CHECK-NET: ret
+define float @fmul_c3_c4_f32(float %x) #0 {
+  %y = fmul float %x, 3.0
+  %z = fmul float %y, 4.0
+  ret float %z
+}
+
+; CHECK-LABEL: fmul_fneg_fneg_f32:
+; CHECK: mulss %xmm1, %xmm0
+; CHECK-NEXT: retq
+define float @fmul_fneg_fneg_f32(float %x, float %y) {
+  %x.neg = fsub float -0.0, %x
+  %y.neg = fsub float -0.0, %y
+  %mul = fmul float %x.neg, %y.neg
+  ret float %mul
+}
+; CHECK-LABEL: fmul_fneg_fneg_v4f32:
+; CHECK: mulps {{%xmm1|\(%rdx\)}}, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmul_fneg_fneg_v4f32(<4 x float> %x, <4 x float> %y) {
+  %x.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
+  %y.neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %y
+  %mul = fmul <4 x float> %x.neg, %y.neg
+  ret <4 x float> %mul
+}
+
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fnabs.ll b/test/CodeGen/X86/fnabs.ll
new file mode 100644
index 000000000000..19718d3ff926
--- /dev/null
+++ b/test/CodeGen/X86/fnabs.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx| FileCheck %s
+
+; Verify that we generate a single OR instruction for a scalar, vec128, and vec256
+; FNABS(x) operation -> FNEG (FABS(x)).
+; If the FABS() result isn't used, the AND instruction should be eliminated.
+; PR20578: http://llvm.org/bugs/show_bug.cgi?id=20578
+
+define float @scalar_no_abs(float %a) {
+; CHECK-LABEL: scalar_no_abs:
+; CHECK: vorps
+; CHECK-NEXT: retq
+  %fabs = tail call float @fabsf(float %a) #1
+  %fsub = fsub float -0.0, %fabs
+  ret float %fsub
+}
+
+define float @scalar_uses_abs(float %a) {
+; CHECK-LABEL: scalar_uses_abs:
+; CHECK-DAG: vandps
+; CHECK-DAG: vorps
+; CHECK: vmulss
+; CHECK-NEXT: retq
+  %fabs = tail call float @fabsf(float %a) #1
+  %fsub = fsub float -0.0, %fabs
+  %fmul = fmul float %fsub, %fabs
+  ret float %fmul
+}
+
+define <4 x float> @vector128_no_abs(<4 x float> %a) {
+; CHECK-LABEL: vector128_no_abs:
+; CHECK: vorps
+; CHECK-NEXT: retq
+  %fabs = tail call <4 x float> @llvm.fabs.v4f32(< 4 x float> %a) #1
+  %fsub = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
+  ret <4 x float> %fsub
+}
+
+define <4 x float> @vector128_uses_abs(<4 x float> %a) {
+; CHECK-LABEL: vector128_uses_abs:
+; CHECK-DAG: vandps
+; CHECK-DAG: vorps
+; CHECK: vmulps
+; CHECK-NEXT: retq
+  %fabs = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) #1
+  %fsub = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
+  %fmul = fmul <4 x float> %fsub, %fabs
+  ret <4 x float> %fmul
+}
+
+define <8 x float> @vector256_no_abs(<8 x float> %a) {
+; CHECK-LABEL: vector256_no_abs:
+; CHECK: vorps
+; CHECK-NEXT: retq
+  %fabs = tail call <8 x float> @llvm.fabs.v8f32(< 8 x float> %a) #1
+  %fsub = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
+  ret <8 x float> %fsub
+}
+
+define <8 x float> @vector256_uses_abs(<8 x float> %a) {
+; CHECK-LABEL: vector256_uses_abs:
+; CHECK-DAG: vandps
+; CHECK-DAG: vorps
+; CHECK: vmulps
+; CHECK-NEXT: retq
+  %fabs = tail call <8 x float> @llvm.fabs.v8f32(<8 x float> %a) #1
+  %fsub = fsub <8 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %fabs
+  %fmul = fmul <8 x float> %fsub, %fabs
+  ret <8 x float> %fmul
+}
+
+declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
+declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
+
+declare float @fabsf(float)
+
+attributes #1 = { readnone }
+
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
deleted file mode 100644
index 1d315ffe359b..000000000000
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
-; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
-
-; i386 test has been disabled when scheduler 2-addr hack is disabled.
-
-; This testcase shouldn't need to spill the -1 value,
-; so it should just use pcmpeqd to materialize an all-ones vector.
-; For i386, cp load of -1 are folded.
-
-; With -regalloc=greedy, the live range is split before spilling, so the first
-; pcmpeq doesn't get folded as a constant pool load.
-
-; I386-NOT: pcmpeqd
-; I386: orps LCPI0_2, %xmm
-; I386-NOT: pcmpeqd
-; I386: orps LCPI0_2, %xmm
-
-; X86-64: pcmpeqd
-; X86-64-NOT: pcmpeqd
-
-	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
-	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
-	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
-
-define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
-entry:
-	%tmp3.i = load i32* null		; <i32> [#uses=1]
-	%cmp = icmp sgt i32 %tmp3.i, 200		; <i1> [#uses=1]
-	br i1 %cmp, label %forcond, label %ifthen
-
-ifthen:		; preds = %entry
-	ret void
-
-forcond:		; preds = %entry
-	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
-	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
-	br i1 %cmp12, label %forbody, label %afterfor
-
-forbody:		; preds = %forcond
-	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
-	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
-	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
-	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
-	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
-	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
-	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
-	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i103 = or <4 x i32> %andnps192.i100, %xorps.i102		; <<4 x i32>> [#uses=1]
-	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
-	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
-	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
-	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
-	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
-	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
-	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
-	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
-	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%bitcast198.i = bitcast <4 x float> %cmple.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
-	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
-	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
-	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
-	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%orps.i18 = or <4 x i32> %andnps.i17, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
-	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%bitcast6.i4 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
-	%andps.i5 = and <4 x i32> %bitcast.i3, %bitcast6.i4		; <<4 x i32>> [#uses=1]
-	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%not.i7 = xor <4 x i32> %bitcast6.i4, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
-	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
-	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
-	%bitcast.i = bitcast <4 x float> %mul313 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andps.i = and <4 x i32> %bitcast.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%orps.i = or <4 x i32> zeroinitializer, %andps.i		; <<4 x i32>> [#uses=1]
-	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
-	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
-	unreachable
-
-afterfor:		; preds = %forcond
-	ret void
-}
-
-declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
-
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
-
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
-
-declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/fold-tied-op.ll b/test/CodeGen/X86/fold-tied-op.ll
new file mode 100644
index 000000000000..5bf5dbd1a9ce
--- /dev/null
+++ b/test/CodeGen/X86/fold-tied-op.ll
@@ -0,0 +1,84 @@
+; RUN: llc -verify-machineinstrs -mtriple=i386--netbsd < %s | FileCheck %s
+; Regression test for http://reviews.llvm.org/D5701
+
+; ModuleID = 'xxhash.i'
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386--netbsd"
+
+; CHECK-LABEL: fn1
+; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       shldl {{.*#+}} 4-byte Folded Spill
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       addl  {{.*#+}} 4-byte Folded Reload
+; CHECK:       imull {{.*#+}} 4-byte Folded Reload
+; CHECK:       orl   {{.*#+}} 4-byte Folded Reload
+; CHECK:       retl
+
+%struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
+
+@a = common global i32 0, align 4
+@b = common global i64 0, align 8
+
+; Function Attrs: nounwind uwtable
+define i64 @fn1() #0 {
+entry:
+  %0 = load i32* @a, align 4, !tbaa !1
+  %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
+  %total_len = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 0
+  %2 = load i32* %total_len, align 4, !tbaa !5
+  %tobool = icmp eq i32 %2, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %v3 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 3
+  %3 = load i64* %v3, align 4, !tbaa !8
+  %v4 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 4
+  %4 = load i64* %v4, align 4, !tbaa !9
+  %v2 = getelementptr inbounds %struct.XXH_state64_t* %1, i32 0, i32 2
+  %5 = load i64* %v2, align 4, !tbaa !10
+  %shl = shl i64 %5, 1
+  %or = or i64 %shl, %5
+  %shl2 = shl i64 %3, 2
+  %shr = lshr i64 %3, 1
+  %or3 = or i64 %shl2, %shr
+  %add = add i64 %or, %or3
+  %mul = mul i64 %4, -4417276706812531889
+  %shl4 = mul i64 %4, -8834553413625063778
+  %shr5 = ashr i64 %mul, 3
+  %or6 = or i64 %shr5, %shl4
+  %mul7 = mul nsw i64 %or6, 1400714785074694791
+  %xor = xor i64 %add, %mul7
+  store i64 %xor, i64* @b, align 8, !tbaa !11
+  %mul8 = mul nsw i64 %xor, 1400714785074694791
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %6 = load i64* @b, align 8, !tbaa !11
+  %xor10 = xor i64 %6, -4417276706812531889
+  %mul11 = mul nsw i64 %xor10, 400714785074694791
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %storemerge.in = phi i64 [ %mul11, %if.else ], [ %mul8, %if.then ]
+  %storemerge = add i64 %storemerge.in, -8796714831421723037
+  store i64 %storemerge, i64* @b, align 8, !tbaa !11
+  ret i64 undef
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.6 (trunk 219587)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !2, i64 0}
+!6 = !{!"XXH_state64_t", !2, i64 0, !2, i64 4, !7, i64 8, !7, i64 16, !7, i64 24}
+!7 = !{!"long long", !3, i64 0}
+!8 = !{!6, !7, i64 16}
+!9 = !{!6, !7, i64 24}
+!10 = !{!6, !7, i64 8}
+!11 = !{!7, !7, i64 0}
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index 95defc83db1f..bd9806943920 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -33,14 +33,14 @@ define i64 @g(i32 %i) nounwind {
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
 ; Next we set up the memset call, and then undo it.
-; CHECK:      subl   $32, %esp
+; CHECK:      subl   $20, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ; CHECK:      calll  memset
 ; CHECK-NEXT: addl   $32, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
 ; Next we set up the call to 'f'.
-; CHECK:      subl   $32, %esp
+; CHECK:      subl   $28, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ; CHECK:      calll  f
 ; CHECK-NEXT: addl   $32, %esp
diff --git a/test/CodeGen/X86/fp-load-trunc.ll b/test/CodeGen/X86/fp-load-trunc.ll
index a973befdafe7..e6c1e1adb59e 100644
--- a/test/CodeGen/X86/fp-load-trunc.ll
+++ b/test/CodeGen/X86/fp-load-trunc.ll
@@ -2,57 +2,87 @@
 ; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
 
 define <1 x float> @test1(<1 x double>* %p) nounwind {
-; CHECK: test1
-; CHECK: cvtsd2ss
-; CHECK: ret
-; AVX:   test1
-; AVX:   vcvtsd2ss
-; AVX:   ret
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movsd (%eax), %xmm0
+; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    pushl %eax
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vmovsd (%eax), %xmm0
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovss %xmm0, (%esp)
+; AVX-NEXT:    flds (%esp)
+; AVX-NEXT:    popl %eax
+; AVX-NEXT:    retl
   %x = load <1 x double>* %p
   %y = fptrunc <1 x double> %x to <1 x float>
   ret <1 x float> %y
 }
 
 define <2 x float> @test2(<2 x double>* %p) nounwind {
-; CHECK: test2
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: ret
-; AVX:   test2
-; AVX:   vcvtpd2psx {{[0-9]*}}(%{{.*}})
-; AVX:   ret
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vcvtpd2psx (%eax), %xmm0
+; AVX-NEXT:    retl
   %x = load <2 x double>* %p
   %y = fptrunc <2 x double> %x to <2 x float>
   ret <2 x float> %y
 }
 
 define <4 x float> @test3(<4 x double>* %p) nounwind {
-; CHECK: test3
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: movlhps
-; CHECK: ret
-; AVX:   test3
-; AVX:   vcvtpd2psy {{[0-9]*}}(%{{.*}})
-; AVX:   ret
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cvtpd2ps 16(%eax), %xmm1
+; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vcvtpd2psy (%eax), %xmm0
+; AVX-NEXT:    retl
   %x = load <4 x double>* %p
   %y = fptrunc <4 x double> %x to <4 x float>
   ret <4 x float> %y
 }
 
 define <8 x float> @test4(<8 x double>* %p) nounwind {
-; CHECK: test4
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: movlhps
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: cvtpd2ps {{[0-9]*}}(%{{.*}})
-; CHECK: movlhps
-; CHECK: ret
-; AVX:   test4
-; AVX:   vcvtpd2psy
-; AVX:   vcvtpd2psy
-; AVX:   vinsertf128
-; AVX:   ret
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    cvtpd2ps 16(%eax), %xmm1
+; CHECK-NEXT:    cvtpd2ps (%eax), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvtpd2ps 48(%eax), %xmm2
+; CHECK-NEXT:    cvtpd2ps 32(%eax), %xmm1
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX-NEXT:    vcvtpd2psy (%eax), %xmm0
+; AVX-NEXT:    vcvtpd2psy 32(%eax), %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retl
   %x = load <8 x double>* %p
   %y = fptrunc <8 x double> %x to <8 x float>
   ret <8 x float> %y
diff --git a/test/CodeGen/X86/fp-trunc.ll b/test/CodeGen/X86/fp-trunc.ll
index 25442fcadd23..6424bfc9c219 100644
--- a/test/CodeGen/X86/fp-trunc.ll
+++ b/test/CodeGen/X86/fp-trunc.ll
@@ -2,55 +2,77 @@
 ; RUN: llc < %s -march=x86 -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
 
 define <1 x float> @test1(<1 x double> %x) nounwind {
-; CHECK: test1
-; CHECK: cvtsd2ss
-; CHECK: ret
-; AVX:   test1
-; AVX:   vcvtsd2ss
-; AVX:   ret
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movsd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    cvtsd2ss %xmm0, %xmm0
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    pushl %eax
+; AVX-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0
+; AVX-NEXT:    vcvtsd2ss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovss %xmm0, (%esp)
+; AVX-NEXT:    flds (%esp)
+; AVX-NEXT:    popl %eax
+; AVX-NEXT:    retl
   %y = fptrunc <1 x double> %x to <1 x float>
   ret <1 x float> %y
 }
 
 define <2 x float> @test2(<2 x double> %x) nounwind {
-; CHECK: test2
-; CHECK: cvtpd2ps
-; CHECK: ret
-; AVX:   test2
-; AVX-NOT:  vcvtpd2psy
-; AVX:   vcvtpd2ps
-; AVX:   ret
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; AVX-NEXT:    retl
   %y = fptrunc <2 x double> %x to <2 x float>
   ret <2 x float> %y
 }
 
 define <4 x float> @test3(<4 x double> %x) nounwind {
-; CHECK: test3
-; CHECK: cvtpd2ps
-; CHECK: cvtpd2ps
-; CHECK: movlhps
-; CHECK: ret
-; AVX:   test3
-; AVX:   vcvtpd2psy
-; AVX:   ret
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
+; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtpd2psy %ymm0, %xmm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retl
   %y = fptrunc <4 x double> %x to <4 x float>
   ret <4 x float> %y
 }
 
 define <8 x float> @test4(<8 x double> %x) nounwind {
-; CHECK: test4
-; CHECK: cvtpd2ps
-; CHECK: cvtpd2ps
-; CHECK: movlhps
-; CHECK: cvtpd2ps
-; CHECK: cvtpd2ps
-; CHECK: movlhps
-; CHECK: ret
-; AVX:   test4
-; AVX:   vcvtpd2psy
-; AVX:   vcvtpd2psy
-; AVX:   vinsertf128
-; AVX:   ret
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
+; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvtpd2ps %xmm3, %xmm3
+; CHECK-NEXT:    cvtpd2ps %xmm2, %xmm1
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT:    retl
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtpd2psy %ymm0, %xmm0
+; AVX-NEXT:    vcvtpd2psy %ymm1, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retl
   %y = fptrunc <8 x double> %x to <8 x float>
   ret <8 x float> %y
 }
diff --git a/test/CodeGen/X86/fpstack-debuginstr-kill.ll b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
new file mode 100644
index 000000000000..e3180f4e68a2
--- /dev/null
+++ b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -no-integrated-as
+
+@g1 = global double 0.000000e+00, align 8
+@g2 = global i32 0, align 4
+
+define void @_Z16fpuop_arithmeticjj(i32, i32) {
+entry:
+  switch i32 undef, label %sw.bb.i1921 [
+  ]
+
+sw.bb261:                                         ; preds = %entry, %entry
+  unreachable
+
+sw.bb.i1921:                                      ; preds = %if.end504
+  switch i32 undef, label %if.end511 [
+    i32 1, label %sw.bb27.i
+  ]
+
+sw.bb27.i:                                        ; preds = %sw.bb.i1921
+  %conv.i.i1923 = fpext float undef to x86_fp80
+  br label %if.end511
+
+if.end511:                                        ; preds = %sw.bb27.i, %sw.bb13.i
+  %src.sroa.0.0.src.sroa.0.0.2280 = phi x86_fp80 [ %conv.i.i1923, %sw.bb27.i ], [ undef, %sw.bb.i1921 ]
+  switch i32 undef, label %sw.bb992 [
+    i32 3, label %sw.bb735
+    i32 18, label %if.end41.i2210
+  ]
+
+sw.bb735:                                         ; preds = %if.end511
+  %2 = call x86_fp80 asm sideeffect "frndint", "={st},0,~{dirflag},~{fpsr},~{flags}"(x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280)
+  unreachable
+
+if.end41.i2210:                                   ; preds = %if.end511
+  call void @llvm.dbg.value(metadata x86_fp80 %src.sroa.0.0.src.sroa.0.0.2280, i64 0, metadata !20, metadata !{!"0x102"})
+  unreachable
+
+sw.bb992:                                         ; preds = %if.end511
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!24, !25}
+!0 = !{!"0x11\004\00clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)\001\00\000\00\001", !1, !2, !2, !3, !21, !2} ; [ DW_TAG_compile_unit ] [x87stackifier/fpu_ieee.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"fpu_ieee.cpp", !"x87stackifier"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00fpuop_arithmetic\00fpuop_arithmetic\00_Z16fpuop_arithmeticjj\0011\000\001\000\006\00256\001\0013", !5, !6, !7, null, void (i32, i32)* @_Z16fpuop_arithmeticjj, null, null, !10} ; [ DW_TAG_subprogram ] [line 11] [def] [scope 13] [fpuop_arithmetic]
+!5 = !{!"f1.cpp", !"x87stackifier"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [x87stackifier/f1.cpp]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !9}
+!9 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!10 = !{!11, !12, !13, !18, !20}
+!11 = !{!"0x101\00\0016777227\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 11]
+!12 = !{!"0x101\00\0033554443\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 11]
+!13 = !{!"0x100\00x\0014\000", !4, !6, !14} ; [ DW_TAG_auto_variable ] [x] [line 14]
+!14 = !{!"0x16\00fpu_extended\003\000\000\000\000", !5, null, !15} ; [ DW_TAG_typedef ] [fpu_extended] [line 3, size 0, align 0, offset 0] [from fpu_register]
+!15 = !{!"0x16\00fpu_register\002\000\000\000\000", !5, null, !16} ; [ DW_TAG_typedef ] [fpu_register] [line 2, size 0, align 0, offset 0] [from uae_f64]
+!16 = !{!"0x16\00uae_f64\001\000\000\000\000", !5, null, !17} ; [ DW_TAG_typedef ] [uae_f64] [line 1, size 0, align 0, offset 0] [from double]
+!17 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!18 = !{!"0x100\00a\0015\000", !4, !6, !19} ; [ DW_TAG_auto_variable ] [a] [line 15]
+!19 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = !{!"0x100\00value\0016\000", !4, !6, !14} ; [ DW_TAG_auto_variable ] [value] [line 16]
+!21 = !{!22, !23}
+!22 = !{!"0x34\00g1\00g1\00\005\000\001", null, !6, !14, double* @g1, null} ; [ DW_TAG_variable ] [g1] [line 5] [def]
+!23 = !{!"0x34\00g2\00g2\00\006\000\001", null, !6, !19, i32* @g2, null} ; [ DW_TAG_variable ] [g2] [line 6] [def]
+!24 = !{i32 2, !"Dwarf Version", i32 2}
+!25 = !{i32 2, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/frameaddr.ll b/test/CodeGen/X86/frameaddr.ll
index 6c1ca252bb97..3e0f8bc34d64 100644
--- a/test/CodeGen/X86/frameaddr.ll
+++ b/test/CodeGen/X86/frameaddr.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -march=x86    -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
 ; RUN: llc < %s -march=x86-64                             | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-gnux32                    | FileCheck %s --check-prefix=CHECK-X32ABI
+; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI
+; RUN: llc < %s -mtriple=x86_64-nacl                    | FileCheck %s --check-prefix=CHECK-NACL64
+; RUN: llc < %s -mtriple=x86_64-nacl -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-NACL64
 
 define i8* @test1() nounwind {
 entry:
@@ -17,6 +21,16 @@ entry:
 ; CHECK-64-NEXT:  movq %rbp, %rax
 ; CHECK-64-NEXT:  pop
 ; CHECK-64-NEXT:  ret
+; CHECK-X32ABI-LABEL: test1
+; CHECK-X32ABI:       pushq %rbp
+; CHECK-X32ABI-NEXT:  movl %esp, %ebp
+; CHECK-X32ABI-NEXT:  movl %ebp, %eax
+; CHECK-X32ABI-NEXT:  popq %rbp
+; CHECK-X32ABI-NEXT:  ret
+; CHECK-NACL64-LABEL: test1
+; CHECK-NACL64:       pushq %rbp
+; CHECK-NACL64-NEXT:  movq %rsp, %rbp
+; CHECK-NACL64-NEXT:  movl %ebp, %eax
   %0 = tail call i8* @llvm.frameaddress(i32 0)
   ret i8* %0
 }
@@ -37,6 +51,18 @@ entry:
 ; CHECK-64-NEXT:  movq (%rax), %rax
 ; CHECK-64-NEXT:  pop
 ; CHECK-64-NEXT:  ret
+; CHECK-X32ABI-LABEL: test2
+; CHECK-X32ABI:       pushq %rbp
+; CHECK-X32ABI-NEXT:  movl %esp, %ebp
+; CHECK-X32ABI-NEXT:  movl (%ebp), %eax
+; CHECK-X32ABI-NEXT:  movl (%eax), %eax
+; CHECK-X32ABI-NEXT:  popq %rbp
+; CHECK-X32ABI-NEXT:  ret
+; CHECK-NACL64-LABEL: test2
+; CHECK-NACL64:       pushq %rbp
+; CHECK-NACL64-NEXT:  movq %rsp, %rbp
+; CHECK-NACL64-NEXT:  movl (%ebp), %eax
+; CHECK-NACL64-NEXT:  movl (%eax), %eax
   %0 = tail call i8* @llvm.frameaddress(i32 2)
   ret i8* %0
 }
diff --git a/test/CodeGen/X86/frameallocate.ll b/test/CodeGen/X86/frameallocate.ll
new file mode 100644
index 000000000000..13d35b91937d
--- /dev/null
+++ b/test/CodeGen/X86/frameallocate.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+declare i8* @llvm.frameallocate(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.framerecover(i8*, i8*)
+declare i32 @printf(i8*, ...)
+
+@str = internal constant [10 x i8] c"asdf: %d\0A\00"
+
+define void @print_framealloc_from_fp(i8* %fp) {
+  %alloc = call i8* @llvm.framerecover(i8* bitcast (void(i32*, i32*)* @alloc_func to i8*), i8* %fp)
+  %alloc_i32 = bitcast i8* %alloc to i32*
+  %r = load i32* %alloc_i32
+  call i32 (i8*, ...)* @printf(i8* getelementptr ([10 x i8]* @str, i32 0, i32 0), i32 %r)
+  ret void
+}
+
+; CHECK-LABEL: print_framealloc_from_fp:
+; CHECK: movabsq $.Lframeallocation_alloc_func, %[[offs:[a-z]+]]
+; CHECK: movl (%rcx,%[[offs]]), %edx
+; CHECK: leaq {{.*}}(%rip), %rcx
+; CHECK: callq printf
+; CHECK: retq
+
+define void @alloc_func(i32* %s, i32* %d) {
+  %alloc = call i8* @llvm.frameallocate(i32 16)
+  %alloc_i32 = bitcast i8* %alloc to i32*
+  store i32 42, i32* %alloc_i32
+  %fp = call i8* @llvm.frameaddress(i32 0)
+  call void @print_framealloc_from_fp(i8* %fp)
+  ret void
+}
+
+; CHECK-LABEL: alloc_func:
+; CHECK: .Lframeallocation_alloc_func = -[[offs:[0-9]+]]
+; CHECK: movl $42, -[[offs]](%rbp)
+; CHECK: movq %rbp, %rcx
+; CHECK: callq print_framealloc_from_fp
+; CHECK: retq
diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll
index 5f48b1e32b16..6d397b211481 100644
--- a/test/CodeGen/X86/gather-addresses.ll
+++ b/test/CodeGen/X86/gather-addresses.ll
@@ -1,35 +1,38 @@
 ; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
 ; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
+; RUN: llc -mtriple=i686-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN32
 ; rdar://7398554
 
 ; When doing vector gather-scatter index calculation with 32-bit indices,
-; bounce the vector off of cache rather than shuffling each individual
+; use an efficient mov/shift sequence rather than shuffling each individual
 ; element out of the index vector.
 
-; CHECK: foo:
-; LIN: movaps	(%rsi), %xmm0
-; LIN: andps	(%rdx), %xmm0
-; LIN: movaps	%xmm0, -24(%rsp)
-; LIN: movslq	-24(%rsp), %[[REG1:r.+]]
-; LIN: movslq	-20(%rsp), %[[REG2:r.+]]
-; LIN: movslq	-16(%rsp), %[[REG3:r.+]]
-; LIN: movslq	-12(%rsp), %[[REG4:r.+]]
-; LIN: movsd	(%rdi,%[[REG1]],8), %xmm0
-; LIN: movhpd	(%rdi,%[[REG2]],8), %xmm0
-; LIN: movsd	(%rdi,%[[REG3]],8), %xmm1
-; LIN: movhpd	(%rdi,%[[REG4]],8), %xmm1
+; CHECK-LABEL: foo:
+; LIN: movdqa	(%rsi), %xmm0
+; LIN: pand 	(%rdx), %xmm0
+; LIN: pextrq	$1, %xmm0, %r[[REG4:.+]]
+; LIN: movd 	%xmm0, %r[[REG2:.+]]
+; LIN: movslq	%e[[REG2]], %r[[REG1:.+]]
+; LIN: sarq    $32, %r[[REG2]]
+; LIN: movslq	%e[[REG4]], %r[[REG3:.+]]
+; LIN: sarq    $32, %r[[REG4]]
+; LIN: movsd	(%rdi,%r[[REG1]],8), %xmm0
+; LIN: movhpd	(%rdi,%r[[REG2]],8), %xmm0
+; LIN: movsd	(%rdi,%r[[REG3]],8), %xmm1
+; LIN: movhpd	(%rdi,%r[[REG4]],8), %xmm1
 
-; WIN: movaps	(%rdx), %xmm0
-; WIN: andps	(%r8), %xmm0
-; WIN: movaps	%xmm0, (%rsp)
-; WIN: movslq	(%rsp), %[[REG1:r.+]]
-; WIN: movslq	4(%rsp), %[[REG2:r.+]]
-; WIN: movslq	8(%rsp), %[[REG3:r.+]]
-; WIN: movslq	12(%rsp), %[[REG4:r.+]]
-; WIN: movsd	(%rcx,%[[REG1]],8), %xmm0
-; WIN: movhpd	(%rcx,%[[REG2]],8), %xmm0
-; WIN: movsd	(%rcx,%[[REG3]],8), %xmm1
-; WIN: movhpd	(%rcx,%[[REG4]],8), %xmm1
+; WIN: movdqa	(%rdx), %xmm0
+; WIN: pand 	(%r8), %xmm0
+; WIN: pextrq	$1, %xmm0, %r[[REG4:.+]]
+; WIN: movd 	%xmm0, %r[[REG2:.+]]
+; WIN: movslq	%e[[REG2]], %r[[REG1:.+]]
+; WIN: sarq    $32, %r[[REG2]]
+; WIN: movslq	%e[[REG4]], %r[[REG3:.+]]
+; WIN: sarq    $32, %r[[REG4]]
+; WIN: movsd	(%rcx,%r[[REG1]],8), %xmm0
+; WIN: movhpd	(%rcx,%r[[REG2]],8), %xmm0
+; WIN: movsd	(%rcx,%r[[REG3]],8), %xmm1
+; WIN: movhpd	(%rcx,%r[[REG4]],8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %a = load <4 x i32>* %i
@@ -53,3 +56,35 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
   %v3 = insertelement <4 x double> %v2, double %r3, i32 3
   ret <4 x double> %v3
 }
+
+; Check that the sequence previously used above, which bounces the vector off the
+; cache works for x86-32. Note that in this case it will not be used for index
+; calculation, since indexes are 32-bit, not 64.
+; CHECK-LABEL: old:
+; LIN32: movaps	%xmm0, (%esp)
+; LIN32-DAG: {{(mov|and)}}l	(%esp),
+; LIN32-DAG: {{(mov|and)}}l	4(%esp),
+; LIN32-DAG: {{(mov|and)}}l	8(%esp),
+; LIN32-DAG: {{(mov|and)}}l	12(%esp),
+define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind {
+  %a = load <4 x i32>* %i
+  %b = load <4 x i32>* %h
+  %j = and <4 x i32> %a, %b
+  %d0 = extractelement <4 x i32> %j, i32 0
+  %d1 = extractelement <4 x i32> %j, i32 1
+  %d2 = extractelement <4 x i32> %j, i32 2
+  %d3 = extractelement <4 x i32> %j, i32 3
+  %q0 = zext i32 %d0 to i64
+  %q1 = zext i32 %d1 to i64
+  %q2 = zext i32 %d2 to i64
+  %q3 = zext i32 %d3 to i64  
+  %r0 = and i64 %q0, %f
+  %r1 = and i64 %q1, %f
+  %r2 = and i64 %q2, %f
+  %r3 = and i64 %q3, %f
+  %v0 = insertelement <4 x i64> undef, i64 %r0, i32 0
+  %v1 = insertelement <4 x i64> %v0, i64 %r1, i32 1
+  %v2 = insertelement <4 x i64> %v1, i64 %r2, i32 2
+  %v3 = insertelement <4 x i64> %v2, i64 %r3, i32 3
+  ret <4 x i64> %v3
+}
diff --git a/test/CodeGen/X86/gcc_except_table_functions.ll b/test/CodeGen/X86/gcc_except_table_functions.ll
new file mode 100644
index 000000000000..4a8168050e56
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table_functions.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple x86_64-pc-linux-gnu < %s | FileCheck %s
+
+; This test demonstrates that it is possible to use functions for typeinfo
+; instead of global variables. While __gxx_personality_v0 would never know what
+; to do with them, other EH schemes such as SEH might use them.
+
+declare i32 @__gxx_personality_v0(...)
+declare void @filt0()
+declare void @filt1()
+declare void @_Z1fv()
+declare i32 @llvm.eh.typeid.for(i8*)
+
+define i32 @main() uwtable {
+entry:
+  invoke void @_Z1fv()
+          to label %try.cont unwind label %lpad
+
+try.cont:
+  ret i32 0
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (void ()* @filt0 to i8*)
+          catch i8* bitcast (void ()* @filt1 to i8*)
+  %sel = extractvalue { i8*, i32 } %0, 1
+  %id0 = call i32 @llvm.eh.typeid.for(i8* bitcast (void ()* @filt0 to i8*))
+  %is_f0 = icmp eq i32 %sel, %id0
+  br i1 %is_f0, label %try.cont, label %check_f1
+
+check_f1:
+  %id1 = call i32 @llvm.eh.typeid.for(i8* bitcast (void ()* @filt1 to i8*))
+  %is_f1 = icmp eq i32 %sel, %id1
+  br i1 %is_f1, label %try.cont, label %eh.resume
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+; CHECK-LABEL: main:
+; CHECK: .cfi_startproc
+; CHECK: .cfi_personality 3, __gxx_personality_v0
+; CHECK: .cfi_lsda 3, .Lexception0
+; CHECK: .cfi_def_cfa_offset 16
+; CHECK: callq _Z1fv
+; CHECK: retq
+; CHECK: cmpl $2, %edx
+; CHECK: je
+; CHECK: cmpl $1, %edx
+; CHECK: je
+; CHECK: callq _Unwind_Resume
+; CHECK: .cfi_endproc
+; CHECK: GCC_except_table0:
+; CHECK: Lexception0:
diff --git a/test/CodeGen/X86/ghc-cc.ll b/test/CodeGen/X86/ghc-cc.ll
index 4dba2c086329..3ada8c8ce98e 100644
--- a/test/CodeGen/X86/ghc-cc.ll
+++ b/test/CodeGen/X86/ghc-cc.ll
@@ -12,13 +12,13 @@ entry:
   ; CHECK: movl {{[0-9]*}}(%esp), %ebx
   ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
   ; CHECK-NEXT: calll addtwo
-  %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
+  %0 = call ghccc i32 @addtwo(i32 %a, i32 %b)
   ; CHECK: calll foo
   call void @foo() nounwind
   ret void
 }
 
-define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
+define ghccc i32 @addtwo(i32 %x, i32 %y) nounwind {
 entry:
   ; CHECK: leal (%ebx,%ebp), %eax
   %0 = add i32 %x, %y
@@ -26,7 +26,7 @@ entry:
   ret i32 %0
 }
 
-define cc 10 void @foo() nounwind {
+define ghccc void @foo() nounwind {
 entry:
   ; CHECK:      movl r1, %esi
   ; CHECK-NEXT: movl hp, %edi
@@ -37,8 +37,8 @@ entry:
   %2 = load i32* @sp
   %3 = load i32* @base
   ; CHECK: jmp bar
-  tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
+  tail call ghccc void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
   ret void
 }
 
-declare cc 10 void @bar(i32, i32, i32, i32)
+declare ghccc void @bar(i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/ghc-cc64.ll b/test/CodeGen/X86/ghc-cc64.ll
index 403391e81658..7251dd673b30 100644
--- a/test/CodeGen/X86/ghc-cc64.ll
+++ b/test/CodeGen/X86/ghc-cc64.ll
@@ -25,13 +25,13 @@ entry:
   ; CHECK:      movq %rdi, %r13
   ; CHECK-NEXT: movq %rsi, %rbp
   ; CHECK-NEXT: callq addtwo
-  %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
+  %0 = call ghccc i64 @addtwo(i64 %a, i64 %b)
   ; CHECK:      callq foo
   call void @foo() nounwind
   ret void
 }
 
-define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
+define ghccc i64 @addtwo(i64 %x, i64 %y) nounwind {
 entry:
   ; CHECK:      leaq (%r13,%rbp), %rax
   %0 = add i64 %x, %y
@@ -39,7 +39,7 @@ entry:
   ret i64 %0
 }
 
-define cc 10 void @foo() nounwind {
+define ghccc void @foo() nounwind {
 entry:
   ; CHECK:      movsd d2(%rip), %xmm6
   ; CHECK-NEXT: movsd d1(%rip), %xmm5
@@ -74,12 +74,12 @@ entry:
   %14 = load i64* @sp
   %15 = load i64* @base
   ; CHECK: jmp bar
-  tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
+  tail call ghccc void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
                              i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
                              float %5, float %4, float %3, float %2, double %1,
                              double %0 ) nounwind
   ret void
 }
 
-declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+declare ghccc void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
                         float, float, float, float, double, double)
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index c763f3947e59..fa1169d8a8e3 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -53,21 +53,20 @@ define void @F1() {
 
 
 ; _Complex long long const G4 = 34;
-@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
+@G4 = private unnamed_addr constant {i64,i64} { i64 34, i64 0 }
 
 ; DARWIN: .section        __TEXT,__literal16,16byte_literals
-; DARWIN: _G4:
+; DARWIN: L_G4:
 ; DARWIN:     .long 34
 
 ; DARWIN-STATIC: .section        __TEXT,__literal16,16byte_literals
-; DARWIN-STATIC: _G4:
+; DARWIN-STATIC: L_G4:
 ; DARWIN-STATIC:     .long 34
 
 ; DARWIN64: .section        __TEXT,__literal16,16byte_literals
-; DARWIN64: _G4:
+; DARWIN64: L_G4:
 ; DARWIN64:     .quad 34
 
-
 ; int G5 = 47;
 @G5 = global i32 47
 
@@ -194,3 +193,23 @@ define void @F1() {
 ; WIN32-SECTIONS: L_G14:
 ; WIN32-SECTIONS:        .asciz  "foo"
 
+; cannot be merged on MachO, but can on other formats.
+@G15 = unnamed_addr constant i64 0
+
+; LINUX: .section        .rodata.cst8,"aM",@progbits,8
+; LINUX: G15:
+
+; DARWIN: .section      __TEXT,__const
+; DARWIN: _G15:
+
+; DARWIN-STATIC: .section       __TEXT,__const
+; DARWIN-STATIC: _G15:
+
+; DARWIN64: .section       __TEXT,__const
+; DARWIN64: _G15:
+
+; LINUX-SECTIONS: .section      .rodata.G15,"aM",@progbits,8
+; LINUX-SECTIONS: G15:
+
+; WIN32-SECTIONS: .section      .rdata,"rd",one_only,_G15
+; WIN32-SECTIONS: _G15:
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
index 34191e3f9a31..c9e52903c79e 100644
--- a/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -27,4 +27,4 @@ for.end:                                          ; preds = %for.body
 
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/CodeGen/X86/i8-umulo.ll b/test/CodeGen/X86/i8-umulo.ll
deleted file mode 100644
index ba846f3e9be3..000000000000
--- a/test/CodeGen/X86/i8-umulo.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
-; PR19858
-
-declare {i8, i1} @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
-define i8 @testumulo(i32 %argc) {
-; CHECK: imulw
-; CHECK: testb %{{.+}}, %{{.+}}
-; CHECK: je [[NOOVERFLOWLABEL:.+]]
-; CHECK: {{.*}}[[NOOVERFLOWLABEL]]:
-; CHECK-NEXT: movb
-; CHECK-NEXT: retl
-top:
-  %RHS = trunc i32 %argc to i8
-  %umul = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 25, i8 %RHS)
-  %ex = extractvalue { i8, i1 } %umul, 1
-  br i1 %ex, label %overflow, label %nooverlow
-
-overflow:
-  ret i8 %RHS
-
-nooverlow:
-  %umul.value = extractvalue { i8, i1 } %umul, 0
-  ret i8 %umul.value
-}
diff --git a/test/CodeGen/X86/ident-metadata.ll b/test/CodeGen/X86/ident-metadata.ll
index a5686730cee9..e08738f47df7 100644
--- a/test/CodeGen/X86/ident-metadata.ll
+++ b/test/CodeGen/X86/ident-metadata.ll
@@ -5,5 +5,5 @@
 ; CHECK: .ident  "clang version x.x"
 ; CHECK-NEXT: .ident  "something else"
 !llvm.ident = !{!0, !1}
-!0 = metadata !{metadata !"clang version x.x"}
-!1 = metadata !{metadata !"something else"}
+!0 = !{!"clang version x.x"}
+!1 = !{!"something else"}
diff --git a/test/CodeGen/X86/inalloca-ctor.ll b/test/CodeGen/X86/inalloca-ctor.ll
index 7cfa92913578..b1781d30f913 100644
--- a/test/CodeGen/X86/inalloca-ctor.ll
+++ b/test/CodeGen/X86/inalloca-ctor.ll
@@ -17,16 +17,16 @@ entry:
 ; CHECK: movl %esp,
   call void @Foo_ctor(%Foo* %c)
 ; CHECK: leal 12(%{{.*}}),
-; CHECK: subl $4, %esp
-; CHECK: calll _Foo_ctor
+; CHECK-NEXT: pushl
+; CHECK-NEXT: calll _Foo_ctor
 ; CHECK: addl $4, %esp
   %b = getelementptr %frame* %args, i32 0, i32 1
   store i32 42, i32* %b
 ; CHECK: movl $42,
   %a = getelementptr %frame* %args, i32 0, i32 0
   call void @Foo_ctor(%Foo* %a)
-; CHECK: subl $4, %esp
-; CHECK: calll _Foo_ctor
+; CHECK-NEXT: pushl
+; CHECK-NEXT: calll _Foo_ctor
 ; CHECK: addl $4, %esp
   call void @f(%frame* inalloca %args)
 ; CHECK: calll   _f
diff --git a/test/CodeGen/X86/inalloca-invoke.ll b/test/CodeGen/X86/inalloca-invoke.ll
index 6cff9ac0640c..b56f24d99628 100644
--- a/test/CodeGen/X86/inalloca-invoke.ll
+++ b/test/CodeGen/X86/inalloca-invoke.ll
@@ -37,7 +37,7 @@ blah:
 invoke.cont:
   call void @begin(%Iter* sret %beg)
 
-; CHECK:  movl %[[beg]],
+; CHECK:  pushl %[[beg]]
 ; CHECK:  calll _begin
 
   invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)
diff --git a/test/CodeGen/X86/inalloca-regparm.ll b/test/CodeGen/X86/inalloca-regparm.ll
new file mode 100644
index 000000000000..9dd916bfbb37
--- /dev/null
+++ b/test/CodeGen/X86/inalloca-regparm.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i686-windows-msvc < %s -o /dev/null
+; RUN: not llc -mtriple=x86_64-windows-msvc %s -o /dev/null 2>&1 | FileCheck %s
+
+; This will compile successfully on x86 but not x86_64, because %b will become a
+; register parameter.
+
+declare x86_thiscallcc i32 @f(i32 %a, i32* inalloca %b)
+define void @g() {
+  %b = alloca inalloca i32
+  store i32 2, i32* %b
+  call x86_thiscallcc i32 @f(i32 0, i32* inalloca %b)
+  ret void
+}
+
+; CHECK: cannot use inalloca attribute on a register parameter
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
index 54f97d99a9c7..e5b07e262c7b 100644
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -19,7 +19,7 @@ define void @g() {
   call x86_stdcallcc void @f(%Foo* inalloca %b)
 ; CHECK: calll   _f@8
 ; CHECK-NOT: %esp
-; CHECK: subl $4, %esp
+; CHECK: pushl
 ; CHECK: calll   _i@4
   call x86_stdcallcc void @i(i32 0)
   ret void
diff --git a/test/CodeGen/X86/inline-asm-flag-clobber.ll b/test/CodeGen/X86/inline-asm-flag-clobber.ll
index bb7c33e422ed..0874b51af6a5 100644
--- a/test/CodeGen/X86/inline-asm-flag-clobber.ll
+++ b/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -29,4 +29,4 @@ entry:
   ret i32 %1
 }
 
-!0 = metadata !{i64 935930}
+!0 = !{i64 935930}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 91c477baaa51..bb3778a28116 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -340,3 +340,65 @@ entry:
   %0 = tail call i32 asm "fcomi $2, $1; pushf; pop $0", "=r,{st},{st(1)},~{dirflag},~{fpsr},~{flags}"(double 2.000000e+00, double 2.000000e+00) nounwind
   ret i32 %0
 }
+
+; <rdar://problem/16952634>
+; X87 stackifier asserted when there was an ST register defined by an
+; inline-asm instruction and the ST register was live across another
+; inline-asm instruction.
+;
+; INLINEASM <es:frndint> [sideeffect] [attdialect], $0:[regdef], %ST0<imp-def,tied5>, $1:[reguse tiedto:$0], %ST0<tied3>, $2:[clobber], %EFLAGS<earlyclobber,imp-def,dead>
+; INLINEASM <es:fldcw $0> [sideeffect] [mayload] [attdialect], $0:[mem], %EAX<undef>, 1, %noreg, 0, %noreg, $1:[clobber], %EFLAGS<earlyclobber,imp-def,dead>
+; %FP0<def> = COPY %ST0
+
+; CHECK-LABEL: _test_live_st
+; CHECK: ## InlineAsm Start
+; CHECK: frndint
+; CHECK: ## InlineAsm End
+; CHECK: ## InlineAsm Start
+; CHECK: fldcw
+; CHECK: ## InlineAsm End
+
+%struct.fpu_t = type { [8 x x86_fp80], x86_fp80, %struct.anon1, %struct.anon2, i32, i8, [15 x i8] }
+%struct.anon1 = type { i32, i32, i32 }
+%struct.anon2 = type { i32, i32, i32, i32 }
+
+@fpu = external global %struct.fpu_t, align 16
+
+; Function Attrs: ssp
+define void @test_live_st(i32 %a1) {
+entry:
+  %0 = load x86_fp80* undef, align 16
+  %cond = icmp eq i32 %a1, 1
+  br i1 %cond, label %sw.bb4.i, label %_Z5tointRKe.exit
+
+sw.bb4.i:
+  %1 = call x86_fp80 asm sideeffect "frndint", "={st},0,~{dirflag},~{fpsr},~{flags}"(x86_fp80 %0)
+  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"(i32* undef)
+  br label %_Z5tointRKe.exit
+
+_Z5tointRKe.exit:
+  %result.0.i = phi x86_fp80 [ %1, %sw.bb4.i ], [ %0, %entry ]
+  %conv.i1814 = fptosi x86_fp80 %result.0.i to i32
+  %conv626 = sitofp i32 %conv.i1814 to x86_fp80
+  store x86_fp80 %conv626, x86_fp80* getelementptr inbounds (%struct.fpu_t* @fpu, i32 0, i32 1)
+  br label %return
+
+return:
+  ret void
+}
+
+; Check that x87 stackifier is correctly rewriting FP registers to ST registers.
+;
+; CHECK-LABEL: _test_operand_rewrite
+; CHECK: ## InlineAsm Start
+; CHECK: foo %st(0), %st(1)
+; CHECK: ## InlineAsm End
+
+define double @test_operand_rewrite() {
+entry:
+  %0 = tail call { double, double } asm sideeffect "foo $0, $1", "={st},={st(1)},~{dirflag},~{fpsr},~{flags}"()
+  %asmresult = extractvalue { double, double } %0, 0
+  %asmresult1 = extractvalue { double, double } %0, 1
+  %sub = fsub double %asmresult, %asmresult1
+  ret double %sub
+}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index d4174539f2f9..dfa8aed46463 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -284,7 +284,7 @@ entry:
 define i32 @func_test1(i32 %p1) nounwind uwtable {
 entry:
 ; CHECK-LABEL: func_test1:
-; CHECK: testb
+; CHECK: andb
 ; CHECK: j
 ; CHECK: ret
   %0 = load i32* @b, align 4
diff --git a/test/CodeGen/X86/jump_table_alias.ll b/test/CodeGen/X86/jump_table_alias.ll
index f3691fda221e..20622009e376 100644
--- a/test/CodeGen/X86/jump_table_alias.ll
+++ b/test/CodeGen/X86/jump_table_alias.ll
@@ -5,7 +5,7 @@ entry:
   ret i32 0
 }
 
-@i = alias internal i32 ()* @f
+@i = internal alias i32 ()* @f
 @j = alias i32 ()* @f
 
 define i32 @main(i32 %argc, i8** %argv) {
@@ -25,7 +25,6 @@ define i32 @main(i32 %argc, i8** %argv) {
 ; There should only be one table, even though there are two GlobalAliases,
 ; because they both alias the same value.
 
-; CHECK:         .globl  __llvm_jump_instr_table_0_1
 ; CHECK:         .align  8, 0x90
 ; CHECK:         .type   __llvm_jump_instr_table_0_1,@function
 ; CHECK: __llvm_jump_instr_table_0_1:
diff --git a/test/CodeGen/X86/jump_table_align.ll b/test/CodeGen/X86/jump_table_align.ll
new file mode 100644
index 000000000000..6ad48d1f54f7
--- /dev/null
+++ b/test/CodeGen/X86/jump_table_align.ll
@@ -0,0 +1,29 @@
+; RUN: llc -filetype=obj <%s -jump-table-type=single -o %t1
+; RUN: llvm-objdump -triple=x86_64-unknown-linux-gnu -d %t1 | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 @g(i8* %a) unnamed_addr jumptable {
+  ret i32 0
+}
+
+define void @h(void ()* %func) unnamed_addr jumptable {
+  ret void
+}
+
+define i32 @main() {
+  %g = alloca i32 (...)*, align 8
+  store i32 (...)* bitcast (i32 ()* @f to i32 (...)*), i32 (...)** %g, align 8
+  %1 = load i32 (...)** %g, align 8
+  %call = call i32 (...)* %1()
+  call void (void ()*)* @h(void ()* bitcast (void (void ()*)* @h to void ()*))
+  %a = call i32 (i32*)* bitcast (i32 (i8*)* @g to i32(i32*)*)(i32* null)
+  ret i32 %a
+}
+
+; Make sure that the padding from getJumpInstrTableEntryBound is right.
+; CHECK: __llvm_jump_instr_table_0_1:
+; CHECK-NEXT: e9 00 00 00 00                                  jmp     0
+; CHECK-NEXT: 0f 1f 00                                        nopl    (%rax)
diff --git a/test/CodeGen/X86/jump_table_bitcast.ll b/test/CodeGen/X86/jump_table_bitcast.ll
index 33a798f7a6b7..749b77a166ea 100644
--- a/test/CodeGen/X86/jump_table_bitcast.ll
+++ b/test/CodeGen/X86/jump_table_bitcast.ll
@@ -15,12 +15,12 @@ define void @h(void ()* %func) unnamed_addr jumptable {
 define i32 @main() {
   %g = alloca i32 (...)*, align 8
   store i32 (...)* bitcast (i32 ()* @f to i32 (...)*), i32 (...)** %g, align 8
-; CHECK: movq    $__llvm_jump_instr_table_0_[[ENTRY:1|2|3]], (%rsp)
-; CHECK: movl    $__llvm_jump_instr_table_0_[[ENTRY]], %ecx
+; CHECK: movq    $__llvm_jump_instr_table_0_[[ENTRY:1|2|3]],
+; CHECK: movl    $__llvm_jump_instr_table_0_[[ENTRY]],
   %1 = load i32 (...)** %g, align 8
   %call = call i32 (...)* %1()
   call void (void ()*)* @h(void ()* bitcast (void (void ()*)* @h to void ()*))
-; CHECK: movl    $__llvm_jump_instr_table_0_{{1|2|3}}, %edi
+; CHECK: movl    $__llvm_jump_instr_table_0_{{1|2|3}},
 ; CHECK: callq   h
 
   %a = call i32 (i32*)* bitcast (i32 (i8*)* @g to i32(i32*)*)(i32* null)
@@ -28,17 +28,14 @@ define i32 @main() {
   ret i32 %a
 }
 
-; CHECK:         .globl  __llvm_jump_instr_table_0_1
 ; CHECK:         .align  8, 0x90
 ; CHECK:         .type   __llvm_jump_instr_table_0_1,@function
 ; CHECK: __llvm_jump_instr_table_0_1:
 ; CHECK:         jmp     {{f|g|h}}@PLT
-; CHECK:         .globl  __llvm_jump_instr_table_0_2
 ; CHECK:         .align  8, 0x90
 ; CHECK:         .type   __llvm_jump_instr_table_0_2,@function
 ; CHECK: __llvm_jump_instr_table_0_2:
 ; CHECK:         jmp     {{f|g|h}}@PLT
-; CHECK:         .globl  __llvm_jump_instr_table_0_3
 ; CHECK:         .align  8, 0x90
 ; CHECK:         .type   __llvm_jump_instr_table_0_3,@function
 ; CHECK: __llvm_jump_instr_table_0_3:
diff --git a/test/CodeGen/X86/jump_tables.ll b/test/CodeGen/X86/jump_tables.ll
index 5a0aed0c1761..485154eaa2a9 100644
--- a/test/CodeGen/X86/jump_tables.ll
+++ b/test/CodeGen/X86/jump_tables.ll
@@ -7,6 +7,20 @@ target triple = "x86_64-unknown-linux-gnu"
 
 %struct.fun_struct = type { i32 (...)* }
 
+@a = global [12 x i32 () *] [ i32 ()* bitcast (void ()* @indirect_fun to i32 ()*),
+			      i32 ()* bitcast (void ()* @indirect_fun_match to i32 ()*),
+			      i32 ()* bitcast (i32 ()* @indirect_fun_i32 to i32 ()*),
+			      i32 ()* bitcast (i32 (i32)* @indirect_fun_i32_1 to i32 ()*),
+			      i32 ()* bitcast (i32 (i32, i32)* @indirect_fun_i32_2 to i32 ()*),
+			      i32 ()* bitcast (i32* (i32*, i32)* @indirect_fun_i32S_2 to i32 ()*),
+			      i32 ()* bitcast (void (%struct.fun_struct)* @indirect_fun_struct to i32 ()*),
+			      i32 ()* bitcast (void (i32 (...)*, i32)* @indirect_fun_fun to i32 ()*),
+			      i32 ()* bitcast (i32 (i32 (...)*, i32)* @indirect_fun_fun_ret to i32 ()*),
+			      i32 ()* bitcast (void ([19 x i8])* @indirect_fun_array to i32 ()*),
+			      i32 ()* bitcast (void (<3 x i32>)* @indirect_fun_vec to i32 ()*),
+			      i32 ()* bitcast (void (<4 x float>)* @indirect_fun_vec_2 to i32 ()*)
+			    ]
+
 define void @indirect_fun() unnamed_addr jumptable {
   ret void
 }
@@ -74,62 +88,50 @@ define i32 @main(i32 %argc, i8** %argv) {
   ret i32 %a
 }
 
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_1
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_1,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_1:
 ; SINGLE-DAG:         jmp     indirect_fun_array@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_2
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_2,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_2:
 ; SINGLE-DAG:         jmp     indirect_fun_i32_2@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_3
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_3,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_3:
 ; SINGLE-DAG:         jmp     indirect_fun_vec_2@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_4
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_4,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_4:
 ; SINGLE-DAG:         jmp     indirect_fun_i32S_2@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_5
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_5,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_5:
 ; SINGLE-DAG:         jmp     indirect_fun_struct@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_6
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_6,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_6:
 ; SINGLE-DAG:         jmp     indirect_fun_i32_1@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_7
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_7,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_7:
 ; SINGLE-DAG:         jmp     indirect_fun_i32@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_8
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_8,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_8:
 ; SINGLE-DAG:         jmp     indirect_fun_fun@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_9
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_9,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_9:
 ; SINGLE-DAG:         jmp     indirect_fun_fun_ret@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_10
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_10,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_10:
 ; SINGLE-DAG:         jmp     indirect_fun@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_11
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_11,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_11:
 ; SINGLE-DAG:         jmp     indirect_fun_match@PLT
-; SINGLE-DAG:         .globl  __llvm_jump_instr_table_0_12
 ; SINGLE-DAG:         .align  8, 0x90
 ; SINGLE-DAG:         .type   __llvm_jump_instr_table_0_12,@function
 ; SINGLE-DAG: __llvm_jump_instr_table_0_12:
@@ -144,82 +146,69 @@ define i32 @main(i32 %argc, i8** %argv) {
 ; SINGLE-DAG:         ud2
 
 
-; ARITY-DAG:         .globl  __llvm_jump_instr_table_2_1
 ; ARITY-DAG:         .align  8, 0x90
 ; ARITY-DAG:         .type   __llvm_jump_instr_table_2_1,@function
 ; ARITY-DAG: __llvm_jump_instr_table_2_1:
 ; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
 ; ARITY-DAG:         .align  8, 0x90
 ; ARITY-DAG:         ud2
-; ARITY-DAG:         .globl  __llvm_jump_instr_table_0_1
 ; ARITY-DAG:         .align  8, 0x90
 ; ARITY-DAG:         .type   __llvm_jump_instr_table_0_1,@function
 ; ARITY-DAG: __llvm_jump_instr_table_0_1:
 ; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
-; ARITY-DAG:         .globl  __llvm_jump_instr_table_1_1
 ; ARITY-DAG:         .align  8, 0x90
 ; ARITY-DAG:         .type   __llvm_jump_instr_table_1_1,@function
 ; ARITY-DAG: __llvm_jump_instr_table_1_1:
 ; ARITY-DAG:         jmp     indirect_fun{{.*}}@PLT
 
-; SIMPL-DAG:         .globl  __llvm_jump_instr_table_2_1
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         .type   __llvm_jump_instr_table_2_1,@function
 ; SIMPL-DAG: __llvm_jump_instr_table_2_1:
 ; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         ud2
-; SIMPL-DAG:         .globl  __llvm_jump_instr_table_0_1
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         .type   __llvm_jump_instr_table_0_1,@function
 ; SIMPL-DAG: __llvm_jump_instr_table_0_1:
 ; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
-; SIMPL-DAG:         .globl  __llvm_jump_instr_table_1_1
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         .type   __llvm_jump_instr_table_1_1,@function
 ; SIMPL-DAG: __llvm_jump_instr_table_1_1:
 ; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
-; SIMPL-DAG:         .globl  __llvm_jump_instr_table_3_1
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         .type   __llvm_jump_instr_table_3_1,@function
 ; SIMPL-DAG: __llvm_jump_instr_table_3_1:
 ; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
-; SIMPL-DAG:         .globl  __llvm_jump_instr_table_4_1
 ; SIMPL-DAG:         .align  8, 0x90
 ; SIMPL-DAG:         .type   __llvm_jump_instr_table_4_1,@function
 ; SIMPL-DAG: __llvm_jump_instr_table_4_1:
 ; SIMPL-DAG:         jmp     indirect_fun{{.*}}@PLT
 
 
-; FULL-DAG:        .globl  __llvm_jump_instr_table_10_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_10_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_10_1:
 ; FULL-DAG:        jmp     indirect_fun_i32_1@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_9_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_9_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_9_1:
 ; FULL-DAG:        jmp     indirect_fun_i32_2@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_7_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_7_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_7_1:
 ; FULL-DAG:        jmp     indirect_fun_i32S_2@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_3_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_3_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_3_1:
 ; FULL-DAG:        jmp     indirect_fun_vec_2@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_2_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_2_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_2_1:
@@ -228,42 +217,36 @@ define i32 @main(i32 %argc, i8** %argv) {
 ; FULL-DAG:        ud2
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_8_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_8_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_8_1:
 ; FULL-DAG:        jmp     indirect_fun_i32@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_1_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_1_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_1_1:
 ; FULL-DAG:        jmp     indirect_fun_array@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_0_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_0_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_0_1:
 ; FULL-DAG:        jmp     indirect_fun_vec@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_6_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_6_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_6_1:
 ; FULL-DAG:        jmp     indirect_fun_struct@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_5_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_5_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_5_1:
 ; FULL-DAG:        jmp     indirect_fun_fun@PLT
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        ud2
-; FULL-DAG:        .globl  __llvm_jump_instr_table_4_1
 ; FULL-DAG:        .align  8, 0x90
 ; FULL-DAG:        .type   __llvm_jump_instr_table_4_1,@function
 ; FULL-DAG:__llvm_jump_instr_table_4_1:
diff --git a/test/CodeGen/X86/large-code-model-isel.ll b/test/CodeGen/X86/large-code-model-isel.ll
new file mode 100644
index 000000000000..3c283d934949
--- /dev/null
+++ b/test/CodeGen/X86/large-code-model-isel.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -code-model=large -mcpu=core2 -march=x86-64 -O0 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@.str10 = external unnamed_addr constant [2 x i8], align 1
+
+define void @foo() {
+; CHECK-LABEL: foo:
+entry:
+; CHECK: callq
+  %call = call i64* undef(i64* undef, i8* getelementptr inbounds ([2 x i8]* @.str10, i32 0, i32 0))
+  ret void
+}
diff --git a/test/CodeGen/X86/lea-2.ll b/test/CodeGen/X86/lea-2.ll
index 82cefb728c6e..98c57c7d090d 100644
--- a/test/CodeGen/X86/lea-2.ll
+++ b/test/CodeGen/X86/lea-2.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl -x86-asm-syntax=intel | FileCheck %s
 
 define i32 @test1(i32 %A, i32 %B) {
   %tmp1 = shl i32 %A, 2
@@ -7,7 +10,7 @@ define i32 @test1(i32 %A, i32 %B) {
 ; The above computation of %tmp4 should match a single lea, without using
 ; actual add instructions.
 ; CHECK-NOT: add
-; CHECK: lea {{[a-z]+}}, dword ptr [{{[a-z]+}} + 4*{{[a-z]+}} - 5]
+; CHECK: lea {{[a-z]+}}, [{{[a-z]+}} + 4*{{[a-z]+}} - 5]
 
   ret i32 %tmp4
 }
diff --git a/test/CodeGen/X86/lea-3.ll b/test/CodeGen/X86/lea-3.ll
index c439ee1d06e3..a56403a24b03 100644
--- a/test/CodeGen/X86/lea-3.ll
+++ b/test/CodeGen/X86/lea-3.ll
@@ -1,4 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax
diff --git a/test/CodeGen/X86/lea-4.ll b/test/CodeGen/X86/lea-4.ll
index cef47264a583..00c2278c54bf 100644
--- a/test/CodeGen/X86/lea-4.ll
+++ b/test/CodeGen/X86/lea-4.ll
@@ -1,4 +1,7 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
+
 
 define zeroext i16 @t1(i32 %on_off) nounwind {
 entry:
diff --git a/test/CodeGen/X86/lea-5.ll b/test/CodeGen/X86/lea-5.ll
new file mode 100644
index 000000000000..50d3aaf4c594
--- /dev/null
+++ b/test/CodeGen/X86/lea-5.ll
@@ -0,0 +1,59 @@
+; test for more complicated forms of lea operands which can be generated
+; in loop optimized cases.
+; See also http://llvm.org/bugs/show_bug.cgi?id=20016
+
+; RUN: llc < %s -mtriple=x86_64-linux -O2        | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -O2 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-nacl -O2 | FileCheck %s -check-prefix=X32
+
+; Function Attrs: nounwind readnone uwtable
+define void @foo(i32 %x, i32 %d) #0 {
+entry:
+  %a = alloca [8 x i32], align 16
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
+  %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
+
+; CHECK: leaq	-40(%rsp,%r{{[^,]*}},4), %rax
+; X32:   leal	-40(%rsp,%r{{[^,]*}},4), %eax
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  %inc = add nsw i32 %d.addr.0, 1
+
+; CHECK: leaq	4(%r{{[^,]*}}), %r{{[^,]*}}
+; X32:   leal	4(%r{{[^,]*}}), %e{{[^,]*}}
+  br i1 %cmp1, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+; The same test as above but with enforsed stack realignment (%a aligned by 64)
+; to check one more case of correct lea generation.
+
+; Function Attrs: nounwind readnone uwtable
+define void @bar(i32 %x, i32 %d) #0 {
+entry:
+  %a = alloca [8 x i32], align 64
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %d.addr.0 = phi i32 [ %d, %entry ], [ %inc, %while.cond ]
+  %arrayidx = getelementptr inbounds [8 x i32]* %a, i32 0, i32 %d.addr.0
+
+; CHECK: leaq	(%rsp,%r{{[^,]*}},4), %rax
+; X32:   leal	(%rsp,%r{{[^,]*}},4), %eax
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %0, 0
+  %inc = add nsw i32 %d.addr.0, 1
+
+; CHECK: leaq	4(%r{{[^,]*}}), %r{{[^,]*}}
+; X32:   leal	4(%r{{[^,]*}}), %e{{[^,]*}}
+  br i1 %cmp1, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll
index 93cfe4611b44..9b6632c94693 100644
--- a/test/CodeGen/X86/lea.ll
+++ b/test/CodeGen/X86/lea.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-nacl | FileCheck %s
 
 define i32 @test1(i32 %x) nounwind {
         %tmp1 = shl i32 %x, 3
diff --git a/test/CodeGen/X86/long-extend.ll b/test/CodeGen/X86/long-extend.ll
deleted file mode 100644
index 5bbd41dad9d2..000000000000
--- a/test/CodeGen/X86/long-extend.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -mcpu=core-avx-i -mtriple=x86_64-linux -asm-verbose=0| FileCheck %s
-define void @test_long_extend(<16 x i8> %a, <16 x i32>* %p) nounwind {
-; CHECK-LABEL: test_long_extend
-; CHECK: vpunpcklbw	%xmm1, %xmm0, [[REG1:%xmm[0-9]+]]
-; CHECK: vpunpckhwd	%xmm1, [[REG1]], [[REG2:%xmm[0-9]+]]
-; CHECK: vpunpcklwd	%xmm1, [[REG1]], %x[[REG3:mm[0-9]+]]
-; CHECK: vinsertf128	$1, [[REG2]], %y[[REG3]], [[REG_result0:%ymm[0-9]+]]
-; CHECK: vpunpckhbw	%xmm1, %xmm0, [[REG4:%xmm[0-9]+]]
-; CHECK: vpunpckhwd	%xmm1, [[REG4]], [[REG5:%xmm[0-9]+]]
-; CHECK: vpunpcklwd	%xmm1, [[REG4]], %x[[REG6:mm[0-9]+]]
-; CHECK: vinsertf128	$1, [[REG5]], %y[[REG6]], [[REG_result1:%ymm[0-9]+]]
-; CHECK: vmovaps	[[REG_result1]], 32(%rdi)
-; CHECK: vmovaps	[[REG_result0]], (%rdi)
-
-  %tmp = zext <16 x i8> %a to <16 x i32>
-  store <16 x i32> %tmp, <16 x i32>*%p
-  ret void
-}
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
index 1d042769b0ba..c36047c451ae 100644
--- a/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
 
-; CHECK: leal 16(%eax), %edx
+; FIXME: The first two instructions, movl and addl, should have been combined to
+; "leal 16(%eax), %edx" by the backend (PR20776).
+; CHECK: movl    %eax, %edx
+; CHECK: addl    $16, %edx
 ; CHECK: align
 ; CHECK: addl    $4, %edx
 ; CHECK: decl    %ecx
diff --git a/test/CodeGen/X86/lower-bitcast.ll b/test/CodeGen/X86/lower-bitcast.ll
index f47161e5520c..edb8433ec30c 100644
--- a/test/CodeGen/X86/lower-bitcast.ll
+++ b/test/CodeGen/X86/lower-bitcast.ll
@@ -68,13 +68,13 @@ define i64 @test4(i64 %A) {
   %2 = bitcast <2 x i32> %add to i64
   ret i64 %2
 }
-; FIXME: At the moment we still produce the sequence pshufd+paddq+pshufd.
+; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
 ; Ideally, we should fold that sequence into a single paddd. This is fixed with
 ; the widening legalization.
 ;
 ; CHECK-LABEL: test4
 ; CHECK: pshufd
-; CHECK-NEXT: paddq
+; CHECK-NEXT: paddd
 ; CHECK-NEXT: pshufd
 ; CHECK: ret
 ;
diff --git a/test/CodeGen/X86/lower-vec-shift-2.ll b/test/CodeGen/X86/lower-vec-shift-2.ll
new file mode 100644
index 000000000000..770775d32427
--- /dev/null
+++ b/test/CodeGen/X86/lower-vec-shift-2.ll
@@ -0,0 +1,149 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s -check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s -check-prefix=AVX
+
+define <8 x i16> @test1(<8 x i16> %A, <8 x i16> %B) {
+; SSE2-LABEL: test1:
+; SSE2:       # BB#0
+; SSE2-NEXT:    movd  %xmm1, %eax
+; SSE2-NEXT:    movzwl  %ax, %eax
+; SSE2-NEXT:    movd  %eax, %xmm1
+; SSE2-NEXT:    psllw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test1:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsllw  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shl = shl <8 x i16> %A, %vecinit14
+  ret <8 x i16> %shl
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: test2:
+; SSE2:       # BB#0
+; SSE2-NEXT:    xorps  %xmm2, %xmm2
+; SSE2-NEXT:    movss  %xmm1, %xmm2
+; SSE2-NEXT:    pslld  %xmm2, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test2:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpslld  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %A, %vecinit6
+  ret <4 x i32> %shl
+}
+
+define <2 x i64> @test3(<2 x i64> %A, <2 x i64> %B) {
+; SSE2-LABEL: test3:
+; SSE2:       # BB#0
+; SSE2-NEXT:    psllq  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test3:
+; AVX:       # BB#0
+; AVX-NEXT:    vpsllq  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit2 = shufflevector <2 x i64> %B, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shl = shl <2 x i64> %A, %vecinit2
+  ret <2 x i64> %shl
+}
+
+define <8 x i16> @test4(<8 x i16> %A, <8 x i16> %B) {
+; SSE2-LABEL: test4:
+; SSE2:       # BB#0
+; SSE2-NEXT:    movd  %xmm1, %eax
+; SSE2-NEXT:    movzwl  %ax, %eax
+; SSE2-NEXT:    movd  %eax, %xmm1
+; SSE2-NEXT:    psrlw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test4:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsrlw  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shr = lshr <8 x i16> %A, %vecinit14
+  ret <8 x i16> %shr
+}
+
+define <4 x i32> @test5(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: test5:
+; SSE2:       # BB#0
+; SSE2-NEXT:    xorps  %xmm2, %xmm2
+; SSE2-NEXT:    movss  %xmm1, %xmm2
+; SSE2-NEXT:    psrld  %xmm2, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test5:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpsrld  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = lshr <4 x i32> %A, %vecinit6
+  ret <4 x i32> %shr
+}
+
+define <2 x i64> @test6(<2 x i64> %A, <2 x i64> %B) {
+; SSE2-LABEL: test6:
+; SSE2:       # BB#0
+; SSE2-NEXT:    psrlq  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test6:
+; AVX:       # BB#0
+; AVX-NEXT:    vpsrlq  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit2 = shufflevector <2 x i64> %B, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shr = lshr <2 x i64> %A, %vecinit2
+  ret <2 x i64> %shr
+}
+
+define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) {
+; SSE2-LABEL: test7:
+; SSE2:       # BB#0
+; SSE2-NEXT:    movd  %xmm1, %eax
+; SSE2-NEXT:    movzwl  %ax, %eax
+; SSE2-NEXT:    movd  %eax, %xmm1
+; SSE2-NEXT:    psraw  %xmm1, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test7:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsraw  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit14 = shufflevector <8 x i16> %B, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shr = ashr <8 x i16> %A, %vecinit14
+  ret <8 x i16> %shr
+}
+
+define <4 x i32> @test8(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: test8:
+; SSE2:       # BB#0
+; SSE2-NEXT:    xorps  %xmm2, %xmm2
+; SSE2-NEXT:    movss  %xmm1, %xmm2
+; SSE2-NEXT:    psrad  %xmm2, %xmm0
+; SSE2-NEXT:    retq
+; AVX-LABEL: test8:
+; AVX:       # BB#0
+; AVX-NEXT:    vpxor  %xmm2, %xmm2
+; AVX-NEXT:    vpblendw  {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; AVX-NEXT:    vpsrad  %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vecinit6 = shufflevector <4 x i32> %B, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %A, %vecinit6
+  ret <4 x i32> %shr
+}
diff --git a/test/CodeGen/X86/lzcnt-tzcnt.ll b/test/CodeGen/X86/lzcnt-tzcnt.ll
index 07e4b9d8ce61..e98764a0d787 100644
--- a/test/CodeGen/X86/lzcnt-tzcnt.ll
+++ b/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -437,6 +437,137 @@ define i64 @test18_cttz(i64* %ptr) {
 ; CHECK: tzcnt
 ; CHECK-NEXT: ret
 
+define i16 @test1b_ctlz(i16 %v) {
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp ne i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test1b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test2b_ctlz(i32 %v) {
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test2b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test3b_ctlz(i64 %v) {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test3b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test4b_ctlz(i16 %v) {
+  %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
+  %tobool = icmp ne i16 %v, 0
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test4b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test5b_ctlz(i32 %v) {
+  %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test5b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test6b_ctlz(i64 %v) {
+  %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test6b_ctlz
+; CHECK: lzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test1b_cttz(i16 %v) {
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp ne i16 %v, 0
+  %cond = select i1 %tobool, i16 16, i16 %cnt
+  ret i16 %cond
+}
+; CHECK-LABEL: test1b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test2b_cttz(i32 %v) {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 32, i32 %cnt
+  ret i32 %cond
+}
+; CHECK-LABEL: test2b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test3b_cttz(i64 %v) {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 64, i64 %cnt
+  ret i64 %cond
+}
+; CHECK-LABEL: test3b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i16 @test4b_cttz(i16 %v) {
+  %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
+  %tobool = icmp ne i16 %v, 0
+  %cond = select i1 %tobool, i16 %cnt, i16 16
+  ret i16 %cond
+}
+; CHECK-LABEL: test4b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i32 @test5b_cttz(i32 %v) {
+  %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
+  %tobool = icmp ne i32 %v, 0
+  %cond = select i1 %tobool, i32 %cnt, i32 32
+  ret i32 %cond
+}
+; CHECK-LABEL: test5b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
+
+define i64 @test6b_cttz(i64 %v) {
+  %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
+  %tobool = icmp ne i64 %v, 0
+  %cond = select i1 %tobool, i64 %cnt, i64 64
+  ret i64 %cond
+}
+; CHECK-LABEL: test6b_cttz
+; CHECK: tzcnt
+; CHECK-NEXT: ret
+
 
 declare i64 @llvm.cttz.i64(i64, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
diff --git a/test/CodeGen/X86/macho-comdat.ll b/test/CodeGen/X86/macho-comdat.ll
index 3c2d997b4594..60560470ed5b 100644
--- a/test/CodeGen/X86/macho-comdat.ll
+++ b/test/CodeGen/X86/macho-comdat.ll
@@ -2,5 +2,5 @@
 ; RUN: FileCheck < %t %s
 
 $f = comdat any
-@v = global i32 0, comdat $f
+@v = global i32 0, comdat($f)
 ; CHECK: LLVM ERROR: MachO doesn't support COMDATs, 'f' cannot be lowered.
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
new file mode 100644
index 000000000000..726d7125a638
--- /dev/null
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -0,0 +1,217 @@
+; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=AVX_SCALAR
+
+; AVX512-LABEL: test1
+; AVX512: vmovdqu32       (%rdi), %zmm0 {%k1} {z}
+
+; AVX2-LABEL: test1
+; AVX2: vpmaskmovd      32(%rdi)
+; AVX2: vpmaskmovd      (%rdi)
+; AVX2-NOT: blend
+
+; AVX_SCALAR-LABEL: test1
+; AVX_SCALAR-NOT: masked
+; AVX_SCALAR: extractelement
+; AVX_SCALAR: insertelement
+; AVX_SCALAR: extractelement
+; AVX_SCALAR: insertelement
+define <16 x i32> @test1(<16 x i32> %trigger, <16 x i32>* %addr) {
+  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+  %res = call <16 x i32> @llvm.masked.load.v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>undef)
+  ret <16 x i32> %res
+}
+
+; AVX512-LABEL: test2
+; AVX512: vmovdqu32       (%rdi), %zmm0 {%k1} {z}
+
+; AVX2-LABEL: test2
+; AVX2: vpmaskmovd      {{.*}}(%rdi)
+; AVX2: vpmaskmovd      {{.*}}(%rdi)
+; AVX2-NOT: blend
+define <16 x i32> @test2(<16 x i32> %trigger, <16 x i32>* %addr) {
+  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+  %res = call <16 x i32> @llvm.masked.load.v16i32(<16 x i32>* %addr, i32 4, <16 x i1>%mask, <16 x i32>zeroinitializer)
+  ret <16 x i32> %res
+}
+
+; AVX512-LABEL: test3
+; AVX512: vmovdqu32       %zmm1, (%rdi) {%k1}
+
+; AVX_SCALAR-LABEL: test3
+; AVX_SCALAR-NOT: masked
+; AVX_SCALAR: extractelement
+; AVX_SCALAR: store
+; AVX_SCALAR: extractelement
+; AVX_SCALAR: store
+; AVX_SCALAR: extractelement
+; AVX_SCALAR: store
+define void @test3(<16 x i32> %trigger, <16 x i32>* %addr, <16 x i32> %val) {
+  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v16i32(<16 x i32>%val, <16 x i32>* %addr, i32 4, <16 x i1>%mask)
+  ret void
+}
+
+; AVX512-LABEL: test4
+; AVX512: vmovups       (%rdi), %zmm{{.*{%k[1-7]}}}
+
+; AVX2-LABEL: test4
+; AVX2: vmaskmovps      {{.*}}(%rdi)
+; AVX2: vmaskmovps      {{.*}}(%rdi)
+; AVX2: blend
+define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %dst) {
+  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+  %res = call <16 x float> @llvm.masked.load.v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
+  ret <16 x float> %res
+}
+
+; AVX512-LABEL: test5
+; AVX512: vmovupd (%rdi), %zmm1 {%k1}
+
+; AVX2-LABEL: test5
+; AVX2: vmaskmovpd
+; AVX2: vblendvpd
+; AVX2: vmaskmovpd
+; AVX2: vblendvpd
+define <8 x double> @test5(<8 x i32> %trigger, <8 x double>* %addr, <8 x double> %dst) {
+  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
+  %res = call <8 x double> @llvm.masked.load.v8f64(<8 x double>* %addr, i32 4, <8 x i1>%mask, <8 x double>%dst)
+  ret <8 x double> %res
+}
+
+; AVX2-LABEL: test6
+; AVX2: vmaskmovpd
+; AVX2: vblendvpd
+define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double> %dst) {
+  %mask = icmp eq <2 x i64> %trigger, zeroinitializer
+  %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
+  ret <2 x double> %res
+}
+
+; AVX2-LABEL: test7
+; AVX2: vmaskmovps      {{.*}}(%rdi)
+; AVX2: blend
+define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %dst) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
+  ret <4 x float> %res
+}
+
+; AVX2-LABEL: test8
+; AVX2: vpmaskmovd      {{.*}}(%rdi)
+; AVX2: blend
+define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x i32> @llvm.masked.load.v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
+  ret <4 x i32> %res
+}
+
+; AVX2-LABEL: test9
+; AVX2: vpmaskmovd %xmm
+define void @test9(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1>%mask)
+  ret void
+}
+
+; AVX2-LABEL: test10
+; AVX2: vmaskmovpd    (%rdi), %ymm
+; AVX2: blend
+define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double> %dst) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x double> @llvm.masked.load.v4f64(<4 x double>* %addr, i32 4, <4 x i1>%mask, <4 x double>%dst)
+  ret <4 x double> %res
+}
+
+; AVX2-LABEL: test11
+; AVX2: vmaskmovps
+; AVX2: vblendvps
+define <8 x float> @test11(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
+  %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 4, <8 x i1>%mask, <8 x float>%dst)
+  ret <8 x float> %res
+}
+
+; AVX2-LABEL: test12
+; AVX2: vpmaskmovd %ymm
+define void @test12(<8 x i32> %trigger, <8 x i32>* %addr, <8 x i32> %val) {
+  %mask = icmp eq <8 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v8i32(<8 x i32>%val, <8 x i32>* %addr, i32 4, <8 x i1>%mask)
+  ret void
+}
+
+; AVX512-LABEL: test13
+; AVX512: vmovups       %zmm1, (%rdi) {%k1}
+
+define void @test13(<16 x i32> %trigger, <16 x float>* %addr, <16 x float> %val) {
+  %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v16f32(<16 x float>%val, <16 x float>* %addr, i32 4, <16 x i1>%mask)
+  ret void
+}
+
+; AVX2-LABEL: test14
+; AVX2: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX2: vmaskmovps
+define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
+  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v2f32(<2 x float>%val, <2 x float>* %addr, i32 4, <2 x i1>%mask)
+  ret void
+}
+
+; AVX2-LABEL: test15
+; AVX2: vpmaskmovq
+define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
+  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
+  ret void
+}
+
+; AVX2-LABEL: test16
+; AVX2: vmaskmovps
+; AVX2: vblendvps
+define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %dst) {
+  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
+  %res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
+  ret <2 x float> %res
+}
+
+; AVX2-LABEL: test17
+; AVX2: vpmaskmovq
+; AVX2: vblendvpd
+define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
+  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
+  %res = call <2 x i32> @llvm.masked.load.v2i32(<2 x i32>* %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst)
+  ret <2 x i32> %res
+}
+
+; AVX2-LABEL: test18
+; AVX2: vmaskmovps
+; AVX2-NOT: blend
+define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
+  %mask = icmp eq <2 x i32> %trigger, zeroinitializer
+  %res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
+  ret <2 x float> %res
+}
+
+
+declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
+declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
+declare <2 x i32> @llvm.masked.load.v2i32(<2 x i32>*, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.store.v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)
+declare void @llvm.masked.store.v8i32(<8 x i32>, <8 x i32>*, i32, <8 x i1>)
+declare void @llvm.masked.store.v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)
+declare void @llvm.masked.store.v2f32(<2 x float>, <2 x float>*, i32, <2 x i1>)
+declare void @llvm.masked.store.v2i32(<2 x i32>, <2 x i32>*, i32, <2 x i1>)
+declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i1>)
+declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
+declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
+declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
+declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
+declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
+declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
+declare <4 x double> @llvm.masked.load.v4f64(<4 x double>*, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.load.v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)
+declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>)
+declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
+declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
+
diff --git a/test/CodeGen/X86/mem-intrin-base-reg.ll b/test/CodeGen/X86/mem-intrin-base-reg.ll
new file mode 100644
index 000000000000..9a6de3dd1d92
--- /dev/null
+++ b/test/CodeGen/X86/mem-intrin-base-reg.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=i686-windows -mattr=+sse2 < %s | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+; There is a conflict between lowering the X86 memory intrinsics and the "base"
+; register used to address stack locals.  See X86RegisterInfo::hasBaseRegister
+; for when this is necessary. Typically, we chose ESI for the base register,
+; which all of the X86 string instructions use.
+
+; The pattern of vector icmp and extractelement is used in these tests because
+; it forces creation of an aligned stack temporary. Perhaps such temporaries
+; shouldn't be aligned.
+
+declare void @escape_vla_and_icmp(i8*, i1 zeroext)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+define i32 @memcpy_novla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
+  br i1 %cond, label %spill_vectors, label %no_vectors
+
+no_vectors:
+  ret i32 0
+
+spill_vectors:
+  %vp1 = getelementptr <4 x i32>* %vp0, i32 1
+  %v0 = load <4 x i32>* %vp0
+  %v1 = load <4 x i32>* %vp1
+  %vicmp = icmp slt <4 x i32> %v0, %v1
+  %icmp = extractelement <4 x i1> %vicmp, i32 0
+  call void @escape_vla_and_icmp(i8* null, i1 zeroext %icmp)
+  %r = extractelement <4 x i32> %v0, i32 0
+  ret i32 %r
+}
+
+; CHECK-LABEL: _memcpy_novla_vector:
+; CHECK: andl $-16, %esp
+; CHECK-DAG: movl $32, %ecx
+; CHECK-DAG: movl {{.*}}, %esi
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK: rep;movsl
+
+define i32 @memcpy_vla_vector(<4 x i32>* %vp0, i8* %a, i8* %b, i32 %n, i1 zeroext %cond) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %b, i32 128, i32 4, i1 false)
+  br i1 %cond, label %spill_vectors, label %no_vectors
+
+no_vectors:
+  ret i32 0
+
+spill_vectors:
+  %vp1 = getelementptr <4 x i32>* %vp0, i32 1
+  %v0 = load <4 x i32>* %vp0
+  %v1 = load <4 x i32>* %vp1
+  %vicmp = icmp slt <4 x i32> %v0, %v1
+  %icmp = extractelement <4 x i1> %vicmp, i32 0
+  %vla = alloca i8, i32 %n
+  call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
+  %r = extractelement <4 x i32> %v0, i32 0
+  ret i32 %r
+}
+
+; CHECK-LABEL: _memcpy_vla_vector:
+; CHECK: andl $-16, %esp
+; CHECK: movl %esp, %esi
+; CHECK: pushl $128
+; CHECK: calll _memcpy
+; CHECK: calll __chkstk
+
+; stosd doesn't clobber esi, so we can use it.
+
+define i32 @memset_vla_vector(<4 x i32>* %vp0, i8* %a, i32 %n, i1 zeroext %cond) {
+  call void @llvm.memset.p0i8.i32(i8* %a, i8 42, i32 128, i32 4, i1 false)
+  br i1 %cond, label %spill_vectors, label %no_vectors
+
+no_vectors:
+  ret i32 0
+
+spill_vectors:
+  %vp1 = getelementptr <4 x i32>* %vp0, i32 1
+  %v0 = load <4 x i32>* %vp0
+  %v1 = load <4 x i32>* %vp1
+  %vicmp = icmp slt <4 x i32> %v0, %v1
+  %icmp = extractelement <4 x i1> %vicmp, i32 0
+  %vla = alloca i8, i32 %n
+  call void @escape_vla_and_icmp(i8* %vla, i1 zeroext %icmp)
+  %r = extractelement <4 x i32> %v0, i32 0
+  ret i32 %r
+}
+
+; CHECK-LABEL: _memset_vla_vector:
+; CHECK: andl $-16, %esp
+; CHECK: movl %esp, %esi
+; CHECK-DAG: movl $707406378, %eax        # imm = 0x2A2A2A2A
+; CHECK-DAG: movl $32, %ecx
+; CHECK-DAG: movl {{.*}}, %edi
+; CHECK-NOT: movl {{.*}}, %esi
+; CHECK: rep;stosl
+
+; Add a test for memcmp if we ever add a special lowering for it.
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
index 0015df0c1fac..ea38b95a864e 100644
--- a/test/CodeGen/X86/mem-promote-integers.ll
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -1,8 +1,8 @@
 ; Test the basic functionality of integer element promotions of different types.
 ; This tests checks passing of arguments, loading and storing to memory and
 ; basic arithmetic.
-; RUN: llc -march=x86 < %s
-; RUN: llc -march=x86-64 < %s
+; RUN: llc -march=x86 < %s > /dev/null
+; RUN: llc -march=x86-64 < %s > /dev/null
 
 define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
   %bb = load <1 x i8>* %b
diff --git a/test/CodeGen/X86/misched-code-difference-with-debug.ll b/test/CodeGen/X86/misched-code-difference-with-debug.ll
new file mode 100644
index 000000000000..fb2a986e561b
--- /dev/null
+++ b/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-unknown -mcpu=generic | FileCheck %s
+; Both functions should produce the same code. The presence of debug values
+; should not affect the scheduling strategy.
+; Generated from:
+; char argc;
+; class C {
+; public:
+;   int test(char ,char ,char ,...);
+; };
+; void foo() {
+;   C c;
+;   char lc = argc;
+;   c.test(0,argc,0,lc);
+;   c.test(0,argc,0,lc);
+; }
+;
+; with
+; clang -O2 -c test.cpp -emit-llvm -S
+; clang -O2 -c test.cpp -emit-llvm -S -g
+;
+
+
+%class.C = type { i8 }
+
+@argc = global i8 0, align 1
+
+declare i32 @test_function(%class.C*, i8 signext, i8 signext, i8 signext, ...)
+
+; CHECK-LABEL: test_without_debug
+; CHECK: movl [[A:%[a-z]+]], [[B:%[a-z]+]]
+; CHECK-NEXT: movl [[A]], [[C:%[a-z]+]]
+define void @test_without_debug() {
+entry:
+  %c = alloca %class.C, align 1
+  %0 = load i8* @argc, align 1
+  %conv = sext i8 %0 to i32
+  %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+  %1 = load i8* @argc, align 1
+  %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+  ret void
+}
+
+; CHECK-LABEL: test_with_debug
+; CHECK: movl [[A]], [[B]]
+; CHECK-NEXT: movl [[A]], [[C]]
+define void @test_with_debug() {
+entry:
+  %c = alloca %class.C, align 1
+  %0 = load i8* @argc, align 1
+  tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29)
+  %conv = sext i8 %0 to i32
+  tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
+  %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
+  %1 = load i8* @argc, align 1
+  call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
+  %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+
+!0 = !{!"", !1, !2, !3, !12, !20, !2} ; [ DW_TAG_compile_unit ] [test.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"test.cpp", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00C\002\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1C"} ; [ DW_TAG_class_type ] [C] [line 2, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"", !1, !"_ZTS1C", !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [public] [test]
+!7 = !{!"", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10, !11, !11, !11, null}
+!9 = !{!"", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!11 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = !{!13}
+!13 = !{!"0x2e\00test_with_debug\00test_with_debug\00test_with_debug\006\000\001\000\000\00256\001\006", !1, !14, !15, null, void ()* @test_with_debug, null, null, !17} ; [ DW_TAG_subprogram ] [line 6] [def] [test_with_debug]
+!14 = !{!"0x29", !1}
+!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null}
+!17 = !{!18, !19}
+!18 = !{!"0x100\00c\007\000", !13, !14, !"_ZTS1C"} ; [ DW_TAG_auto_variable ] [c] [line 7]
+!19 = !{!"0x100\00lc\008\000", !13, !14, !11} ; [ DW_TAG_auto_variable ] [lc] [line 8]
+!20 = !{!21}
+!21 = !{!"0x34\00argc\00argc\00\001\000\001", null, !14, !11, i8* @argc, null} ; [ DW_TAG_variable ] [argc] [line 1] [def]
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 2}
+!25 = !MDLocation(line: 8, column: 3, scope: !13)
+!29 = !{!"0x102"}               ; [ DW_TAG_expression ]
diff --git a/test/CodeGen/X86/misched-copy.ll b/test/CodeGen/X86/misched-copy.ll
index 4485b8a244a8..3e3729285d27 100644
--- a/test/CodeGen/X86/misched-copy.ll
+++ b/test/CodeGen/X86/misched-copy.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -march=x86 -mcpu=core2 -pre-RA-sched=source -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
 ;
 ; Test scheduling of copy instructions.
 ;
@@ -44,6 +44,6 @@ end:
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !"float", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!0 = !{!"float", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-crash.ll b/test/CodeGen/X86/misched-crash.ll
index 7644ee070878..21c3fa3510d6 100644
--- a/test/CodeGen/X86/misched-crash.ll
+++ b/test/CodeGen/X86/misched-crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -enable-misched -verify-misched
+; RUN: llc < %s -verify-machineinstrs -enable-misched -verify-misched
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10"
 
diff --git a/test/CodeGen/X86/misched-matmul.ll b/test/CodeGen/X86/misched-matmul.ll
index 3ea6512258d4..5454b7cf780a 100644
--- a/test/CodeGen/X86/misched-matmul.ll
+++ b/test/CodeGen/X86/misched-matmul.ll
@@ -10,7 +10,7 @@
 ; more complex cases.
 ;
 ; CHECK: @wrap_mul4
-; CHECK: 22 regalloc - Number of spills inserted
+; CHECK: 23 regalloc - Number of spills inserted
 
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index 71b0723c429e..96c5dbb8ea98 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -3,40 +3,58 @@
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -mattr=sse4.1 | FileCheck %s --check-prefix=X64
 
 define i32 @test1() nounwind readonly {
+; X32-LABEL: test1:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    movl %gs:196, %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: test1:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movq %gs:320, %rax
+; X64-NEXT:    movl (%rax), %eax
+; X64-NEXT:    retq
 entry:
 	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
-; X32-LABEL: test1:
-; X32: 	movl	%gs:196, %eax
-; X32: 	movl	(%eax), %eax
-; X32: 	ret
-
-; X64-LABEL: test1:
-; X64: 	movq	%gs:320, %rax
-; X64: 	movl	(%rax), %eax
-; X64: 	ret
 
 define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+; X32-LABEL: test2:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    subl $12, %esp
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    calll *%gs:(%eax)
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    xorl %edx, %edx
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+;
+; X64-LABEL: test2:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    {{(subq.*%rsp|pushq)}}
+; X64-NEXT:    callq *%gs:(%{{(rcx|rdi)}})
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    {{(addq.*%rsp|popq)}}
+; X64-NEXT:    retq
 entry:
   %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
   tail call void %tmp9(i8* undef) nounwind optsize
   ret i64 0
 }
 
-; rdar://8453210
-; X32-LABEL: test2:
-; X32: movl	{{.*}}(%esp), %eax
-; X32: calll	*%gs:(%eax)
-
-; X64-LABEL: test2:
-; X64: callq	*%gs:([[A0:%rdi|%rcx]])
-
-
-
-
 define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+; X32-LABEL: pmovsxwd_1:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    pmovsxwd %gs:(%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pmovsxwd_1:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    pmovsxwd %gs:(%{{(rcx|rdi)}}), %xmm0
+; X64-NEXT:    retq
 entry:
   %0 = load i64 addrspace(256)* %p
   %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
@@ -44,20 +62,26 @@ entry:
   %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
   %3 = bitcast <4 x i32> %2 to <2 x i64>
   ret <2 x i64> %3
-  
-; X32-LABEL: pmovsxwd_1:
-; X32: 	movl	4(%esp), %eax
-; X32: 	pmovsxwd	%gs:(%eax), %xmm0
-; X32: 	ret
-
-; X64-LABEL: pmovsxwd_1:
-; X64:	pmovsxwd	%gs:([[A0]]), %xmm0
-; X64:	ret
 }
 
 ; The two loads here both look identical to selection DAG, except for their
 ; address spaces.  Make sure they aren't CSE'd.
 define i32 @test_no_cse() nounwind readonly {
+; X32-LABEL: test_no_cse:
+; X32:       # BB#0: # %entry
+; X32-NEXT:    movl %gs:196, %eax
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    movl %fs:196, %ecx
+; X32-NEXT:    addl (%ecx), %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_no_cse:
+; X64:       # BB#0: # %entry
+; X64-NEXT:    movq %gs:320, %rax
+; X64-NEXT:    movl (%rax), %eax
+; X64-NEXT:    movq %fs:320, %rcx
+; X64-NEXT:    addl (%rcx), %eax
+; X64-NEXT:    retq
 entry:
 	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
@@ -66,9 +90,5 @@ entry:
 	%tmp4 = add i32 %tmp1, %tmp3
 	ret i32 %tmp4
 }
-; X32-LABEL: test_no_cse:
-; X32: 	movl	%gs:196
-; X32: 	movl	%fs:196
-; X32: 	ret
 
 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/movntdq-no-avx.ll b/test/CodeGen/X86/movntdq-no-avx.ll
index 8b7e6ef15258..cc35e201e6b3 100644
--- a/test/CodeGen/X86/movntdq-no-avx.ll
+++ b/test/CodeGen/X86/movntdq-no-avx.ll
@@ -9,4 +9,4 @@ entry:
   ret void
 }
 
-!0 = metadata !{i32 1}
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
new file mode 100644
index 000000000000..cb48ed747be4
--- /dev/null
+++ b/test/CodeGen/X86/movtopush.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
+
+; Here, we should have a reserved frame, so we don't expect pushes
+; NORMAL-LABEL: test1
+; NORMAL: subl    $16, %esp
+; NORMAL-NEXT: movl    $4, 12(%esp)
+; NORMAL-NEXT: movl    $3, 8(%esp)
+; NORMAL-NEXT: movl    $2, 4(%esp)
+; NORMAL-NEXT: movl    $1, (%esp)
+; NORMAL-NEXT: call
+define void @test1() {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Here, we expect a sequence of 4 immediate pushes
+; NORMAL-LABEL: test2
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $2
+; NORMAL-NEXT: pushl   $1
+; NORMAL-NEXT: call
+define void @test2(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Again, we expect a sequence of 4 immediate pushes
+; Checks that we generate the right pushes for >8bit immediates
+; NORMAL-LABEL: test2b
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4096
+; NORMAL-NEXT: pushl   $3072
+; NORMAL-NEXT: pushl   $2048
+; NORMAL-NEXT: pushl   $1024
+; NORMAL-NEXT: call
+define void @test2b(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
+  ret void
+}
+
+; The first push should push a register
+; NORMAL-LABEL: test3
+; NORMAL-NOT: subl {{.*}} %esp
+; NORMAL: pushl   $4
+; NORMAL-NEXT: pushl   $3
+; NORMAL-NEXT: pushl   $2
+; NORMAL-NEXT: pushl   %e{{..}}
+; NORMAL-NEXT: call
+define void @test3(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 %k, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; We don't support weird calling conventions
+; NORMAL-LABEL: test4
+; NORMAL: subl    $12, %esp
+; NORMAL-NEXT: movl    $4, 8(%esp)
+; NORMAL-NEXT: movl    $3, 4(%esp)
+; NORMAL-NEXT: movl    $1, (%esp)
+; NORMAL-NEXT: movl    $2, %eax
+; NORMAL-NEXT: call
+define void @test4(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @inreg(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that additional alignment is added when the pushes
+; don't add up to the required alignment.
+; ALIGNED-LABEL: test5
+; ALIGNED: subl    $16, %esp
+; ALIGNED-NEXT: pushl   $4
+; ALIGNED-NEXT: pushl   $3
+; ALIGNED-NEXT: pushl   $2
+; ALIGNED-NEXT: pushl   $1
+; ALIGNED-NEXT: call
+define void @test5(i32 %k) {
+entry:
+  %a = alloca i32, i32 %k
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that pushing the addresses of globals (Or generally, things that 
+; aren't exactly immediates) isn't broken.
+; Fixes PR21878.
+; NORMAL-LABEL: test6
+; NORMAL: pushl    $_ext
+; NORMAL-NEXT: call
+declare void @f(i8*)
+@ext = external constant i8
+
+define void @test6() {
+  call void @f(i8* @ext)
+  br label %bb
+bb:
+  alloca i32
+  ret void
+}
diff --git a/test/CodeGen/X86/ms-inline-asm.ll b/test/CodeGen/X86/ms-inline-asm.ll
index 69105158906f..f0bdbba50ef3 100644
--- a/test/CodeGen/X86/ms-inline-asm.ll
+++ b/test/CodeGen/X86/ms-inline-asm.ll
@@ -110,7 +110,7 @@ define i32 @t31() {
 entry:
   %val = alloca i32, align 64
   store i32 -1, i32* %val, align 64
-  call void asm sideeffect inteldialect "mov dword ptr $0, esp", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %val) #1
+  call void asm sideeffect inteldialect "mov dword ptr $0, esp", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %val)
   %sp = load i32* %val, align 64
   ret i32 %sp
 ; CHECK-LABEL: t31:
@@ -125,3 +125,12 @@ entry:
 ; CHECK: movl (%esp), %eax
 ; CHECK: ret
 }
+
+declare hidden void @other_func()
+
+define void @naked() #0 {
+  call void asm sideeffect inteldialect "call dword ptr $0", "*m,~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{esp},~{ebp},~{dirflag},~{fpsr},~{flags}"(void()* @other_func)
+  unreachable
+}
+
+attributes #0 = { naked }
diff --git a/test/CodeGen/X86/musttail-fastcall.ll b/test/CodeGen/X86/musttail-fastcall.ll
new file mode 100644
index 000000000000..c7e5ffcfa877
--- /dev/null
+++ b/test/CodeGen/X86/musttail-fastcall.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+
+; While we don't support varargs with fastcall, we do support forwarding.
+
+@asdf = internal constant [4 x i8] c"asdf"
+
+declare void @puts(i8*)
+
+define i32 @call_fast_thunk() {
+  %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  ret i32 %r
+}
+
+define x86_fastcallcc i32 @fast_thunk(...) {
+  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
+  ret i32 %r
+}
+
+; Check that we spill and fill around the call to puts.
+
+; CHECK-LABEL: @fast_thunk@0:
+; CHECK-DAG: movl %ecx, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+; CHECK: calll _puts
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: jmp @fast_target@12
+
+define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
+  %a0 = add i32 %a, %b
+  %a1 = add i32 %a0, %c
+  ret i32 %a1
+}
+
+; Repeat the test for vectorcall, which has XMM registers.
+
+define i32 @call_vector_thunk() {
+  %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
+  ret i32 %r
+}
+
+define x86_vectorcallcc i32 @vector_thunk(...) {
+  call void @puts(i8* getelementptr ([4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
+  ret i32 %r
+}
+
+; Check that we spill and fill SSE registers around the call to puts.
+
+; CHECK-LABEL: vector_thunk@@0:
+; CHECK-DAG: movl %ecx, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+
+; SSE2-DAG: movups %xmm0, {{.*}}
+; SSE2-DAG: movups %xmm1, {{.*}}
+; SSE2-DAG: movups %xmm2, {{.*}}
+; SSE2-DAG: movups %xmm3, {{.*}}
+; SSE2-DAG: movups %xmm4, {{.*}}
+; SSE2-DAG: movups %xmm5, {{.*}}
+
+; AVX-DAG: vmovups %ymm0, {{.*}}
+; AVX-DAG: vmovups %ymm1, {{.*}}
+; AVX-DAG: vmovups %ymm2, {{.*}}
+; AVX-DAG: vmovups %ymm3, {{.*}}
+; AVX-DAG: vmovups %ymm4, {{.*}}
+; AVX-DAG: vmovups %ymm5, {{.*}}
+
+; AVX512-DAG: vmovups %zmm0, {{.*}}
+; AVX512-DAG: vmovups %zmm1, {{.*}}
+; AVX512-DAG: vmovups %zmm2, {{.*}}
+; AVX512-DAG: vmovups %zmm3, {{.*}}
+; AVX512-DAG: vmovups %zmm4, {{.*}}
+; AVX512-DAG: vmovups %zmm5, {{.*}}
+
+; CHECK: calll _puts
+
+; SSE2-DAG: movups {{.*}}, %xmm0
+; SSE2-DAG: movups {{.*}}, %xmm1
+; SSE2-DAG: movups {{.*}}, %xmm2
+; SSE2-DAG: movups {{.*}}, %xmm3
+; SSE2-DAG: movups {{.*}}, %xmm4
+; SSE2-DAG: movups {{.*}}, %xmm5
+
+; AVX-DAG: vmovups {{.*}}, %ymm0
+; AVX-DAG: vmovups {{.*}}, %ymm1
+; AVX-DAG: vmovups {{.*}}, %ymm2
+; AVX-DAG: vmovups {{.*}}, %ymm3
+; AVX-DAG: vmovups {{.*}}, %ymm4
+; AVX-DAG: vmovups {{.*}}, %ymm5
+
+; AVX512-DAG: vmovups {{.*}}, %zmm0
+; AVX512-DAG: vmovups {{.*}}, %zmm1
+; AVX512-DAG: vmovups {{.*}}, %zmm2
+; AVX512-DAG: vmovups {{.*}}, %zmm3
+; AVX512-DAG: vmovups {{.*}}, %zmm4
+; AVX512-DAG: vmovups {{.*}}, %zmm5
+
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: jmp vector_target@@12
+
+define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
+  %a0 = add i32 %a, %b
+  %a1 = add i32 %a0, %c
+  ret i32 %a1
+}
diff --git a/test/CodeGen/X86/musttail-varargs.ll b/test/CodeGen/X86/musttail-varargs.ll
new file mode 100644
index 000000000000..7f105a13a6a0
--- /dev/null
+++ b/test/CodeGen/X86/musttail-varargs.ll
@@ -0,0 +1,140 @@
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
+
+; Test that we actually spill and reload all arguments in the variadic argument
+; pack. Doing a normal call will clobber all argument registers, and we will
+; spill around it. A simple adjustment should not require any XMM spills.
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void(i8*, ...)* @get_f(i8* %this)
+
+define void @f_thunk(i8* %this, ...) {
+  ; Use va_start so that we exercise the combination.
+  %ap = alloca [4 x i8*], align 16
+  %ap_i8 = bitcast [4 x i8*]* %ap to i8*
+  call void @llvm.va_start(i8* %ap_i8)
+
+  %fptr = call void(i8*, ...)*(i8*)* @get_f(i8* %this)
+  musttail call void (i8*, ...)* %fptr(i8* %this, ...)
+  ret void
+}
+
+; Save and restore 6 GPRs, 8 XMMs, and AL around the call.
+
+; LINUX-LABEL: f_thunk:
+; LINUX-DAG: movq %rdi, {{.*}}
+; LINUX-DAG: movq %rsi, {{.*}}
+; LINUX-DAG: movq %rdx, {{.*}}
+; LINUX-DAG: movq %rcx, {{.*}}
+; LINUX-DAG: movq %r8, {{.*}}
+; LINUX-DAG: movq %r9, {{.*}}
+; LINUX-DAG: movb %al, {{.*}}
+; LINUX-DAG: movaps %xmm0, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm1, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm2, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm3, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm4, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm5, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm6, {{[0-9]*}}(%rsp)
+; LINUX-DAG: movaps %xmm7, {{[0-9]*}}(%rsp)
+; LINUX: callq get_f
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm0
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm1
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm2
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm3
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm4
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm5
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm6
+; LINUX-DAG: movaps {{[0-9]*}}(%rsp), %xmm7
+; LINUX-DAG: movq {{.*}}, %rdi
+; LINUX-DAG: movq {{.*}}, %rsi
+; LINUX-DAG: movq {{.*}}, %rdx
+; LINUX-DAG: movq {{.*}}, %rcx
+; LINUX-DAG: movq {{.*}}, %r8
+; LINUX-DAG: movq {{.*}}, %r9
+; LINUX-DAG: movb {{.*}}, %al
+; LINUX: jmpq *{{.*}}  # TAILCALL
+
+; WINDOWS-LABEL: f_thunk:
+; WINDOWS-NOT: mov{{.}}ps
+; WINDOWS-DAG: movq %rdx, {{.*}}
+; WINDOWS-DAG: movq %rcx, {{.*}}
+; WINDOWS-DAG: movq %r8, {{.*}}
+; WINDOWS-DAG: movq %r9, {{.*}}
+; WINDOWS-NOT: mov{{.}}ps
+; WINDOWS: callq get_f
+; WINDOWS-NOT: mov{{.}}ps
+; WINDOWS-DAG: movq {{.*}}, %rdx
+; WINDOWS-DAG: movq {{.*}}, %rcx
+; WINDOWS-DAG: movq {{.*}}, %r8
+; WINDOWS-DAG: movq {{.*}}, %r9
+; WINDOWS-NOT: mov{{.}}ps
+; WINDOWS: jmpq *{{.*}} # TAILCALL
+
+; No regparms on normal x86 conventions.
+
+; X86-LABEL: _f_thunk:
+; X86: calll _get_f
+; X86: jmpl *{{.*}} # TAILCALL
+
+; This thunk shouldn't require any spills and reloads, assuming the register
+; allocator knows what it's doing.
+
+define void @g_thunk(i8* %fptr_i8, ...) {
+  %fptr = bitcast i8* %fptr_i8 to void (i8*, ...)*
+  musttail call void (i8*, ...)* %fptr(i8* %fptr_i8, ...)
+  ret void
+}
+
+; LINUX-LABEL: g_thunk:
+; LINUX-NOT: movq
+; LINUX: jmpq *%rdi  # TAILCALL
+
+; WINDOWS-LABEL: g_thunk:
+; WINDOWS-NOT: movq
+; WINDOWS: jmpq *%rcx # TAILCALL
+
+; X86-LABEL: _g_thunk:
+; X86: jmpl *%eax # TAILCALL
+
+; Do a simple multi-exit multi-bb test.
+
+%struct.Foo = type { i1, i8*, i8* }
+
+@g = external global i32
+
+define void @h_thunk(%struct.Foo* %this, ...) {
+  %cond_p = getelementptr %struct.Foo* %this, i32 0, i32 0
+  %cond = load i1* %cond_p
+  br i1 %cond, label %then, label %else
+
+then:
+  %a_p = getelementptr %struct.Foo* %this, i32 0, i32 1
+  %a_i8 = load i8** %a_p
+  %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
+  musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
+  ret void
+
+else:
+  %b_p = getelementptr %struct.Foo* %this, i32 0, i32 2
+  %b_i8 = load i8** %b_p
+  %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
+  store i32 42, i32* @g
+  musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
+  ret void
+}
+
+; LINUX-LABEL: h_thunk:
+; LINUX: jne
+; LINUX: jmpq *{{.*}} # TAILCALL
+; LINUX: jmpq *{{.*}} # TAILCALL
+; WINDOWS-LABEL: h_thunk:
+; WINDOWS: jne
+; WINDOWS: jmpq *{{.*}} # TAILCALL
+; WINDOWS: jmpq *{{.*}} # TAILCALL
+; X86-LABEL: _h_thunk:
+; X86: jne
+; X86: jmpl *{{.*}} # TAILCALL
+; X86: jmpl *{{.*}} # TAILCALL
diff --git a/test/CodeGen/X86/named-reg-alloc.ll b/test/CodeGen/X86/named-reg-alloc.ll
index 9463ea377a9d..c33b4eb75d04 100644
--- a/test/CodeGen/X86/named-reg-alloc.ll
+++ b/test/CodeGen/X86/named-reg-alloc.ll
@@ -11,4 +11,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"eax\00"}
+!0 = !{!"eax\00"}
diff --git a/test/CodeGen/X86/named-reg-notareg.ll b/test/CodeGen/X86/named-reg-notareg.ll
index d85ddddbea85..18c517d87810 100644
--- a/test/CodeGen/X86/named-reg-notareg.ll
+++ b/test/CodeGen/X86/named-reg-notareg.ll
@@ -10,4 +10,4 @@ entry:
 
 declare i32 @llvm.read_register.i32(metadata) nounwind
 
-!0 = metadata !{metadata !"notareg\00"}
+!0 = !{!"notareg\00"}
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 8036710b225a..8a665fa79cff 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts | llc > %t
+; RUN: opt < %s -O3 | llc > %t
 ; RUN: grep 2147027116 %t | count 3
 ; RUN: grep 2147228864 %t | count 3
 ; RUN: grep 2146502828 %t | count 3
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index 30387925b34d..5175bfc2bcb1 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -30,40 +30,6 @@ while.end:                                        ; preds = %while.cond
   ret void
 }
 
-
-; DAGCombiner shouldn't fold the sdiv (ashr) away.
-; rdar://8636812
-; CHECK-LABEL: test2:
-; CHECK:   sarl
-
-define i32 @test2() nounwind {
-entry:
-  %i = alloca i32, align 4
-  %j = alloca i8, align 1
-  store i32 127, i32* %i, align 4
-  store i8 0, i8* %j, align 1
-  %tmp3 = load i32* %i, align 4
-  %mul = mul nsw i32 %tmp3, 2
-  %conv4 = trunc i32 %mul to i8
-  %conv5 = sext i8 %conv4 to i32
-  %div6 = sdiv i32 %conv5, 2
-  %conv7 = trunc i32 %div6 to i8
-  %conv9 = sext i8 %conv7 to i32
-  %cmp = icmp eq i32 %conv9, -1
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  ret i32 0
-
-if.end:                                           ; preds = %entry
-  call void @abort() noreturn
-  unreachable
-}
-
-declare void @abort() noreturn
-
-declare void @exit(i32) noreturn
-
 ; DAG Combiner can't fold this into a load of the 1'th byte.
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
diff --git a/test/CodeGen/X86/no-compact-unwind.ll b/test/CodeGen/X86/no-compact-unwind.ll
deleted file mode 100644
index 991cd4ed7363..000000000000
--- a/test/CodeGen/X86/no-compact-unwind.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: llc < %s -mtriple x86_64-apple-macosx10.8.0 -mcpu corei7 -filetype=obj -o - \
-; RUN:  | llvm-objdump -triple x86_64-apple-macosx10.8.0 -s - \
-; RUN:  | FileCheck -check-prefix=CU %s
-; RUN: llc < %s -mtriple x86_64-apple-darwin11 -mcpu corei7 \
-; RUN:  | llvm-mc -triple x86_64-apple-darwin11 -filetype=obj -o - \
-; RUN:  | llvm-objdump -triple x86_64-apple-darwin11 -s - \
-; RUN:  | FileCheck -check-prefix=FROM-ASM %s
-
-%"struct.dyld::MappedRanges" = type { [400 x %struct.anon], %"struct.dyld::MappedRanges"* }
-%struct.anon = type { %class.ImageLoader*, i64, i64 }
-%class.ImageLoader = type { i32 (...)**, i8*, i8*, i32, i64, i64, i32, i32, %"struct.ImageLoader::recursive_lock"*, i16, i16, [4 x i8] }
-%"struct.ImageLoader::recursive_lock" = type { i32, i32 }
-
-@G1 = external hidden global %"struct.dyld::MappedRanges", align 8
-
-declare void @OSMemoryBarrier() optsize
-
-; This compact unwind encoding indicates that we could not generate correct
-; compact unwind encodings for this function. This then defaults to using the
-; DWARF EH frame.
-
-; CU:      Contents of section __compact_unwind:
-; CU-NEXT: 0048 00000000 00000000 42000000 00000004
-; CU-NEXT: 0058 00000000 00000000 00000000 00000000
-
-; FROM-ASM:      Contents of section __compact_unwind:
-; FROM-ASM-NEXT: 0048 00000000 00000000 42000000 00000004
-; FROM-ASM-NEXT: 0058 00000000 00000000 00000000 00000000
-
-define void @func(%class.ImageLoader* %image) optsize ssp uwtable {
-entry:
-  br label %for.cond1.preheader
-
-for.cond1.preheader:                              ; preds = %for.inc10, %entry
-  %p.019 = phi %"struct.dyld::MappedRanges"* [ @G1, %entry ], [ %1, %for.inc10 ]
-  br label %for.body3
-
-for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
-  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
-  %image4 = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
-  %0 = load %class.ImageLoader** %image4, align 8
-  %cmp5 = icmp eq %class.ImageLoader* %0, %image
-  br i1 %cmp5, label %if.then, label %for.inc
-
-if.then:                                          ; preds = %for.body3
-  tail call void @OSMemoryBarrier() optsize
-  store %class.ImageLoader* null, %class.ImageLoader** %image4, align 8
-  br label %for.inc
-
-for.inc:                                          ; preds = %if.then, %for.body3
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 400
-  br i1 %exitcond, label %for.inc10, label %for.body3
-
-for.inc10:                                        ; preds = %for.inc
-  %next = getelementptr inbounds %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
-  %1 = load %"struct.dyld::MappedRanges"** %next, align 8
-  %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
-  br i1 %cmp, label %for.end11, label %for.cond1.preheader
-
-for.end11:                                        ; preds = %for.inc10
-  ret void
-}
diff --git a/test/CodeGen/X86/nonconst-static-ev.ll b/test/CodeGen/X86/nonconst-static-ev.ll
index f852caeeea21..5449791f3fa3 100644
--- a/test/CodeGen/X86/nonconst-static-ev.ll
+++ b/test/CodeGen/X86/nonconst-static-ev.ll
@@ -1,6 +1,5 @@
 ; RUN: not llc -march=x86 -mtriple=x86_64-linux-gnu < %s 2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
-; REQUIRES: shell
 
 @0 = global i8 extractvalue ([1 x i8] select (i1 ptrtoint (i32* @1 to i1), [1 x i8] [ i8 1 ], [1 x i8] [ i8 2 ]), 0)
 @1 = external global i32
diff --git a/test/CodeGen/X86/nonconst-static-iv.ll b/test/CodeGen/X86/nonconst-static-iv.ll
index 8fad39bcbf72..30613ef383a3 100644
--- a/test/CodeGen/X86/nonconst-static-iv.ll
+++ b/test/CodeGen/X86/nonconst-static-iv.ll
@@ -1,6 +1,5 @@
 ; RUN: not llc -march=x86 -mtriple=x86_64-linux-gnu < %s 2> %t
 ; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
-; REQUIRES: shell
 
 @0 = global i8 insertvalue( { i8 } select (i1 ptrtoint (i32* @1 to i1), { i8 } { i8 1 }, { i8 } { i8 2 }), i8 0, 0)
 @1 = external global i32
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
new file mode 100644
index 000000000000..f62f3725d7d8
--- /dev/null
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+; Make sure that we generate non-temporal stores for the test cases below.
+
+define void @test1(<4 x float>* %dst) {
+; CHECK-LABEL: test1:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test2(<4 x i32>* %dst) {
+; CHECK-LABEL: test2:
+; SSE: movntps
+; AVX: vmovntps
+  store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test3(<2 x double>* %dst) {
+; CHECK-LABEL: test3:
+; SSE: movntps
+; AVX: vmovntps
+  store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
+!1 = !{i32 1}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index ae04435ac39c..f9385df36421 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -19,4 +19,4 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
   ret void
 }
 
-!0 = metadata !{i32 1}
+!0 = !{i32 1}
diff --git a/test/CodeGen/X86/norex-subreg.ll b/test/CodeGen/X86/norex-subreg.ll
index 2c529fdf1039..fb41dede287f 100644
--- a/test/CodeGen/X86/norex-subreg.ll
+++ b/test/CodeGen/X86/norex-subreg.ll
@@ -1,5 +1,5 @@
-; RUN: llc -O0 < %s
-; RUN: llc < %s
+; RUN: llc -O0 < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs
 target triple = "x86_64-apple-macosx10.7"
 
 ; This test case extracts a sub_8bit_hi sub-register:
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
index fa77fcb1d138..f6eb0e15aabb 100644
--- a/test/CodeGen/X86/null-streamer.ll
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -14,16 +14,16 @@ define void @f1() {
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !" ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""}
-!1 = metadata !{metadata !"", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"", metadata !"", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* null, null, null, metadata !2, i32 2}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null}
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"_ZL1i", metadata !5, i32 1, metadata !8, i32 1, i32 1, null, null}
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00 \001\00\000\00\000", !1, !2, !2, !3, !9, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00\00\00\002\000\001\000\006\00256\001\002", !1, !5, !6, null, i32 ()* null, null, null, !2} ; [ DW_TAG_subprogram ]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = !{!8}
+!8 = !{!"0x24\00\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!9 = !{!10}
+!10 = !{!"0x34\00i\00i\00_ZL1i\001\001\001", null, !5, !8, null, null} ; [ DW_TAG_variable ]
+!11 = !{i32 2, !"Dwarf Version", i32 3}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
index 8cb2c036a4f7..f197510f2207 100644
--- a/test/CodeGen/X86/objc-gc-module-flags.ll
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -7,7 +7,7 @@
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 1, !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index ec35d2981a16..0610f0b6de2e 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -12,8 +12,8 @@ entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
-; X64: movabsq $-1, [[RAX:%r..]]
-; X64: cmpq    $-1, [[RAX]]
+; CHECK: movq $-1, [[RAX:%r..]]
+; CHECK: cmpq $-1, [[RAX]]
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
diff --git a/test/CodeGen/X86/osx-private-labels.ll b/test/CodeGen/X86/osx-private-labels.ll
index 349ce7d0cc5e..e30cb4824aa7 100644
--- a/test/CodeGen/X86/osx-private-labels.ll
+++ b/test/CodeGen/X86/osx-private-labels.ll
@@ -69,3 +69,20 @@
 ; CHECK: .section	__DATA,__foobar,interposing
 ; CHECK-NEXT: .align	3
 ; CHECK-NEXT: L_private12:
+
+@private13 = private global i32 42, section "__DATA, __objc_classlist, regular, no_dead_strip"
+; CHECK: .section	__DATA,__objc_classlist,regular,no_dead_strip
+; CHECK-NEXT: .align	2
+; CHECK-NEXT: L_private13:
+
+@private14 = private global [4 x i8] c"zed\00", section "__TEXT,__objc_classname,cstring_literals"
+; CHECK: .section	__TEXT,__objc_classname,cstring_literals
+; CHECK-NEXT: L_private14:
+
+@private15 = private global [4 x i8] c"zed\00", section "__TEXT,__objc_methname,cstring_literals"
+; CHECK: .section	__TEXT,__objc_methname,cstring_literals
+; CHECK-NEXT: L_private15:
+
+@private16 = private global [4 x i8] c"zed\00", section "__TEXT,__objc_methtype,cstring_literals"
+; CHECK: .section	__TEXT,__objc_methtype,cstring_literals
+; CHECK-NEXT: L_private16:
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index ec6564d7e2eb..3efcc2e41215 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -3,58 +3,127 @@
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test1:
-; CHECK: pshufd
-; CHECK-YONAH: pshufd
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test1:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,3,0]
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
 	ret <4 x i32> %C
 }
 
 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test2:
-; CHECK: palignr
-; CHECK-YONAH: shufps
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test2:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
 	ret <4 x i32> %C
 }
 
 define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test3:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test3:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
 	ret <4 x i32> %C
 }
 
 define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
 ; CHECK-LABEL: test4:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test4:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; CHECK-YONAH-NEXT:    movapd %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
 	ret <4 x i32> %C
 }
 
 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
 ; CHECK-LABEL: test5:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test5:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; CHECK-YONAH-NEXT:    movapd %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
 	ret <4 x float> %C
 }
 
 define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: test6:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test6:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
+; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
 	ret <8 x i16> %C
 }
 
 define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: test7:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test7:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
+; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
 	ret <8 x i16> %C
 }
 
 define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
 ; CHECK-LABEL: test8:
-; CHECK: palignr
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm0[5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test8:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero
+; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
+; CHECK-YONAH-NEXT:    por %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
 	ret <16 x i8> %C
 }
@@ -65,8 +134,19 @@ define <16 x i8> @test8(<16 x i8> %A, <16 x i8> %B) nounwind {
 ; was an UNDEF.)
 define <8 x i16> @test9(<8 x i16> %A, <8 x i16> %B) nounwind {
 ; CHECK-LABEL: test9:
-; CHECK-NOT: palignr
-; CHECK: pshufb
+; CHECK:       # BB#0:
+; CHECK-NEXT:    palignr {{.*#+}} xmm1 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-YONAH-LABEL: test9:
+; CHECK-YONAH:       # BB#0:
+; CHECK-YONAH-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; CHECK-YONAH-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1]
+; CHECK-YONAH-NEXT:    por %xmm0, %xmm1
+; CHECK-YONAH-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-YONAH-NEXT:    retl
   %C = shufflevector <8 x i16> %B, <8 x i16> %A, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
 	ret <8 x i16> %C
 }
diff --git a/test/CodeGen/X86/patchpoint-invoke.ll b/test/CodeGen/X86/patchpoint-invoke.ll
new file mode 100644
index 000000000000..192cacc908ab
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint-invoke.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=x86_64-unknown-linux -mcpu=corei7                             < %s | FileCheck %s
+
+; Test invoking of patchpoints
+;
+define i64 @patchpoint_invoke(i64 %p1, i64 %p2) {
+entry:
+; CHECK-LABEL: patchpoint_invoke:
+; CHECK-NEXT: .cfi_startproc
+; CHECK:      [[FUNC_BEGIN:.L.*]]:
+; CHECK:      .cfi_lsda 3, [[EXCEPTION_LABEL:.L[^ ]*]]
+; CHECK:      pushq %rbp
+
+; Unfortunately, hardcode the name of the label that begins the patchpoint:
+; CHECK:      .Ltmp0:
+; CHECK:      movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK-NEXT: xchgw %ax, %ax
+; CHECK-NEXT: [[PP_END:.L.*]]:
+; CHECK:      ret
+  %resolveCall = inttoptr i64 -559038736 to i8*
+  %result = invoke i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall, i32 1, i64 %p1, i64 %p2)
+            to label %success unwind label %threw
+
+success:
+  ret i64 %result
+
+threw:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret i64 0
+}
+
+; Verify that the exception table was emitted:
+; CHECK:      [[EXCEPTION_LABEL]]:
+; CHECK-NEXT: .byte 255
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .byte 21
+; CHECK-NEXT: .byte 3
+; CHECK-NEXT: .byte 13
+; Verify that the unwind data covers the entire patchpoint region:
+; CHECK-NEXT: .long .Ltmp0-[[FUNC_BEGIN]]
+; CHECK-NEXT: .long [[PP_END]]-.Ltmp0
+
+
+; Verify that the stackmap section got emitted:
+; CHECK-LABEL: __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 1
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 1
+; CHECK-NEXT:   .quad patchpoint_invoke
+
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/X86/patchpoint-webkit_jscc.ll b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
new file mode 100644
index 000000000000..5e76bf8d4e60
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint-webkit_jscc.ll
@@ -0,0 +1,88 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7                             < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=FAST
+
+; Test the webkit_jscc calling convention.
+; One argument will be passed in register, the other will be pushed on the stack.
+; Return value in $rax.
+define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen:
+; CHECK:      Ltmp
+; CHECK:      movq %r{{.+}}, (%rsp)
+; CHECK:      movq %r{{.+}}, %rax
+; CHECK:      Ltmp
+; CHECK-NEXT: movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; CHECK:      movq %rax, (%rsp)
+; CHECK:      callq
+; FAST-LABEL: jscall_patchpoint_codegen:
+; FAST:       Ltmp
+; FAST:       movq %r{{.+}}, (%rsp)
+; FAST:       movq %r{{.+}}, %rax
+; FAST:       Ltmp
+; FAST-NEXT:  movabsq $-559038736, %r11
+; FAST-NEXT:  callq *%r11
+; FAST:       movq %rax, (%rsp)
+; FAST:       callq
+  %resolveCall2 = inttoptr i64 -559038736 to i8*
+  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
+  %resolveCall3 = inttoptr i64 -559038737 to i8*
+  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
+  ret void
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen2(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen2:
+; CHECK:      Ltmp
+; CHECK:      movq $6, 24(%rsp)
+; CHECK-NEXT: movl $4, 16(%rsp)
+; CHECK-NEXT: movq $2, (%rsp)
+; CHECK:      Ltmp
+; CHECK-NEXT: movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; FAST-LABEL: jscall_patchpoint_codegen2:
+; FAST:       Ltmp
+; FAST:       movq $2, (%rsp)
+; FAST-NEXT:  movl $4, 16(%rsp)
+; FAST-NEXT:  movq $6, 24(%rsp)
+; FAST:       Ltmp
+; FAST-NEXT:  movabsq $-559038736, %r11
+; FAST-NEXT:  callq *%r11
+  %call = inttoptr i64 -559038736 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
+  ret i64 %result
+}
+
+; Test if the arguments are properly aligned and that we don't store undef arguments.
+define i64 @jscall_patchpoint_codegen3(i64 %callee) {
+entry:
+; CHECK-LABEL: jscall_patchpoint_codegen3:
+; CHECK:      Ltmp
+; CHECK:      movq $10, 48(%rsp)
+; CHECK-NEXT: movl  $8, 36(%rsp)
+; CHECK-NEXT: movq  $6, 24(%rsp)
+; CHECK-NEXT: movl  $4, 16(%rsp)
+; CHECK-NEXT: movq  $2, (%rsp)
+; CHECK:      Ltmp
+; CHECK-NEXT: movabsq $-559038736, %r11
+; CHECK-NEXT: callq *%r11
+; FAST-LABEL: jscall_patchpoint_codegen3:
+; FAST:       Ltmp
+; FAST:       movq  $2, (%rsp)
+; FAST-NEXT:  movl  $4, 16(%rsp)
+; FAST-NEXT:  movq  $6, 24(%rsp)
+; FAST-NEXT:  movl  $8, 36(%rsp)
+; FAST-NEXT:  movq $10, 48(%rsp)
+; FAST:       Ltmp
+; FAST-NEXT:  movabsq $-559038736, %r11
+; FAST-NEXT:  callq *%r11
+  %call = inttoptr i64 -559038736 to i8*
+  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
+  ret i64 %result
+}
+
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
+declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
diff --git a/test/CodeGen/X86/patchpoint.ll b/test/CodeGen/X86/patchpoint.ll
index 62b12732ded4..07148f0329a2 100644
--- a/test/CodeGen/X86/patchpoint.ll
+++ b/test/CodeGen/X86/patchpoint.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7                             < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort < %s | FileCheck %s
 
 ; Trivial patchpoint codegen
 ;
@@ -38,61 +39,6 @@ entry:
   ret void
 }
 
-; Test the webkit_jscc calling convention.
-; One argument will be passed in register, the other will be pushed on the stack.
-; Return value in $rax.
-define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen:
-; CHECK:      Ltmp
-; CHECK:      movq %r{{.+}}, (%rsp)
-; CHECK:      movq %r{{.+}}, %rax
-; CHECK:      Ltmp
-; CHECK-NEXT: movabsq $-559038736, %r11
-; CHECK-NEXT: callq *%r11
-; CHECK:      movq %rax, (%rsp)
-; CHECK:      callq
-  %resolveCall2 = inttoptr i64 -559038736 to i8*
-  %result = tail call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 5, i32 15, i8* %resolveCall2, i32 2, i64 %p4, i64 %p2)
-  %resolveCall3 = inttoptr i64 -559038737 to i8*
-  tail call webkit_jscc void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 6, i32 15, i8* %resolveCall3, i32 2, i64 %p4, i64 %result)
-  ret void
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen2(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen2:
-; CHECK:      Ltmp
-; CHECK:      movq $6, 24(%rsp)
-; CHECK-NEXT: movl $4, 16(%rsp)
-; CHECK-NEXT: movq $2, (%rsp)
-; CHECK:      Ltmp
-; CHECK-NEXT: movabsq $-559038736, %r11
-; CHECK-NEXT: callq *%r11
-  %call = inttoptr i64 -559038736 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 6, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6)
-  ret i64 %result
-}
-
-; Test if the arguments are properly aligned and that we don't store undef arguments.
-define i64 @jscall_patchpoint_codegen3(i64 %callee) {
-entry:
-; CHECK-LABEL: jscall_patchpoint_codegen3:
-; CHECK:      Ltmp
-; CHECK:      movq $10, 48(%rsp)
-; CHECK-NEXT: movl  $8, 36(%rsp)
-; CHECK-NEXT: movq  $6, 24(%rsp)
-; CHECK-NEXT: movl  $4, 16(%rsp)
-; CHECK-NEXT: movq  $2, (%rsp)
-; CHECK:      Ltmp
-; CHECK-NEXT: movabsq $-559038736, %r11
-; CHECK-NEXT: callq *%r11
-  %call = inttoptr i64 -559038736 to i8*
-  %result = call webkit_jscc i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 7, i32 15, i8* %call, i32 10, i64 %callee, i64 2, i64 undef, i32 4, i32 undef, i64 6, i32 undef, i32 8, i32 undef, i64 10)
-  ret i64 %result
-}
-
 ; Test patchpoints reusing the same TargetConstant.
 ; <rdar:15390785> Assertion failed: (CI.getNumArgOperands() >= NumArgs + 4)
 ; There is no way to verify this, since it depends on memory allocation.
@@ -125,6 +71,17 @@ entry:
   ret void
 }
 
+; Test large target address.
+define i64 @large_target_address_patchpoint_codegen() {
+entry:
+; CHECK-LABEL: large_target_address_patchpoint_codegen:
+; CHECK:      movabsq $6153737369414576827, %r11
+; CHECK-NEXT: callq *%r11
+  %resolveCall2 = inttoptr i64 6153737369414576827 to i8*
+  %result = tail call i64 (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.i64(i64 2, i32 15, i8* %resolveCall2, i32 0)
+  ret i64 %result
+}
+
 declare void @llvm.experimental.stackmap(i64, i32, ...)
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
 declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
index e4bafbb6ffab..e43b8ef54cf5 100644
--- a/test/CodeGen/X86/peep-test-2.ll
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -march=x86 | FileCheck %s
 
 ; CHECK: testl
 
diff --git a/test/CodeGen/X86/peep-vector-extract-concat.ll b/test/CodeGen/X86/peep-vector-extract-concat.ll
deleted file mode 100644
index f73ebb944dcd..000000000000
--- a/test/CodeGen/X86/peep-vector-extract-concat.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse4.1 | FileCheck %s
-; CHECK: pshufd $3, %xmm0, %xmm0
-
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse4.1 | FileCheck %s -check-prefix=WIN64
-; %a is passed indirectly on Win64.
-; WIN64: movss   12(%rcx), %xmm0
-
-define float @foo(<8 x float> %a) nounwind {
-  %c = extractelement <8 x float> %a, i32 3
-  ret float %c
-}
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
deleted file mode 100644
index f958b6b2c069..000000000000
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86-64 | grep "xorps	%xmm0, %xmm0" | count 2
-
-define float @foo(<4 x float> %a) {
-  %b = insertelement <4 x float> %a, float 0.0, i32 3
-  %c = extractelement <4 x float> %b, i32 3
-  ret float %c
-}
-define float @bar(float %a) {
-  %b = insertelement <4 x float> <float 0x400B333340000000, float 4.5, float 0.0, float 0x4022666660000000>, float %a, i32 3
-  %c = extractelement <4 x float> %b, i32 2
-  ret float %c
-}
diff --git a/test/CodeGen/X86/peephole-fold-movsd.ll b/test/CodeGen/X86/peephole-fold-movsd.ll
new file mode 100644
index 000000000000..09d9328815da
--- /dev/null
+++ b/test/CodeGen/X86/peephole-fold-movsd.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+;
+; Check that x86's peephole optimization doesn't fold a 64-bit load (movsd) into
+; addpd.
+; rdar://problem/18236850
+
+%struct.S1 = type { double, double }
+
+@g = common global %struct.S1 zeroinitializer, align 8
+
+declare void @foo3(%struct.S1*)
+
+; CHECK: movsd {{[0-9]*}}(%rsp), [[R0:%xmm[0-9]+]]
+; CHECK: addpd [[R0]], %xmm{{[0-9]+}}
+
+define void @foo1(double %a.coerce0, double %a.coerce1, double %b.coerce0, double %b.coerce1) {
+  %1 = alloca <2 x double>, align 16
+  %tmpcast = bitcast <2 x double>* %1 to %struct.S1*
+  call void @foo3(%struct.S1* %tmpcast) #2
+  %p2 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 0
+  %2 = load double* %p2, align 16
+  %p3 = getelementptr inbounds %struct.S1* %tmpcast, i64 0, i32 1
+  %3 = load double* %p3, align 8
+  %4 = insertelement <2 x double> undef, double %2, i32 0
+  %5 = insertelement <2 x double> %4, double 0.000000e+00, i32 1
+  %6 = insertelement <2 x double> undef, double %3, i32 1
+  %7 = insertelement <2 x double> %6, double 1.000000e+00, i32 0
+  %8 = fadd <2 x double> %5, %7
+  store <2 x double> %8, <2 x double>* bitcast (%struct.S1* @g to <2 x double>*), align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index 6eb97c3cd7ab..12a3adfdfe98 100644
--- a/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
 ; rdar://5571034
 
 ; This requires physreg joining, %vreg13 is live everywhere:
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index 7bf8a618fa77..8937d6afa0ae 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,32 +1,96 @@
-; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
-; RUN: grep pmul %t | count 12
-; RUN: grep mov %t | count 14
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
 
 define <4 x i32> @a(<4 x i32> %i) nounwind  {
-        %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
-        ret <4 x i32> %A
+; SSE2-LABEL: a:
+; SSE2:         movdqa {{.*}}, %[[X1:xmm[0-9]+]]
+; SSE2-NEXT:    pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq %[[X1]], %xmm0
+; SSE2-NEXT:    pmuludq %[[X1]], %[[X2]]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: a:
+; SSE41:         pmulld
+; SSE41-NEXT:    retq
+entry:
+  %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
+  ret <4 x i32> %A
 }
+
 define <2 x i64> @b(<2 x i64> %i) nounwind  {
-        %A = mul <2 x i64> %i, < i64 117, i64 117 >
-        ret <2 x i64> %A
+; ALL-LABEL: b:
+; ALL:         pmuludq
+; ALL:         pmuludq
+; ALL:         pmuludq
+entry:
+  %A = mul <2 x i64> %i, < i64 117, i64 117 >
+  ret <2 x i64> %A
 }
+
 define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
-        %A = mul <4 x i32> %i, %j
-        ret <4 x i32> %A
+; SSE2-LABEL: c:
+; SSE2:         pshufd {{.*}} # [[X2:xmm[0-9]+]] = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pmuludq %[[X2]], %xmm1
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],xmm1[0,2]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: c:
+; SSE41:         pmulld
+; SSE41-NEXT:    retq
+entry:
+  %A = mul <4 x i32> %i, %j
+  ret <4 x i32> %A
 }
+
 define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
-        %A = mul <2 x i64> %i, %j
-        ret <2 x i64> %A
+; ALL-LABEL: d:
+; ALL:         pmuludq
+; ALL:         pmuludq
+; ALL:         pmuludq
+entry:
+  %A = mul <2 x i64> %i, %j
+  ret <2 x i64> %A
 }
-; Use a call to force spills.
+
 declare void @foo()
+
 define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
-        call void @foo()
-        %A = mul <4 x i32> %i, %j
-        ret <4 x i32> %A
+; SSE2-LABEL: e:
+; SSE2:         movdqa {{[0-9]*}}(%rsp), %xmm0
+; SSE2-NEXT:    pshufd {{.*}} # [[X1:xmm[0-9]+]] = xmm0[1,1,3,3]
+; SSE2-NEXT:    movdqa {{[0-9]*}}(%rsp), %[[X2:xmm[0-9]+]]
+; SSE2-NEXT:    pmuludq %[[X2]], %xmm0
+; SSE2-NEXT:    pshufd {{.*}} # [[X2]] = [[X2]][1,1,3,3]
+; SSE2-NEXT:    pmuludq %[[X1]], %[[X2]]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2],[[X2]][0,2]
+; SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    addq ${{[0-9]+}}, %rsp
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: e:
+; SSE41:         pmulld {{[0-9]+}}(%rsp), %xmm
+; SSE41-NEXT:    addq ${{[0-9]+}}, %rsp
+; SSE41-NEXT:    retq
+entry:
+  ; Use a call to force spills.
+  call void @foo()
+  %A = mul <4 x i32> %i, %j
+  ret <4 x i32> %A
 }
+
 define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
-        call void @foo()
-        %A = mul <2 x i64> %i, %j
-        ret <2 x i64> %A
+; ALL-LABEL: f:
+; ALL:         pmuludq
+; ALL:         pmuludq
+; ALL:         pmuludq
+entry:
+  ; Use a call to force spills.
+  call void @foo()
+  %A = mul <2 x i64> %i, %j
+  ret <2 x i64> %A
 }
diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll
index e7e29e0d609c..0bdb0ec7cf44 100644
--- a/test/CodeGen/X86/pr11334.ll
+++ b/test/CodeGen/X86/pr11334.ll
@@ -15,7 +15,7 @@ define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
 entry:
 ; CHECK: v3f2d_ext_vec
 ; CHECK: cvtps2pd
-; CHECK: movhlps
+; CHECK: shufpd
 ; CHECK: cvtps2pd
 ; AVX:   v3f2d_ext_vec
 ; AVX:   vcvtps2pd
@@ -28,7 +28,7 @@ define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
 entry:
 ; CHECK: v4f2d_ext_vec
 ; CHECK: cvtps2pd
-; CHECK: movhlps
+; CHECK: shufpd
 ; CHECK: cvtps2pd
 ; AVX:   v4f2d_ext_vec
 ; AVX:   vcvtps2pd
@@ -42,9 +42,9 @@ entry:
 ; CHECK: v8f2d_ext_vec
 ; CHECK: cvtps2pd
 ; CHECK: cvtps2pd
-; CHECK: movhlps
+; CHECK: shufpd
 ; CHECK: cvtps2pd
-; CHECK: movhlps
+; CHECK: shufpd
 ; CHECK: cvtps2pd
 ; AVX:   v8f2d_ext_vec
 ; AVX:   vcvtps2pd
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
index f7e9adb4a211..f721df11586b 100644
--- a/test/CodeGen/X86/pr11468.ll
+++ b/test/CodeGen/X86/pr11468.ll
@@ -29,5 +29,5 @@ entry:
 ; CHECK: popq %rbp
 }
 
-!0 = metadata !{i32 125}
+!0 = !{i32 125}
 
diff --git a/test/CodeGen/X86/pr12359.ll b/test/CodeGen/X86/pr12359.ll
deleted file mode 100644
index 024b163fa718..000000000000
--- a/test/CodeGen/X86/pr12359.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc -asm-verbose -mtriple=x86_64-unknown-unknown -mcpu=corei7 < %s | FileCheck %s
-define <16 x i8> @shuf(<16 x i8> %inval1) {
-entry:
-  %0 = shufflevector <16 x i8> %inval1, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4, i32 0, i32 4, i32 3, i32 2, i32 16, i32 16, i32 3, i32 4>
-  ret <16 x i8> %0
-; CHECK: shuf
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: pshufb
-; CHECK-NEXT: ret
-}
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
index 8b30596cd8ac..673403624589 100644
--- a/test/CodeGen/X86/pr12360.ll
+++ b/test/CodeGen/X86/pr12360.ll
@@ -22,7 +22,7 @@ entry:
   ret i1 %tobool
 }
 
-!0 = metadata !{i8 0, i8 2}
+!0 = !{i8 0, i8 2}
 
 
 ; check that we don't build a "trunc" from i1 to i1, which would assert.
diff --git a/test/CodeGen/X86/pr14161.ll b/test/CodeGen/X86/pr14161.ll
index ff4532eac3ac..c2bb8d3df8f3 100644
--- a/test/CodeGen/X86/pr14161.ll
+++ b/test/CodeGen/X86/pr14161.ll
@@ -3,6 +3,12 @@
 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>)
 
 define <2 x i16> @good(<4 x i32>*, <4 x i8>*) {
+; CHECK-LABEL: good:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-NEXT:    pminud {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %2 = load <4 x i32>* %0, align 16
   %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
@@ -13,13 +19,17 @@ entry:
   %8 = bitcast i32 %4 to <2 x i16>
   %9 = bitcast i32 %5 to <2 x i16>
   ret <2 x i16> %8
-; CHECK: good
-; CHECK: pminud
-; CHECK-NEXT: pmovzxwq
-; CHECK: ret
 }
 
 define <2 x i16> @bad(<4 x i32>*, <4 x i8>*) {
+; CHECK-LABEL: bad:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-NEXT:    pminud {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    pextrd $1, %xmm0, %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %2 = load <4 x i32>* %0, align 16
   %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
@@ -30,9 +40,4 @@ entry:
   %8 = bitcast i32 %4 to <2 x i16>
   %9 = bitcast i32 %5 to <2 x i16>
   ret <2 x i16> %9
-; CHECK: bad
-; CHECK: pminud
-; CHECK: pextrd
-; CHECK: pmovzxwq
-; CHECK: ret
 }
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
index c8aaf327a7dd..b4dc5fd47168 100644
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -48,19 +48,22 @@ define <4 x i64> @test3(<4 x i1>* %in) nounwind {
 
 ; CHECK: test3
 ; CHECK: movzbl
-; CHECK: shrl
-; CHECK: andl $1
-; CHECK: andl $1
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $2
-; CHECK: andl $1
-; CHECK: pinsrd $2
-; CHECK: shrl $3
-; CHECK: andl $1
-; CHECK: pinsrd $3
-; CHECK: pslld
-; CHECK: psrad
-; CHECK: pmovsxdq
-; CHECK: pmovsxdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: movq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: shlq
+; CHECK: sarq
+; CHECK: vmovq
+; CHECK: vpunpcklqdq
+; CHECK: vinsertf128
 ; CHECK: ret
diff --git a/test/CodeGen/X86/pr18846.ll b/test/CodeGen/X86/pr18846.ll
new file mode 100644
index 000000000000..c65bc79d6813
--- /dev/null
+++ b/test/CodeGen/X86/pr18846.ll
@@ -0,0 +1,139 @@
+; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; pr18846 - needless avx spill/reload
+; Test for unnecessary repeated spills due to eliminateRedundantSpills failing
+; to recognise unaligned ymm load/stores to the stack.
+; Bugpoint reduced testcase.
+
+;CHECK-LABEL: _Z16opt_kernel_cachePfS_S_
+;CHECK-NOT:   vmovups {{.*#+}} 32-byte Folded Spill
+;CHECK-NOT:   vmovups {{.*#+}} 32-byte Folded Reload
+
+; Function Attrs: uwtable
+define void @_Z16opt_kernel_cachePfS_S_() #0 {
+entry:
+  br label %for.body29
+
+for.body29:                                       ; preds = %for.body29, %entry
+  br i1 undef, label %for.body29, label %for.body65
+
+for.body65:                                       ; preds = %for.body29
+  %0 = load float* undef, align 4, !tbaa !1
+  %vecinit7.i4448 = insertelement <8 x float> undef, float %0, i32 7
+  %1 = load float* null, align 4, !tbaa !1
+  %vecinit7.i4304 = insertelement <8 x float> undef, float %1, i32 7
+  %2 = load float* undef, align 4, !tbaa !1
+  %vecinit7.i4196 = insertelement <8 x float> undef, float %2, i32 7
+  %3 = or i64 0, 16
+  %add.ptr111.sum4096 = add i64 %3, 0
+  %4 = load <8 x float>* null, align 16, !tbaa !5
+  %add.ptr162 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr111.sum4096
+  %__v.i4158 = bitcast float* %add.ptr162 to <8 x float>*
+  %5 = load <8 x float>* %__v.i4158, align 16, !tbaa !5
+  %add.ptr158.sum40975066 = or i64 %add.ptr111.sum4096, 8
+  %add.ptr183 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr158.sum40975066
+  %__v.i4162 = bitcast float* %add.ptr183 to <8 x float>*
+  %6 = load <8 x float>* %__v.i4162, align 16, !tbaa !5
+  %add.ptr200.sum40995067 = or i64 undef, 8
+  %add.ptr225 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr200.sum40995067
+  %__v.i4167 = bitcast float* %add.ptr225 to <8 x float>*
+  %7 = load <8 x float>* %__v.i4167, align 4, !tbaa !5
+  %8 = load <8 x float>* undef, align 16, !tbaa !5
+  %add.ptr242.sum41015068 = or i64 0, 8
+  %add.ptr267 = getelementptr inbounds [65536 x float]* null, i64 0, i64 %add.ptr242.sum41015068
+  %__v.i4171 = bitcast float* %add.ptr267 to <8 x float>*
+  %9 = load <8 x float>* %__v.i4171, align 4, !tbaa !5
+  %mul.i4690 = fmul <8 x float> %7, undef
+  %add.i4665 = fadd <8 x float> undef, undef
+  %mul.i4616 = fmul <8 x float> %8, undef
+  %mul.i4598 = fmul <8 x float> undef, undef
+  %add.i4597 = fadd <8 x float> undef, %mul.i4598
+  %mul.i4594 = fmul <8 x float> %6, undef
+  %add.i4593 = fadd <8 x float> undef, %mul.i4594
+  %mul.i4578 = fmul <8 x float> %9, undef
+  %add.i4577 = fadd <8 x float> %add.i4593, %mul.i4578
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4577) #1
+  %10 = load <8 x float>* null, align 16, !tbaa !5
+  %11 = load <8 x float>* undef, align 16, !tbaa !5
+  %mul.i4564 = fmul <8 x float> %4, undef
+  %add.i4563 = fadd <8 x float> %10, %mul.i4564
+  %mul.i4560 = fmul <8 x float> %5, undef
+  %add.i4559 = fadd <8 x float> %11, %mul.i4560
+  %add.i4547 = fadd <8 x float> %add.i4563, undef
+  %mul.i4546 = fmul <8 x float> %7, undef
+  %add.i4545 = fadd <8 x float> undef, %mul.i4546
+  %mul.i4544 = fmul <8 x float> %8, undef
+  %add.i4543 = fadd <8 x float> %add.i4559, %mul.i4544
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4547) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4545) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4543) #1
+  %add.i4455 = fadd <8 x float> undef, undef
+  %mul.i4454 = fmul <8 x float> undef, undef
+  %add.i4453 = fadd <8 x float> undef, %mul.i4454
+  %mul.i4440 = fmul <8 x float> zeroinitializer, %vecinit7.i4448
+  %add.i4439 = fadd <8 x float> %add.i4455, %mul.i4440
+  %mul.i4438 = fmul <8 x float> %7, %vecinit7.i4448
+  %add.i4437 = fadd <8 x float> %add.i4453, %mul.i4438
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4439) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4437) #1
+  %add.i4413 = fadd <8 x float> zeroinitializer, undef
+  %mul.i4400 = fmul <8 x float> %8, undef
+  %add.i4399 = fadd <8 x float> undef, %mul.i4400
+  %add.i4397 = fadd <8 x float> %add.i4413, zeroinitializer
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> zeroinitializer) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4399) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4397) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> undef) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> undef) #1
+  %mul.i4330 = fmul <8 x float> %7, undef
+  %add.i4329 = fadd <8 x float> undef, %mul.i4330
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4329) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> undef) #1
+  %mul.i4312 = fmul <8 x float> %4, undef
+  %add.i4311 = fadd <8 x float> undef, %mul.i4312
+  %mul.i4306 = fmul <8 x float> %6, undef
+  %add.i4305 = fadd <8 x float> undef, %mul.i4306
+  %add.i4295 = fadd <8 x float> %add.i4311, undef
+  %mul.i4294 = fmul <8 x float> %7, %vecinit7.i4304
+  %add.i4293 = fadd <8 x float> undef, %mul.i4294
+  %mul.i4292 = fmul <8 x float> %8, %vecinit7.i4304
+  %add.i4291 = fadd <8 x float> undef, %mul.i4292
+  %mul.i4290 = fmul <8 x float> %9, %vecinit7.i4304
+  %add.i4289 = fadd <8 x float> %add.i4305, %mul.i4290
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4295) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4293) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4291) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4289) #1
+  %12 = load <8 x float>* undef, align 16, !tbaa !5
+  %mul.i4274 = fmul <8 x float> undef, undef
+  %add.i4273 = fadd <8 x float> %12, %mul.i4274
+  %mul.i4258 = fmul <8 x float> %7, undef
+  %add.i4257 = fadd <8 x float> %add.i4273, %mul.i4258
+  %mul.i4254 = fmul <8 x float> %9, undef
+  %add.i4253 = fadd <8 x float> undef, %mul.i4254
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4257) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4253) #1
+  %mul.i = fmul <8 x float> %9, %vecinit7.i4196
+  %add.i = fadd <8 x float> undef, %mul.i
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> zeroinitializer) #1
+  call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i) #1
+  unreachable
+}
+
+; Function Attrs: nounwind
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.5 "}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"float", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!3, !3, i64 0}
diff --git a/test/CodeGen/X86/pr21099.ll b/test/CodeGen/X86/pr21099.ll
new file mode 100644
index 000000000000..07292c125eea
--- /dev/null
+++ b/test/CodeGen/X86/pr21099.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -O2 -march=x86-64 -verify-machineinstrs | FileCheck %s
+
+define void @pr21099(i64* %p) {
+; CHECK-LABEL: pr21099
+; CHECK: lock
+; CHECK-NEXT: addq $-2147483648
+; This number is INT32_MIN: 0x80000000UL
+  %1 = atomicrmw add i64* %p, i64 -2147483648 seq_cst
+  ret void
+}
diff --git a/test/CodeGen/X86/pr21529.ll b/test/CodeGen/X86/pr21529.ll
new file mode 100644
index 000000000000..655bc844f503
--- /dev/null
+++ b/test/CodeGen/X86/pr21529.ll
@@ -0,0 +1,15 @@
+; RUN: llc -show-mc-encoding < %s | FileCheck %s
+
+; Test that the direct object emission selects the and variant with 8 bit
+; immediate.
+; We used to get this wrong when using direct object emission, but not when
+; reading assembly.
+
+; CHECK: andq    $-32, %rsp              # encoding: [0x48,0x83,0xe4,0xe0]
+
+target triple = "x86_64-pc-linux"
+
+define void @f() {
+  %foo = alloca i8, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/pr22019.ll b/test/CodeGen/X86/pr22019.ll
new file mode 100644
index 000000000000..4cee5d704d3a
--- /dev/null
+++ b/test/CodeGen/X86/pr22019.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "pselect = __pselect"
+module asm "var = __var"
+module asm "alias = __alias"
+; CHECK: pselect = __pselect
+; CHECK: var = __var
+; CHECK: alias = __alias
+
+; CHECK: pselect:
+; CHECK: retq
+define void @pselect() {
+  ret void
+}
+
+; CHECK: var:
+; CHECK: .long 0
+@var = global i32 0
+
+; CHECK: alias = var
+@alias = alias i32* @var
diff --git a/test/CodeGen/X86/pr22103.ll b/test/CodeGen/X86/pr22103.ll
new file mode 100644
index 000000000000..77c0751e219d
--- /dev/null
+++ b/test/CodeGen/X86/pr22103.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+; Don't try to emit a direct call through a TLS global.
+; This fixes PR22103
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external thread_local global i64
+
+; Function Attrs: nounwind
+define void @_Z1fv() {
+; CHECK-NOT: callq *$a
+; CHECK: movq %fs:0, [[RAX:%r..]]
+; CHECK-NEXT: addq    a@GOTTPOFF(%rip), [[RAX]]
+; CHECK-NEXT: callq *[[RAX]]
+entry:
+  call void bitcast (i64* @a to void ()*)()
+  ret void
+}
diff --git a/test/CodeGen/X86/pre-ra-sched.ll b/test/CodeGen/X86/pre-ra-sched.ll
index 70135d43f49b..bb4c1269b7cf 100644
--- a/test/CodeGen/X86/pre-ra-sched.ll
+++ b/test/CodeGen/X86/pre-ra-sched.ll
@@ -1,4 +1,4 @@
-; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
+; RUN-disabled: llc < %s -verify-machineinstrs -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
 ; RUN-disabled:     2>&1 | FileCheck %s
 ; RUN: true
 ; REQUIRES: asserts
diff --git a/test/CodeGen/X86/prefixdata.ll b/test/CodeGen/X86/prefixdata.ll
index 2ec1892dd183..9bb54a2a3977 100644
--- a/test/CodeGen/X86/prefixdata.ll
+++ b/test/CodeGen/X86/prefixdata.ll
@@ -2,16 +2,17 @@
 
 @i = linkonce_odr global i32 1
 
-; CHECK: f:
-; CHECK-NEXT: .cfi_startproc
+; CHECK: .type f,@function
 ; CHECK-NEXT: .long	1
+; CHECK-NEXT: # 0x1
+; CHECK-NEXT: f:
 define void @f() prefix i32 1 {
   ret void
 }
 
-; CHECK: g:
-; CHECK-NEXT: .cfi_startproc
+; CHECK: .type g,@function
 ; CHECK-NEXT: .quad	i
+; CHECK-NEXT: g:
 define void @g() prefix i32* @i {
   ret void
 }
diff --git a/test/CodeGen/X86/prologuedata.ll b/test/CodeGen/X86/prologuedata.ll
new file mode 100644
index 000000000000..6a50ddbfd140
--- /dev/null
+++ b/test/CodeGen/X86/prologuedata.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+@i = linkonce_odr global i32 1
+
+; CHECK: f:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .long	1
+define void @f() prologue i32 1 {
+  ret void
+}
+
+; CHECK: g:
+; CHECK-NEXT: .cfi_startproc
+; CHECK-NEXT: .quad	i
+define void @g() prologue i32* @i {
+  ret void
+}
diff --git a/test/CodeGen/X86/pshufb-mask-comments.ll b/test/CodeGen/X86/pshufb-mask-comments.ll
new file mode 100644
index 000000000000..303c4a684761
--- /dev/null
+++ b/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86-64 -mattr=+ssse3 | FileCheck %s
+
+; Test that the pshufb mask comment is correct.
+
+define <16 x i8> @test1(<16 x i8> %V) {
+; CHECK-LABEL: test1:
+; CHECK: pshufb {{.*}}# xmm0 = xmm0[1,0,0,0,0,2,0,0,0,0,3,0,0,0,0,4]
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 2, i8 0, i8 0, i8 0, i8 0, i8 3, i8 0, i8 0, i8 0, i8 0, i8 4>)
+  ret <16 x i8> %1
+}
+
+; Test that indexes larger than the size of the vector are shown masked (bottom 4 bits).
+
+define <16 x i8> @test2(<16 x i8> %V) {
+; CHECK-LABEL: test2:
+; CHECK: pshufb {{.*}}# xmm0 = xmm0[15,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2]
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> <i8 15, i8 0, i8 0, i8 0, i8 0, i8 16, i8 0, i8 0, i8 0, i8 0, i8 17, i8 0, i8 0, i8 0, i8 0, i8 50>)
+  ret <16 x i8> %1
+}
+
+; Test that indexes with bit seven set are shown as zero.
+
+define <16 x i8> @test3(<16 x i8> %V) {
+; CHECK-LABEL: test3:
+; CHECK: pshufb {{.*}}# xmm0 = xmm0[1,0,0,15,0,2,0,0],zero,xmm0[0,3,0,0],zero,xmm0[0,4]
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %V, <16 x i8> <i8 1, i8 0, i8 0, i8 127, i8 0, i8 2, i8 0, i8 0, i8 128, i8 0, i8 3, i8 0, i8 0, i8 255, i8 0, i8 4>)
+  ret <16 x i8> %1
+}
+
+; Test that we won't crash when the constant was reused for another instruction.
+
+define <16 x i8> @test4(<2 x i64>* %V) {
+; CHECK-LABEL: test4
+; CHECK: pshufb {{.*}}
+  store <2 x i64> <i64 1084818905618843912, i64 506097522914230528>, <2 x i64>* %V, align 16
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> <i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <16 x i8> %1
+}
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
diff --git a/test/CodeGen/X86/ragreedy-bug.ll b/test/CodeGen/X86/ragreedy-bug.ll
index df9b41d6e90b..83ac274bba19 100644
--- a/test/CodeGen/X86/ragreedy-bug.ll
+++ b/test/CodeGen/X86/ragreedy-bug.ll
@@ -266,27 +266,27 @@ return:
   %retval.0 = phi i32 [ 0, %entry ], [ 1, %land.lhs.true52 ], [ 1, %land.lhs.true43 ], [ 0, %if.else123 ], [ 1, %while.cond59.preheader ], [ 1, %while.cond95.preheader ], [ 1, %while.cond130.preheader ], [ 1, %land.lhs.true28 ], [ 1, %if.then83 ], [ 0, %lor.lhs.false74 ], [ 1, %land.rhs ], [ 1, %if.then117 ], [ 0, %while.body104 ], [ 1, %land.rhs99 ], [ 1, %if.then152 ], [ 0, %while.body139 ], [ 1, %land.rhs134 ], [ 0, %while.body ]
   ret i32 %retval.0
 }
-!181 = metadata !{metadata !"branch_weights", i32 662038, i32 1}
-!988 = metadata !{metadata !"branch_weights", i32 12091450, i32 1916}
-!989 = metadata !{metadata !"branch_weights", i32 7564670, i32 4526781}
-!990 = metadata !{metadata !"branch_weights", i32 7484958, i32 13283499}
-!991 = metadata !{metadata !"branch_weights", i32 8677007, i32 4606493}
-!992 = metadata !{metadata !"branch_weights", i32 -1172426948, i32 145094705}
-!993 = metadata !{metadata !"branch_weights", i32 1468914, i32 5683688}
-!994 = metadata !{metadata !"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
-!995 = metadata !{metadata !"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
-!996 = metadata !{metadata !"branch_weights", i32 1004870, i32 20259}
-!997 = metadata !{metadata !"branch_weights", i32 20071, i32 189}
-!998 = metadata !{metadata !"branch_weights", i32 -1020255939, i32 572177766}
-!999 = metadata !{metadata !"branch_weights", i32 2666513, i32 3466431}
-!1000 = metadata !{metadata !"branch_weights", i32 5117635, i32 1859780}
-!1001 = metadata !{metadata !"branch_weights", i32 354902465, i32 -1444604407}
-!1002 = metadata !{metadata !"branch_weights", i32 -1762419279, i32 1592770684}
-!1003 = metadata !{metadata !"branch_weights", i32 1435905930, i32 -1951930624}
-!1004 = metadata !{metadata !"branch_weights", i32 1, i32 504888}
-!1005 = metadata !{metadata !"branch_weights", i32 94662, i32 504888}
-!1006 = metadata !{metadata !"branch_weights", i32 -1897793104, i32 160196332}
-!1007 = metadata !{metadata !"branch_weights", i32 2074643678, i32 -29579071}
-!1008 = metadata !{metadata !"branch_weights", i32 1, i32 226163}
-!1009 = metadata !{metadata !"branch_weights", i32 58357, i32 226163}
-!1010 = metadata !{metadata !"branch_weights", i32 -2072848646, i32 92907517}
+!181 = !{!"branch_weights", i32 662038, i32 1}
+!988 = !{!"branch_weights", i32 12091450, i32 1916}
+!989 = !{!"branch_weights", i32 7564670, i32 4526781}
+!990 = !{!"branch_weights", i32 7484958, i32 13283499}
+!991 = !{!"branch_weights", i32 8677007, i32 4606493}
+!992 = !{!"branch_weights", i32 -1172426948, i32 145094705}
+!993 = !{!"branch_weights", i32 1468914, i32 5683688}
+!994 = !{!"branch_weights", i32 114025221, i32 -1217548794, i32 -1199521551, i32 87712616}
+!995 = !{!"branch_weights", i32 1853716452, i32 -444717951, i32 932776759}
+!996 = !{!"branch_weights", i32 1004870, i32 20259}
+!997 = !{!"branch_weights", i32 20071, i32 189}
+!998 = !{!"branch_weights", i32 -1020255939, i32 572177766}
+!999 = !{!"branch_weights", i32 2666513, i32 3466431}
+!1000 = !{!"branch_weights", i32 5117635, i32 1859780}
+!1001 = !{!"branch_weights", i32 354902465, i32 -1444604407}
+!1002 = !{!"branch_weights", i32 -1762419279, i32 1592770684}
+!1003 = !{!"branch_weights", i32 1435905930, i32 -1951930624}
+!1004 = !{!"branch_weights", i32 1, i32 504888}
+!1005 = !{!"branch_weights", i32 94662, i32 504888}
+!1006 = !{!"branch_weights", i32 -1897793104, i32 160196332}
+!1007 = !{!"branch_weights", i32 2074643678, i32 -29579071}
+!1008 = !{!"branch_weights", i32 1, i32 226163}
+!1009 = !{!"branch_weights", i32 58357, i32 226163}
+!1010 = !{!"branch_weights", i32 -2072848646, i32 92907517}
diff --git a/test/CodeGen/X86/ragreedy-hoist-spill.ll b/test/CodeGen/X86/ragreedy-hoist-spill.ll
index c6b28f71af46..57afb4152db5 100644
--- a/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -202,7 +202,6 @@ lor.rhs500:
   ; CHECK: lor.rhs500
   ; Make sure that we don't hoist the spill to outer loops.
   ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
-  ; CHECK: movq %r{{.*}}, {{[0-9]+}}(%rsp)
   ; CHECK: callq {{.*}}maskrune
   %call3.i.i2792 = call i32 @__maskrune(i32 undef, i64 256)
   br i1 undef, label %land.lhs.true504, label %do.body479.backedge
@@ -378,12 +377,12 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 (trunk 204257)"}
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"int", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{metadata !3, metadata !3, i64 0}
-!6 = metadata !{metadata !7, metadata !8, i64 8}
-!7 = metadata !{metadata !"", metadata !8, i64 0, metadata !8, i64 8, metadata !3, i64 16}
-!8 = metadata !{metadata !"any pointer", metadata !3, i64 0}
+!0 = !{!"clang version 3.5.0 (trunk 204257)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!3, !3, i64 0}
+!6 = !{!7, !8, i64 8}
+!7 = !{!"", !8, i64 0, !8, i64 8, !3, i64 16}
+!8 = !{!"any pointer", !3, i64 0}
diff --git a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
index d8e45727b9d2..49d58f437c21 100644
--- a/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
+++ b/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
@@ -2,10 +2,12 @@
 ; Without the last chance recoloring, this test fails with:
 ; "ran out of registers".
 
-; RUN: not llc -regalloc=greedy -relocation-model=pic -lcr-max-depth=0  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEPTH
+; NOTE: With the fix to PR18883, we don't actually run out of registers here
+; any more, and so those checks are disabled. This test remains only for general coverage.
+; XXX: not llc -regalloc=greedy -relocation-model=pic -lcr-max-depth=0  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-DEPTH
 ; Test whether failure due to cutoff for depth is reported
 
-; RUN: not llc -regalloc=greedy -relocation-model=pic -lcr-max-interf=1  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-INTERF
+; XXX: not llc -regalloc=greedy -relocation-model=pic -lcr-max-interf=1  < %s 2>&1 | FileCheck %s --check-prefix=CHECK-INTERF
 ; Test whether failure due to cutoff for interference is reported
 
 ; RUN: llc -regalloc=greedy -relocation-model=pic -lcr-max-interf=1 -lcr-max-depth=0 -exhaustive-register-search < %s > %t 2>&1
diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll
new file mode 100644
index 000000000000..83b86accdb38
--- /dev/null
+++ b/test/CodeGen/X86/recip-fastmath.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+use-recip-est,+avx -x86-recip-refinement-steps=2 | FileCheck %s --check-prefix=REFINE
+
+; If the target's divss/divps instructions are substantially
+; slower than rcpss/rcpps with a Newton-Raphson refinement,
+; we should generate the estimate sequence.
+
+; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
+; for details about the accuracy, speed, and implementation
+; differences of x86 reciprocal estimates.
+
+define float @reciprocal_estimate(float %x) #0 {
+  %div = fdiv fast float 1.0, %x
+  ret float %div
+
+; CHECK-LABEL: reciprocal_estimate:
+; CHECK: movss
+; CHECK-NEXT: divss
+; CHECK-NEXT: movaps
+; CHECK-NEXT: retq
+
+; BTVER2-LABEL: reciprocal_estimate:
+; BTVER2: vrcpss
+; BTVER2: vmulss
+; BTVER2: vsubss
+; BTVER2: vmulss
+; BTVER2: vaddss
+; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate:
+; REFINE: vrcpss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE: vmulss
+; REFINE: vsubss
+; REFINE: vmulss
+; REFINE: vaddss
+; REFINE-NEXT: retq
+}
+
+define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
+
+; CHECK-LABEL: reciprocal_estimate_v4f32:
+; CHECK: movaps
+; CHECK-NEXT: divps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: retq
+
+; BTVER2-LABEL: reciprocal_estimate_v4f32:
+; BTVER2: vrcpps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
+; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v4f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
+}
+
+define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
+
+; CHECK-LABEL: reciprocal_estimate_v8f32:
+; CHECK: movaps
+; CHECK: movaps
+; CHECK-NEXT: divps
+; CHECK-NEXT: divps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: retq
+
+; BTVER2-LABEL: reciprocal_estimate_v8f32:
+; BTVER2: vrcpps
+; BTVER2: vmulps
+; BTVER2: vsubps
+; BTVER2: vmulps
+; BTVER2: vaddps
+; BTVER2-NEXT: retq
+
+; REFINE-LABEL: reciprocal_estimate_v8f32:
+; REFINE: vrcpps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE: vmulps
+; REFINE: vsubps
+; REFINE: vmulps
+; REFINE: vaddps
+; REFINE-NEXT: retq
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll b/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
new file mode 100644
index 000000000000..00679428ca63
--- /dev/null
+++ b/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -o - -mtriple=x86_64-apple-macosx | FileCheck %s
+; Test case for the recoloring of broken hints.
+; This is tricky to have something reasonably small to kick this optimization since
+; it requires that spliting and spilling occur.
+; The bottom line is that this test case is fragile.
+; This was reduced from the make_list function from the llvm-testsuite:
+; SingleSource/Benchmarks/McGill/chomp.c
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct._list = type { i32*, %struct._list* }
+
+@ncol = external global i32, align 4
+@nrow = external global i32, align 4
+
+declare noalias i32* @copy_data()
+
+declare noalias i8* @malloc(i64)
+
+declare i32 @get_value()
+
+declare i32 @in_wanted(i32* nocapture readonly)
+
+declare noalias i32* @make_data()
+
+; CHECK-LABEL: make_list:
+; Function prologue.
+; CHECK: pushq
+; CHECK: subq ${{[0-9]+}}, %rsp
+; Move the first argument (%data) into a temporary register.
+; It will not survive the call to malloc otherwise.
+; CHECK: movq %rdi, [[ARG1:%r[0-9a-z]+]]
+; CHECK: callq _malloc
+; Compute %data - 1 as used for load in land.rhs.i (via the variable  %indvars.iv.next.i).
+; CHECK: addq $-4, [[ARG1]]
+; We use to produce a useless copy here and move %data in another temporary register. 
+; CHECK-NOT: movq [[ARG1]]
+; End of the first basic block.
+; CHECK: .align
+; Now check that %data is used in an address computation.
+; CHECK: leaq ([[ARG1]]
+define %struct._list* @make_list(i32* nocapture readonly %data, i32* nocapture %value, i32* nocapture %all) {
+entry:
+  %call = tail call i8* @malloc(i64 16)
+  %next = getelementptr inbounds i8* %call, i64 8
+  %tmp = bitcast i8* %next to %struct._list**
+  %tmp2 = bitcast i8* %call to %struct._list*
+  %.pre78 = load i32* @ncol, align 4
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc32, %entry
+  %tmp4 = phi i32 [ %.pre78, %entry ], [ 0, %for.inc32 ]
+  %current.077 = phi %struct._list* [ %tmp2, %entry ], [ %current.1.lcssa, %for.inc32 ]
+  %cmp270 = icmp eq i32 %tmp4, 0
+  br i1 %cmp270, label %for.inc32, label %for.body3
+
+for.body3:                                        ; preds = %if.end31, %for.cond1.preheader
+  %current.173 = phi %struct._list* [ %current.2, %if.end31 ], [ %current.077, %for.cond1.preheader ]
+  %row.172 = phi i32 [ %row.3, %if.end31 ], [ 0, %for.cond1.preheader ]
+  %col.071 = phi i32 [ %inc, %if.end31 ], [ 0, %for.cond1.preheader ]
+  %call4 = tail call i32* @make_data()
+  %tmp5 = load i32* @ncol, align 4
+  %tobool14.i = icmp eq i32 %tmp5, 0
+  br i1 %tobool14.i, label %while.cond.i, label %while.body.lr.ph.i
+
+while.body.lr.ph.i:                               ; preds = %for.body3
+  %tmp6 = sext i32 %tmp5 to i64
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %while.body.i, %while.body.lr.ph.i
+  %indvars.iv.i = phi i64 [ %tmp6, %while.body.lr.ph.i ], [ %indvars.iv.next.i, %while.body.i ]
+  %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1
+  %tmp9 = trunc i64 %indvars.iv.next.i to i32
+  %tobool.i = icmp eq i32 %tmp9, 0
+  br i1 %tobool.i, label %while.cond.i, label %while.body.i
+
+while.cond.i:                                     ; preds = %land.rhs.i, %while.body.i, %for.body3
+  %indvars.iv.i64 = phi i64 [ %indvars.iv.next.i65, %land.rhs.i ], [ 0, %for.body3 ], [ %tmp6, %while.body.i ]
+  %indvars.iv.next.i65 = add nsw i64 %indvars.iv.i64, -1
+  %tmp10 = trunc i64 %indvars.iv.i64 to i32
+  %tobool.i66 = icmp eq i32 %tmp10, 0
+  br i1 %tobool.i66, label %if.else, label %land.rhs.i
+
+land.rhs.i:                                       ; preds = %while.cond.i
+  %arrayidx.i67 = getelementptr inbounds i32* %call4, i64 %indvars.iv.next.i65
+  %tmp11 = load i32* %arrayidx.i67, align 4
+  %arrayidx2.i68 = getelementptr inbounds i32* %data, i64 %indvars.iv.next.i65
+  %tmp12 = load i32* %arrayidx2.i68, align 4
+  %cmp.i69 = icmp eq i32 %tmp11, %tmp12
+  br i1 %cmp.i69, label %while.cond.i, label %equal_data.exit
+
+equal_data.exit:                                  ; preds = %land.rhs.i
+  %cmp3.i = icmp slt i32 %tmp10, 1
+  br i1 %cmp3.i, label %if.else, label %if.then
+
+if.then:                                          ; preds = %equal_data.exit
+  %next7 = getelementptr inbounds %struct._list* %current.173, i64 0, i32 1
+  %tmp14 = load %struct._list** %next7, align 8
+  %next12 = getelementptr inbounds %struct._list* %tmp14, i64 0, i32 1
+  store %struct._list* null, %struct._list** %next12, align 8
+  %tmp15 = load %struct._list** %next7, align 8
+  %tmp16 = load i32* %value, align 4
+  %cmp14 = icmp eq i32 %tmp16, 1
+  %.tmp16 = select i1 %cmp14, i32 0, i32 %tmp16
+  %tmp18 = load i32* %all, align 4
+  %tmp19 = or i32 %tmp18, %.tmp16
+  %tmp20 = icmp eq i32 %tmp19, 0
+  br i1 %tmp20, label %if.then19, label %if.end31
+
+if.then19:                                        ; preds = %if.then
+  %call21 = tail call i32 @in_wanted(i32* %call4)
+  br label %if.end31
+
+if.else:                                          ; preds = %equal_data.exit, %while.cond.i
+  %cmp26 = icmp eq i32 %col.071, 0
+  %.row.172 = select i1 %cmp26, i32 0, i32 %row.172
+  %sub30 = add nsw i32 %tmp5, -1
+  br label %if.end31
+
+if.end31:                                         ; preds = %if.else, %if.then19, %if.then
+  %col.1 = phi i32 [ %sub30, %if.else ], [ 0, %if.then ], [ 0, %if.then19 ]
+  %row.3 = phi i32 [ %.row.172, %if.else ], [ %row.172, %if.then ], [ 0, %if.then19 ]
+  %current.2 = phi %struct._list* [ %current.173, %if.else ], [ %tmp15, %if.then ], [ %tmp15, %if.then19 ]
+  %inc = add nsw i32 %col.1, 1
+  %tmp25 = load i32* @ncol, align 4
+  %cmp2 = icmp eq i32 %inc, %tmp25
+  br i1 %cmp2, label %for.cond1.for.inc32_crit_edge, label %for.body3
+
+for.cond1.for.inc32_crit_edge:                    ; preds = %if.end31
+  %.pre79 = load i32* @nrow, align 4
+  br label %for.inc32
+
+for.inc32:                                        ; preds = %for.cond1.for.inc32_crit_edge, %for.cond1.preheader
+  %tmp26 = phi i32 [ %.pre79, %for.cond1.for.inc32_crit_edge ], [ 0, %for.cond1.preheader ]
+  %current.1.lcssa = phi %struct._list* [ %current.2, %for.cond1.for.inc32_crit_edge ], [ %current.077, %for.cond1.preheader ]
+  %row.1.lcssa = phi i32 [ %row.3, %for.cond1.for.inc32_crit_edge ], [ 0, %for.cond1.preheader ]
+  %inc33 = add nsw i32 %row.1.lcssa, 1
+  %cmp = icmp eq i32 %inc33, %tmp26
+  br i1 %cmp, label %for.end34, label %for.cond1.preheader
+
+for.end34:                                        ; preds = %for.inc32
+  %.pre = load %struct._list** %tmp, align 8
+  ret %struct._list* %.pre
+}
diff --git a/test/CodeGen/X86/remat-phys-dead.ll b/test/CodeGen/X86/remat-phys-dead.ll
index 4d7ee622a37e..6cdcd28eacd8 100644
--- a/test/CodeGen/X86/remat-phys-dead.ll
+++ b/test/CodeGen/X86/remat-phys-dead.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: llc -mtriple=x86_64-apple-darwin -debug -o /dev/null < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-darwin -debug -o /dev/null < %s 2>&1 | FileCheck %s
 
 ; We need to make sure that rematerialization into a physical register marks the
 ; super- or sub-register as dead after this rematerialization since only the
diff --git a/test/CodeGen/X86/return_zeroext_i2.ll b/test/CodeGen/X86/return_zeroext_i2.ll
new file mode 100644
index 000000000000..d535b0c41267
--- /dev/null
+++ b/test/CodeGen/X86/return_zeroext_i2.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=i386-pc-win32 < %s | FileCheck %s 
+; Check that the testcase does not crash
+define zeroext i2 @crash () {
+  ret i2 0
+}
+; CHECK: xorl	%eax, %eax
+; CHECK-NEXT: retl
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
index 71a4d21a9b95..0e7047b4845d 100644
--- a/test/CodeGen/X86/scev-interchange.ll
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -mtriple=x86_64-linux
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 	%"struct.DataOutBase::GmvFlags" = type { i32 }
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
index b82be41b8cbf..e34ba5412f07 100644
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -1,7 +1,9 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
@@ -61,6 +63,26 @@ false:
 ; X64-NEXT: callq __morestack_allocate_stack_space
 ; X64:      movq %rax, %rdi
 
+; X32ABI-LABEL:      test_basic:
+
+; X32ABI:      cmpl %fs:64, %esp
+; X32ABI-NEXT: ja      .LBB0_2
+
+; X32ABI:      movl $24, %r10d
+; X32ABI-NEXT: movl $0, %r11d
+; X32ABI-NEXT: callq __morestack
+; X32ABI-NEXT: ret
+
+; X32ABI:      movl %esp, %[[EDI:edi|eax]]
+; X32ABI:      subl %{{.*}}, %[[EDI]]
+; X32ABI-NEXT: cmpl %[[EDI]], %fs:64
+
+; X32ABI:      movl %[[EDI]], %esp
+
+; X32ABI:      movl %{{.*}}, %edi
+; X32ABI-NEXT: callq __morestack_allocate_stack_space
+; X32ABI:      movl %eax, %edi
+
 }
 
 attributes #0 = { "split-stack" }
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 9dab3cd8d6d5..3e47121a380e 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,18 +1,25 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -code-model=large -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux-Large
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -verify-machineinstrs | FileCheck %s -check-prefix=X32-DFlyBSD
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -verify-machineinstrs | FileCheck %s -check-prefix=X64-DFlyBSD
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X64-MinGW
 
 ; We used to crash with filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-dragonfly -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-dragonfly -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -filetype=obj
 
 ; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris 2> %t.log
@@ -51,6 +58,26 @@ define void @test_basic() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
+; X64-Linux-Large-LABEL:       test_basic:
+
+; X64-Linux-Large:       cmpq %fs:112, %rsp
+; X64-Linux-Large-NEXT:  ja      .LBB0_2
+
+; X64-Linux-Large:       movabsq $40, %r10
+; X64-Linux-Large-NEXT:  movabsq $0, %r11
+; X64-Linux-Large-NEXT:  callq *__morestack_addr(%rip)
+; X64-Linux-Large-NEXT:  ret
+
+; X32ABI-LABEL:       test_basic:
+
+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB0_2
+
+; X32ABI:       movl $40, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_basic:
 
 ; X32-Darwin:      movl $432, %ecx
@@ -102,6 +129,26 @@ define void @test_basic() #0 {
 ; X64-FreeBSD-NEXT:  callq __morestack
 ; X64-FreeBSD-NEXT:  ret
 
+; X32-DFlyBSD-LABEL:       test_basic:
+
+; X32-DFlyBSD:       cmpl %fs:16, %esp
+; X32-DFlyBSD-NEXT:  ja      .LBB0_2
+
+; X32-DFlyBSD:       pushl $0
+; X32-DFlyBSD-NEXT:  pushl $48
+; X32-DFlyBSD-NEXT:  calll __morestack
+; X32-DFlyBSD-NEXT:  ret
+
+; X64-DFlyBSD-LABEL:       test_basic:
+
+; X64-DFlyBSD:       cmpq %fs:32, %rsp
+; X64-DFlyBSD-NEXT:  ja      .LBB0_2
+
+; X64-DFlyBSD:       movabsq $40, %r10
+; X64-DFlyBSD-NEXT:  movabsq $0, %r11
+; X64-DFlyBSD-NEXT:  callq __morestack
+; X64-DFlyBSD-NEXT:  ret
+
 }
 
 define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
@@ -129,6 +176,16 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
 ; X64-Linux-NEXT:  ret
 ; X64-Linux-NEXT:  movq %rax, %r10
 
+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB1_2
+
+; X32ABI:       movl %r10d, %eax
+; X32ABI-NEXT:  movl $56, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+; X32ABI-NEXT:  movq %rax, %r10
+
 ; X32-Darwin:      movl $432, %edx
 ; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
 ; X32-Darwin-NEXT: ja      LBB1_2
@@ -177,6 +234,24 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
 ; X64-FreeBSD-NEXT:  ret
 ; X64-FreeBSD-NEXT:  movq %rax, %r10
 
+; X32-DFlyBSD:       cmpl %fs:16, %esp
+; X32-DFlyBSD-NEXT:  ja      .LBB1_2
+
+; X32-DFlyBSD:       pushl $4
+; X32-DFlyBSD-NEXT:  pushl $52
+; X32-DFlyBSD-NEXT:  calll __morestack
+; X32-DFlyBSD-NEXT:  ret
+
+; X64-DFlyBSD:       cmpq %fs:32, %rsp
+; X64-DFlyBSD-NEXT:  ja      .LBB1_2
+
+; X64-DFlyBSD:       movq %r10, %rax
+; X64-DFlyBSD-NEXT:  movabsq $56, %r10
+; X64-DFlyBSD-NEXT:  movabsq $0, %r11
+; X64-DFlyBSD-NEXT:  callq __morestack
+; X64-DFlyBSD-NEXT:  ret
+; X64-DFlyBSD-NEXT:  movq %rax, %r10
+
 }
 
 define void @test_large() #0 {
@@ -202,6 +277,15 @@ define void @test_large() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
+; X32ABI:       leal -40008(%rsp), %r11d
+; X32ABI-NEXT:  cmpl %fs:64, %r11d
+; X32ABI-NEXT:  ja      .LBB2_2
+
+; X32ABI:       movl $40008, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin:      leal -40012(%esp), %ecx
 ; X32-Darwin-NEXT: movl $432, %eax
 ; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
@@ -249,6 +333,24 @@ define void @test_large() #0 {
 ; X64-FreeBSD-NEXT:  callq __morestack
 ; X64-FreeBSD-NEXT:  ret
 
+; X32-DFlyBSD:       leal -40008(%esp), %ecx
+; X32-DFlyBSD-NEXT:  cmpl %fs:16, %ecx
+; X32-DFlyBSD-NEXT:  ja      .LBB2_2
+
+; X32-DFlyBSD:       pushl $0
+; X32-DFlyBSD-NEXT:  pushl $40008
+; X32-DFlyBSD-NEXT:  calll __morestack
+; X32-DFlyBSD-NEXT:  ret
+
+; X64-DFlyBSD:       leaq -40008(%rsp), %r11
+; X64-DFlyBSD-NEXT:  cmpq %fs:32, %r11
+; X64-DFlyBSD-NEXT:  ja      .LBB2_2
+
+; X64-DFlyBSD:       movabsq $40008, %r10
+; X64-DFlyBSD-NEXT:  movabsq $0, %r11
+; X64-DFlyBSD-NEXT:  callq __morestack
+; X64-DFlyBSD-NEXT:  ret
+
 }
 
 define fastcc void @test_fastcc() #0 {
@@ -276,6 +378,16 @@ define fastcc void @test_fastcc() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
+; X32ABI-LABEL:       test_fastcc:
+
+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB3_2
+
+; X32ABI:       movl $40, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_fastcc:
 
 ; X32-Darwin:      movl $432, %eax
@@ -327,6 +439,26 @@ define fastcc void @test_fastcc() #0 {
 ; X64-FreeBSD-NEXT:  callq __morestack
 ; X64-FreeBSD-NEXT:  ret
 
+; X32-DFlyBSD-LABEL:       test_fastcc:
+
+; X32-DFlyBSD:       cmpl %fs:16, %esp
+; X32-DFlyBSD-NEXT:  ja      .LBB3_2
+
+; X32-DFlyBSD:       pushl $0
+; X32-DFlyBSD-NEXT:  pushl $48
+; X32-DFlyBSD-NEXT:  calll __morestack
+; X32-DFlyBSD-NEXT:  ret
+
+; X64-DFlyBSD-LABEL:       test_fastcc:
+
+; X64-DFlyBSD:       cmpq %fs:32, %rsp
+; X64-DFlyBSD-NEXT:  ja      .LBB3_2
+
+; X64-DFlyBSD:       movabsq $40, %r10
+; X64-DFlyBSD-NEXT:  movabsq $0, %r11
+; X64-DFlyBSD-NEXT:  callq __morestack
+; X64-DFlyBSD-NEXT:  ret
+
 }
 
 define fastcc void @test_fastcc_large() #0 {
@@ -356,6 +488,17 @@ define fastcc void @test_fastcc_large() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret
 
+; X32ABI-LABEL:       test_fastcc_large:
+
+; X32ABI:       leal -40008(%rsp), %r11d
+; X32ABI-NEXT:  cmpl %fs:64, %r11d
+; X32ABI-NEXT:  ja      .LBB4_2
+
+; X32ABI:       movl $40008, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_fastcc_large:
 
 ; X32-Darwin:      leal -40012(%esp), %eax
@@ -412,6 +555,28 @@ define fastcc void @test_fastcc_large() #0 {
 ; X64-FreeBSD-NEXT:  callq __morestack
 ; X64-FreeBSD-NEXT:  ret
 
+; X32-DFlyBSD-LABEL:       test_fastcc_large:
+
+; X32-DFlyBSD:       leal -40008(%esp), %eax
+; X32-DFlyBSD-NEXT:  cmpl %fs:16, %eax
+; X32-DFlyBSD-NEXT:  ja      .LBB4_2
+
+; X32-DFlyBSD:       pushl $0
+; X32-DFlyBSD-NEXT:  pushl $40008
+; X32-DFlyBSD-NEXT:  calll __morestack
+; X32-DFlyBSD-NEXT:  ret
+
+; X64-DFlyBSD-LABEL:       test_fastcc_large:
+
+; X64-DFlyBSD:       leaq -40008(%rsp), %r11
+; X64-DFlyBSD-NEXT:  cmpq %fs:32, %r11
+; X64-DFlyBSD-NEXT:  ja      .LBB4_2
+
+; X64-DFlyBSD:       movabsq $40008, %r10
+; X64-DFlyBSD-NEXT:  movabsq $0, %r11
+; X64-DFlyBSD-NEXT:  callq __morestack
+; X64-DFlyBSD-NEXT:  ret
+
 }
 
 define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) #0 {
@@ -446,6 +611,9 @@ define void @test_nostack() #0 {
 ; X64-Linux-LABEL: test_nostack:
 ; X32-Linux-NOT:   callq __morestack
 
+; X32ABI-LABEL: test_nostack:
+; X32ABI-NOT:   callq __morestack
+
 ; X32-Darwin-LABEL: test_nostack:
 ; X32-Darwin-NOT:   calll __morestack
 
@@ -460,6 +628,16 @@ define void @test_nostack() #0 {
 
 ; X64-FreeBSD-LABEL: test_nostack:
 ; X64-FreeBSD-NOT:   callq __morestack
+
+; X32-DFlyBSD-LABEL: test_nostack:
+; X32-DFlyBSD-NOT:   calll __morestack
+
+; X64-DFlyBSD-LABEL: test_nostack:
+; X64-DFlyBSD-NOT:   callq __morestack
 }
 
 attributes #0 = { "split-stack" }
+
+; X64-Linux-Large: .rodata
+; X64-Linux-Large-NEXT: __morestack_addr:
+; X64-Linux-Large-NEXT: .quad	__morestack
diff --git a/test/CodeGen/X86/seh-basic.ll b/test/CodeGen/X86/seh-basic.ll
new file mode 100644
index 000000000000..69d70d70948c
--- /dev/null
+++ b/test/CodeGen/X86/seh-basic.ll
@@ -0,0 +1,175 @@
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+define void @two_invoke_merged() {
+entry:
+  invoke void @try_body()
+          to label %again unwind label %lpad
+
+again:
+  invoke void @try_body()
+          to label %done unwind label %lpad
+
+done:
+  ret void
+
+lpad:
+  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @filt1 to i8*)
+  %sel = extractvalue { i8*, i32 } %vals, 1
+  call void @use_selector(i32 %sel)
+  ret void
+}
+
+; Normal path code
+
+; CHECK-LABEL: {{^}}two_invoke_merged:
+; CHECK: .seh_proc two_invoke_merged
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .Ltmp0:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp1:
+; CHECK: .Ltmp2:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp3:
+; CHECK: retq
+
+; Landing pad code
+
+; CHECK: .Ltmp5:
+; CHECK: movl $1, %ecx
+; CHECK: jmp
+; CHECK: .Ltmp6:
+; CHECK: movl $2, %ecx
+; CHECK: callq use_selector
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp3@IMGREL+1
+; CHECK-NEXT: .long filt0@IMGREL
+; CHECK-NEXT: .long .Ltmp5@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp3@IMGREL+1
+; CHECK-NEXT: .long filt1@IMGREL
+; CHECK-NEXT: .long .Ltmp6@IMGREL
+; CHECK: .text
+; CHECK: .seh_endproc
+
+define void @two_invoke_gap() {
+entry:
+  invoke void @try_body()
+          to label %again unwind label %lpad
+
+again:
+  call void @do_nothing_on_unwind()
+  invoke void @try_body()
+          to label %done unwind label %lpad
+
+done:
+  ret void
+
+lpad:
+  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
+  %sel = extractvalue { i8*, i32 } %vals, 1
+  call void @use_selector(i32 %sel)
+  ret void
+}
+
+; Normal path code
+
+; CHECK-LABEL: {{^}}two_invoke_gap:
+; CHECK: .seh_proc two_invoke_gap
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .Ltmp11:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp12:
+; CHECK: callq do_nothing_on_unwind
+; CHECK: .Ltmp13:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp14:
+; CHECK: retq
+
+; Landing pad code
+
+; CHECK: .Ltmp16:
+; CHECK: movl $1, %ecx
+; CHECK: callq use_selector
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 2
+; CHECK-NEXT: .long .Ltmp11@IMGREL
+; CHECK-NEXT: .long .Ltmp12@IMGREL+1
+; CHECK-NEXT: .long filt0@IMGREL
+; CHECK-NEXT: .long .Ltmp16@IMGREL
+; CHECK-NEXT: .long .Ltmp13@IMGREL
+; CHECK-NEXT: .long .Ltmp14@IMGREL+1
+; CHECK-NEXT: .long filt0@IMGREL
+; CHECK-NEXT: .long .Ltmp16@IMGREL
+; CHECK: .text
+; CHECK: .seh_endproc
+
+define void @two_invoke_nounwind_gap() {
+entry:
+  invoke void @try_body()
+          to label %again unwind label %lpad
+
+again:
+  call void @cannot_unwind()
+  invoke void @try_body()
+          to label %done unwind label %lpad
+
+done:
+  ret void
+
+lpad:
+  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @filt0 to i8*)
+  %sel = extractvalue { i8*, i32 } %vals, 1
+  call void @use_selector(i32 %sel)
+  ret void
+}
+
+; Normal path code
+
+; CHECK-LABEL: {{^}}two_invoke_nounwind_gap:
+; CHECK: .seh_proc two_invoke_nounwind_gap
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .Ltmp21:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp22:
+; CHECK: callq cannot_unwind
+; CHECK: .Ltmp23:
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp24:
+; CHECK: retq
+
+; Landing pad code
+
+; CHECK: .Ltmp26:
+; CHECK: movl $1, %ecx
+; CHECK: callq use_selector
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .long 1
+; CHECK-NEXT: .long .Ltmp21@IMGREL
+; CHECK-NEXT: .long .Ltmp24@IMGREL+1
+; CHECK-NEXT: .long filt0@IMGREL
+; CHECK-NEXT: .long .Ltmp26@IMGREL
+; CHECK: .text
+; CHECK: .seh_endproc
+
+declare void @try_body()
+declare void @do_nothing_on_unwind()
+declare void @cannot_unwind() nounwind
+declare void @use_selector(i32)
+
+declare i32 @filt0(i8* %eh_info, i8* %rsp)
+declare i32 @filt1(i8* %eh_info, i8* %rsp)
+
+declare void @handler0()
+declare void @handler1()
+
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
new file mode 100644
index 000000000000..e911df04ded4
--- /dev/null
+++ b/test/CodeGen/X86/seh-safe-div.ll
@@ -0,0 +1,196 @@
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+; This test case is also intended to be run manually as a complete functional
+; test. It should link, print something, and exit zero rather than crashing.
+; It is the hypothetical lowering of a C source program that looks like:
+;
+;   int safe_div(int *n, int *d) {
+;     int r;
+;     __try {
+;       __try {
+;         r = *n / *d;
+;       } __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION) {
+;         puts("EXCEPTION_ACCESS_VIOLATION");
+;         r = -1;
+;       }
+;     } __except(GetExceptionCode() == EXCEPTION_INT_DIVIDE_BY_ZERO) {
+;       puts("EXCEPTION_INT_DIVIDE_BY_ZERO");
+;       r = -2;
+;     }
+;     return r;
+;   }
+
+@str1 = internal constant [27 x i8] c"EXCEPTION_ACCESS_VIOLATION\00"
+@str2 = internal constant [29 x i8] c"EXCEPTION_INT_DIVIDE_BY_ZERO\00"
+
+define i32 @safe_div(i32* %n, i32* %d) {
+entry:
+  %r = alloca i32, align 4
+  invoke void @try_body(i32* %r, i32* %n, i32* %d)
+          to label %__try.cont unwind label %lpad
+
+lpad:
+  %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
+          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
+  %ehptr = extractvalue { i8*, i32 } %vals, 0
+  %sel = extractvalue { i8*, i32 } %vals, 1
+  %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
+  %is_filt0 = icmp eq i32 %sel, %filt0_val
+  br i1 %is_filt0, label %handler0, label %eh.dispatch1
+
+eh.dispatch1:
+  %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
+  %is_filt1 = icmp eq i32 %sel, %filt1_val
+  br i1 %is_filt1, label %handler1, label %eh.resume
+
+handler0:
+  call void @puts(i8* getelementptr ([27 x i8]* @str1, i32 0, i32 0))
+  store i32 -1, i32* %r, align 4
+  br label %__try.cont
+
+handler1:
+  call void @puts(i8* getelementptr ([29 x i8]* @str2, i32 0, i32 0))
+  store i32 -2, i32* %r, align 4
+  br label %__try.cont
+
+eh.resume:
+  resume { i8*, i32 } %vals
+
+__try.cont:
+  %safe_ret = load i32* %r, align 4
+  ret i32 %safe_ret
+}
+
+; Normal path code
+
+; CHECK: {{^}}safe_div:
+; CHECK: .seh_proc safe_div
+; CHECK: .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .Ltmp0:
+; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
+; CHECK: callq try_body
+; CHECK-NEXT: .Ltmp1
+; CHECK: .LBB0_7:
+; CHECK: movl [[rloc]], %eax
+; CHECK: retq
+
+; Landing pad code
+
+; CHECK: .Ltmp3:
+; CHECK: movl $1, %[[sel:[a-z]+]]
+; CHECK: .Ltmp4
+; CHECK: movl $2, %[[sel]]
+; CHECK: .L{{.*}}:
+; CHECK: cmpl $1, %[[sel]]
+
+; CHECK: # %handler0
+; CHECK: callq puts
+; CHECK: movl $-1, [[rloc]]
+; CHECK: jmp .LBB0_7
+
+; CHECK: cmpl $2, %[[sel]]
+
+; CHECK: # %handler1
+; CHECK: callq puts
+; CHECK: movl $-2, [[rloc]]
+; CHECK: jmp .LBB0_7
+
+; FIXME: EH preparation should not call _Unwind_Resume.
+; CHECK: callq _Unwind_Resume
+; CHECK: ud2
+
+; CHECK: .seh_handlerdata
+; CHECK: .long 2
+; CHECK: .long .Ltmp0@IMGREL
+; CHECK: .long .Ltmp1@IMGREL+1
+; CHECK: .long safe_div_filt0@IMGREL
+; CHECK: .long .Ltmp3@IMGREL
+; CHECK: .long .Ltmp0@IMGREL
+; CHECK: .long .Ltmp1@IMGREL+1
+; CHECK: .long safe_div_filt1@IMGREL
+; CHECK: .long .Ltmp4@IMGREL
+; CHECK: .text
+; CHECK: .seh_endproc
+
+
+define void @try_body(i32* %r, i32* %n, i32* %d) {
+entry:
+  %0 = load i32* %n, align 4
+  %1 = load i32* %d, align 4
+  %div = sdiv i32 %0, %1
+  store i32 %div, i32* %r, align 4
+  ret void
+}
+
+; The prototype of these filter functions is:
+; int filter(EXCEPTION_POINTERS *eh_ptrs, void *rbp);
+
+; The definition of EXCEPTION_POINTERS is:
+;   typedef struct _EXCEPTION_POINTERS {
+;     EXCEPTION_RECORD *ExceptionRecord;
+;     CONTEXT          *ContextRecord;
+;   } EXCEPTION_POINTERS;
+
+; The definition of EXCEPTION_RECORD is:
+;   typedef struct _EXCEPTION_RECORD {
+;     DWORD ExceptionCode;
+;     ...
+;   } EXCEPTION_RECORD;
+
+; The exception code can be retreived with two loads, one for the record
+; pointer and one for the code.  The values of local variables can be
+; accessed via rbp, but that would require additional not yet implemented LLVM
+; support.
+
+define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) {
+  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
+  %eh_rec = load i32** %eh_ptrs_c
+  %eh_code = load i32* %eh_rec
+  ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
+  %cmp = icmp eq i32 %eh_code, 3221225477
+  %filt.res = zext i1 %cmp to i32
+  ret i32 %filt.res
+}
+
+define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) {
+  %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
+  %eh_rec = load i32** %eh_ptrs_c
+  %eh_code = load i32* %eh_rec
+  ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
+  %cmp = icmp eq i32 %eh_code, 3221225620
+  %filt.res = zext i1 %cmp to i32
+  ret i32 %filt.res
+}
+
+@str_result = internal constant [21 x i8] c"safe_div result: %d\0A\00"
+
+define i32 @main() {
+  %d.addr = alloca i32, align 4
+  %n.addr = alloca i32, align 4
+
+  store i32 10, i32* %n.addr, align 4
+  store i32 2, i32* %d.addr, align 4
+  %r1 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
+  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r1)
+
+  store i32 10, i32* %n.addr, align 4
+  store i32 0, i32* %d.addr, align 4
+  %r2 = call i32 @safe_div(i32* %n.addr, i32* %d.addr)
+  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r2)
+
+  %r3 = call i32 @safe_div(i32* %n.addr, i32* null)
+  call void (i8*, ...)* @printf(i8* getelementptr ([21 x i8]* @str_result, i32 0, i32 0), i32 %r3)
+  ret i32 0
+}
+
+define void @_Unwind_Resume() {
+  call void @abort()
+  unreachable
+}
+
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.typeid.for(i8*) readnone nounwind
+declare void @puts(i8*)
+declare void @printf(i8*, ...)
+declare void @abort()
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 654e8652cfcb..7e6f15321415 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -364,4 +364,40 @@ define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
   %tmp1 = select i1 %cc, i32 3, i32 2
   %tmp2 = shl i32 %a, %tmp1
   ret i32 %tmp2
-}
-\ No newline at end of file
+}
+
+define void @test19() {
+; This is a massive reduction of an llvm-stress test case that generates
+; interesting chains feeding setcc and eventually a f32 select operation. This
+; is intended to exercise the SELECT formation in the DAG combine simplifying
+; a simplified select_cc node. If it it regresses and is no longer triggering
+; that code path, it can be deleted.
+;
+; CHECK-LABEL: @test19
+; CHECK: testb
+; CHECK: cmpl
+; CHECK: ucomiss
+
+BB:
+  br label %CF
+
+CF:
+  %Cmp10 = icmp ule i8 undef, undef
+  br i1 %Cmp10, label %CF, label %CF250
+
+CF250:
+  %E12 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
+  %Cmp32 = icmp ugt i1 %Cmp10, false
+  br i1 %Cmp32, label %CF, label %CF242
+
+CF242:
+  %Cmp38 = icmp uge i32 %E12, undef
+  %FC = uitofp i1 %Cmp38 to float
+  %Sl59 = select i1 %Cmp32, float %FC, float undef
+  %Cmp60 = fcmp ugt float undef, undef
+  br i1 %Cmp60, label %CF242, label %CF244
+
+CF244:
+  %B122 = fadd float %Sl59, undef
+  ret void
+}
diff --git a/test/CodeGen/X86/sext-i1.ll b/test/CodeGen/X86/sext-i1.ll
index 64de0aee70d3..1a575db11b83 100644
--- a/test/CodeGen/X86/sext-i1.ll
+++ b/test/CodeGen/X86/sext-i1.ll
@@ -61,3 +61,36 @@ if.end:                                           ; preds = %if.then, %entry
   %xor27 = xor i32 undef, %cond                   ; <i32> [#uses=0]
   ret i32 0
 }
+
+define i32 @t4(i64 %x) nounwind readnone ssp {
+entry:
+; 32-LABEL: t4:
+; 32: movl
+; 32: orl
+; 32: movl
+; 32: je
+; 32: xorl
+
+; 64-LABEL: t4:
+; 64: cmpq $1
+; 64: sbbl
+  %0 = icmp eq i64 %x, 0
+  %1 = sext i1 %0 to i32
+  ret i32 %1
+}
+
+define i64 @t5(i32 %x) nounwind readnone ssp {
+entry:
+; 32-LABEL: t5:
+; 32: cmpl $1
+; 32: sbbl
+; 32: movl
+
+; 64-LABEL: t5:
+; 64: cmpl $1
+; 64: sbbq
+  %0 = icmp eq i32 %x, 0
+  %1 = sext i1 %0 to i64
+  ret i64 %1
+}
+
diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll
index fc7ee061f35d..4ddef4ca5351 100644
--- a/test/CodeGen/X86/shrink-compare.ll
+++ b/test/CodeGen/X86/shrink-compare.ll
@@ -89,3 +89,151 @@ if.end:
 ; CHECK-NOT: cmpl $1,{{.*}}x+4
 ; CHECK: ret
 }
+
+; CHECK-LABEL: test2_1:
+; CHECK: movzbl
+; CHECK: cmpl $256
+; CHECK: jne
+define void @test2_1(i32 %X) nounwind minsize {
+entry:
+  %and = and i32 %X, 255
+  %cmp = icmp eq i32 %and, 256
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_1:
+; CHECK: cmpb $1, %{{dil|cl}}
+define void @test_sext_i8_icmp_1(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_47:
+; CHECK: cmpb $47, %{{dil|cl}}
+define void @test_sext_i8_icmp_47(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_127:
+; CHECK: cmpb $127, %{{dil|cl}}
+define void @test_sext_i8_icmp_127(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, 127
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_neg1:
+; CHECK: cmpb $-1, %{{dil|cl}}
+define void @test_sext_i8_icmp_neg1(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_neg2:
+; CHECK: cmpb $-2, %{{dil|cl}}
+define void @test_sext_i8_icmp_neg2(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, -2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_neg127:
+; CHECK: cmpb $-127, %{{dil|cl}}
+define void @test_sext_i8_icmp_neg127(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, -127
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_neg128:
+; CHECK: cmpb $-128, %{{dil|cl}}
+define void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, -128
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: test_sext_i8_icmp_255:
+; CHECK: movb $1,
+; CHECK: testb
+; CHECK: jne
+define void @test_sext_i8_icmp_255(i8 %x) nounwind minsize {
+entry:
+  %sext = sext i8 %x to i32
+  %cmp = icmp eq i32 %sext, 255
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-4.ll b/test/CodeGen/X86/sibcall-4.ll
index 980b0f797ee1..2c7f51d28025 100644
--- a/test/CodeGen/X86/sibcall-4.ll
+++ b/test/CodeGen/X86/sibcall-4.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -mtriple=i386-pc-linux-gnu | FileCheck %s
 ; pr7610
 
-define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
+define ghccc void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
 cm1:
 ; CHECK-LABEL: t:
 ; CHECK: jmpl *%eax
   %nm3 = getelementptr i32* %Sp_Arg, i32 1
   %nm9 = load i32* %Sp_Arg
   %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
-  tail call cc10 void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
+  tail call ghccc void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index c04af234b131..b065cce17b24 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -62,4 +62,4 @@ declare i8* @objc_msgSend(i8*, i8*, ...)
 
 declare double @floor(double) optsize
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/CodeGen/X86/sincos-opt.ll b/test/CodeGen/X86/sincos-opt.ll
index 2dc8816f840f..1e34a2be10b3 100644
--- a/test/CodeGen/X86/sincos-opt.ll
+++ b/test/CodeGen/X86/sincos-opt.ll
@@ -15,7 +15,8 @@ entry:
 
 ; OSX_SINCOS-LABEL: test1:
 ; OSX_SINCOS: callq ___sincosf_stret
-; OSX_SINCOS: pshufd $1, %xmm0, %xmm1
+; OSX_SINCOS: movaps %xmm0, %xmm1
+; OSX_SINCOS: shufps {{.*}} ## xmm1 = xmm1[1,1,2,3]
 ; OSX_SINCOS: addss %xmm0, %xmm1
 
 ; OSX_NOOPT: test1
diff --git a/test/CodeGen/X86/sink-blockfreq.ll b/test/CodeGen/X86/sink-blockfreq.ll
new file mode 100644
index 000000000000..c2f0411901a7
--- /dev/null
+++ b/test/CodeGen/X86/sink-blockfreq.ll
@@ -0,0 +1,45 @@
+; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
+; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+
+; Test that by changing BlockFrequencyInfo we change the order in which
+; machine-sink looks for sucessor blocks. By not using BFI, both G and B
+; have the same loop depth and no instructions is sinked - B is selected but
+; can't be used as to avoid breaking a non profitable critical edge. By using
+; BFI, "mul" is sinked into the less frequent block G.
+define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
+; MSINK_BFI-LABEL: sink_freqinfo
+; MSINK_BFI: jl
+; MSINK_BFI-NEXT: ## BB#
+; MSINK_BFI-NEXT: imull
+
+; MSINK_NOBFI-LABEL: sink_freqinfo
+; MSINK_NOBFI: imull
+; MSINK_NOBFI: jl
+entry:
+  br label %B
+
+B:
+  %ee = phi i32 [ 0, %entry ], [ %inc, %F ]
+  %xx = sub i32 %a, %ee
+  %cond0 = icmp slt i32 %xx, 0
+  br i1 %cond0, label %F, label %exit, !prof !0
+
+F:
+  %inc = add nsw i32 %xx, 2
+  %aa = mul nsw i32 %b, %inc
+  %exitcond = icmp slt i32 %inc, %a
+  br i1 %exitcond, label %B, label %G, !prof !1
+
+G:
+  %ii = add nsw i32 %aa, %a
+  %ll = add i32 %b, 45
+  %exitcond2 = icmp sge i32 %ii, %b
+  br i1 %exitcond2, label %G, label %exit, !prof !2
+
+exit:
+  ret i32 0
+}
+
+!0 = !{!"branch_weights", i32 4, i32 1}
+!1 = !{!"branch_weights", i32 128, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index 64f5311792db..455cf24bce1c 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -mcpu=nehalem -post-RA-scheduler=true -schedmodel=false | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
index c600f925a32b..6757f315b6da 100644
--- a/test/CodeGen/X86/sink-out-of-loop.ll
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -5,7 +5,7 @@
 ; MOV32ri outside the loop.
 ; rdar://11980766
 define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
-; CHECK: sink_succ
+; CHECK-LABEL: sink_succ
 ; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
 ; CHECK: %exit
 ; CHECK-NOT: movl
@@ -52,3 +52,24 @@ for.body2:
 for.end20:
   ret i32 0
 }
+
+define i32 @sink_out_of_loop(i32 %n, i32* %output) {
+; CHECK-LABEL: sink_out_of_loop:
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i2, %loop ]
+  %j = mul i32 %i, %i
+  %addr = getelementptr i32* %output, i32 %i
+  store i32 %i, i32* %addr
+  %i2 = add i32 %i, 1
+  %exit_cond = icmp sge i32 %i2, %n
+  br i1 %exit_cond, label %exit, label %loop
+
+exit:
+; CHECK: BB#2
+; CHECK: imull %eax, %eax
+; CHECK: retq
+  ret i32 %j
+}
diff --git a/test/CodeGen/X86/sjlj-baseptr.ll b/test/CodeGen/X86/sjlj-baseptr.ll
new file mode 100644
index 000000000000..e439ff4dbd2f
--- /dev/null
+++ b/test/CodeGen/X86/sjlj-baseptr.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X64 %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%Foo = type { [125 x i8] }
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*) nounwind
+
+declare void @whatever(i64, %Foo*, i8**, i8*, i8*, i32)  #0
+
+attributes #0 = { nounwind uwtable "no-frame-pointer-elim"="true" }
+
+define i32 @test1(i64 %n, %Foo* byval nocapture readnone align 8 %f) #0 {
+entry:
+  %buf = alloca [5 x i8*], align 16
+  %p = alloca i8*, align 8
+  %q = alloca i8, align 64
+  %r = bitcast [5 x i8*]* %buf to i8*
+  %s = alloca i8, i64 %n, align 1
+  store i8* %s, i8** %p, align 8
+  %t = call i32 @llvm.eh.sjlj.setjmp(i8* %s)
+  call void @whatever(i64 %n, %Foo* %f, i8** %p, i8* %q, i8* %s, i32 %t) #1
+  ret i32 0
+; X86: movl    %esp, %esi
+; X86: movl    %esp, -16(%ebp)
+; X86: {{.LBB.*:}}
+; X86: movl    -16(%ebp), %esi
+; X86: {{.LBB.*:}}
+; X64: movq    %rsp, %rbx
+; X64: movq    %rsp, -48(%rbp)
+; X64: {{.LBB.*:}}
+; X64: movq    -48(%rbp), %rbx
+; X64: {{.LBB.*:}}
+}
+
+
diff --git a/test/CodeGen/X86/slow-div.ll b/test/CodeGen/X86/slow-div.ll
new file mode 100644
index 000000000000..52223824bf96
--- /dev/null
+++ b/test/CodeGen/X86/slow-div.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s
+
+define i32 @div32(i32 %a, i32 %b) {
+entry:
+; DIV32-LABEL: div32:
+; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
+; DIV32: testl $-256, [[REG]]
+; DIV32: divb
+; DIV64-LABEL: div32:
+; DIV64-NOT: divb
+  %div = sdiv i32 %a, %b
+  ret i32 %div
+}
+
+define i64 @div64(i64 %a, i64 %b) {
+entry:
+; DIV32-LABEL: div64:
+; DIV32-NOT: divw
+; DIV64-LABEL: div64:
+; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
+; DIV64: testq   $-65536, [[REG]]
+; DIV64: divw
+  %div = sdiv i64 %a, %b
+  ret i64 %div
+}
+
+
diff --git a/test/CodeGen/X86/slow-incdec.ll b/test/CodeGen/X86/slow-incdec.ll
new file mode 100644
index 000000000000..323e3ae8c472
--- /dev/null
+++ b/test/CodeGen/X86/slow-incdec.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=-slow-incdec < %s | FileCheck -check-prefix=INCDEC %s
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+slow-incdec < %s | FileCheck -check-prefix=ADD %s
+
+; check -mattr=-slow-incdec
+; INCDEC-NOT: addl $-1
+; INCDEC: dec
+; INCDEC-NOT: addl $1
+; INCDEC: inc
+
+; check -mattr=+slow-incdec
+; ADD: addl $-1
+; ADD-NOT: dec
+; ADD: addl $1
+; ADD-NOT: inc
+
+; Function Attrs: nounwind readonly
+define i32 @slow_1(i32* nocapture readonly %a, i32 %s) #0 {
+entry:
+  %cmp5 = icmp eq i32 %s, 0
+  br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i32 %dec, 0
+  br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.06
+  %0 = load i32* %arrayidx, align 4, !tbaa !1
+  %cmp1 = icmp eq i32 %0, 0
+;
+  %dec = add nsw i32 %i.06, -1
+  br i1 %cmp1, label %for.end.loopexit, label %for.cond
+
+for.end.loopexit:                                 ; preds = %for.cond, %for.body
+  %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+; Function Attrs: nounwind readonly
+define i32 @slow_2(i32* nocapture readonly %a, i32 %s) #0 {
+entry:
+  %cmp5 = icmp eq i32 %s, 0
+  br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i32 %inc, 0
+  br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.06
+  %0 = load i32* %arrayidx, align 4, !tbaa !1
+  %cmp1 = icmp eq i32 %0, 0
+  %inc = add nsw i32 %i.06, 1
+  br i1 %cmp1, label %for.end.loopexit, label %for.cond
+
+for.end.loopexit:                                 ; preds = %for.cond, %for.body
+  %i.0.lcssa.ph = phi i32 [ 0, %for.cond ], [ %i.06, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %i.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.lcssa.ph, %for.end.loopexit ]
+  ret i32 %i.0.lcssa
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll
index 1b596b589899..3c03750199cb 100644
--- a/test/CodeGen/X86/small-byval-memcpy.ll
+++ b/test/CodeGen/X86/small-byval-memcpy.ll
@@ -1,20 +1,25 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2   | grep movsd  | count 8
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
-entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
-	ret void
-}
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 1, i1 false)
+  ret void
+
+  ; CHECK-LABEL: copy16bytes
+  ; CORE2: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: movq
+  ; CORE2-NEXT: retq
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 
+  ; NEHALEM: movups
+  ; NEHALEM-NEXT: movups
+  ; NEHALEM-NEXT: retq
+
+  ; BTVER2: movups
+  ; BTVER2-NEXT: movups
+  ; BTVER2-NEXT: retq
+}
diff --git a/test/CodeGen/X86/splat-for-size.ll b/test/CodeGen/X86/splat-for-size.ll
new file mode 100644
index 000000000000..c052ad2aa146
--- /dev/null
+++ b/test/CodeGen/X86/splat-for-size.ll
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX2
+
+; Check constant loads of every 128-bit and 256-bit vector type 
+; for size optimization using splat ops available with AVX and AVX2.
+
+; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
+define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
+  %add = fadd <2 x double> %x, <double 1.0, double 1.0>
+  ret <2 x double> %add
+; CHECK-LABEL: splat_v2f64
+; CHECK: vmovddup
+; CHECK: vaddpd 
+; CHECK-NEXT: retq
+}
+
+define <4 x double> @splat_v4f64(<4 x double> %x) #0 {
+  %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
+  ret <4 x double> %add
+; CHECK-LABEL: splat_v4f64
+; CHECK: vbroadcastsd 
+; CHECK-NEXT: vaddpd
+; CHECK-NEXT: retq
+}
+
+define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
+  %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  ret <4 x float> %add
+; CHECK-LABEL: splat_v4f32
+; CHECK: vbroadcastss 
+; CHECK-NEXT: vaddps
+; CHECK-NEXT: retq
+}
+
+define <8 x float> @splat_v8f32(<8 x float> %x) #0 {
+  %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
+  ret <8 x float> %add
+; CHECK-LABEL: splat_v8f32
+; CHECK: vbroadcastss 
+; CHECK-NEXT: vaddps
+; CHECK-NEXT: retq
+}
+
+; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
+; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
+define <2 x i64> @splat_v2i64(<2 x i64> %x) #0 {
+  %add = add <2 x i64> %x, <i64 1, i64 1>
+  ret <2 x i64> %add
+; CHECK-LABEL: splat_v2i64
+; CHECK: vmovddup 
+; CHECK: vpaddq
+; CHECK-NEXT: retq
+}
+
+; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
+; and then we fake it: use vmovddup to splat 64-bit value.
+define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
+  %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
+  ret <4 x i64> %add
+; CHECK-LABEL: splat_v4i64
+; AVX: vmovddup
+; AVX: vpaddq 
+; AVX: vpaddq 
+; AVX2: vpbroadcastq 
+; AVX2: vpaddq 
+; CHECK: retq
+}
+
+; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
+define <4 x i32> @splat_v4i32(<4 x i32> %x) #0 {
+  %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %add
+; CHECK-LABEL: splat_v4i32
+; AVX: vbroadcastss
+; AVX2: vpbroadcastd 
+; CHECK-NEXT: vpaddd 
+; CHECK-NEXT: retq
+}
+
+; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
+define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
+  %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %add
+; CHECK-LABEL: splat_v8i32
+; AVX: vbroadcastss
+; AVX: vpaddd 
+; AVX: vpaddd 
+; AVX2: vpbroadcastd 
+; AVX2: vpaddd 
+; CHECK: retq
+}
+
+; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
+define <8 x i16> @splat_v8i16(<8 x i16> %x) #0 {
+  %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %add
+; CHECK-LABEL: splat_v8i16
+; AVX-NOT: broadcast
+; AVX2: vpbroadcastw 
+; CHECK: vpaddw 
+; CHECK-NEXT: retq
+}
+
+; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
+define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
+  %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <16 x i16> %add
+; CHECK-LABEL: splat_v16i16
+; AVX-NOT: broadcast
+; AVX: vpaddw 
+; AVX: vpaddw 
+; AVX2: vpbroadcastw 
+; AVX2: vpaddw 
+; CHECK: retq
+}
+
+; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
+define <16 x i8> @splat_v16i8(<16 x i8> %x) #0 {
+  %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %add
+; CHECK-LABEL: splat_v16i8
+; AVX-NOT: broadcast
+; AVX2: vpbroadcastb 
+; CHECK: vpaddb 
+; CHECK-NEXT: retq
+}
+
+; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
+define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
+  %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <32 x i8> %add
+; CHECK-LABEL: splat_v32i8
+; AVX-NOT: broadcast
+; AVX: vpaddb 
+; AVX: vpaddb 
+; AVX2: vpbroadcastb 
+; AVX2: vpaddb 
+; CHECK: retq
+}
+
+attributes #0 = { optsize }
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
deleted file mode 100644
index 4d59b9cc2f63..000000000000
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -mcpu=nehalem | FileCheck %s
-; rdar://7434544
-
-define <2 x i64> @t2() nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: pshufd	$85, (%esp), %xmm0
-  %array = alloca [8 x float], align 4
-  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
-  %tmp2 = load float* %arrayidx
-  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
-  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
-  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
-  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
-  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
-  ret <2 x i64> %0
-}
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index fc79e31e72ee..24b175eed7a3 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
 
 ; generated using "clang -S -O2 -ffast-math -emit-llvm sqrt.c" from
 ; #include <math.h>
@@ -52,9 +53,80 @@ entry:
   ret x86_fp80 %call
 }
 
-; Function Attrs: nounwind readnone
 declare x86_fp80 @__sqrtl_finite(x86_fp80) #1
 
+declare float @llvm.sqrt.f32(float) #1
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #1
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #1
+
+; If the target's sqrtss and divss instructions are substantially
+; slower than rsqrtss with a Newton-Raphson refinement, we should
+; generate the estimate sequence.
+
+define float @reciprocal_square_root(float %x) #0 {
+  %sqrt = tail call float @llvm.sqrt.f32(float %x)
+  %div = fdiv fast float 1.0, %sqrt
+  ret float %div
+
+; CHECK-LABEL: reciprocal_square_root:
+; CHECK: sqrtss
+; CHECK-NEXT: movss
+; CHECK-NEXT: divss
+; CHECK-NEXT: retq
+; BTVER2-LABEL: reciprocal_square_root:
+; BTVER2: vrsqrtss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: vaddss
+; BTVER2-NEXT: vmulss
+; BTVER2-NEXT: retq
+}
+
+define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
+  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <4 x float> %div
+
+; CHECK-LABEL: reciprocal_square_root_v4f32:
+; CHECK: sqrtps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: divps
+; CHECK-NEXT: retq
+; BTVER2-LABEL: reciprocal_square_root_v4f32:
+; BTVER2: vrsqrtps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vaddps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: retq
+}
+
+define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
+  %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <8 x float> %div
+
+; CHECK-LABEL: reciprocal_square_root_v8f32:
+; CHECK: sqrtps
+; CHECK-NEXT: sqrtps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: movaps
+; CHECK-NEXT: divps
+; CHECK-NEXT: divps
+; CHECK-NEXT: retq
+; BTVER2-LABEL: reciprocal_square_root_v8f32:
+; BTVER2: vrsqrtps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: vaddps
+; BTVER2-NEXT: vmulps
+; BTVER2-NEXT: retq
+}
+
+
 attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 2351fd6fa77b..396da0f48956 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,9 +1,11 @@
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=nehalem | FileCheck %s
 
-; CHECK-LABEL: a:
-; CHECK: movdqu
-; CHECK: pshufd
 define <4 x float> @a(<4 x float>* %y) nounwind {
+; CHECK-LABEL: a:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movups (%rdi), %xmm0
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retq
   %x = load <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 0
   %b = extractelement <4 x float> %x, i32 1
@@ -16,10 +18,12 @@ define <4 x float> @a(<4 x float>* %y) nounwind {
   ret <4 x float> %s
 }
 
-; CHECK-LABEL: b:
-; CHECK: movups
-; CHECK: unpckhps
 define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
+; CHECK-LABEL: b:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movups (%rdi), %xmm1
+; CHECK-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-NEXT:    retq
   %x = load <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 2
   %b = extractelement <4 x float> %x, i32 3
@@ -32,10 +36,12 @@ define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
   ret <4 x float> %s
 }
 
-; CHECK-LABEL: c:
-; CHECK: movupd
-; CHECK: shufpd
 define <2 x double> @c(<2 x double>* %y) nounwind {
+; CHECK-LABEL: c:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movupd (%rdi), %xmm0
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    retq
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 0
   %c = extractelement <2 x double> %x, i32 1
@@ -44,10 +50,12 @@ define <2 x double> @c(<2 x double>* %y) nounwind {
   ret <2 x double> %r
 }
 
-; CHECK-LABEL: d:
-; CHECK: movupd
-; CHECK: unpckhpd
 define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
+; CHECK-LABEL: d:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movupd (%rdi), %xmm1
+; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT:    retq
   %x = load <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 1
   %c = extractelement <2 x double> %z, i32 1
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
index 168959a5d653..8cf522dd3c15 100644
--- a/test/CodeGen/X86/sse-domains.ll
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -43,45 +43,3 @@ while.body:
 while.end:
   ret void
 }
-
-; CHECK: f2
-; CHECK: for.body
-;
-; This loop contains two cvtsi2ss instructions that update the same xmm
-; register.  Verify that the execution dependency fix pass breaks those
-; dependencies by inserting xorps instructions.
-;
-; If the register allocator chooses different registers for the two cvtsi2ss
-; instructions, they are still dependent on themselves.
-; CHECK: xorps [[XMM1:%xmm[0-9]+]]
-; CHECK: , [[XMM1]]
-; CHECK: cvtsi2ssl %{{.*}}, [[XMM1]]
-; CHECK: xorps [[XMM2:%xmm[0-9]+]]
-; CHECK: , [[XMM2]]
-; CHECK: cvtsi2ssl %{{.*}}, [[XMM2]]
-;
-define float @f2(i32 %m) nounwind uwtable readnone ssp {
-entry:
-  %tobool3 = icmp eq i32 %m, 0
-  br i1 %tobool3, label %for.end, label %for.body
-
-for.body:                                         ; preds = %entry, %for.body
-  %m.addr.07 = phi i32 [ %dec, %for.body ], [ %m, %entry ]
-  %s1.06 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
-  %s2.05 = phi float [ %add2, %for.body ], [ 0.000000e+00, %entry ]
-  %n.04 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
-  %conv = sitofp i32 %n.04 to float
-  %add = fadd float %s1.06, %conv
-  %conv1 = sitofp i32 %m.addr.07 to float
-  %add2 = fadd float %s2.05, %conv1
-  %inc = add nsw i32 %n.04, 1
-  %dec = add nsw i32 %m.addr.07, -1
-  %tobool = icmp eq i32 %dec, 0
-  br i1 %tobool, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body, %entry
-  %s1.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
-  %s2.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add2, %for.body ]
-  %sub = fsub float %s1.0.lcssa, %s2.0.lcssa
-  ret float %sub
-}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 5122c44131a4..4dcb54ca4b0b 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -138,8 +138,7 @@ define double @ole_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      ogt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ogt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -157,8 +156,7 @@ define double @ogt_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      olt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      olt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -177,8 +175,7 @@ define double @olt_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      ogt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ogt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -198,8 +195,7 @@ define double @ogt_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      olt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      olt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -217,8 +213,7 @@ define double @olt_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: andpd
 ; UNSAFE-LABEL:      oge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      oge_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -235,8 +230,7 @@ define double @oge_x(double %x) nounwind {
 ; CHECK-NEXT: andpd
 ; UNSAFE-LABEL:      ole_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ole_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -253,8 +247,7 @@ define double @ole_x(double %x) nounwind {
 ; CHECK-NEXT: andnpd
 ; UNSAFE-LABEL:      oge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      oge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -271,8 +264,7 @@ define double @oge_inverse_x(double %x) nounwind {
 ; CHECK:      cmplesd %xmm
 ; UNSAFE-LABEL:      ole_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ole_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -412,8 +404,7 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: andpd
 ; UNSAFE-LABEL:      ugt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ugt_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -430,8 +421,7 @@ define double @ugt_x(double %x) nounwind {
 ; CHECK-NEXT: andpd
 ; UNSAFE-LABEL:      ult_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ult_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -448,8 +438,7 @@ define double @ult_x(double %x) nounwind {
 ; CHECK-NEXT: andnpd
 ; UNSAFE-LABEL:      ugt_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ugt_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -467,8 +456,7 @@ define double @ugt_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: andnpd
 ; UNSAFE-LABEL:      ult_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ult_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -488,8 +476,7 @@ define double @ult_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      uge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      uge_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -508,8 +495,7 @@ define double @uge_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      ule_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ule_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -527,8 +513,7 @@ define double @ule_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      uge_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      uge_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -547,8 +532,7 @@ define double @uge_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE-LABEL:      ule_inverse_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE-LABEL:      ule_inverse_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -819,11 +803,18 @@ define double @ule_inverse_y(double %x) nounwind {
 ; Test a few more misc. cases.
 
 ; CHECK-LABEL: clampTo3k_a:
-; CHECK: minsd
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_a:
-; UNSAFE: minsd
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_a:
-; FINITE: minsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_a(double %x) nounwind readnone {
 entry:
   %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -832,11 +823,16 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_b:
-; CHECK: minsd
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_b:
-; UNSAFE: minsd
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_b:
-; FINITE: minsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_b(double %x) nounwind readnone {
 entry:
   %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -845,11 +841,18 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_c:
-; CHECK: maxsd
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_c:
-; UNSAFE: maxsd
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_c:
-; FINITE: maxsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_c(double %x) nounwind readnone {
 entry:
   %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -858,11 +861,16 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_d:
-; CHECK: maxsd
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_d:
-; UNSAFE: maxsd
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_d:
-; FINITE: maxsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_d(double %x) nounwind readnone {
 entry:
   %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -871,11 +879,18 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_e:
-; CHECK: maxsd
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_e:
-; UNSAFE: maxsd
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_e:
-; FINITE: maxsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_e(double %x) nounwind readnone {
 entry:
   %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -884,11 +899,16 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_f:
-; CHECK: maxsd
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_f:
-; UNSAFE: maxsd
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_f:
-; FINITE: maxsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_f(double %x) nounwind readnone {
 entry:
   %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -897,11 +917,18 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_g:
-; CHECK: minsd
+; CHECK-NEXT: movsd {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd %xmm0, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_g:
-; UNSAFE: minsd
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_g:
-; FINITE: minsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_g(double %x) nounwind readnone {
 entry:
   %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -910,11 +937,16 @@ entry:
 }
 
 ; CHECK-LABEL: clampTo3k_h:
-; CHECK: minsd
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
 ; UNSAFE-LABEL: clampTo3k_h:
-; UNSAFE: minsd
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
 ; FINITE-LABEL: clampTo3k_h:
-; FINITE: minsd
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movaps %xmm1, %xmm0
+; FINITE-NEXT: ret
 define double @clampTo3k_h(double %x) nounwind readnone {
 entry:
   %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
@@ -923,33 +955,73 @@ entry:
 }
 
 ; UNSAFE-LABEL: test_maxpd:
-; UNSAFE: maxpd
-define <2 x double> @test_maxpd(<2 x double> %x, <2 x double> %y) {
+; UNSAFE-NEXT: maxpd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <2 x double> @test_maxpd(<2 x double> %x, <2 x double> %y) nounwind {
   %max_is_x = fcmp oge <2 x double> %x, %y
   %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
   ret <2 x double> %max
 }
 
 ; UNSAFE-LABEL: test_minpd:
-; UNSAFE: minpd
-define <2 x double> @test_minpd(<2 x double> %x, <2 x double> %y) {
+; UNSAFE-NEXT: minpd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <2 x double> @test_minpd(<2 x double> %x, <2 x double> %y) nounwind {
   %min_is_x = fcmp ole <2 x double> %x, %y
   %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
   ret <2 x double> %min
 }
 
 ; UNSAFE-LABEL: test_maxps:
-; UNSAFE: maxps
-define <4 x float> @test_maxps(<4 x float> %x, <4 x float> %y) {
+; UNSAFE-NEXT: maxps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <4 x float> @test_maxps(<4 x float> %x, <4 x float> %y) nounwind {
   %max_is_x = fcmp oge <4 x float> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x float> %x, <4 x float> %y
   ret <4 x float> %max
 }
 
 ; UNSAFE-LABEL: test_minps:
-; UNSAFE: minps
-define <4 x float> @test_minps(<4 x float> %x, <4 x float> %y) {
+; UNSAFE-NEXT: minps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <4 x float> @test_minps(<4 x float> %x, <4 x float> %y) nounwind {
   %min_is_x = fcmp ole <4 x float> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y
   ret <4 x float> %min
 }
+
+; UNSAFE-LABEL: test_maxps_illegal_v2f32:
+; UNSAFE-NEXT: maxps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y) nounwind {
+  %max_is_x = fcmp oge <2 x float> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x float> %x, <2 x float> %y
+  ret <2 x float> %max
+}
+
+; UNSAFE-LABEL: test_minps_illegal_v2f32:
+; UNSAFE-NEXT: minps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y) nounwind {
+  %min_is_x = fcmp ole <2 x float> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x float> %x, <2 x float> %y
+  ret <2 x float> %min
+}
+
+; UNSAFE-LABEL: test_maxps_illegal_v3f32:
+; UNSAFE-NEXT: maxps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <3 x float> @test_maxps_illegal_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
+  %max_is_x = fcmp oge <3 x float> %x, %y
+  %max = select <3 x i1> %max_is_x, <3 x float> %x, <3 x float> %y
+  ret <3 x float> %max
+}
+
+; UNSAFE-LABEL: test_minps_illegal_v3f32:
+; UNSAFE-NEXT: minps %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+define <3 x float> @test_minps_illegal_v3f32(<3 x float> %x, <3 x float> %y) nounwind {
+  %min_is_x = fcmp ole <3 x float> %x, %y
+  %min = select <3 x i1> %min_is_x, <3 x float> %x, <3 x float> %y
+  ret <3 x float> %min
+}
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith-2.ll b/test/CodeGen/X86/sse-scalar-fp-arith-2.ll
deleted file mode 100644
index 600ee1b7b1e5..000000000000
--- a/test/CodeGen/X86/sse-scalar-fp-arith-2.ll
+++ /dev/null
@@ -1,423 +0,0 @@
-; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
-; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
-; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
-
-; Ensure that the backend selects SSE/AVX scalar fp instructions
-; from a packed fp instrution plus a vector insert.
-
-
-define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fadd <4 x float> %a, %b
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test_add_ss
-; SSE2: addss   %xmm1, %xmm0
-; AVX: vaddss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fsub <4 x float> %a, %b
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test_sub_ss
-; SSE2: subss   %xmm1, %xmm0
-; AVX: vsubss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fmul <4 x float> %a, %b
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test_mul_ss
-; SSE2: mulss   %xmm1, %xmm0
-; AVX: vmulss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fdiv <4 x float> %a, %b
-  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test_div_ss
-; SSE2: divss   %xmm1, %xmm0
-; AVX: vdivss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fadd <2 x double> %a, %b
-  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test_add_sd
-; SSE2: addsd   %xmm1, %xmm0
-; AVX: vaddsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fsub <2 x double> %a, %b
-  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test_sub_sd
-; SSE2: subsd   %xmm1, %xmm0
-; AVX: vsubsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fmul <2 x double> %a, %b
-  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test_mul_sd
-; SSE2: mulsd   %xmm1, %xmm0
-; AVX: vmulsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fdiv <2 x double> %a, %b
-  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test_div_sd
-; SSE2: divsd   %xmm1, %xmm0
-; AVX: vdivsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fadd <4 x float> %b, %a
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test2_add_ss
-; SSE2: addss   %xmm0, %xmm1
-; AVX: vaddss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fsub <4 x float> %b, %a
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test2_sub_ss
-; SSE2: subss   %xmm0, %xmm1
-; AVX: vsubss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fmul <4 x float> %b, %a
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test2_mul_ss
-; SSE2: mulss   %xmm0, %xmm1
-; AVX: vmulss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fdiv <4 x float> %b, %a
-  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test2_div_ss
-; SSE2: divss   %xmm0, %xmm1
-; AVX: vdivss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fadd <2 x double> %b, %a
-  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test2_add_sd
-; SSE2: addsd   %xmm0, %xmm1
-; AVX: vaddsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fsub <2 x double> %b, %a
-  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test2_sub_sd
-; SSE2: subsd   %xmm0, %xmm1
-; AVX: vsubsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fmul <2 x double> %b, %a
-  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test2_mul_sd
-; SSE2: mulsd   %xmm0, %xmm1
-; AVX: vmulsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fdiv <2 x double> %b, %a
-  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test2_div_sd
-; SSE2: divsd   %xmm0, %xmm1
-; AVX: vdivsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <4 x float> @test3_add_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fadd <4 x float> %a, %b
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test3_add_ss
-; SSE2: addss   %xmm1, %xmm0
-; AVX: vaddss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test3_sub_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fsub <4 x float> %a, %b
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test3_sub_ss
-; SSE2: subss   %xmm1, %xmm0
-; AVX: vsubss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test3_mul_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fmul <4 x float> %a, %b
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test3_mul_ss
-; SSE2: mulss   %xmm1, %xmm0
-; AVX: vmulss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test3_div_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fdiv <4 x float> %a, %b
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test3_div_ss
-; SSE2: divss   %xmm1, %xmm0
-; AVX: vdivss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <2 x double> @test3_add_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fadd <2 x double> %a, %b
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test3_add_sd
-; SSE2: addsd   %xmm1, %xmm0
-; AVX: vaddsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test3_sub_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fsub <2 x double> %a, %b
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test3_sub_sd
-; SSE2: subsd   %xmm1, %xmm0
-; AVX: vsubsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test3_mul_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fmul <2 x double> %a, %b
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test3_mul_sd
-; SSE2: mulsd   %xmm1, %xmm0
-; AVX: vmulsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test3_div_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fdiv <2 x double> %a, %b
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test3_div_sd
-; SSE2: divsd   %xmm1, %xmm0
-; AVX: vdivsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <4 x float> @test4_add_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fadd <4 x float> %b, %a
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test4_add_ss
-; SSE2: addss   %xmm0, %xmm1
-; AVX: vaddss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test4_sub_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fsub <4 x float> %b, %a
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test4_sub_ss
-; SSE2: subss   %xmm0, %xmm1
-; AVX: vsubss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test4_mul_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fmul <4 x float> %b, %a
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test4_mul_ss
-; SSE2: mulss   %xmm0, %xmm1
-; AVX: vmulss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <4 x float> @test4_div_ss(<4 x float> %a, <4 x float> %b) {
-  %1 = fdiv <4 x float> %b, %a
-  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
-  ret <4 x float> %2
-}
-
-; CHECK-LABEL: test4_div_ss
-; SSE2: divss   %xmm0, %xmm1
-; AVX: vdivss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
-define <2 x double> @test4_add_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fadd <2 x double> %b, %a
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test4_add_sd
-; SSE2: addsd   %xmm0, %xmm1
-; AVX: vaddsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test4_sub_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fsub <2 x double> %b, %a
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test4_sub_sd
-; SSE2: subsd   %xmm0, %xmm1
-; AVX: vsubsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test4_mul_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fmul <2 x double> %b, %a
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test4_mul_sd
-; SSE2: mulsd   %xmm0, %xmm1
-; AVX: vmulsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
-define <2 x double> @test4_div_sd(<2 x double> %a, <2 x double> %b) {
-  %1 = fdiv <2 x double> %b, %a
-  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
-  ret <2 x double> %2
-}
-
-; CHECK-LABEL: test4_div_sd
-; SSE2: divsd   %xmm0, %xmm1
-; AVX: vdivsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith.ll b/test/CodeGen/X86/sse-scalar-fp-arith.ll
index 3949a835e67a..b122ef67544c 100644
--- a/test/CodeGen/X86/sse-scalar-fp-arith.ll
+++ b/test/CodeGen/X86/sse-scalar-fp-arith.ll
@@ -1,13 +1,23 @@
-; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
-; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
-; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
+; RUN: llc -mcpu=x86-64 -mattr=+sse2 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE2 %s
+; RUN: llc -mcpu=x86-64 -mattr=+sse4.1 < %s | FileCheck --check-prefix=SSE --check-prefix=SSE41 %s
+; RUN: llc -mcpu=x86-64 -mattr=+avx < %s | FileCheck --check-prefix=AVX %s
+
+target triple = "x86_64-unknown-unknown"
 
 ; Ensure that the backend no longer emits unnecessary vector insert
 ; instructions immediately after SSE scalar fp instructions
 ; like addss or mulss.
 
-
 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %add = fadd float %2, %1
@@ -15,14 +25,16 @@ define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_add_ss
-; SSE2: addss   %xmm1, %xmm0
-; AVX: vaddss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %sub = fsub float %2, %1
@@ -30,13 +42,16 @@ define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_sub_ss
-; SSE2: subss   %xmm1, %xmm0
-; AVX: vsubss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %mul = fmul float %2, %1
@@ -44,14 +59,16 @@ define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_mul_ss
-; SSE2: mulss   %xmm1, %xmm0
-; AVX: vmulss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %div = fdiv float %2, %1
@@ -59,14 +76,16 @@ define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_div_ss
-; SSE2: divss   %xmm1, %xmm0
-; AVX: vdivss   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %b, i32 0
   %2 = extractelement <2 x double> %a, i32 0
   %add = fadd double %2, %1
@@ -74,14 +93,16 @@ define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test_add_sd
-; SSE2: addsd   %xmm1, %xmm0
-; AVX: vaddsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %b, i32 0
   %2 = extractelement <2 x double> %a, i32 0
   %sub = fsub double %2, %1
@@ -89,14 +110,16 @@ define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test_sub_sd
-; SSE2: subsd   %xmm1, %xmm0
-; AVX: vsubsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %b, i32 0
   %2 = extractelement <2 x double> %a, i32 0
   %mul = fmul double %2, %1
@@ -104,14 +127,16 @@ define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test_mul_sd
-; SSE2: mulsd   %xmm1, %xmm0
-; AVX: vmulsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %b, i32 0
   %2 = extractelement <2 x double> %a, i32 0
   %div = fdiv double %2, %1
@@ -119,14 +144,17 @@ define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test_div_sd
-; SSE2: divsd   %xmm1, %xmm0
-; AVX: vdivsd   %xmm1, %xmm0, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test2_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %a, i32 0
   %2 = extractelement <4 x float> %b, i32 0
   %add = fadd float %1, %2
@@ -134,14 +162,17 @@ define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test2_add_ss
-; SSE2: addss   %xmm0, %xmm1
-; AVX: vaddss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test2_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %a, i32 0
   %2 = extractelement <4 x float> %b, i32 0
   %sub = fsub float %2, %1
@@ -149,14 +180,17 @@ define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test2_sub_ss
-; SSE2: subss   %xmm0, %xmm1
-; AVX: vsubss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test2_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %a, i32 0
   %2 = extractelement <4 x float> %b, i32 0
   %mul = fmul float %1, %2
@@ -164,14 +198,17 @@ define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test2_mul_ss
-; SSE2: mulss   %xmm0, %xmm1
-; AVX: vmulss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test2_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %a, i32 0
   %2 = extractelement <4 x float> %b, i32 0
   %div = fdiv float %2, %1
@@ -179,14 +216,17 @@ define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test2_div_ss
-; SSE2: divss   %xmm0, %xmm1
-; AVX: vdivss   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test2_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %b, i32 0
   %add = fadd double %1, %2
@@ -194,14 +234,17 @@ define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test2_add_sd
-; SSE2: addsd   %xmm0, %xmm1
-; AVX: vaddsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test2_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %b, i32 0
   %sub = fsub double %2, %1
@@ -209,14 +252,17 @@ define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test2_sub_sd
-; SSE2: subsd   %xmm0, %xmm1
-; AVX: vsubsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test2_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %b, i32 0
   %mul = fmul double %1, %2
@@ -224,14 +270,17 @@ define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test2_mul_sd
-; SSE2: mulsd   %xmm0, %xmm1
-; AVX: vmulsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: test2_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %a, i32 0
   %2 = extractelement <2 x double> %b, i32 0
   %div = fdiv double %2, %1
@@ -239,14 +288,18 @@ define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
   ret <2 x double> %3
 }
 
-; CHECK-LABEL: test2_div_sd
-; SSE2: divsd   %xmm0, %xmm1
-; AVX: vdivsd   %xmm0, %xmm1, %xmm0
-; CHECK-NOT: movsd
-; CHECK: ret
-
-
 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_multiple_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_multiple_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %add = fadd float %2, %1
@@ -255,14 +308,19 @@ define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_multiple_add_ss
-; CHECK: addss
-; CHECK: addss
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_multiple_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    subss %xmm1, %xmm2
+; SSE-NEXT:    subss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_multiple_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %sub = fsub float %2, %1
@@ -271,14 +329,18 @@ define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_multiple_sub_ss
-; CHECK: subss
-; CHECK: subss
-; CHECK-NOT: movss
-; CHECK: ret
-
-
 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_multiple_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm0, %xmm1
+; SSE-NEXT:    mulss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_multiple_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %mul = fmul float %2, %1
@@ -287,13 +349,19 @@ define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_multiple_mul_ss
-; CHECK: mulss
-; CHECK: mulss
-; CHECK-NOT: movss
-; CHECK: ret
-
 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: test_multiple_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    divss %xmm1, %xmm2
+; SSE-NEXT:    divss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_multiple_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %b, i32 0
   %2 = extractelement <4 x float> %a, i32 0
   %div = fdiv float %2, %1
@@ -302,9 +370,501 @@ define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
   ret <4 x float> %3
 }
 
-; CHECK-LABEL: test_multiple_div_ss
-; CHECK: divss
-; CHECK: divss
-; CHECK-NOT: movss
-; CHECK: ret
+; Ensure that the backend selects SSE/AVX scalar fp instructions
+; from a packed fp instrution plus a vector insert.
+
+define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <4 x float> %a, %b
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <4 x float> %a, %b
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <4 x float> %a, %b
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <4 x float> %a, %b
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <2 x double> %a, %b
+  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <2 x double> %a, %b
+  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <2 x double> %a, %b
+  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <2 x double> %a, %b
+  %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test2_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <4 x float> %b, %a
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test2_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <4 x float> %b, %a
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test2_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <4 x float> %b, %a
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test2_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <4 x float> %b, %a
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test2_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <2 x double> %b, %a
+  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test2_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <2 x double> %b, %a
+  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test2_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <2 x double> %b, %a
+  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test2_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test2_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <2 x double> %b, %a
+  %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %2
+}
+
+define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test3_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <4 x float> %a, %b
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test3_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <4 x float> %a, %b
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test3_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <4 x float> %a, %b
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
+  ret <4 x float> %2
+}
 
+define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test3_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <4 x float> %a, %b
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test3_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <2 x double> %a, %b
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test3_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <2 x double> %a, %b
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test3_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <2 x double> %a, %b
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test3_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test3_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <2 x double> %a, %b
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test4_add_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_add_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <4 x float> %b, %a
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test4_sub_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_sub_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <4 x float> %b, %a
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test4_mul_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_mul_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <4 x float> %b, %a
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: insert_test4_div_ss:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_div_ss:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <4 x float> %b, %a
+  %2 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %b, <4 x float> %1
+  ret <4 x float> %2
+}
+
+define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test4_add_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_add_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fadd <2 x double> %b, %a
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test4_sub_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    subsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_sub_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fsub <2 x double> %b, %a
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test4_mul_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_mul_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fmul <2 x double> %b, %a
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
+  ret <2 x double> %2
+}
+
+define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) {
+; SSE-LABEL: insert_test4_div_sd:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_test4_div_sd:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = fdiv <2 x double> %b, %a
+  %2 = select <2 x i1> <i1 false, i1 true>, <2 x double> %b, <2 x double> %1
+  ret <2 x double> %2
+}
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 183297e4c306..fd35e75d71ae 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -1,17 +1,6 @@
 ; Tests for SSE1 and below, without SSE2+.
-; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=-sse2,+sse -O3 | FileCheck %s
-
-define <8 x i16> @test1(<8 x i32> %a) nounwind {
-; CHECK: test1
-  ret <8 x i16> zeroinitializer
-}
-
-define <8 x i16> @test2(<8 x i32> %a) nounwind {
-; CHECK: test2
-  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %c
-}
+; RUN: llc < %s -mtriple=i386-unknown-unknown -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=-sse2,+sse -O3 | FileCheck %s
 
 ; PR7993
 ;define <4 x i32> @test3(<4 x i16> %a) nounwind {
@@ -23,6 +12,15 @@ define <8 x i16> @test2(<8 x i32> %a) nounwind {
 ; vector that this ends up returning.
 ; rdar://8368414
 define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movaps %xmm0, %xmm2
+; CHECK-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; CHECK-NEXT:    addss %xmm1, %xmm0
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; CHECK-NEXT:    subss %xmm1, %xmm2
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; CHECK-NEXT:    ret
 entry:
   %tmp7 = extractelement <2 x float> %A, i32 0
   %tmp5 = extractelement <2 x float> %A, i32 1
@@ -33,15 +31,6 @@ entry:
   %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
   %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
   ret <2 x float> %tmp9
-; CHECK-LABEL: test4:
-; CHECK-NOT: shufps	$16
-; CHECK: shufps	$1, 
-; CHECK-NOT: shufps	$16
-; CHECK: shufps	$1, 
-; CHECK-NOT: shufps	$16
-; CHECK: unpcklps
-; CHECK-NOT: shufps	$16
-; CHECK: ret
 }
 
 ; We used to get stuck in type legalization for this example when lowering the
@@ -50,8 +39,9 @@ entry:
 ; condition operand and widening the resulting vselect for the v4f32 result.
 ; PR18036
 
-; CHECK-LABEL: vselect
 define <4 x float> @vselect(<4 x float>*%p, <4 x i32> %q) {
+; CHECK-LABEL: vselect:
+; CHECK:         ret
 entry:
   %a1 = icmp eq <4 x i32> %q, zeroinitializer
   %a14 = select <4 x i1> %a1, <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+0> , <4 x float> zeroinitializer
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
deleted file mode 100644
index c63ff72b4801..000000000000
--- a/test/CodeGen/X86/sse2-blend.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse4.1 | FileCheck %s
-
-; CHECK-LABEL: vsel_float
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NOT: orps
-; CHECK: ret
-define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
-  %A = load <4 x float>* %v1
-  %B = load <4 x float>* %v2
-  %vsel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %A, <4 x float> %B
-  store <4 x float > %vsel, <4 x float>* %v1
-  ret void
-}
-
-; CHECK-LABEL: vsel_i32
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK-NOT: orps
-; CHECK: ret
-define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
-  %A = load <4 x i32>* %v1
-  %B = load <4 x i32>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
-  store <4 x i32 > %vsel, <4 x i32>* %v1
-  ret void
-}
-
-; Without forcing instructions, fall back to the preferred PS domain.
-; CHECK-LABEL: vsel_i64
-; CHECK: andnps
-; CHECK: orps
-; CHECK: ret
-
-define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
-  %A = load <2 x i64>* %v1
-  %B = load <2 x i64>* %v2
-  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
-  store <2 x i64 > %vsel, <2 x i64>* %v1
-  ret void
-}
-
-; Without forcing instructions, fall back to the preferred PS domain.
-; CHECK-LABEL: vsel_double
-; CHECK: andnps
-; CHECK: orps
-; CHECK: ret
-
-define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
-  %A = load <2 x double>* %v1
-  %B = load <2 x double>* %v2
-  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
-  store <2 x double > %vsel, <2 x double>* %v1
-  ret void
-}
-
-
diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll
index c906ecdd60c1..ddb04211ec7b 100644
--- a/test/CodeGen/X86/sse2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll
@@ -411,7 +411,7 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
-  ; CHECK: pslldq
+  ; CHECK: pslldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -419,7 +419,7 @@ declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
-  ; CHECK: pslldq
+  ; CHECK: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -507,7 +507,7 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
-  ; CHECK: psrldq
+  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
@@ -515,7 +515,7 @@ declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
 
 
 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
-  ; CHECK: psrldq
+  ; CHECK: psrldq {{.*#+}} xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
   ret <2 x i64> %res
 }
diff --git a/test/CodeGen/X86/sse2-mul.ll b/test/CodeGen/X86/sse2-mul.ll
deleted file mode 100644
index e066368dc73e..000000000000
--- a/test/CodeGen/X86/sse2-mul.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s
-
-define <4 x i32> @test1(<4 x i32> %x, <4 x i32> %y) {
-  %m = mul <4 x i32> %x, %y
-  ret <4 x i32> %m
-; CHECK-LABEL: test1:
-; CHECK: pshufd $49
-; CHECK: pmuludq
-; CHECK: pshufd $49
-; CHECK: pmuludq
-; CHECK: shufps $-120
-; CHECK: pshufd $-40
-; CHECK: ret
-}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index e8d3d6f19ed7..b7db6cb56ef2 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -2,39 +2,48 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
 
 define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+; CHECK-LABEL: test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movapd (%ecx), %xmm0
+; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    movapd %xmm0, (%eax)
+; CHECK-NEXT:    retl
 	%tmp3 = load <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-
-; CHECK-LABEL: test1:
-; CHECK: 	movl	4(%esp), %eax
-; CHECK-NEXT: 	movl	8(%esp), %ecx
-; CHECK-NEXT: 	movapd	(%ecx), %xmm0
-; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%eax)
-; CHECK-NEXT: 	ret
 }
 
 define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+; CHECK-LABEL: test2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movapd (%ecx), %xmm0
+; CHECK-NEXT:    movhpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    movapd %xmm0, (%eax)
+; CHECK-NEXT:    retl
 	%tmp3 = load <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
 	ret void
-
-; CHECK-LABEL: test2:
-; CHECK: 	movl	4(%esp), %eax
-; CHECK: 	movl	8(%esp), %ecx
-; CHECK-NEXT: 	movapd	(%ecx), %xmm0
-; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
-; CHECK-NEXT: 	movapd	%xmm0, (%eax)
-; CHECK-NEXT: 	ret
 }
 
 
 define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movaps (%edx), %xmm0
+; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; CHECK-NEXT:    movaps %xmm0, (%eax)
+; CHECK-NEXT:    retl
 	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
 	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
 	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
@@ -47,24 +56,30 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp13, <4 x float>* %res
 	ret void
-; CHECK: @test3
-; CHECK: 	unpcklps
 }
 
 define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; CHECK-NEXT:    movaps %xmm0, (%eax)
+; CHECK-NEXT:    retl
 	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp5, <4 x float>* %res
 	ret void
-; CHECK: @test4
-; CHECK: 	pshufd	$50, %xmm0, %xmm0
 }
 
 define <4 x i32> @test5(i8** %ptr) nounwind {
 ; CHECK-LABEL: test5:
-; CHECK: pxor
-; CHECK: punpcklbw
-; CHECK: punpcklwd
-
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl (%eax), %eax
+; CHECK-NEXT:    movss (%eax), %xmm1
+; CHECK-NEXT:    pxor %xmm0, %xmm0
+; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK-NEXT:    retl
 	%tmp = load i8** %ptr		; <i8*> [#uses=1]
 	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
 	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
@@ -81,30 +96,39 @@ define <4 x i32> @test5(i8** %ptr) nounwind {
 }
 
 define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
-        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp2, <4 x float>* %res
-        ret void
-
 ; CHECK-LABEL: test6:
-; CHECK: 	movaps	(%ecx), %xmm0
-; CHECK:	movaps	%xmm0, (%eax)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movaps (%ecx), %xmm0
+; CHECK-NEXT:    movaps %xmm0, (%eax)
+; CHECK-NEXT:    retl
+  %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
+  store <4 x float> %tmp2, <4 x float>* %res
+  ret void
 }
 
 define void @test7() nounwind {
-        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
-        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
-        store <4 x float> %2, <4 x float>* null
-        ret void
-
 ; CHECK-LABEL: test7:
-; CHECK:	xorps	%xmm0, %xmm0
-; CHECK:	movaps	%xmm0, 0
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    movaps %xmm0, 0
+; CHECK-NEXT:    retl
+  bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
+  shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
+  store <4 x float> %2, <4 x float>* null
+  ret void
 }
 
 @x = external global [4 x i32]
 
 define <2 x i64> @test8() nounwind {
+; CHECK-LABEL: test8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl L_x$non_lazy_ptr, %eax
+; CHECK-NEXT:    movups (%eax), %xmm0
+; CHECK-NEXT:    retl
 	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
 	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
 	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
@@ -115,90 +139,123 @@ define <2 x i64> @test8() nounwind {
 	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
 	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %tmp16
-; CHECK-LABEL: test8:
-; CHECK: movups	(%eax), %xmm0
 }
 
 define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+; CHECK-LABEL: test9:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movups {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    retl
 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
 	ret <4 x float> %tmp13
-; CHECK-LABEL: test9:
-; CHECK: movups	8(%esp), %xmm0
 }
 
 define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+; CHECK-LABEL: test10:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    retl
 	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
 	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
 	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
 	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
 	ret <4 x float> %tmp13
-; CHECK-LABEL: test10:
-; CHECK: movaps	4(%esp), %xmm0
 }
 
 define <2 x double> @test11(double %a, double %b) nounwind {
+; CHECK-LABEL: test11:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movaps {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    retl
 	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
-; CHECK-LABEL: test11:
-; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {
-        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
-        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
-        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp4, <4 x float>* null
-        ret void
 ; CHECK-LABEL: test12:
-; CHECK: movhlps
-; CHECK: shufps
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movapd 0, %xmm0
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; CHECK-NEXT:    movsd %xmm0, %xmm1
+; CHECK-NEXT:    xorpd %xmm2, %xmm2
+; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
+; CHECK-NEXT:    addps %xmm1, %xmm0
+; CHECK-NEXT:    movaps %xmm0, 0
+; CHECK-NEXT:    retl
+  %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
+  %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
+  %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+  store <4 x float> %tmp4, <4 x float>* null
+  ret void
 }
 
 define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
-        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
-        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
-        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
-        store <4 x float> %tmp11, <4 x float>* %res
-        ret void
-; CHECK: test13
-; CHECK: shufps	$69, (%ecx), %xmm0
-; CHECK: pshufd	$-40, %xmm0, %xmm0
+; CHECK-LABEL: test13:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movaps (%edx), %xmm0
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1],mem[0,1]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; CHECK-NEXT:    movaps %xmm0, (%eax)
+; CHECK-NEXT:    retl
+  %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
+  %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+  %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
+  store <4 x float> %tmp11, <4 x float>* %res
+  ret void
 }
 
 define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
-        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
-        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
-        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
-        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
-        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
-        ret <4 x float> %tmp27
 ; CHECK-LABEL: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
-; CHECK: 	movlhps	[[X2]], [[X0]]
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movaps (%ecx), %xmm1
+; CHECK-NEXT:    movaps (%eax), %xmm2
+; CHECK-NEXT:    movaps %xmm2, %xmm0
+; CHECK-NEXT:    addps %xmm1, %xmm0
+; CHECK-NEXT:    subps %xmm1, %xmm2
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    retl
+  %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+  %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+  %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+  %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+  %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+  ret <4 x float> %tmp27
 }
 
 define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
-entry:
-        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
-        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
-        ret <4 x float> %tmp4
 ; CHECK-LABEL: test15:
-; CHECK: 	movhlps	%xmm1, %xmm0
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movapd (%ecx), %xmm0
+; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
+; CHECK-NEXT:    retl
+entry:
+  %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+  %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+  %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+  ret <4 x float> %tmp4
 }
 
 ; PR8900
-; CHECK-LABEL: test16:
-; CHECK: unpcklpd
-; CHECK: ret
 
 define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+; CHECK-LABEL: test16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movapd 96(%eax), %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; CHECK-NEXT:    retl
   %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
   %i6 = load <4 x double>* %i5, align 32
   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
@@ -207,6 +264,11 @@ define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocap
 
 ; PR9009
 define fastcc void @test17() nounwind {
+; CHECK-LABEL: test17:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = <u,u,32768,32768>
+; CHECK-NEXT:    movaps %xmm0, (%eax)
+; CHECK-NEXT:    retl
 entry:
   %0 = insertelement <4 x i32> undef, i32 undef, i32 1
   %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -217,25 +279,48 @@ entry:
 
 ; PR9210
 define <4 x float> @f(<4 x double>) nounwind {
+; CHECK-LABEL: f:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    cvtpd2ps %xmm1, %xmm1
+; CHECK-NEXT:    cvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retl
 entry:
  %double2float.i = fptrunc <4 x double> %0 to <4 x float>
  ret <4 x float> %double2float.i
 }
 
 define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
-; CHECK-LABEL: test_insert_64_zext
-; CHECK-NOT: xor
-; CHECK: movq
+; CHECK-LABEL: test_insert_64_zext:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movq %xmm0, %xmm0
+; CHECK-NEXT:    retl
   %1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %1
 }
 
 define <4 x i32> @PR19721(<4 x i32> %i) {
+; CHECK-LABEL: PR19721:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    movss %xmm1, %xmm0
+; CHECK-NEXT:    retl
   %bc = bitcast <4 x i32> %i to i128
   %insert = and i128 %bc, -4294967296
   %bc2 = bitcast i128 %insert to <4 x i32>
   ret <4 x i32> %bc2
+}
 
-; CHECK-LABEL: PR19721
-; CHECK: punpckldq
+define <4 x i32> @test_mul(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: test_mul:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm1, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; CHECK-NEXT:    retl
+  %m = mul <4 x i32> %x, %y
+  ret <4 x i32> %m
 }
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index b7706cc34bb6..5b2de28c0f5d 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
 
 
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll
index 8b6674312b34..431588f90ab2 100644
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
 ; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
 
 ; Test ADDSUB ISel patterns.
@@ -141,156 +141,3 @@ define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
 ; AVX: vaddsubpd
 ; CHECK-NEXT: ret
 
-; Functions below are obtained from the following source:
-;
-; float4 test1(float4 A, float4 B) {
-;   float4 X = A + B;
-;   float4 Y = A - B;
-;   return (float4){X[0], Y[1], X[2], Y[3]};
-; }
-;
-; float8 test2(float8 A, float8 B) {
-;   float8 X = A + B;
-;   float8 Y = A - B;
-;   return (float8){X[0], Y[1], X[2], Y[3], X[4], Y[5], X[6], Y[7]};
-; }
-;
-; double4 test3(double4 A, double4 B) {
-;   double4 X = A + B;
-;   double4 Y = A - B;
-;   return (double4){X[0], Y[1], X[2], Y[3]};
-; }
-;
-; double2 test4(double2 A, double2 B) {
-;   double2 X = A + B;
-;   double2 Y = A - B;
-;   return (double2){X[0], Y[1]};
-; }
-
-define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
-  %sub = fsub <4 x float> %A, %B
-  %add = fadd <4 x float> %A, %B
-  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %vecinit6
-}
-; CHECK-LABEL: test5
-; SSE: xorps
-; SSE-NEXT: addsubps
-; AVX: vxorps
-; AVX-NEXT: vaddsubps
-; CHECK: ret
-
-
-define <8 x float> @test6(<8 x float> %A, <8 x float> %B) {
-  %sub = fsub <8 x float> %A, %B
-  %add = fadd <8 x float> %A, %B
-  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
-  ret <8 x float> %vecinit14
-}
-; CHECK-LABEL: test6
-; SSE: xorps
-; SSE-NEXT: addsubps
-; SSE: xorps
-; SSE-NEXT: addsubps
-; AVX: vxorps
-; AVX-NEXT: vaddsubps
-; AVX-NOT: vxorps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
-
-define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
-  %sub = fsub <4 x double> %A, %B
-  %add = fadd <4 x double> %A, %B
-  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x double> %vecinit6
-}
-; CHECK-LABEL: test7
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; AVX: vxorpd
-; AVX-NEXT: vaddsubpd
-; AVX-NOT: vxorpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
-
-define <2 x double> @test8(<2 x double> %A, <2 x double> %B) #0 {
-  %add = fadd <2 x double> %A, %B
-  %sub = fsub <2 x double> %A, %B
-  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %vecinit2
-}
-; CHECK-LABEL: test8
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; AVX: vxorpd
-; AVX-NEXT: vaddsubpd
-; CHECK: ret
-
-
-define <4 x float> @test5b(<4 x float> %A, <4 x float> %B) {
-  %sub = fsub <4 x float> %A, %B
-  %add = fadd <4 x float> %B, %A
-  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x float> %vecinit6
-}
-; CHECK-LABEL: test5
-; SSE: xorps
-; SSE-NEXT: addsubps
-; AVX: vxorps
-; AVX-NEXT: vaddsubps
-; CHECK: ret
-
-
-define <8 x float> @test6b(<8 x float> %A, <8 x float> %B) {
-  %sub = fsub <8 x float> %A, %B
-  %add = fadd <8 x float> %B, %A
-  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
-  ret <8 x float> %vecinit14
-}
-; CHECK-LABEL: test6
-; SSE: xorps
-; SSE-NEXT: addsubps
-; SSE: xorps
-; SSE-NEXT: addsubps
-; AVX: vxorps
-; AVX-NEXT: vaddsubps
-; AVX-NOT: vxorps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
-
-define <4 x double> @test7b(<4 x double> %A, <4 x double> %B) {
-  %sub = fsub <4 x double> %A, %B
-  %add = fadd <4 x double> %B, %A
-  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret <4 x double> %vecinit6
-}
-; CHECK-LABEL: test7
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; AVX: vxorpd
-; AVX-NEXT: vaddsubpd
-; AVX-NOT: vxorpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
-
-define <2 x double> @test8b(<2 x double> %A, <2 x double> %B) #0 {
-  %add = fadd <2 x double> %B, %A
-  %sub = fsub <2 x double> %A, %B
-  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 0, i32 3>
-  ret <2 x double> %vecinit2
-}
-; CHECK-LABEL: test8
-; SSE: xorpd
-; SSE-NEXT: addsubpd
-; AVX: vxorpd
-; AVX-NEXT: vaddsubpd
-; CHECK: ret
-
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 18bdcb3912b1..0a5b0cab851c 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -1,99 +1,120 @@
 ; These are tests for SSE3 codegen.
 
-; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 \
-; RUN:              | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 | FileCheck %s --check-prefix=X64
 
 ; Test for v8xi16 lowering where we extract the first element of the vector and
 ; placed it in the second element of the result.
 
 define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+; X64-LABEL: t0:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movl $1, %eax
+; X64-NEXT:    movd %eax, %xmm0
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X64-NEXT:    movdqa %xmm0, (%rdi)
+; X64-NEXT:    retq
 entry:
 	%tmp3 = load <8 x i16>* %old
 	%tmp6 = shufflevector <8 x i16> %tmp3,
-                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
                 <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
 	store <8 x i16> %tmp6, <8 x i16>* %dest
 	ret void
-
-; X64-LABEL: t0:
-; X64:	movdqa	(%rsi), %xmm0
-; X64:	pslldq	$2, %xmm0
-; X64:	movdqa	%xmm0, (%rdi)
-; X64:	ret
 }
 
 define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+; X64-LABEL: t1:
+; X64:       ## BB#0:
+; X64-NEXT:    movdqa (%rdi), %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X64-NEXT:    retq
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp3
 
-; X64-LABEL: t1:
-; X64: 	movdqa	(%rdi), %xmm0
-; X64: 	pinsrw	$0, (%rsi), %xmm0
-; X64: 	ret
 }
 
 define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t2:
+; X64:       ## BB#0:
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,0,3,4,5,6,7]
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp
-; X64-LABEL: t2:
-; X64:	pextrw	$1, %xmm1, %eax
-; X64:	pinsrw	$0, %eax, %xmm0
-; X64:	pinsrw	$3, %eax, %xmm0
-; X64:	ret
 }
 
 define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t3:
+; X64:       ## BB#0:
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,5]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
 	ret <8 x i16> %tmp
-; X64-LABEL: t3:
-; X64: 	pextrw	$5, %xmm0, %eax
-; X64: 	pshuflw	$44, %xmm0, %xmm0
-; X64: 	pshufhw	$27, %xmm0, %xmm0
-; X64: 	pinsrw	$3, %eax, %xmm0
-; X64: 	ret
 }
 
 define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t4:
+; X64:       ## BB#0:
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
 	ret <8 x i16> %tmp
-; X64-LABEL: t4:
-; X64: 	pextrw	$7, [[XMM0:%xmm[0-9]+]], %eax
-; X64: 	pshufhw	$100, [[XMM0]], [[XMM1:%xmm[0-9]+]]
-; X64: 	pinsrw	$1, %eax, [[XMM1]]
-; X64: 	pextrw	$1, [[XMM0]], %eax
-; X64: 	pinsrw	$4, %eax, %xmm{{[0-9]}}
-; X64: 	ret
 }
 
 define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t5:
+; X64:       ## BB#0:
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
 	ret <8 x i16> %tmp
-; X64: 	t5:
-; X64: 		movlhps	%xmm1, %xmm0
-; X64: 		pshufd	$114, %xmm0, %xmm0
-; X64: 		ret
 }
 
 define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t6:
+; X64:       ## BB#0:
+; X64-NEXT:    movss %xmm1, %xmm0
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp
-; X64: 	t6:
-; X64: 		movss	%xmm1, %xmm0
-; X64: 		ret
 }
 
 define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+; X64-LABEL: t7:
+; X64:       ## BB#0:
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
+; X64-NEXT:    retq
 	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
 	ret <8 x i16> %tmp
-; X64: 	t7:
-; X64: 		pshuflw	$-80, %xmm0, %xmm0
-; X64: 		pshufhw	$-56, %xmm0, %xmm0
-; X64: 		ret
 }
 
 define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+; X64-LABEL: t8:
+; X64:       ## BB#0:
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[2,1,0,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; X64-NEXT:    movdqa %xmm0, (%rdi)
+; X64-NEXT:    retq
 	%tmp = load <2 x i64>* %A
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
 	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
@@ -115,14 +136,15 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
 	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
 	ret void
-; X64: 	t8:
-; X64: 		pshuflw	$-58, (%rsi), %xmm0
-; X64: 		pshufhw	$-58, %xmm0, %xmm0
-; X64: 		movdqa	%xmm0, (%rdi)
-; X64: 		ret
 }
 
 define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+; X64-LABEL: t9:
+; X64:       ## BB#0:
+; X64-NEXT:    movapd (%rdi), %xmm0
+; X64-NEXT:    movhpd (%rsi), %xmm0
+; X64-NEXT:    movapd %xmm0, (%rdi)
+; X64-NEXT:    retq
 	%tmp = load <4 x float>* %r
 	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
 	%tmp.upgrd.4 = load double* %tmp.upgrd.3
@@ -139,11 +161,6 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
 	store <4 x float> %tmp13, <4 x float>* %r
 	ret void
-; X64: 	t9:
-; X64: 		movaps	(%rdi), %xmm0
-; X64:	        movhps	(%rsi), %xmm0
-; X64:	        movaps	%xmm0, (%rdi)
-; X64: 		ret
 }
 
 
@@ -154,113 +171,121 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
 @g1 = external constant <4 x i32>
 @g2 = external constant <4 x i16>
 
-define internal void @t10() nounwind {
-        load <4 x i32>* @g1, align 16
-        bitcast <4 x i32> %1 to <8 x i16>
-        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
-        bitcast <8 x i16> %3 to <2 x i64>
-        extractelement <2 x i64> %4, i32 0
-        bitcast i64 %5 to <4 x i16>
-        store <4 x i16> %6, <4 x i16>* @g2, align 8
-        ret void
-; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %e{{..}}
-; X64: 		pextrw	$6, [[X0]], %e{{..}}
-; X64: 		movlhps [[X0]], [[X0]]
-; X64: 		pshuflw	$8, [[X0]], [[X0]]
-; X64: 		pinsrw	$2, %e{{..}}, [[X0]]
-; X64: 		pinsrw	$3, %e{{..}}, [[X0]]
+define void @t10() nounwind {
+; X64-LABEL: t10:
+; X64:       ## BB#0:
+; X64-NEXT:    movq _g1@{{.*}}(%rip), %rax
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = mem[0,2,2,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    movq _g2@{{.*}}(%rip), %rax
+; X64-NEXT:    movq %xmm0, (%rax)
+; X64-NEXT:    retq
+  load <4 x i32>* @g1, align 16
+  bitcast <4 x i32> %1 to <8 x i16>
+  shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+  bitcast <8 x i16> %3 to <2 x i64>
+  extractelement <2 x i64> %4, i32 0
+  bitcast i64 %5 to <4 x i16>
+  store <4 x i16> %6, <4 x i16>* @g2, align 8
+  ret void
 }
 
-
 ; Pack various elements via shuffles.
 define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; X64-LABEL: t11:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; X64-NEXT:    retq
 entry:
 	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
 	ret <8 x i16> %tmp7
 
-; X64-LABEL: t11:
-; X64:	movd	%xmm1, %eax
-; X64:	movlhps	%xmm0, %xmm0
-; X64:	pshuflw	$1, %xmm0, %xmm0
-; X64:	pinsrw	$1, %eax, %xmm0
-; X64:	ret
 }
 
-
 define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; X64-LABEL: t12:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]
+; X64-NEXT:    retq
 entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
 
-; X64-LABEL: t12:
-; X64: 	pextrw	$3, %xmm1, %eax
-; X64: 	movlhps	%xmm0, %xmm0
-; X64: 	pshufhw	$3, %xmm0, %xmm0
-; X64: 	pinsrw	$5, %eax, %xmm0
-; X64: 	ret
 }
 
-
 define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; X64-LABEL: t13:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,6,7]
+; X64-NEXT:    retq
 entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
-; X64-LABEL: t13:
-; X64: 	punpcklqdq	%xmm0, %xmm1
-; X64: 	pextrw	$3, %xmm1, %eax
-; X64: 	pshufhw	$12, %xmm1, %xmm0
-; X64: 	pinsrw	$4, %eax, %xmm0
-; X64: 	ret
 }
 
-
 define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; X64-LABEL: t14:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT:    retq
 entry:
 	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
 	ret <8 x i16> %tmp9
-; X64-LABEL: t14:
-; X64: 	punpcklqdq	%xmm0, %xmm1
-; X64: 	pshufhw	$8, %xmm1, %xmm0
-; X64: 	ret
 }
 
-
 ; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; X64-LABEL: t15:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,2,3,4,5,6,7]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,0,2,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; X64-NEXT:    retq
 entry:
-        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
-        ret <8 x i16> %tmp8
-; X64: 	t15:
-; X64: 		pextrw	$7, %xmm0, %eax
-; X64: 		punpcklqdq	%xmm1, %xmm0
-; X64: 		pshuflw	$-128, %xmm0, %xmm0
-; X64: 		pinsrw	$2, %eax, %xmm0
-; X64: 		ret
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
 }
 
-
 ; Test yonah where we convert a shuffle to pextrw and pinrsw
 define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+; X64-LABEL: t16:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT:    pxor %xmm2, %xmm2
+; X64-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X64-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-NEXT:    packuswb %xmm0, %xmm0
+; X64-NEXT:    retq
 entry:
-        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
-        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
-        ret <16 x i8> %tmp9
-; X64: 	t16:
-; X64: 		pextrw	$8, %xmm0, %eax
-; X64: 		pslldq	$2, %xmm0
-; X64: 		pextrw	$1, %xmm0, %ecx
-; X64: 		movzbl	%cl, %ecx
-; X64: 		orl	%eax, %ecx
-; X64: 		pinsrw	$1, %ecx, %xmm0
-; X64: 		ret
+  %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+  %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+  ret <16 x i8> %tmp9
 }
 
 ; rdar://8520311
 define <4 x i32> @t17() nounwind {
-entry:
 ; X64-LABEL: t17:
-; X64:          movddup (%rax), %xmm0
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movddup (%rax), %xmm0
+; X64-NEXT:    andpd {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
+entry:
   %tmp1 = load <4 x float>* undef, align 16
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
   %tmp3 = load <4 x float>* undef, align 16
diff --git a/test/CodeGen/X86/sse41-blend.ll b/test/CodeGen/X86/sse41-blend.ll
deleted file mode 100644
index 3a4812119f8a..000000000000
--- a/test/CodeGen/X86/sse41-blend.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
-
-;CHECK-LABEL: vsel_float:
-;CHECK: blendps
-;CHECK: ret
-define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2
-  ret <4 x float> %vsel
-}
-
-
-;CHECK-LABEL: vsel_4xi8:
-;CHECK: blendps
-;CHECK: ret
-define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
-  ret <4 x i8> %vsel
-}
-
-;CHECK-LABEL: vsel_4xi16:
-;CHECK: blendps
-;CHECK: ret
-define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
-  ret <4 x i16> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i32:
-;CHECK: blendps
-;CHECK: ret
-define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
-  ret <4 x i32> %vsel
-}
-
-
-;CHECK-LABEL: vsel_double:
-;CHECK: movsd
-;CHECK: ret
-define <4 x double> @vsel_double(<4 x double> %v1, <4 x double> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %v1, <4 x double> %v2
-  ret <4 x double> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i64:
-;CHECK: movsd
-;CHECK: ret
-define <4 x i64> @vsel_i64(<4 x i64> %v1, <4 x i64> %v2) {
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v1, <4 x i64> %v2
-  ret <4 x i64> %vsel
-}
-
-
-;CHECK-LABEL: vsel_i8:
-;CHECK: pblendvb
-;CHECK: ret
-define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
-  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
-  ret <16 x i8> %vsel
-}
-
-;; TEST blend + compares
-; CHECK: A
-define <2 x double> @A(<2 x double> %x, <2 x double> %y) {
-  ; CHECK: cmplepd
-  ; CHECK: blendvpd
-  %max_is_x = fcmp oge <2 x double> %x, %y
-  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
-  ret <2 x double> %max
-}
-
-; CHECK: B
-define <2 x double> @B(<2 x double> %x, <2 x double> %y) {
-  ; CHECK: cmpnlepd
-  ; CHECK: blendvpd
-  %min_is_x = fcmp ult <2 x double> %x, %y
-  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
-  ret <2 x double> %min
-}
-
-; CHECK: float_crash
-define void @float_crash() nounwind {
-entry:
-  %merge205vector_func.i = select <4 x i1> undef, <4 x double> undef, <4 x double> undef
-  %extract214vector_func.i = extractelement <4 x double> %merge205vector_func.i, i32 0
-  store double %extract214vector_func.i, double addrspace(1)* undef, align 8
-  ret void
-}
-
-; If we can figure out a blend has a constant mask, we should emit the
-; blend instruction with an immediate mask
-define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
-; In this case, we emit a simple movss
-; CHECK-LABEL: constant_blendvpd
-; CHECK: movsd
-; CHECK: ret
-  %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %xy, <2 x double> %ab
-  ret <2 x double> %1
-}
-
-define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
-; CHECK-LABEL: constant_blendvps
-; CHECK-NOT: mov
-; CHECK: blendps $7
-; CHECK: ret
-  %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %xyzw, <4 x float> %abcd
-  ret <4 x float> %1
-}
-
-define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
-; CHECK-LABEL: constant_pblendvb:
-; CHECK: movaps
-; CHECK: pblendvb
-; CHECK: ret
-  %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd
-  ret <16 x i8> %1
-}
-
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
-
-;; 2 tests for shufflevectors that optimize to blend + immediate
-; CHECK-LABEL: @blend_shufflevector_4xfloat
-; CHECK: blendps $6, %xmm1, %xmm0
-; CHECK: ret
-define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
-  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-  ret <4 x float> %1
-}
-
-; CHECK-LABEL: @blend_shufflevector_8xi16
-; CHECK: pblendw $134, %xmm1, %xmm0
-; CHECK: ret
-define <8 x i16> @blend_shufflevector_8xi16(<8 x i16> %a, <8 x i16> %b) {
-  %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
-  ret <8 x i16> %1
-}
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
new file mode 100644
index 000000000000..6fab98e70a89
--- /dev/null
+++ b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse4.1 | FileCheck %s
+; This test works just like the non-upgrade one except that it only checks
+; forms which require auto-upgrading.
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: blendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: blendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: dppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: dpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: insertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: mpsadbw
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: pblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 37eff43b28c4..5f25a16380de 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -2,18 +2,18 @@
 
 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: blendpd
-  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: blendps
-  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
@@ -34,35 +34,35 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
 
 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: dppd
-  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: dpps
-  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: insertps
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 
 
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: mpsadbw
-  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -83,10 +83,10 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
 
 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: pblendw
-  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
diff --git a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
new file mode 100644
index 000000000000..55faf4d32b36
--- /dev/null
+++ b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+
+define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
+; SSE41: pmovsxbw (%rdi), %xmm0
+; AVX:  vpmovsxbw (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
+; SSE41: pmovsxbd (%rdi), %xmm0
+; AVX:  vpmovsxbd (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
+; SSE41: pmovsxbq (%rdi), %xmm0
+; AVX:  vpmovsxbq (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
+; SSE41: pmovsxwd (%rdi), %xmm0
+; AVX:  vpmovsxwd (%rdi), %xmm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
+; SSE41: pmovsxwq (%rdi), %xmm0
+; AVX:  vpmovsxwq (%rdi), %xmm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
+; SSE41: pmovsxdq (%rdi), %xmm0
+; AVX:  vpmovsxdq (%rdi), %xmm0
+  %1 = load <4 x i32>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
+  ret <2 x i64> %2
+}
+
+define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
+; SSE41: pmovzxbw (%rdi), %xmm0
+; AVX:  vpmovzxbw (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
+; SSE41: pmovzxbd (%rdi), %xmm0
+; AVX:  vpmovzxbd (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
+; SSE41: pmovzxbq (%rdi), %xmm0
+; AVX:  vpmovzxbq (%rdi), %xmm0
+  %1 = load <16 x i8>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
+; SSE41: pmovzxwd (%rdi), %xmm0
+; AVX:  vpmovzxwd (%rdi), %xmm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
+; SSE41: pmovzxwq (%rdi), %xmm0
+; AVX:  vpmovzxwq (%rdi), %xmm0
+  %1 = load <8 x i16>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
+; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
+; SSE41: pmovzxdq (%rdi), %xmm0
+; AVX:  vpmovzxdq (%rdi), %xmm0
+  %1 = load <4 x i32>* %a, align 1
+  %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
+  ret <2 x i64> %2
+}
+
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>)
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>)
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 986488f531ec..9c0c2221cb7f 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -1,30 +1,47 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X32 --check-prefix=CHECK
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s -check-prefix=X64 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse4.1 -mcpu=penryn | FileCheck %s --check-prefix=X64
 
 @g16 = external global i16
 
 define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
-        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
-        ret <4 x i32> %tmp1
 ; X32-LABEL: pinsrd_1:
-; X32:    pinsrd $1, 4(%esp), %xmm0
-
+; X32:       ## BB#0:
+; X32-NEXT:    pinsrd $1, {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:    retl
+;
 ; X64-LABEL: pinsrd_1:
-; X64:    pinsrd $1, %edi, %xmm0
+; X64:       ## BB#0:
+; X64-NEXT:    pinsrd $1, %edi, %xmm0
+; X64-NEXT:    retq
+  %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+  ret <4 x i32> %tmp1
 }
 
 define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
-        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
-        ret <16 x i8> %tmp1
 ; X32-LABEL: pinsrb_1:
-; X32:    pinsrb $1, 4(%esp), %xmm0
-
+; X32:       ## BB#0:
+; X32-NEXT:    pinsrb $1, {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:    retl
+;
 ; X64-LABEL: pinsrb_1:
-; X64:    pinsrb $1, %edi, %xmm0
+; X64:       ## BB#0:
+; X64-NEXT:    pinsrb $1, %edi, %xmm0
+; X64-NEXT:    retq
+  %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+  ret <16 x i8> %tmp1
 }
 
-
 define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+; X32-LABEL: pmovsxbd_1:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    pmovsxbd (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pmovsxbd_1:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    pmovsxbd (%rdi), %xmm0
+; X64-NEXT:    retq
 entry:
 	%0 = load i32* %p, align 4
 	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
@@ -35,16 +52,19 @@ entry:
 	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
 	%7 = bitcast <4 x i32> %6 to <2 x i64>
 	ret <2 x i64> %7
-        
-; X32: _pmovsxbd_1:
-; X32:   movl      4(%esp), %eax
-; X32:   pmovsxbd   (%eax), %xmm0
-
-; X64: _pmovsxbd_1:
-; X64:   pmovsxbd   (%rdi), %xmm0
 }
 
 define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+; X32-LABEL: pmovsxwd_1:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    pmovsxwd (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pmovsxwd_1:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    pmovsxwd (%rdi), %xmm0
+; X64-NEXT:    retq
 entry:
 	%0 = load i64* %p		; <i64> [#uses=1]
 	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
@@ -52,63 +72,59 @@ entry:
 	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
 	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %3
-        
-; X32: _pmovsxwd_1:
-; X32:   movl 4(%esp), %eax
-; X32:   pmovsxwd (%eax), %xmm0
-
-; X64: _pmovsxwd_1:
-; X64:   pmovsxwd (%rdi), %xmm0
 }
 
-
-
-
 define <2 x i64> @pmovzxbq_1() nounwind {
+; X32-LABEL: pmovzxbq_1:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl L_g16$non_lazy_ptr, %eax
+; X32-NEXT:    pmovzxbq (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pmovzxbq_1:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movq _g16@{{.*}}(%rip), %rax
+; X64-NEXT:    pmovzxbq (%rax), %xmm0
+; X64-NEXT:    retq
 entry:
 	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
 	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
 	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
 	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
 	ret <2 x i64> %3
-
-; X32: _pmovzxbq_1:
-; X32:   movl	L_g16$non_lazy_ptr, %eax
-; X32:   pmovzxbq	(%eax), %xmm0
-
-; X64: _pmovzxbq_1:
-; X64:   movq	_g16@GOTPCREL(%rip), %rax
-; X64:   pmovzxbq	(%rax), %xmm0
 }
 
 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
 
-
-
-
 define i32 @extractps_1(<4 x float> %v) nounwind {
+; X32-LABEL: extractps_1:
+; X32:       ## BB#0:
+; X32-NEXT:    extractps $3, %xmm0, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: extractps_1:
+; X64:       ## BB#0:
+; X64-NEXT:    extractps $3, %xmm0, %eax
+; X64-NEXT:    retq
   %s = extractelement <4 x float> %v, i32 3
   %i = bitcast float %s to i32
   ret i32 %i
-
-; X32: _extractps_1:  
-; X32:	  extractps	$3, %xmm0, %eax
-
-; X64: _extractps_1:  
-; X64:	  extractps	$3, %xmm0, %eax
 }
 define i32 @extractps_2(<4 x float> %v) nounwind {
+; X32-LABEL: extractps_2:
+; X32:       ## BB#0:
+; X32-NEXT:    extractps $3, %xmm0, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: extractps_2:
+; X64:       ## BB#0:
+; X64-NEXT:    extractps $3, %xmm0, %eax
+; X64-NEXT:    retq
   %t = bitcast <4 x float> %v to <4 x i32>
   %s = extractelement <4 x i32> %t, i32 3
   ret i32 %s
-
-; X32: _extractps_2:
-; X32:	  extractps	$3, %xmm0, %eax
-
-; X64: _extractps_2:
-; X64:	  extractps	$3, %xmm0, %eax
 }
 
 
@@ -117,106 +133,152 @@ define i32 @extractps_2(<4 x float> %v) nounwind {
 ; is bitcasted to i32, but unsuitable for much of anything else.
 
 define float @ext_1(<4 x float> %v) nounwind {
+; X32-LABEL: ext_1:
+; X32:       ## BB#0:
+; X32-NEXT:    pushl %eax
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X32-NEXT:    addss LCPI7_0, %xmm0
+; X32-NEXT:    movss %xmm0, (%esp)
+; X32-NEXT:    flds (%esp)
+; X32-NEXT:    popl %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ext_1:
+; X64:       ## BB#0:
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X64-NEXT:    addss {{.*}}(%rip), %xmm0
+; X64-NEXT:    retq
   %s = extractelement <4 x float> %v, i32 3
   %t = fadd float %s, 1.0
   ret float %t
-
-; X32: _ext_1:
-; X32:	  pshufd	$3, %xmm0, %xmm0
-; X32:	  addss	LCPI7_0, %xmm0
-
-; X64: _ext_1:
-; X64:	  pshufd	$3, %xmm0, %xmm0
-; X64:	  addss	LCPI7_0(%rip), %xmm0
 }
 define float @ext_2(<4 x float> %v) nounwind {
+; X32-LABEL: ext_2:
+; X32:       ## BB#0:
+; X32-NEXT:    pushl %eax
+; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X32-NEXT:    movss %xmm0, (%esp)
+; X32-NEXT:    flds (%esp)
+; X32-NEXT:    popl %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ext_2:
+; X64:       ## BB#0:
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; X64-NEXT:    retq
   %s = extractelement <4 x float> %v, i32 3
   ret float %s
-
-; X32: _ext_2:
-; X32:	  pshufd	$3, %xmm0, %xmm0
-
-; X64: _ext_2:
-; X64:	  pshufd	$3, %xmm0, %xmm0
 }
 define i32 @ext_3(<4 x i32> %v) nounwind {
+; X32-LABEL: ext_3:
+; X32:       ## BB#0:
+; X32-NEXT:    pextrd $3, %xmm0, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ext_3:
+; X64:       ## BB#0:
+; X64-NEXT:    pextrd $3, %xmm0, %eax
+; X64-NEXT:    retq
   %i = extractelement <4 x i32> %v, i32 3
   ret i32 %i
-
-; X32: _ext_3:
-; X32:	  pextrd	$3, %xmm0, %eax
-
-; X64: _ext_3:
-; X64:	  pextrd	$3, %xmm0, %eax
 }
 
 define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
-        ret <4 x float> %tmp1
-; X32: _insertps_1:
-; X32:    insertps  $1, %xmm1, %xmm0
-
-; X64: _insertps_1:
-; X64:    insertps  $1, %xmm1, %xmm0
+; X32-LABEL: insertps_1:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_1:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
+; X64-NEXT:    retq
+  %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+  ret <4 x float> %tmp1
 }
 
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
-        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
-        ret <4 x float> %tmp1
-; X32: _insertps_2:
-; X32:    insertps  $0, 4(%esp), %xmm0
-
-; X64: _insertps_2:
-; X64:    insertps  $0, %xmm1, %xmm0        
+; X32-LABEL: insertps_2:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps $0, {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_2:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT:    retq
+  %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+  ret <4 x float> %tmp1
 }
-
 define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp2 = extractelement <4 x float> %t2, i32 0
-        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
-        ret <4 x float> %tmp1
-; X32: _insertps_3:
-; X32:    insertps  $0, %xmm1, %xmm0        
-
-; X64: _insertps_3:
-; X64:    insertps  $0, %xmm1, %xmm0        
+; X32-LABEL: insertps_3:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_3:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; X64-NEXT:    retq
+  %tmp2 = extractelement <4 x float> %t2, i32 0
+  %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+  ret <4 x float> %tmp1
 }
 
 define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
-        ret i32 %tmp1
-; X32: _ptestz_1:
-; X32:    ptest 	%xmm1, %xmm0
-; X32:    sete	%al
-
-; X64: _ptestz_1:
-; X64:    ptest 	%xmm1, %xmm0
-; X64:    sete	%al
+; X32-LABEL: ptestz_1:
+; X32:       ## BB#0:
+; X32-NEXT:    ptest %xmm1, %xmm0
+; X32-NEXT:    sete %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ptestz_1:
+; X64:       ## BB#0:
+; X64-NEXT:    ptest %xmm1, %xmm0
+; X64-NEXT:    sete %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    retq
+  %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
+  ret i32 %tmp1
 }
 
 define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
-        ret i32 %tmp1
-; X32: _ptestz_2:
-; X32:    ptest 	%xmm1, %xmm0
-; X32:    sbbl	%eax
-
-; X64: _ptestz_2:
-; X64:    ptest 	%xmm1, %xmm0
-; X64:    sbbl	%eax
+; X32-LABEL: ptestz_2:
+; X32:       ## BB#0:
+; X32-NEXT:    ptest %xmm1, %xmm0
+; X32-NEXT:    sbbl %eax, %eax
+; X32-NEXT:    andl $1, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ptestz_2:
+; X64:       ## BB#0:
+; X64-NEXT:    ptest %xmm1, %xmm0
+; X64-NEXT:    sbbl %eax, %eax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    retq
+  %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
+  ret i32 %tmp1
 }
 
 define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
-        ret i32 %tmp1
-; X32: _ptestz_3:
-; X32:    ptest 	%xmm1, %xmm0
-; X32:    seta	%al
-
-; X64: _ptestz_3:
-; X64:    ptest 	%xmm1, %xmm0
-; X64:    seta	%al
+; X32-LABEL: ptestz_3:
+; X32:       ## BB#0:
+; X32-NEXT:    ptest %xmm1, %xmm0
+; X32-NEXT:    seta %al
+; X32-NEXT:    movzbl %al, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: ptestz_3:
+; X64:       ## BB#0:
+; X64-NEXT:    ptest %xmm1, %xmm0
+; X64-NEXT:    seta %al
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    retq
+  %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
+  ret i32 %tmp1
 }
 
 
@@ -227,6 +289,25 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 ; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
 ; pointless.
 define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
+; X32-LABEL: buildvector:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movaps %xmm0, %xmm2
+; X32-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X32-NEXT:    addss %xmm1, %xmm0
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X32-NEXT:    addss %xmm2, %xmm1
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: buildvector:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    movaps %xmm0, %xmm2
+; X64-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; X64-NEXT:    addss %xmm1, %xmm0
+; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X64-NEXT:    addss %xmm2, %xmm1
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; X64-NEXT:    retq
 entry:
   %tmp7 = extractelement <2 x float> %A, i32 0
   %tmp5 = extractelement <2 x float> %A, i32 1
@@ -237,97 +318,124 @@ entry:
   %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
   %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
   ret <2 x float> %tmp9
-; X32-LABEL: buildvector:
-; X32-NOT: insertps $0
-; X32: insertps $16
-; X32-NOT: insertps $0
-; X32: ret
-; X64-LABEL: buildvector:
-; X64-NOT: insertps $0
-; X64: insertps $16
-; X64-NOT: insertps $0
-; X64: ret
 }
 
 define <4 x float> @insertps_from_shufflevector_1(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; X32-LABEL: insertps_from_shufflevector_1:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $48, (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_shufflevector_1:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    insertps $48, (%rdi), %xmm0
+; X64-NEXT:    retq
 entry:
   %0 = load <4 x float>* %pb, align 16
   %vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x float> %vecinit6
-; CHECK-LABEL: insertps_from_shufflevector_1:
-; CHECK-NOT: movss
-; CHECK-NOT: shufps
-; CHECK: insertps    $48,
-; CHECK: ret
 }
 
 define <4 x float> @insertps_from_shufflevector_2(<4 x float> %a, <4 x float> %b) {
+; X32-LABEL: insertps_from_shufflevector_2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_shufflevector_2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[1],xmm0[3]
+; X64-NEXT:    retq
 entry:
   %vecinit6 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
   ret <4 x float> %vecinit6
-; CHECK-LABEL: insertps_from_shufflevector_2:
-; CHECK-NOT: shufps
-; CHECK: insertps    $96,
-; CHECK: ret
 }
 
 ; For loading an i32 from memory into an xmm register we use pinsrd
 ; instead of insertps
 define <4 x i32> @pinsrd_from_shufflevector_i32(<4 x i32> %a, <4 x i32>* nocapture readonly %pb) {
+; X32-LABEL: pinsrd_from_shufflevector_i32:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $48, (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pinsrd_from_shufflevector_i32:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    insertps $48, (%rdi), %xmm0
+; X64-NEXT:    retq
 entry:
   %0 = load <4 x i32>* %pb, align 16
   %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x i32> %vecinit6
-; CHECK-LABEL: pinsrd_from_shufflevector_i32:
-; CHECK-NOT: movss
-; CHECK-NOT: shufps
-; CHECK: pinsrd  $3,
-; CHECK: ret
 }
 
 define <4 x i32> @insertps_from_shufflevector_i32_2(<4 x i32> %a, <4 x i32> %b) {
+; X32-LABEL: insertps_from_shufflevector_i32_2:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[3],xmm0[2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_shufflevector_i32_2:
+; X64:       ## BB#0: ## %entry
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[3],xmm0[2,3]
+; X64-NEXT:    retq
 entry:
   %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
   ret <4 x i32> %vecinit6
-; CHECK-LABEL: insertps_from_shufflevector_i32_2:
-; CHECK-NOT: shufps
-; CHECK-NOT: movaps
-; CHECK: insertps    $208,
-; CHECK: ret
 }
 
 define <4 x float> @insertps_from_load_ins_elt_undef(<4 x float> %a, float* %b) {
-; CHECK-LABEL: insertps_from_load_ins_elt_undef:
-; CHECK-NOT: movss
-; CHECK-NOT: shufps
-; CHECK: insertps    $16,
-; CHECK: ret
+; X32-LABEL: insertps_from_load_ins_elt_undef:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $16, (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_load_ins_elt_undef:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps $16, (%rdi), %xmm0
+; X64-NEXT:    retq
   %1 = load float* %b, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
   ret <4 x float> %result
 }
 
-define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
-; CHECK-LABEL: insertps_from_load_ins_elt_undef_i32:
 ; TODO: Like on pinsrd_from_shufflevector_i32, remove this mov instr
-;; aCHECK-NOT: movd
-; CHECK-NOT: shufps
-; CHECK: insertps    $32,
-; CHECK: ret
+define <4 x i32> @insertps_from_load_ins_elt_undef_i32(<4 x i32> %a, i32* %b) {
+; X32-LABEL: insertps_from_load_ins_elt_undef_i32:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movd (%eax), %xmm1
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_load_ins_elt_undef_i32:
+; X64:       ## BB#0:
+; X64-NEXT:    movd (%rdi), %xmm1
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; X64-NEXT:    retq
   %1 = load i32* %b, align 4
   %2 = insertelement <4 x i32> undef, i32 %1, i32 0
   %result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
   ret <4 x i32> %result
 }
 
-;;;;;; Shuffles optimizable with a single insertps instruction
+;;;;;; Shuffles optimizable with a single insertps or blend instruction
 define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_XYZ0:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $8
-; CHECK: ret
+; X32-LABEL: shuf_XYZ0:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_XYZ0:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecext1 = extractelement <4 x float> %x, i32 1
@@ -339,11 +447,15 @@ define <4 x float> @shuf_XYZ0(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_XY00:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $12
-; CHECK: ret
+; X32-LABEL: shuf_XY00:
+; X32:       ## BB#0:
+; X32-NEXT:    movq %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_XY00:
+; X64:       ## BB#0:
+; X64-NEXT:    movq %xmm0, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecext1 = extractelement <4 x float> %x, i32 1
@@ -354,11 +466,15 @@ define <4 x float> @shuf_XY00(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_XYY0:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $104
-; CHECK: ret
+; X32-LABEL: shuf_XYY0:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_XYY0:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecext1 = extractelement <4 x float> %x, i32 1
@@ -369,9 +485,15 @@ define <4 x float> @shuf_XYY0(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_XYW0:
-; CHECK: insertps    $232
-; CHECK: ret
+; X32-LABEL: shuf_XYW0:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_XYW0:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecext1 = extractelement <4 x float> %x, i32 1
@@ -383,11 +505,15 @@ define <4 x float> @shuf_XYW0(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_W00W:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $198
-; CHECK: ret
+; X32-LABEL: shuf_W00W:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_W00W:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 3
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecinit2 = insertelement <4 x float> %vecinit, float 0.0, i32 1
@@ -397,11 +523,19 @@ define <4 x float> @shuf_W00W(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_X00A:
-; CHECK-NOT: movaps
-; CHECK-NOT: shufps
-; CHECK: insertps    $48
-; CHECK: ret
+; X32-LABEL: shuf_X00A:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm2, %xmm2
+; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_X00A:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm2, %xmm2
+; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
@@ -411,11 +545,21 @@ define <4 x float> @shuf_X00A(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_X00X:
-; CHECK-NOT: movaps
-; CHECK-NOT: shufps
-; CHECK: insertps    $48
-; CHECK: ret
+; X32-LABEL: shuf_X00X:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; X32-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
+; X32-NEXT:    movaps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_X00X:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
+; X64-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
@@ -425,12 +569,23 @@ define <4 x float> @shuf_X00X(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
-; CHECK-LABEL: shuf_X0YC:
-; CHECK: shufps
-; CHECK-NOT: movhlps
-; CHECK-NOT: shufps
-; CHECK: insertps    $176
-; CHECK: ret
+; X32-LABEL: shuf_X0YC:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm2, %xmm2
+; X32-NEXT:    blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
+; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
+; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
+; X32-NEXT:    movaps %xmm2, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: shuf_X0YC:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm2, %xmm2
+; X64-NEXT:    blendps {{.*#+}} xmm2 = xmm0[0],xmm2[1],xmm0[2,3]
+; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
+; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
+; X64-NEXT:    movaps %xmm2, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 1
@@ -440,11 +595,17 @@ define <4 x float> @shuf_X0YC(<4 x float> %x, <4 x float> %a) {
 }
 
 define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_XYZ0:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $8
-; CHECK: ret
+; X32-LABEL: i32_shuf_XYZ0:
+; X32:       ## BB#0:
+; X32-NEXT:    pxor %xmm1, %xmm1
+; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_XYZ0:
+; X64:       ## BB#0:
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecext1 = extractelement <4 x i32> %x, i32 1
@@ -456,11 +617,15 @@ define <4 x i32> @i32_shuf_XYZ0(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_XY00:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $12
-; CHECK: ret
+; X32-LABEL: i32_shuf_XY00:
+; X32:       ## BB#0:
+; X32-NEXT:    movq %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_XY00:
+; X64:       ## BB#0:
+; X64-NEXT:    movq %xmm0, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecext1 = extractelement <4 x i32> %x, i32 1
@@ -471,11 +636,15 @@ define <4 x i32> @i32_shuf_XY00(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_XYY0:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $104
-; CHECK: ret
+; X32-LABEL: i32_shuf_XYY0:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_XYY0:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,1],zero
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecext1 = extractelement <4 x i32> %x, i32 1
@@ -486,11 +655,15 @@ define <4 x i32> @i32_shuf_XYY0(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_XYW0:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $232
-; CHECK: ret
+; X32-LABEL: i32_shuf_XYW0:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_XYW0:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,3],zero
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecext1 = extractelement <4 x i32> %x, i32 1
@@ -502,11 +675,15 @@ define <4 x i32> @i32_shuf_XYW0(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_W00W:
-; CHECK-NOT: pextrd
-; CHECK-NOT: punpckldq
-; CHECK: insertps    $198
-; CHECK: ret
+; X32-LABEL: i32_shuf_W00W:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_W00W:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[3],zero,zero,xmm0[3]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 3
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecinit2 = insertelement <4 x i32> %vecinit, i32 0, i32 1
@@ -516,11 +693,19 @@ define <4 x i32> @i32_shuf_W00W(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_X00A:
-; CHECK-NOT: movaps
-; CHECK-NOT: shufps
-; CHECK: insertps    $48
-; CHECK: ret
+; X32-LABEL: i32_shuf_X00A:
+; X32:       ## BB#0:
+; X32-NEXT:    pxor %xmm2, %xmm2
+; X32-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_X00A:
+; X64:       ## BB#0:
+; X64-NEXT:    pxor %xmm2, %xmm2
+; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
@@ -530,11 +715,21 @@ define <4 x i32> @i32_shuf_X00A(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_X00X:
-; CHECK-NOT: movaps
-; CHECK-NOT: shufps
-; CHECK: insertps    $48
-; CHECK: ret
+; X32-LABEL: i32_shuf_X00X:
+; X32:       ## BB#0:
+; X32-NEXT:    pxor %xmm1, %xmm1
+; X32-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; X32-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
+; X32-NEXT:    movaps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_X00X:
+; X64:       ## BB#0:
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; X64-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[0]
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
@@ -544,12 +739,21 @@ define <4 x i32> @i32_shuf_X00X(<4 x i32> %x, <4 x i32> %a) {
 }
 
 define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
-; CHECK-LABEL: i32_shuf_X0YC:
-; CHECK: shufps
-; CHECK-NOT: movhlps
-; CHECK-NOT: shufps
-; CHECK: insertps    $176
-; CHECK: ret
+; X32-LABEL: i32_shuf_X0YC:
+; X32:       ## BB#0:
+; X32-NEXT:    pmovzxdq %xmm0, %xmm2
+; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
+; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
+; X32-NEXT:    movaps %xmm2, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: i32_shuf_X0YC:
+; X64:       ## BB#0:
+; X64-NEXT:    pmovzxdq %xmm0, %xmm2
+; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[1],zero
+; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[2]
+; X64-NEXT:    movaps %xmm2, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x i32> %x, i32 0
   %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
   %vecinit1 = insertelement <4 x i32> %vecinit, i32 0, i32 1
@@ -560,11 +764,19 @@ define <4 x i32> @i32_shuf_X0YC(<4 x i32> %x, <4 x i32> %a) {
 
 ;; Test for a bug in the first implementation of LowerBuildVectorv4x32
 define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
-; CHECK-LABEL: test_insertps_no_undef:
-; CHECK: movaps  %xmm0, %xmm1
-; CHECK-NEXT: insertps        $8, %xmm1, %xmm1
-; CHECK-NEXT: maxps   %xmm1, %xmm0
-; CHECK-NEXT: ret
+; X32-LABEL: test_insertps_no_undef:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
+; X32-NEXT:    maxps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_insertps_no_undef:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3]
+; X64-NEXT:    maxps %xmm1, %xmm0
+; X64-NEXT:    retq
   %vecext = extractelement <4 x float> %x, i32 0
   %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
   %vecext1 = extractelement <4 x float> %x, i32 1
@@ -578,48 +790,75 @@ define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
 }
 
 define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
-; CHECK-LABEL: blendvb_fallback
-; CHECK: blendvb
-; CHECK: ret
+; X32-LABEL: blendvb_fallback:
+; X32:       ## BB#0:
+; X32-NEXT:    psllw $15, %xmm0
+; X32-NEXT:    psraw $15, %xmm0
+; X32-NEXT:    pblendvb %xmm1, %xmm2
+; X32-NEXT:    movdqa %xmm2, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: blendvb_fallback:
+; X64:       ## BB#0:
+; X64-NEXT:    psllw $15, %xmm0
+; X64-NEXT:    psraw $15, %xmm0
+; X64-NEXT:    pblendvb %xmm1, %xmm2
+; X64-NEXT:    movdqa %xmm2, %xmm0
+; X64-NEXT:    retq
   %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %ret
 }
 
-define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
-; CHECK-LABEL: insertps_from_vector_load:
 ; On X32, account for the argument's move to registers
-; X32: movl    4(%esp), %eax
-; CHECK-NOT: mov
-; CHECK: insertps    $48
-; CHECK-NEXT: ret
+define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; X32-LABEL: insertps_from_vector_load:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $48, (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_vector_load:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps $48, (%rdi), %xmm0
+; X64-NEXT:    retq
   %1 = load <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
   ret <4 x float> %2
 }
 
 ;; Use a non-zero CountS for insertps
-define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
-; CHECK-LABEL: insertps_from_vector_load_offset:
-; On X32, account for the argument's move to registers
-; X32: movl    4(%esp), %eax
-; CHECK-NOT: mov
 ;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps    $96, 4(%{{...}}), %
-; CHECK-NEXT: ret
+define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>* nocapture readonly %pb) {
+; X32-LABEL: insertps_from_vector_load_offset:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $96, 4(%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_vector_load_offset:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps $96, 4(%rdi), %xmm0
+; X64-NEXT:    retq
   %1 = load <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
   ret <4 x float> %2
 }
 
-define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
-; CHECK-LABEL: insertps_from_vector_load_offset_2:
-; On X32, account for the argument's move to registers
-; X32: movl    4(%esp), %eax
-; X32: movl    8(%esp), %ecx
-; CHECK-NOT: mov
 ;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps    $192, 12(%{{...}},%{{...}}), %
-; CHECK-NEXT: ret
+define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x float>* nocapture readonly %pb, i64 %index) {
+; X32-LABEL: insertps_from_vector_load_offset_2:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    shll $4, %ecx
+; X32-NEXT:    insertps $-64, 12(%eax,%ecx), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_vector_load_offset_2:
+; X64:       ## BB#0:
+; X64-NEXT:    shlq $4, %rsi
+; X64-NEXT:    insertps $-64, 12(%rdi,%rsi), %xmm0
+; X64-NEXT:    retq
   %1 = getelementptr inbounds <4 x float>* %pb, i64 %index
   %2 = load <4 x float>* %1, align 16
   %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
@@ -627,13 +866,21 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
 }
 
 define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocapture readonly %fb, i64 %index) {
-; CHECK-LABEL: insertps_from_broadcast_loadf32:
-; On X32, account for the arguments' move to registers
-; X32: movl    8(%esp), %eax
-; X32: movl    4(%esp), %ecx
-; CHECK-NOT: mov
-; CHECK: insertps    $48
-; CHECK-NEXT: ret
+; X32-LABEL: insertps_from_broadcast_loadf32:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movss (%ecx,%eax,4), %xmm1
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_broadcast_loadf32:
+; X64:       ## BB#0:
+; X64-NEXT:    movss (%rdi,%rsi,4), %xmm1
+; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X64-NEXT:    retq
   %1 = getelementptr inbounds float* %fb, i64 %index
   %2 = load float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
@@ -645,12 +892,20 @@ define <4 x float> @insertps_from_broadcast_loadf32(<4 x float> %a, float* nocap
 }
 
 define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float>* nocapture readonly %b) {
-; CHECK-LABEL: insertps_from_broadcast_loadv4f32:
-; On X32, account for the arguments' move to registers
-; X32: movl    4(%esp), %{{...}}
-; CHECK-NOT: mov
-; CHECK: insertps    $48
-; CHECK-NEXT: ret
+; X32-LABEL: insertps_from_broadcast_loadv4f32:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movups (%eax), %xmm1
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_broadcast_loadv4f32:
+; X64:       ## BB#0:
+; X64-NEXT:    movups (%rdi), %xmm1
+; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; X64-NEXT:    retq
   %1 = load <4 x float>* %b, align 4
   %2 = extractelement <4 x float> %1, i32 0
   %3 = insertelement <4 x float> undef, float %2, i32 0
@@ -663,20 +918,33 @@ define <4 x float> @insertps_from_broadcast_loadv4f32(<4 x float> %a, <4 x float
 
 ;; FIXME: We're emitting an extraneous pshufd/vbroadcast.
 define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, float* nocapture readonly %fb, i64 %index) {
-; CHECK-LABEL: insertps_from_broadcast_multiple_use:
-; On X32, account for the arguments' move to registers
-; X32: movl    8(%esp), %eax
-; X32: movl    4(%esp), %ecx
-; CHECK: movss
-; CHECK-NOT: mov
-; CHECK: insertps    $48
-; CHECK: insertps    $48
-; CHECK: insertps    $48
-; CHECK: insertps    $48
-; CHECK: addps
-; CHECK: addps
-; CHECK: addps
-; CHECK-NEXT: ret
+; X32-LABEL: insertps_from_broadcast_multiple_use:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movss (%ecx,%eax,4), %xmm4
+; X32-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
+; X32-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
+; X32-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
+; X32-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
+; X32-NEXT:    addps %xmm1, %xmm0
+; X32-NEXT:    addps %xmm2, %xmm3
+; X32-NEXT:    addps %xmm3, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_from_broadcast_multiple_use:
+; X64:       ## BB#0:
+; X64-NEXT:    movss (%rdi,%rsi,4), %xmm4
+; X64-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,0,0,0]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0]
+; X64-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0]
+; X64-NEXT:    insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[0]
+; X64-NEXT:    insertps {{.*#+}} xmm3 = xmm3[0,1,2],xmm4[0]
+; X64-NEXT:    addps %xmm1, %xmm0
+; X64-NEXT:    addps %xmm2, %xmm3
+; X64-NEXT:    addps %xmm3, %xmm0
+; X64-NEXT:    retq
   %1 = getelementptr inbounds float* %fb, i64 %index
   %2 = load float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
@@ -694,10 +962,20 @@ define <4 x float> @insertps_from_broadcast_multiple_use(<4 x float> %a, <4 x fl
 }
 
 define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
-; CHECK-LABEL: insertps_with_undefs:
-; CHECK-NOT: shufps
-; CHECK: insertps    $32, %xmm0
-; CHECK: ret
+; X32-LABEL: insertps_with_undefs:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movss (%eax), %xmm1
+; X32-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm0[0],xmm1[3]
+; X32-NEXT:    movaps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_with_undefs:
+; X64:       ## BB#0:
+; X64-NEXT:    movss (%rdi), %xmm1
+; X64-NEXT:    insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm0[0],xmm1[3]
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
   %1 = load float* %b, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
@@ -707,9 +985,16 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) {
 ; Test for a bug in X86ISelLowering.cpp:getINSERTPS where we were using
 ; the destination index to change the load, instead of the source index.
 define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
-; CHECK-LABEL: pr20087:
-; CHECK: insertps  $48
-; CHECK: ret
+; X32-LABEL: pr20087:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    insertps $-78, 8(%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: pr20087:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps $-78, 8(%rdi), %xmm0
+; X64-NEXT:    retq
   %load = load <4 x float> *%ptr
   %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
   ret <4 x float> %ret
@@ -717,18 +1002,201 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
 
 ; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1>
 define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
-; CHECK-LABEL: insertps_pr20411:
-; CHECK: movaps  {{[^,]*}}, %[[REG1:xmm.]]
-; CHECK: pshufd  {{.*}} ## [[REG2:xmm.]] = mem[3,0,0,0]
-; CHECK: insertps  {{.*}} ## xmm1 = [[REG2]][0],[[REG1]][3]{{.*}}
-
+; X32-LABEL: insertps_pr20411:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
+; X32-NEXT:    insertps $-36, LCPI49_1+12, %xmm0
+; X32-NEXT:    movups %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_pr20411:
+; X64:       ## BB#0:
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
+; X64-NEXT:    insertps $-36, LCPI49_1+{{.*}}(%rip), %xmm0
+; X64-NEXT:    movups %xmm0, (%rdi)
+; X64-NEXT:    retq
   %gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; 4 5 6 7
-
   %shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x
   %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x
-
   %ptrcast = bitcast i32* %RET to <4 x i32>*
   store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4
   ret void
 }
+
+define <4 x float> @insertps_4(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_4:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_4:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
+  %vecext2 = extractelement <4 x float> %B, i32 2
+  %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_5(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_5:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_5:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[1],zero,zero
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %B, i32 1
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_6(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_6:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_6:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[1],xmm1[2],zero
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 1
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
+  %vecext1 = extractelement <4 x float> %B, i32 2
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit3
+}
+
+define <4 x float> @insertps_7(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_7:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_7:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[1],zero
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.000000e+00, i32 1
+  %vecext2 = extractelement <4 x float> %B, i32 1
+  %vecinit3 = insertelement <4 x float> %vecinit1, float %vecext2, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_8(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_8:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_8:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecinit = insertelement <4 x float> undef, float %vecext, i32 0
+  %vecext1 = extractelement <4 x float> %B, i32 0
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 1
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 2
+  %vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit4
+}
+
+define <4 x float> @insertps_9(<4 x float> %A, <4 x float> %B) {
+; X32-LABEL: insertps_9:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
+; X32-NEXT:    movaps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_9:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm1 = zero,xmm0[0],xmm1[2],zero
+; X64-NEXT:    movaps %xmm1, %xmm0
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecinit = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %vecext, i32 1
+  %vecext1 = extractelement <4 x float> %B, i32 2
+  %vecinit2 = insertelement <4 x float> %vecinit, float %vecext1, i32 2
+  %vecinit3 = insertelement <4 x float> %vecinit2, float 0.000000e+00, i32 3
+  ret <4 x float> %vecinit3
+}
+
+define <4 x float> @insertps_10(<4 x float> %A)
+{
+; X32-LABEL: insertps_10:
+; X32:       ## BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_10:
+; X64:       ## BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0],zero
+; X64-NEXT:    retq
+  %vecext = extractelement <4 x float> %A, i32 0
+  %vecbuild1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %vecext, i32 0
+  %vecbuild2 = insertelement <4 x float> %vecbuild1, float %vecext, i32 2
+  ret <4 x float> %vecbuild2
+}
+
+define <4 x float> @build_vector_to_shuffle_1(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_1:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: build_vector_to_shuffle_1:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 1
+  %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+  %vecinit3 = shufflevector <4 x float> %vecinit1, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %vecinit3
+}
+
+define <4 x float> @build_vector_to_shuffle_2(<4 x float> %A) {
+; X32-LABEL: build_vector_to_shuffle_2:
+; X32:       ## BB#0:
+; X32-NEXT:    xorps %xmm1, %xmm1
+; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: build_vector_to_shuffle_2:
+; X64:       ## BB#0:
+; X64-NEXT:    xorps %xmm1, %xmm1
+; X64-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; X64-NEXT:    retq
+entry:
+  %vecext = extractelement <4 x float> %A, i32 1
+  %vecinit = insertelement <4 x float> zeroinitializer, float %vecext, i32 1
+  %vecinit1 = insertelement <4 x float> %vecinit, float 0.0, i32 2
+  ret <4 x float> %vecinit1
+}
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll
index 2c16a554aebb..a88ab014641b 100644
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -5,11 +5,18 @@
 ; There is a mismatch between the intrinsic and the actual instruction.
 ; The actual instruction has a partial update of dest, while the intrinsic
 ; passes through the upper FP values. Here, we make sure the source and
-; destination of rsqrtss are the same.
-define void @t1(<4 x float> %a) nounwind uwtable ssp {
+; destination of each scalar unary op are the same.
+
+define void @rsqrtss(<4 x float> %a) nounwind uwtable ssp {
 entry:
-; CHECK-LABEL: t1:
+; CHECK-LABEL: rsqrtss:
 ; CHECK: rsqrtss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: shufps
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
+
   %0 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a) nounwind
   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
   %conv = fpext float %a.addr.0.extract to double
@@ -21,10 +28,16 @@ entry:
 declare void @callee(double, double)
 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
 
-define void @t2(<4 x float> %a) nounwind uwtable ssp {
+define void @rcpss(<4 x float> %a) nounwind uwtable ssp {
 entry:
-; CHECK-LABEL: t2:
+; CHECK-LABEL: rcpss:
 ; CHECK: rcpss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: shufps
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
+
   %0 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a) nounwind
   %a.addr.0.extract = extractelement <4 x float> %0, i32 0
   %conv = fpext float %a.addr.0.extract to double
@@ -34,3 +47,23 @@ entry:
   ret void
 }
 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+define void @sqrtss(<4 x float> %a) nounwind uwtable ssp {
+entry:
+; CHECK-LABEL: sqrtss:
+; CHECK: sqrtss %xmm0, %xmm0
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: shufps
+; CHECK-NEXT: cvtss2sd %xmm0
+; CHECK-NEXT: movap
+; CHECK-NEXT: jmp
+
+  %0 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a) nounwind
+  %a.addr.0.extract = extractelement <4 x float> %0, i32 0
+  %conv = fpext float %a.addr.0.extract to double
+  %a.addr.4.extract = extractelement <4 x float> %0, i32 1
+  %conv3 = fpext float %a.addr.4.extract to double
+  tail call void @callee(double %conv, double %conv3) nounwind
+  ret void
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-probe-size.ll b/test/CodeGen/X86/stack-probe-size.ll
new file mode 100644
index 000000000000..21482c3abded
--- /dev/null
+++ b/test/CodeGen/X86/stack-probe-size.ll
@@ -0,0 +1,78 @@
+; This test is attempting to detect that the compiler correctly generates stack
+; probe calls when the size of the local variables exceeds the specified stack
+; probe size.
+;
+; Testing the default value of 4096 bytes makes sense, because the default
+; stack probe size equals the page size (4096 bytes for all x86 targets), and
+; this is unlikely to change in the future.
+;
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define i32 @test1() "stack-probe-size"="0" {
+  %buffer = alloca [4095 x i8]
+
+  ret i32 0
+
+; CHECK-LABEL: _test1:
+; CHECK-NOT: subl $4095, %esp
+; CHECK: movl $4095, %eax
+; CHECK: calll __chkstk
+}
+
+define i32 @test2() {
+  %buffer = alloca [4095 x i8]
+
+  ret i32 0
+
+; CHECK-LABEL: _test2:
+; CHECK-NOT: movl $4095, %eax
+; CHECK: subl $4095, %esp
+; CHECK-NOT: calll __chkstk
+}
+
+define i32 @test3() "stack-probe-size"="8192" {
+  %buffer = alloca [4095 x i8]
+
+  ret i32 0
+
+; CHECK-LABEL: _test3:
+; CHECK-NOT: movl $4095, %eax
+; CHECK: subl $4095, %esp
+; CHECK-NOT: calll __chkstk
+}
+
+define i32 @test4() "stack-probe-size"="0" {
+  %buffer = alloca [4096 x i8]
+
+  ret i32 0
+
+; CHECK-LABEL: _test4:
+; CHECK-NOT: subl $4096, %esp
+; CHECK: movl $4096, %eax
+; CHECK: calll __chkstk
+}
+
+define i32 @test5() {
+  %buffer = alloca [4096 x i8]
+
+  ret i32 0
+
+; CHECK-LABEL: _test5:
+; CHECK-NOT: subl $4096, %esp
+; CHECK: movl $4096, %eax
+; CHECK: calll __chkstk
+}
+
+define i32 @test6() "stack-probe-size"="8192" {
+  %buffer = alloca [4096 x i8]
+
+  ret i32 0
+
+; CGECK-LABEL: _test6:
+; CGECK-NOT: movl $4096, %eax
+; CGECK: subl $4096, %esp
+; CGECK-NOT: calll __chkstk
+}
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index cf88ade9363d..a84b77eac5f6 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -10,88 +10,88 @@
 ; Function Attrs: nounwind sspreq
 define i32 @_Z18read_response_sizev() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23), !dbg !39
+  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !39
   %0 = load i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
-  tail call void @llvm.dbg.value(metadata !63, i64 0, metadata !64), !dbg !71
+  tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !64, metadata !{!"0x102"}), !dbg !71
   %1 = trunc i64 %0 to i32
   ret i32 %1
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 attributes #0 = { sspreq }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21, !72}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !5, metadata !8, metadata !20, metadata !5, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !"/Users/matt/ryan_bug"}
-!2 = metadata !{metadata !3}
-!3 = metadata !{i32 786436, metadata !1, metadata !4, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 19, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 19, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786472, metadata !"max_frame_size", i64 0} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
-!8 = metadata !{metadata !9, metadata !24, metadata !41, metadata !65}
-!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"read_response_size", metadata !"read_response_size", metadata !"_Z18read_response_sizev", i32 27, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z18read_response_sizev, null, null, metadata !14, i32 27} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
-!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{metadata !15, metadata !19}
-!15 = metadata !{i32 786688, metadata !9, metadata !"b", metadata !10, i32 28, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 28]
-!16 = metadata !{i32 786451, metadata !1, null, metadata !"B", i32 16, i64 32, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null} ; [ DW_TAG_structure_type ] [B] [line 16, size 32, align 32, offset 0] [def] [from ]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"end_of_file", i32 17, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ] [end_of_file] [line 17, size 32, align 32, offset 0] [from int]
-!19 = metadata !{i32 786688, metadata !9, metadata !"c", metadata !10, i32 29, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [c] [line 29]
-!20 = metadata !{}
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!22 = metadata !{i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0)}
-!23 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, metadata !38} ; [ DW_TAG_arg_variable ] [p2] [line 12]
-!24 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long>", metadata !"min<unsigned long long>", metadata !"_ZN3__13minIyEERKT_S3_RS1_", i32 12, metadata !27, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !33, null, metadata !35, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [min<unsigned long long>]
-!25 = metadata !{i32 786489, metadata !26, null, metadata !"__1", i32 1} ; [ DW_TAG_namespace ] [__1] [line 1]
-!26 = metadata !{metadata !"main.cpp", metadata !"/Users/matt/ryan_bug"}
-!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!28 = metadata !{metadata !29, metadata !29, metadata !32}
-!29 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !30} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
-!31 = metadata !{i32 786468, null, null, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!32 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
-!33 = metadata !{metadata !34}
-!34 = metadata !{i32 786479, null, metadata !"_Tp", metadata !31, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!35 = metadata !{metadata !36, metadata !37}
-!36 = metadata !{i32 786689, metadata !24, metadata !"p1", metadata !10, i32 16777228, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 12]
-!37 = metadata !{i32 786689, metadata !24, metadata !"p2", metadata !10, i32 33554444, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 12]
-!38 = metadata !{i32 33, i32 0, metadata !9, null}
-!39 = metadata !{i32 12, i32 0, metadata !24, metadata !38}
-!40 = metadata !{i32 9, i32 0, metadata !41, metadata !59}
-!41 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"min<unsigned long long, __1::A>", metadata !"min<unsigned long long, __1::A>", metadata !"_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", i32 7, metadata !42, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, metadata !53, null, metadata !55, i32 8} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 8] [min<unsigned long long, __1::A>]
-!42 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!43 = metadata !{metadata !29, metadata !29, metadata !32, metadata !44}
-!44 = metadata !{i32 786451, metadata !1, metadata !25, metadata !"A", i32 0, i64 8, i64 8, i32 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 0, size 8, align 8, offset 0] [def] [from ]
-!45 = metadata !{metadata !46}
-!46 = metadata !{i32 786478, metadata !1, metadata !44, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !52, i32 1} ; [ DW_TAG_subprogram ] [line 1] [operator()]
-!47 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !48, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!48 = metadata !{metadata !13, metadata !49, metadata !50, metadata !50}
-!49 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!50 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!51 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
-!52 = metadata !{i32 786468}
-!53 = metadata !{metadata !34, metadata !54}
-!54 = metadata !{i32 786479, null, metadata !"_Compare", metadata !44, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!55 = metadata !{metadata !56, metadata !57, metadata !58}
-!56 = metadata !{i32 786689, metadata !41, metadata !"p1", metadata !10, i32 16777223, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 7]
-!57 = metadata !{i32 786689, metadata !41, metadata !"p2", metadata !10, i32 33554439, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p2] [line 7]
-!58 = metadata !{i32 786689, metadata !41, metadata !"p3", metadata !10, i32 50331656, metadata !44, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p3] [line 8]
-!59 = metadata !{i32 13, i32 0, metadata !24, metadata !38}
-!63 = metadata !{i32 undef}
-!64 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, metadata !40} ; [ DW_TAG_arg_variable ] [p1] [line 1]
-!65 = metadata !{i32 786478, metadata !1, metadata !25, metadata !"operator()", metadata !"operator()", metadata !"_ZN3__11AclERKiS2_", i32 1, metadata !47, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !46, metadata !66, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [operator()]
-!66 = metadata !{metadata !67, metadata !69, metadata !70}
-!67 = metadata !{i32 786689, metadata !65, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!68 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!69 = metadata !{i32 786689, metadata !65, metadata !"p1", metadata !10, i32 33554433, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p1] [line 1]
-!70 = metadata !{i32 786689, metadata !65, metadata !"", metadata !10, i32 50331650, metadata !50, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 2]
-!71 = metadata !{i32 1, i32 0, metadata !65, metadata !40}
-!72 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \001\00\000\00\001", !1, !2, !5, !8, !20, !5} ; [ DW_TAG_compile_unit ] [/Users/matt/ryan_bug/<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !"/Users/matt/ryan_bug"}
+!2 = !{!3}
+!3 = !{!"0x4\00\0020\0032\0032\000\000\000", !1, !4, null, !6, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
+!4 = !{!"0x13\00C\0019\008\008\000\000\000", !1, null, null, !5, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 19, size 8, align 8, offset 0] [def] [from ]
+!5 = !{}
+!6 = !{!7}
+!7 = !{!"0x28\00max_frame_size\000"} ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
+!8 = !{!9, !24, !41, !65}
+!9 = !{!"0x2e\00read_response_size\00read_response_size\00_Z18read_response_sizev\0027\000\001\000\006\00256\001\0027", !1, !10, !11, null, i32 ()* @_Z18read_response_sizev, null, null, !14} ; [ DW_TAG_subprogram ] [line 27] [def] [read_response_size]
+!10 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/Users/matt/ryan_bug/<unknown>]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!15, !19}
+!15 = !{!"0x100\00b\0028\000", !9, !10, !16} ; [ DW_TAG_auto_variable ] [b] [line 28]
+!16 = !{!"0x13\00B\0016\0032\0032\000\000\000", !1, null, null, !17, null, null} ; [ DW_TAG_structure_type ] [B] [line 16, size 32, align 32, offset 0] [def] [from ]
+!17 = !{!18}
+!18 = !{!"0xd\00end_of_file\0017\0032\0032\000\000", !1, !16, !13} ; [ DW_TAG_member ] [end_of_file] [line 17, size 32, align 32, offset 0] [from int]
+!19 = !{!"0x100\00c\0029\000", !9, !10, !13} ; [ DW_TAG_auto_variable ] [c] [line 29]
+!20 = !{}
+!21 = !{i32 2, !"Dwarf Version", i32 2}
+!22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0)}
+!23 = !{!"0x101\00p2\0033554444\000", !24, !10, !32, !38} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!24 = !{!"0x2e\00min<unsigned long long>\00min<unsigned long long>\00_ZN3__13minIyEERKT_S3_RS1_\0012\000\001\000\006\00256\001\0012", !1, !25, !27, null, null, !33, null, !35} ; [ DW_TAG_subprogram ] [line 12] [def] [min<unsigned long long>]
+!25 = !{!"0x39\00__1\001", !26, null} ; [ DW_TAG_namespace ] [__1] [line 1]
+!26 = !{!"main.cpp", !"/Users/matt/ryan_bug"}
+!27 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !28, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = !{!29, !29, !32}
+!29 = !{!"0x10\00\000\000\000\000\000", null, null, !30} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{!"0x26\00\000\000\000\000\000", null, null, !31} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!31 = !{!"0x24\00long long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!32 = !{!"0x10\00\000\000\000\000\000", null, null, !31} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!33 = !{!34}
+!34 = !{!"0x2f\00_Tp\000\000", null, !31, null} ; [ DW_TAG_template_type_parameter ]
+!35 = !{!36, !37}
+!36 = !{!"0x101\00p1\0016777228\000", !24, !10, !29} ; [ DW_TAG_arg_variable ] [p1] [line 12]
+!37 = !{!"0x101\00p2\0033554444\000", !24, !10, !32} ; [ DW_TAG_arg_variable ] [p2] [line 12]
+!38 = !MDLocation(line: 33, scope: !9)
+!39 = !MDLocation(line: 12, scope: !24, inlinedAt: !38)
+!40 = !MDLocation(line: 9, scope: !41, inlinedAt: !59)
+!41 = !{!"0x2e\00min<unsigned long long, __1::A>\00min<unsigned long long, __1::A>\00_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_\007\000\001\000\006\00256\001\008", !1, !25, !42, null, null, !53, null, !55} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 8] [min<unsigned long long, __1::A>]
+!42 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !43, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!43 = !{!29, !29, !32, !44}
+!44 = !{!"0x13\00A\000\008\008\000\000\000", !1, !25, null, !45, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 0, size 8, align 8, offset 0] [def] [from ]
+!45 = !{!46}
+!46 = !{!"0x2e\00operator()\00operator()\00_ZN3__11AclERKiS2_\001\000\000\000\006\00256\001\001", !1, !44, !47, null, null, null, i32 0, !52} ; [ DW_TAG_subprogram ] [line 1] [operator()]
+!47 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !48, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!48 = !{!13, !49, !50, !50}
+!49 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!50 = !{!"0x10\00\000\000\000\000\000", null, null, !51} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!51 = !{!"0x26\00\000\000\000\000\000", null, null, !13} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!52 = !{i32 786468}
+!53 = !{!34, !54}
+!54 = !{!"0x2f\00_Compare\000\000", null, !44, null} ; [ DW_TAG_template_type_parameter ]
+!55 = !{!56, !57, !58}
+!56 = !{!"0x101\00p1\0016777223\000", !41, !10, !29} ; [ DW_TAG_arg_variable ] [p1] [line 7]
+!57 = !{!"0x101\00p2\0033554439\000", !41, !10, !32} ; [ DW_TAG_arg_variable ] [p2] [line 7]
+!58 = !{!"0x101\00p3\0050331656\000", !41, !10, !44} ; [ DW_TAG_arg_variable ] [p3] [line 8]
+!59 = !MDLocation(line: 13, scope: !24, inlinedAt: !38)
+!63 = !{i32 undef}
+!64 = !{!"0x101\00p1\0033554433\000", !65, !10, !50, !40} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!65 = !{!"0x2e\00operator()\00operator()\00_ZN3__11AclERKiS2_\001\000\001\000\006\00256\001\002", !1, !25, !47, null, null, null, !46, !66} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [operator()]
+!66 = !{!67, !69, !70}
+!67 = !{!"0x101\00this\0016777216\001088", !65, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!68 = !{!"0xf\00\000\0064\0064\000\000", null, null, !44} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!69 = !{!"0x101\00p1\0033554433\000", !65, !10, !50} ; [ DW_TAG_arg_variable ] [p1] [line 1]
+!70 = !{!"0x101\00\0050331650\000", !65, !10, !50} ; [ DW_TAG_arg_variable ] [line 2]
+!71 = !MDLocation(line: 1, scope: !65, inlinedAt: !40)
+!72 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/stack-protector-weight.ll b/test/CodeGen/X86/stack-protector-weight.ll
new file mode 100644
index 000000000000..c5bf49134e4b
--- /dev/null
+++ b/test/CodeGen/X86/stack-protector-weight.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -print-machineinstrs=expand-isel-pseudos -enable-selectiondag-sp=true %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=SELDAG
+; RUN: llc -mtriple=x86_64-apple-darwin -print-machineinstrs=expand-isel-pseudos -enable-selectiondag-sp=false %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=IR
+
+; SELDAG: # Machine code for function test_branch_weights:
+; SELDAG: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; SELDAG: BB#[[FAILURE]]:
+; SELDAG: CALL64pcrel32 <es:__stack_chk_fail>
+; SELDAG: BB#[[SUCCESS]]:
+
+; IR: # Machine code for function test_branch_weights:
+; IR: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; IR: BB#[[SUCCESS]]:
+; IR: BB#[[FAILURE]]:
+; IR: CALL64pcrel32 <ga:@__stack_chk_fail>
+
+define i32 @test_branch_weights(i32 %n) #0 {
+entry:
+  %a = alloca [128 x i32], align 16
+  %0 = bitcast [128 x i32]* %a to i8*
+  call void @llvm.lifetime.start(i64 512, i8* %0)
+  %arraydecay = getelementptr inbounds [128 x i32]* %a, i64 0, i64 0
+  call void @foo2(i32* %arraydecay)
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds [128 x i32]* %a, i64 0, i64 %idxprom
+  %1 = load i32* %arrayidx, align 4
+  call void @llvm.lifetime.end(i64 512, i8* %0)
+  ret i32 %1
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo2(i32*)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { ssp "stack-protector-buffer-size"="8" }
diff --git a/test/CodeGen/X86/stack_guard_remat.ll b/test/CodeGen/X86/stack_guard_remat.ll
new file mode 100644
index 000000000000..dd639a7c7b4c
--- /dev/null
+++ b/test/CodeGen/X86/stack_guard_remat.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -no-integrated-as | FileCheck %s -check-prefix=CHECK
+
+;CHECK:  foo2
+;CHECK:  movq ___stack_chk_guard@GOTPCREL(%rip), [[R0:%[a-z0-9]+]]
+;CHECK:  movq ([[R0]]), {{%[a-z0-9]+}}
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @test_stack_guard_remat() #0 {
+entry:
+  %a1 = alloca [256 x i32], align 16
+  %0 = bitcast [256 x i32]* %a1 to i8*
+  call void @llvm.lifetime.start(i64 1024, i8* %0)
+  %arraydecay = getelementptr inbounds [256 x i32]* %a1, i64 0, i64 0
+  call void @foo3(i32* %arraydecay)
+  call void asm sideeffect "foo2", "~{r12},~{r13},~{r14},~{r15},~{ebx},~{esi},~{edi},~{dirflag},~{fpsr},~{flags}"()
+  call void @llvm.lifetime.end(i64 1024, i8* %0)
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare void @foo3(i32*)
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/stackmap-fast-isel.ll b/test/CodeGen/X86/stackmap-fast-isel.ll
index 0b7e6dbdc7a2..dfb16adaa339 100644
--- a/test/CodeGen/X86/stackmap-fast-isel.ll
+++ b/test/CodeGen/X86/stackmap-fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim                             | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim -fast-isel -fast-isel-abort | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7                             | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort | FileCheck %s
 
 ; CHECK-LABEL:  .section  __LLVM_STACKMAPS,__llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
diff --git a/test/CodeGen/X86/stackmap-large-constants.ll b/test/CodeGen/X86/stackmap-large-constants.ll
new file mode 100644
index 000000000000..73ee4f3d1569
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-large-constants.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK-LABEL:	.section	__LLVM_STACKMAPS,__llvm_stackmaps
+; CHECK-NEXT: __LLVM_StackMaps:
+; version
+; CHECK-NEXT: 	.byte	1
+; reserved
+; CHECK-NEXT: 	.byte	0
+; reserved
+; CHECK-NEXT: 	.short	0
+; # functions
+; CHECK-NEXT: 	.long	2
+; # constants
+; CHECK-NEXT: 	.long	2
+; # records
+; CHECK-NEXT: 	.long	2
+; function address & stack size
+; CHECK-NEXT: 	.quad	_foo
+; CHECK-NEXT: 	.quad	8
+; function address & stack size
+; CHECK-NEXT: 	.quad	_bar
+; CHECK-NEXT: 	.quad	8
+
+; Constants Array:
+; CHECK-NEXT: 	.quad	9223372036854775807
+; CHECK-NEXT: 	.quad	-9223372036854775808
+
+; Patchpoint ID
+; CHECK-NEXT: 	.quad	0
+; Instruction offset
+; CHECK-NEXT: 	.long	L{{.*}}-_foo
+; reserved
+; CHECK-NEXT: 	.short	0
+; # locations
+; CHECK-NEXT: 	.short	1
+; ConstantIndex
+; CHECK-NEXT: 	.byte	5
+; reserved
+; CHECK-NEXT: 	.byte	8
+; Dwarf RegNum
+; CHECK-NEXT: 	.short	0
+; Offset
+; CHECK-NEXT: 	.long	0
+; padding
+; CHECK-NEXT: 	.short	0
+; NumLiveOuts
+; CHECK-NEXT: 	.short	0
+
+; CHECK-NEXT: 	.align	3
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+
+define void @foo() {
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 9223372036854775807)
+  ret void
+}
+
+; Patchpoint ID
+; CHECK-NEXT: 	.quad	0
+; Instruction Offset
+; CHECK-NEXT: 	.long	L{{.*}}-_bar
+; reserved
+; CHECK-NEXT: 	.short	0
+; # locations
+; CHECK-NEXT: 	.short	1
+; ConstantIndex
+; CHECK-NEXT: 	.byte	5
+; reserved
+; CHECK-NEXT: 	.byte	8
+; Dwarf RegNum
+; CHECK-NEXT: 	.short	0
+; Offset
+; CHECK-NEXT: 	.long	1
+; padding
+; CHECK-NEXT: 	.short	0
+; NumLiveOuts
+; CHECK-NEXT: 	.short	0
+
+
+define void @bar() {
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0, i64 -9223372036854775808)
+  ret void
+}
diff --git a/test/CodeGen/X86/stackmap-liveness.ll b/test/CodeGen/X86/stackmap-liveness.ll
index 897595db2438..31553c0b6842 100644
--- a/test/CodeGen/X86/stackmap-liveness.ll
+++ b/test/CodeGen/X86/stackmap-liveness.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim -enable-patchpoint-liveness=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -disable-fp-elim                                   | FileCheck -check-prefix=PATCH %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -enable-patchpoint-liveness=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx                                   | FileCheck -check-prefix=PATCH %s
 ;
 ; Note: Print verbose stackmaps using -debug-only=stackmaps.
 
diff --git a/test/CodeGen/X86/stackmap-nops.ll b/test/CodeGen/X86/stackmap-nops.ll
index 5a78f24d7b5e..7932c0dfb99d 100644
--- a/test/CodeGen/X86/stackmap-nops.ll
+++ b/test/CodeGen/X86/stackmap-nops.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
 
 define void @nop_test() {
 entry:
@@ -224,6 +224,10 @@ entry:
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 28, i32 28)
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 29, i32 29)
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 30, i32 30)
+; Add an extra stackmap with a zero-length shadow to thwart the shadow
+; optimization. This will force all 15 bytes of the previous shadow to be
+; padded with nops.
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 31, i32 0)
   ret void
 }
 
diff --git a/test/CodeGen/X86/stackmap-shadow-optimization.ll b/test/CodeGen/X86/stackmap-shadow-optimization.ll
new file mode 100644
index 000000000000..a3725f2c5b72
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-shadow-optimization.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Check that the X86 stackmap shadow optimization is only outputting a 3-byte
+; nop here. 8-bytes are requested, but 5 are covered by the code for the call to
+; bar.  However, the frame teardown and the return do not count towards the
+; stackmap shadow as the call return counts as a branch target so must flush
+; the shadow.
+; Note that in order for a thread to not return in to the patched space
+; the call must be at the end of the shadow, so the required nop must be
+; before the call, not after.
+define void @shadow_optimization_test() {
+entry:
+; CHECK-LABEL:  shadow_optimization_test:
+; CHECK:        callq   _bar
+; CHECK:        nop
+; CHECK:        callq   _bar
+; CHECK-NOT:    nop
+; CHECK:        callq   _bar
+; CHECK-NOT:    nop
+  call void @bar()
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 8)
+  call void @bar()
+  call void @bar()
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @bar()
diff --git a/test/CodeGen/X86/stackmap.ll b/test/CodeGen/X86/stackmap.ll
index 85670370d870..5e356f3e03d1 100644
--- a/test/CodeGen/X86/stackmap.ll
+++ b/test/CodeGen/X86/stackmap.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
 ;
 ; Note: Print verbose stackmaps using -debug-only=stackmaps.
 
@@ -9,11 +9,11 @@
 ; CHECK-NEXT:   .byte 0
 ; CHECK-NEXT:   .short 0
 ; Num Functions
-; CHECK-NEXT:   .long 15
+; CHECK-NEXT:   .long 16
 ; Num LargeConstants
 ; CHECK-NEXT:   .long 3
 ; Num Callsites
-; CHECK-NEXT:   .long 19
+; CHECK-NEXT:   .long 20
 
 ; Functions and stack size
 ; CHECK-NEXT:   .quad _constantargs
@@ -46,6 +46,8 @@
 ; CHECK-NEXT:   .quad 8
 ; CHECK-NEXT:   .quad _clobberScratch
 ; CHECK-NEXT:   .quad 56
+; CHECK-NEXT:   .quad _needsStackRealignment
+; CHECK-NEXT:   .quad -1
 
 ; Large Constants
 ; CHECK-NEXT:   .quad   2147483648
@@ -464,6 +466,23 @@ define void @clobberScratch(i32 %a) {
   ret void
 }
 
+; A stack frame which needs to be realigned at runtime (to meet alignment 
+; criteria for values on the stack) does not have a fixed frame size. 
+; CHECK-LABEL:  .long L{{.*}}-_needsStackRealignment
+; CHECK-NEXT:   .short 0
+; 0 locations
+; CHECK-NEXT:   .short 0
+define void @needsStackRealignment() {
+  %val = alloca i64, i32 3, align 128
+  tail call void (...)* @escape_values(i64* %val)
+; Note: Adding any non-constant to the stackmap would fail because we
+; expected to be able to address off the frame pointer.  In a realigned
+; frame, we must use the stack pointer instead.  This is a separate bug.
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 0)
+  ret void
+}
+declare void @escape_values(...)
+
 declare void @llvm.experimental.stackmap(i64, i32, ...)
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
 declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
diff --git a/test/CodeGen/X86/stackpointer.ll b/test/CodeGen/X86/stackpointer.ll
index 80bcfbf16743..094856b3c57d 100644
--- a/test/CodeGen/X86/stackpointer.ll
+++ b/test/CodeGen/X86/stackpointer.ll
@@ -25,4 +25,4 @@ declare void @llvm.write_register.i64(metadata, i64) nounwind
 
 ; register unsigned long current_stack_pointer asm("rsp");
 ; CHECK-NOT: .asciz  "rsp"
-!0 = metadata !{metadata !"rsp\00"}
+!0 = !{!"rsp\00"}
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
new file mode 100644
index 000000000000..e7a0dcab9ab7
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s | FileCheck %s
+; This file contains a collection of basic tests to ensure we didn't
+; screw up normal call lowering when there are no deopt or gc arguments.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+declare zeroext i32 @return_i32()
+declare i32* @return_i32ptr()
+declare float @return_float()
+
+define i1 @test_i1_return() {
+; CHECK-LABEL: test_i1_return
+; This is just checking that a i1 gets lowered normally when there's no extra
+; state arguments to the statepoint
+; CHECK: pushq %rax
+; CHECK: callq return_i1
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
+  ret i1 %call1
+}
+
+define i32 @test_i32_return() {
+; CHECK-LABEL: test_i32_return
+; CHECK: pushq %rax
+; CHECK: callq return_i32
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+  %safepoint_token = tail call i32 (i32 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()* @return_i32, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i32 @llvm.experimental.gc.result.int.i32(i32 %safepoint_token)
+  ret i32 %call1
+}
+
+define i32* @test_i32ptr_return() {
+; CHECK-LABEL: test_i32ptr_return
+; CHECK: pushq %rax
+; CHECK: callq return_i32ptr
+; CHECK: popq %rdx
+; CHECK: retq
+entry:
+  %safepoint_token = tail call i32 (i32* ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()* @return_i32ptr, i32 0, i32 0, i32 0)
+  %call1 = call i32* @llvm.experimental.gc.result.ptr.p0i32(i32 %safepoint_token)
+  ret i32* %call1
+}
+
+define float @test_float_return() {
+; CHECK-LABEL: test_float_return
+; CHECK: pushq %rax
+; CHECK: callq return_float
+; CHECK: popq %rax
+; CHECK: retq
+entry:
+  %safepoint_token = tail call i32 (float ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_f32f(float ()* @return_float, i32 0, i32 0, i32 0)
+  %call1 = call float @llvm.experimental.gc.result.float.f32(i32 %safepoint_token)
+  ret float %call1
+}
+
+define i1 @test_relocate(i32* %a) {
+; CHECK-LABEL: test_relocate
+; Check that an ununsed relocate has no code-generation impact
+; CHECK: pushq %rax
+; CHECK: callq return_i1
+; CHECK-NEXT: .Ltmp13:
+; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: retq
+entry:
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %a)
+  %call1 = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 4, i32 4)
+  %call2 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
+  ret i1 %call2
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.int.i1(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i32 ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.int.i32(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.ptr.p0i32(i32)
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.float.f32(i32)
+
+declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
new file mode 100644
index 000000000000..12a6ac2c72a9
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -0,0 +1,107 @@
+; RUN: opt -O3 -S < %s | FileCheck --check-prefix=CHECK-OPT %s
+; RUN: llc < %s | FileCheck --check-prefix=CHECK-LLC %s
+; These tests are targetted at making sure we don't retain information
+; about memory which contains potential gc references across a statepoint.
+; They're carefully written to only outlaw forwarding of references. 
+; Depending on the collector, forwarding non-reference fields or
+; constant null references may be perfectly legal. (If unimplemented.)
+; The general structure of these tests is:
+; - learn a fact about memory (via an assume)
+; - cross a statepoint
+; - check the same fact about memory (which we no longer know)
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; If not at a statepoint, we could forward known memory values
+; across this call.
+declare void @func() readonly
+
+;; Forwarding the value of a pointer load is invalid since it may have
+;; changed at the safepoint.  Forwarding a non-gc pointer value would 
+;; be valid, but is not currently implemented.
+define i1 @test_load_forward(i32 addrspace(1)* addrspace(1)* %p) gc "statepoint-example" {
+entry:
+  %before = load i32 addrspace(1)* addrspace(1)* %p
+  %cmp1 = call i1 @f(i32 addrspace(1)* %before)
+  call void @llvm.assume(i1 %cmp1)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %after = load i32 addrspace(1)* addrspace(1)* %pnew
+  %cmp2 = call i1 @f(i32 addrspace(1)* %after)
+  ret i1 %cmp2
+
+; CHECK-OPT-LABEL: test_load_forward
+; CHECK-OPT: ret i1 %cmp2
+; CHECK-LLC-LABEL: test_load_forward
+; CHECK-LLC: callq f
+}
+
+;; Same as above, but forwarding from a store
+define i1 @test_store_forward(i32 addrspace(1)* addrspace(1)* %p,
+                              i32 addrspace(1)* %v) gc "statepoint-example" {
+entry:
+  %cmp1 = call i1 @f(i32 addrspace(1)* %v)
+  call void @llvm.assume(i1 %cmp1)
+  store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %after = load i32 addrspace(1)* addrspace(1)* %pnew
+  %cmp2 = call i1 @f(i32 addrspace(1)* %after)
+  ret i1 %cmp2
+
+; CHECK-OPT-LABEL: test_store_forward
+; CHECK-OPT: ret i1 %cmp2
+; CHECK-LLC-LABEL: test_store_forward
+; CHECK-LLC: callq f
+}
+
+; A predicate on the pointer which is not simply null, but whose value
+; would be known unchanged if the pointer value could be forwarded.
+; The implementation of such a function could inspect the integral value
+; of the pointer and is thus not safe to reuse after a statepoint.
+declare i1 @f(i32 addrspace(1)* %v) readnone
+
+; This is a variant of the test_load_forward test which is intended to 
+; highlight the fact that a gc pointer can be stored in part of the heap
+; that is not itself GC managed.  The GC may have an external mechanism
+; to know about and update that value at a safepoint.  Note that the 
+; statepoint does not provide the collector with this root.
+define i1 @test_load_forward_nongc_heap(i32 addrspace(1)** %p) gc "statepoint-example" {
+entry:
+  %before = load i32 addrspace(1)** %p
+  %cmp1 = call i1 @f(i32 addrspace(1)* %before)
+  call void @llvm.assume(i1 %cmp1)
+  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
+  %after = load i32 addrspace(1)** %p
+  %cmp2 = call i1 @f(i32 addrspace(1)* %after)
+  ret i1 %cmp2
+
+; CHECK-OPT-LABEL: test_load_forward_nongc_heap
+; CHECK-OPT: ret i1 %cmp2
+; CHECK-LLC-LABEL: test_load_forward_nongc_heap
+; CHECK-LLC: callq f
+}
+
+;; Same as above, but forwarding from a store
+define i1 @test_store_forward_nongc_heap(i32 addrspace(1)** %p,
+                                         i32 addrspace(1)* %v) gc "statepoint-example" {
+entry:
+  %cmp1 = call i1 @f(i32 addrspace(1)* %v)
+  call void @llvm.assume(i1 %cmp1)
+  store i32 addrspace(1)* %v, i32 addrspace(1)** %p
+  call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
+  %after = load i32 addrspace(1)** %p
+  %cmp2 = call i1 @f(i32 addrspace(1)* %after)
+  ret i1 %cmp2
+
+; CHECK-OPT-LABEL: test_store_forward_nongc_heap
+; CHECK-OPT: ret i1 %cmp2
+; CHECK-LLC-LABEL: test_store_forward_nongc_heap
+; CHECK-LLC: callq f
+}
+
+declare void @llvm.assume(i1)
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32, i32, i32) #3
+
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
new file mode 100644
index 000000000000..fd24bf841688
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; This test is checking to make sure that we reuse the same stack slots
+; for GC values spilled over two different call sites.  Since the order
+; of GC arguments differ, niave lowering code would insert loads and 
+; stores to rearrange items on the stack.  We need to make sure (for
+; performance) that this doesn't happen.
+define i32 @back_to_back_calls(i32* %a, i32* %b, i32* %c) #1 {
+; CHECK-LABEL: back_to_back_calls
+; The exact stores don't matter, but there need to be three stack slots created
+; CHECK: movq	%rdx, 16(%rsp)
+; CHECK: movq	%rdi, 8(%rsp)
+; CHECK: movq	%rsi, (%rsp)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
+  %a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
+  %b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
+  %c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
+; CHECK: callq
+; This is the key check.  There should NOT be any memory moves here
+; CHECK-NOT: movq
+  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
+  %a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
+  %b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
+  %c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
+; CHECK: callq
+  ret i32 1
+}
+
+; This test simply checks that minor changes in vm state don't prevent slots
+; being reused for gc values.  
+define i32 @reserve_first(i32* %a, i32* %b, i32* %c) #1 {
+; CHECK-LABEL: reserve_first
+; The exact stores don't matter, but there need to be three stack slots created
+; CHECK: movq	%rdx, 16(%rsp)
+; CHECK: movq	%rdi, 8(%rsp)
+; CHECK: movq	%rsi, (%rsp)
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32* %a, i32* %b, i32* %c)
+  %a1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 9)
+  %b1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 10)
+  %c1 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token, i32 9, i32 11)
+; CHECK: callq
+; This is the key check.  There should NOT be any memory moves here
+; CHECK-NOT: movq
+  %safepoint_token2 = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32* %a1, i32 0, i32* %c1, i32 0, i32 0, i32* %c1, i32* %b1, i32* %a1)
+  %a2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 11)
+  %b2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 10)
+  %c2 = tail call coldcc i32* @llvm.experimental.gc.relocate.p0i32(i32 %safepoint_token2, i32 9, i32 9)
+; CHECK: callq
+  ret i32 1
+}
+
+; Function Attrs: nounwind
+declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32) #3
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+
+attributes #1 = { uwtable }
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
new file mode 100644
index 000000000000..416674839ea8
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s | FileCheck %s
+; This test is a sanity check to ensure statepoints are generating StackMap
+; sections correctly.  This is not intended to be a rigorous test of the 
+; StackMap format (see the stackmap tests for that).
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare zeroext i1 @return_i1()
+
+define i1 @test(i32 addrspace(1)* %ptr) {
+; CHECK-LABEL: test
+; Do we see one spill for the local value and the store to the
+; alloca?
+; CHECK: subq	$24, %rsp
+; CHECK: movq	$0, 8(%rsp)
+; CHECK: movq	%rdi, (%rsp)
+; CHECK: callq return_i1
+; CHECK: addq	$24, %rsp
+; CHECK: retq
+entry:
+  %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
+  store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
+  %safepoint_token = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr, i32 addrspace(1)* null, i32 addrspace(1)* %ptr, i32 addrspace(1)* null)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.int.i1(i32 %safepoint_token)
+  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 6, i32 6)
+  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
+; 
+  ret i1 %call1
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.int.i1(i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+
+
+; CHECK-LABEL: .section .llvm_stackmaps
+; CHECK-NEXT:  __LLVM_StackMaps:
+; Header
+; CHECK-NEXT:   .byte 1
+; CHECK-NEXT:   .byte 0
+; CHECK-NEXT:   .short 0
+; Num Functions
+; CHECK-NEXT:   .long 1
+; Num LargeConstants
+; CHECK-NEXT:   .long 0
+; Num Callsites
+; CHECK-NEXT:   .long 1
+
+; Functions and stack size
+; CHECK-NEXT:   .quad test
+; CHECK-NEXT:   .quad 24
+
+; Large Constants
+; Statepoint ID only
+; CHECK: .quad	2882400000
+
+; Callsites
+; Constant arguments
+; CHECK: .long	.Ltmp1-test
+; CHECK: .short	0
+; CHECK: .short	8
+; SmallConstant (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; SmallConstant (2)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	2
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte	2
+; CHECK: .byte	8
+; CHECK: .short	7
+; CHECK: .long	0
+; SmallConstant  (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; SmallConstant  (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; SmallConstant  (0)
+; CHECK: .byte	4
+; CHECK: .byte	8
+; CHECK: .short	0
+; CHECK: .long	0
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte	2
+; CHECK: .byte	8
+; CHECK: .short	7
+; CHECK: .long	0
+; Direct Spill Slot [RSP+0]
+; CHECK: .byte	2
+; CHECK: .byte	8
+; CHECK: .short	7
+; CHECK: .long	0
+
+; No Padding or LiveOuts
+; CHECK: .short	0
+; CHECK: .short	0
+; CHECK: .align	8
+
+
diff --git a/test/CodeGen/X86/store-narrow.ll b/test/CodeGen/X86/store-narrow.ll
index 7557f255658d..e3cc2fa668ef 100644
--- a/test/CodeGen/X86/store-narrow.ll
+++ b/test/CodeGen/X86/store-narrow.ll
@@ -34,8 +34,8 @@ entry:
 ; X64: movb	%sil, 1(%rdi)
 
 ; X32-LABEL: test2:
-; X32: movb	8(%esp), %[[REG:[abcd]l]]
-; X32: movb	%[[REG]], 1(%{{.*}})
+; X32: movb	8(%esp), %[[REG:[abcd]]]l
+; X32: movb	%[[REG]]l, 1(%{{.*}})
 }
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -67,8 +67,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32-LABEL: test4:
-; X32: movl	8(%esp), %e[[REG:[abcd]x]]
-; X32: movw	%[[REG]], 2(%{{.*}})
+; X32: movw	8(%esp), %[[REG:[abcd]]]x
+; X32: movw	%[[REG]]x, 2(%{{.*}})
 }
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@@ -84,8 +84,8 @@ entry:
 ; X64: movw	%si, 2(%rdi)
 
 ; X32-LABEL: test5:
-; X32: movzwl	8(%esp), %e[[REG:[abcd]x]]
-; X32: movw	%[[REG]], 2(%{{.*}})
+; X32: movw	8(%esp), %[[REG:[abcd]]]x
+; X32: movw	%[[REG]]x, 2(%{{.*}})
 }
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
diff --git a/test/CodeGen/X86/switch-default-only.ll b/test/CodeGen/X86/switch-default-only.ll
new file mode 100644
index 000000000000..360ace5b787f
--- /dev/null
+++ b/test/CodeGen/X86/switch-default-only.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 -fast-isel=false -march=x86 < %s | FileCheck %s
+
+; No need for branching when the default and only destination follows
+; immediately after the switch.
+; CHECK-LABEL: no_branch:
+; CHECK-NOT: jmp
+; CHECK: ret
+
+define void @no_branch(i32 %x) {
+entry:
+  switch i32 %x, label %exit [ ]
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/switch-jump-table.ll b/test/CodeGen/X86/switch-jump-table.ll
new file mode 100644
index 000000000000..a84fb4aafd17
--- /dev/null
+++ b/test/CodeGen/X86/switch-jump-table.ll
@@ -0,0 +1,52 @@
+; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s
+
+
+; An unreachable default destination is replaced with the most popular case label.
+
+define void @sum2(i32 %x, i32* %to) {
+; CHECK-LABEL: sum2:
+; CHECK: movl 4(%esp), [[REG:%e[a-z]{2}]]
+; CHECK: cmpl $3, [[REG]]
+; CHECK: jbe .LBB0_1
+; CHECK: movl $4
+; CHECK: retl
+; CHECK-LABEL: .LBB0_1:
+; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4)
+
+entry:
+  switch i32 %x, label %default [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb4
+  ]
+bb0:
+  store i32 0, i32* %to
+  br label %exit
+bb1:
+  store i32 1, i32* %to
+  br label %exit
+bb2:
+  store i32 2, i32* %to
+  br label %exit
+bb3:
+  store i32 3, i32* %to
+  br label %exit
+bb4:
+  store i32 4, i32* %to
+  br label %exit
+exit:
+  ret void
+default:
+  unreachable
+
+; The jump table has four entries.
+; CHECK-LABEL: .LJTI0_0:
+; CHECK-NEXT: .long  .LBB0_2
+; CHECK-NEXT: .long  .LBB0_3
+; CHECK-NEXT: .long  .LBB0_4
+; CHECK-NEXT: .long  .LBB0_5
+; CHECK-NOT: .long
+}
diff --git a/test/CodeGen/X86/swizzle-2.ll b/test/CodeGen/X86/swizzle-2.ll
index 4b1f903c444a..697af843abb1 100644
--- a/test/CodeGen/X86/swizzle-2.ll
+++ b/test/CodeGen/X86/swizzle-2.ll
@@ -8,508 +8,433 @@
 ; illegal shuffle that is expanded into a sub-optimal sequence of instructions
 ; during lowering stage.
 
-
 define <4 x i32> @swizzle_1(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_1
-; Mask: [1,0,3,2]
-; CHECK: pshufd $-79
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_2(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,3,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_2
-; Mask: [2,1,3,0]
-; CHECK: pshufd $54
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_3(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_3
-; Mask: [1,0,3,2]
-; CHECK: pshufd $-79
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_4(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,0,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_4
-; Mask: [3,1,0,2]
-; CHECK: pshufd $-121
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_5(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_5
-; Mask: [2,3,0,1]
-; CHECK: pshufd $78
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_6(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_6
-; Mask: [2,0,1,3]
-; CHECK: pshufd $-46
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_7(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_7
-; Mask: [0,2,3,1]
-; CHECK: pshufd $120
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_8(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_8
-; Mask: [1,3,2,0]
-; CHECK: pshufd $45
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_9(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_9:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_9
-; Mask: [2,3,0,1]
-; CHECK: pshufd $78
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_10(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,2,0,3]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_10
-; Mask: [1,2,0,3]
-; CHECK: pshufd $-55
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_11(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_11:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_11
-; Mask: [3,2,1,0]
-; CHECK: pshufd $27
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_12(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_12:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_12
-; Mask: [0,3,1,2]
-; CHECK: pshufd $-100
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_13(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_13:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_13
-; Mask: [3,2,1,0]
-; CHECK: pshufd $27
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x i32> @swizzle_14(<4 x i32> %v) {
+; CHECK-LABEL: swizzle_14:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,0,2,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
   ret <4 x i32> %2
 }
-; CHECK-LABEL: swizzle_14
-; Mask: [3,0,2,1]
-; CHECK: pshufd $99
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_15(<4 x float> %v) {
+; CHECK-LABEL: swizzle_15:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_15
-; Mask: [1,0,3,2]
-; CHECK: pshufd $-79
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_16(<4 x float> %v) {
+; CHECK-LABEL: swizzle_16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,1,3,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 0, i32 2>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_16
-; Mask: [2,1,3,0]
-; CHECK: pshufd $54
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_17(<4 x float> %v) {
+; CHECK-LABEL: swizzle_17:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_17
-; Mask: [1,0,3,2]
-; CHECK: pshufd $-79
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_18(<4 x float> %v) {
+; CHECK-LABEL: swizzle_18:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,0,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_18
-; Mask: [3,1,0,2]
-; CHECK: pshufd $-121
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_19(<4 x float> %v) {
+; CHECK-LABEL: swizzle_19:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_19
-; Mask: [2,3,0,1]
-; CHECK: pshufd $78
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_20(<4 x float> %v) {
+; CHECK-LABEL: swizzle_20:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_20
-; Mask: [2,0,1,3]
-; CHECK: pshufd $-46
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_21(<4 x float> %v) {
+; CHECK-LABEL: swizzle_21:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_21
-; Mask: [0,2,3,1]
-; CHECK: pshufd $120
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_22(<4 x float> %v) {
+; CHECK-LABEL: swizzle_22:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3,2,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 2, i32 1>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_22
-; Mask: [1,3,2,0]
-; CHECK: pshufd $45
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_23(<4 x float> %v) {
+; CHECK-LABEL: swizzle_23:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_23
-; Mask: [2,3,0,1]
-; CHECK: pshufd $78
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_24(<4 x float> %v) {
+; CHECK-LABEL: swizzle_24:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2,0,3]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_24
-; Mask: [1,2,0,3]
-; CHECK: pshufd $-55
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_25(<4 x float> %v) {
+; CHECK-LABEL: swizzle_25:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_25
-; Mask: [3,2,1,0]
-; CHECK: pshufd $27
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_26(<4 x float> %v) {
+; CHECK-LABEL: swizzle_26:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,1,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 1>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_26
-; Mask: [0,3,1,2]
-; CHECK: pshufd $-100
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_27(<4 x float> %v) {
+; CHECK-LABEL: swizzle_27:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_27
-; Mask: [3,2,1,0]
-; CHECK: pshufd $27
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_28(<4 x float> %v) {
+; CHECK-LABEL: swizzle_28:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0,2,1]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_28
-; Mask: [3,0,2,1]
-; CHECK: pshufd $99
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @swizzle_29(<4 x float> %v) {
+; CHECK-LABEL: swizzle_29:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3,2,0]
+; CHECK-NEXT:    retq
   %1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
   ret <4 x float> %2
 }
-; CHECK-LABEL: swizzle_29
-; Mask: [1,3,2,0]
-; CHECK: pshufd $45
-; CHECK-NOT: pshufd
-; CHECK-NEXT: ret
 
 ; Make sure that we combine the shuffles from each function below into a single
 ; legal shuffle (either pshuflw or pshufb depending on the masks).
 
 define <8 x i16> @swizzle_30(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_30:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 7, i32 5, i32 6, i32 4>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 7, i32 5, i32 6, i32 4>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_30
-; Mask: [1,3,2,0,5,7,6,4]
-; CHECK: pshuflw $45
-; CHECK-NOT: pshufb
-; CHECK-NEXT: ret
-
 
 define <8 x i16> @swizzle_31(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_31:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,2,0,4,5,6,7]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 5, i32 6, i32 4>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 5, i32 6, i32 4>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_31
-; Mask: [1,3,2,0,4,5,6,7]
-; CHECK: pshuflw $45
-; CHECK-NOT: pshufb
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_32(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 7, i32 5, i32 6, i32 4>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 7, i32 5, i32 6, i32 4>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_32
-; Mask: [2,3,0,1,4,5,6,7] --> equivalent to pshufd mask [1,0,2,3]
-; CHECK: pshufd $-31
-; CHECK-NOT: pshufb
-; CHECK: ret
 
 define <8 x i16> @swizzle_33(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_33:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,3,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,6,4]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 5, i32 7, i32 2, i32 3, i32 1, i32 0>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 5, i32 7, i32 2, i32 3, i32 1, i32 0>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_33
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_34(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_34:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,3,0,2,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,5]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 7, i32 6, i32 5, i32 1, i32 2, i32 0, i32 3>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 7, i32 6, i32 5, i32 1, i32 2, i32 0, i32 3>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_34
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_35(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_35:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,7,6]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 1, i32 3, i32 0, i32 2>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 4, i32 6, i32 5, i32 1, i32 3, i32 0, i32 2>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_35
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_36(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_36:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 7, i32 5, i32 0, i32 1, i32 3, i32 2>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 6, i32 7, i32 5, i32 0, i32 1, i32 3, i32 2>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_36
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_37(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_37:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,5]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 7, i32 5, i32 6, i32 4>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 7, i32 4, i32 6, i32 5>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_37
-; Mask: [0,1,2,3,4,7,6,5]
-; CHECK: pshufhw $108
-; CHECK-NOT: pshufb
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_38(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_38:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,0,3,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 5, i32 6, i32 4, i32 7, i32 0, i32 2, i32 1, i32 3>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 5, i32 6, i32 4, i32 7, i32 0, i32 2, i32 1, i32 3>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_38
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_39(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_39:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,5]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 3, i32 2, i32 1, i32 0>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 3, i32 2, i32 1, i32 0>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_39
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_40(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_40:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 6, i32 4, i32 7, i32 5, i32 1, i32 0, i32 3, i32 2>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 6, i32 4, i32 7, i32 5, i32 1, i32 0, i32 3, i32 2>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_40
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_41(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_41:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 0, i32 1, i32 3, i32 2>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 0, i32 1, i32 3, i32 2>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_41
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK-NOT: shufpd
-; CHECK: ret
-
 
 define <8 x i16> @swizzle_42(<8 x i16> %v) {
+; CHECK-LABEL: swizzle_42:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; CHECK-NEXT:    retq
   %1 = shufflevector <8 x i16> %v, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 7, i32 6, i32 4, i32 5>
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 7, i32 6, i32 4, i32 5>
   ret <8 x i16> %2
 }
-; CHECK-LABEL: swizzle_42
-; Mask: [0,1,2,3,5,4,7,6]
-; CHECK: pshufhw $-79
-; CHECK-NOT: pshufb
-; CHECK: ret
-
-
diff --git a/test/CodeGen/X86/swizzle.ll b/test/CodeGen/X86/swizzle.ll
deleted file mode 100644
index 23e0c2453d64..000000000000
--- a/test/CodeGen/X86/swizzle.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
-; rdar://6523650
-
-	%struct.vector4_t = type { <4 x float> }
-
-define void @swizzle(i8* nocapture %a, %struct.vector4_t* nocapture %b, %struct.vector4_t* nocapture %c) nounwind {
-entry:
-	%0 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
-	%1 = load <4 x float>* %0, align 4		; <<4 x float>> [#uses=1]
-	%tmp.i = bitcast i8* %a to double*		; <double*> [#uses=1]
-	%tmp1.i = load double* %tmp.i		; <double> [#uses=1]
-	%2 = insertelement <2 x double> undef, double %tmp1.i, i32 0		; <<2 x double>> [#uses=1]
-	%tmp2.i = bitcast <2 x double> %2 to <4 x float>		; <<4 x float>> [#uses=1]
-	%3 = shufflevector <4 x float> %1, <4 x float> %tmp2.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
-	store <4 x float> %3, <4 x float>* %0, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/tailcall-multiret.ll b/test/CodeGen/X86/tailcall-multiret.ll
new file mode 100644
index 000000000000..a77a59cd70bc
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-multiret.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -mcpu=core2 | FileCheck %s
+; See PR19530
+declare double    @llvm.powi.f64(double %Val, i32 %power)
+define <3 x double> @julia_foo17589(i32 %arg) {
+  %tmp1 = call double @llvm.powi.f64(double 1.000000e+00, i32 %arg)
+; CHECK: callq   __powidf2
+  %tmp2 = insertelement <3 x double> undef, double %tmp1, i32 0
+  %tmp3 = call double @llvm.powi.f64(double 2.000000e+00, i32 %arg)
+; CHECK: callq   __powidf2
+  %tmp4 = insertelement <3 x double> %tmp2, double %tmp3, i32 1
+  %tmp5 = call double @llvm.powi.f64(double 3.000000e+00, i32 %arg)
+; CHECK: callq   __powidf2
+  %tmp6 = insertelement <3 x double> %tmp4, double %tmp5, i32 2
+; CHECK-NOT: TAILCALL
+  ret <3 x double> %tmp6
+}
diff --git a/test/CodeGen/X86/tailcall-returndup-void.ll b/test/CodeGen/X86/tailcall-returndup-void.ll
index c1d631225ec7..2c39cb4468df 100644
--- a/test/CodeGen/X86/tailcall-returndup-void.ll
+++ b/test/CodeGen/X86/tailcall-returndup-void.ll
@@ -3,9 +3,9 @@
 ; CHECK-NOT: ret
 
 @sES_closure = external global [0 x i64]
-declare cc10 void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
+declare ghccc void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
 
-define cc10 void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
+define ghccc void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
 c263:
   %ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
   %ln266 = ptrtoint i64* %ln265 to i64
@@ -18,11 +18,11 @@ n26p:                                             ; preds = %c263
 n1ZQ.i:                                           ; preds = %n26p
   %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
   %ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
-  tail call cc10 void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  tail call ghccc void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
   br label %rBL_info.exit
 
 c1ZP.i:                                           ; preds = %n26p
-  tail call cc10 void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  tail call ghccc void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
   br label %rBL_info.exit
 
 rBL_info.exit:                                    ; preds = %c1ZP.i, %n1ZQ.i
@@ -32,6 +32,6 @@ c26a:                                             ; preds = %c263
   %ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
   %ln27j = load i64* %ln27h, align 8
   %ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
-  tail call cc10 void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
+  tail call ghccc void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
   ret void
 }
diff --git a/test/CodeGen/X86/tls-addr-non-leaf-function.ll b/test/CodeGen/X86/tls-addr-non-leaf-function.ll
new file mode 100644
index 000000000000..ec47232059f8
--- /dev/null
+++ b/test/CodeGen/X86/tls-addr-non-leaf-function.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -relocation-model=pic -O2 -disable-fp-elim -o - | FileCheck %s
+; RUN: llc < %s -relocation-model=pic -O2 -o - | FileCheck %s
+
+; This test runs twice with different options regarding the frame pointer:
+; first the elimination is disabled, then it is enabled. The disabled case is
+; the "control group".
+; The function 'foo' below is marked with the "no-frame-pointer-elim-non-leaf"
+; attribute which dictates that the frame pointer should not be eliminated
+; unless the function is a leaf (i.e. it doesn't call any other function).
+; Now, 'foo' is not a leaf function, because it performs a TLS access which on
+; X86 ELF in PIC mode is expanded as a library call.
+; This call is represented with a pseudo-instruction which doesn't appear to be
+; a call when inspected by the analysis passes (it doesn't have the "isCall"
+; flag), and the ISel lowering code creating the pseudo was not informing the 
+; MachineFrameInfo that the function contained calls. This affected the decision
+; whether to eliminate the frame pointer.
+; With the fix, the "hasCalls" flag is set in the MFI for the function whenever
+; a TLS access pseudo-instruction is created, so 'foo' appears to be a non-leaf
+; function, and the difference in the options does not affect codegen: both
+; versions will have a frame pointer.
+
+; Test that there's some frame pointer usage in 'foo'...
+; CHECK: foo:
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; ... and the TLS library call is also present.
+; CHECK: leaq x@TLSGD(%rip), %rdi
+; CHECK: callq __tls_get_addr@PLT
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = thread_local global i32 0
+define i32 @foo() "no-frame-pointer-elim-non-leaf" {
+  %a = load i32* @x, align 4
+  ret i32 %a
+}
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
index 8e3e95886ad8..0fd785328211 100644
--- a/test/CodeGen/X86/tls-models.ll
+++ b/test/CodeGen/X86/tls-models.ll
@@ -128,6 +128,14 @@ entry:
   ; DARWIN:  _internal_ie@TLVP
 }
 
+define i32 @PR22083() {
+entry:
+  ret i32 ptrtoint (i32* @external_ie to i32)
+  ; X64-LABEL:     PR22083:
+  ; X64:     movq    external_ie@GOTTPOFF(%rip), %rax
+  ; X64_PIC-LABEL: PR22083:
+  ; X64_PIC: movq    external_ie@GOTTPOFF(%rip), %rax
+}
 
 ; ----- localexec specified -----
 
diff --git a/test/CodeGen/X86/trunc-ext-ld-st.ll b/test/CodeGen/X86/trunc-ext-ld-st.ll
index d230f1f7e2c6..8de6297906c7 100644
--- a/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -20,7 +20,7 @@ define void @load_2_i8(<2 x i8>* %A)  {
 ; Read 32-bits
 ;CHECK: pmovzxwq
 ;CHECK: paddq
-;CHECK: pshufb
+;CHECK: pshufd
 ;CHECK: movd
 ;CHECK: ret
 define void @load_2_i16(<2 x i16>* %A)  {
@@ -32,7 +32,7 @@ define void @load_2_i16(<2 x i16>* %A)  {
 
 ;CHECK-LABEL: load_2_i32:
 ;CHECK: pmovzxdq
-;CHECK: paddq
+;CHECK: paddd
 ;CHECK: pshufd
 ;CHECK: ret
 define void @load_2_i32(<2 x i32>* %A)  {
@@ -56,7 +56,7 @@ define void @load_4_i8(<4 x i8>* %A)  {
 
 ;CHECK-LABEL: load_4_i16:
 ;CHECK: pmovzxwd
-;CHECK: paddd
+;CHECK: paddw
 ;CHECK: pshufb
 ;CHECK: ret
 define void @load_4_i16(<4 x i16>* %A)  {
@@ -68,7 +68,7 @@ define void @load_4_i16(<4 x i16>* %A)  {
 
 ;CHECK-LABEL: load_8_i8:
 ;CHECK: pmovzxbw
-;CHECK: paddw
+;CHECK: paddb
 ;CHECK: pshufb
 ;CHECK: ret
 define void @load_8_i8(<8 x i8>* %A)  {
diff --git a/test/CodeGen/X86/uint_to_fp-2.ll b/test/CodeGen/X86/uint_to_fp-2.ll
index c5a61c3779bf..e47f15453ed4 100644
--- a/test/CodeGen/X86/uint_to_fp-2.ll
+++ b/test/CodeGen/X86/uint_to_fp-2.ll
@@ -1,15 +1,20 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-unknown-unknown -march=x86 -mattr=+sse2 | FileCheck %s
 
 ; rdar://6504833
 define float @test1(i32 %x) nounwind readnone {
-; CHECK: test1
-; CHECK: movd
-; CHECK: orps
-; CHECK: subsd
-; CHECK: cvtsd2ss
-; CHECK: movss
-; CHECK: flds
-; CHECK: ret
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movsd .LCPI0_0, %xmm0
+; CHECK-NEXT:    movd {{[0-9]+}}(%esp), %xmm1
+; CHECK-NEXT:    orps %xmm0, %xmm1
+; CHECK-NEXT:    subsd %xmm0, %xmm1
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    cvtsd2ss %xmm1, %xmm0
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    retl
 entry:
 	%0 = uitofp i32 %x to float
 	ret float %0
@@ -17,15 +22,20 @@ entry:
 
 ; PR10802
 define float @test2(<4 x i32> %x) nounwind readnone ssp {
-; CHECK: test2
-; CHECK: xorps [[ZERO:%xmm[0-9]+]]
-; CHECK: movss {{.*}}, [[ZERO]]
-; CHECK: orps
-; CHECK: subsd
-; CHECK: cvtsd2ss
-; CHECK: movss
-; CHECK: flds
-; CHECK: ret
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    movss %xmm0, %xmm1
+; CHECK-NEXT:    movsd .LCPI1_0, %xmm0
+; CHECK-NEXT:    orps %xmm0, %xmm1
+; CHECK-NEXT:    subsd %xmm0, %xmm1
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    cvtsd2ss %xmm1, %xmm0
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    retl
 entry:
   %vecext = extractelement <4 x i32> %x, i32 0
   %conv = uitofp i32 %vecext to float
diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll
new file mode 100644
index 000000000000..347f330d67ae
--- /dev/null
+++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s --check-prefix=SANDYB --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx-i | FileCheck %s --check-prefix=SANDYB --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s --check-prefix=HASWELL --check-prefix=CHECK
+
+; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte load
+; because that is slower than two 16-byte loads. 
+; Other AVX-capable chips don't have that problem.
+
+define <8 x float> @load32bytes(<8 x float>* %Ap) {
+  ; CHECK-LABEL: load32bytes
+
+  ; SANDYB: vmovaps
+  ; SANDYB: vinsertf128
+  ; SANDYB: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL: retq
+
+  %A = load <8 x float>* %Ap, align 16
+  ret <8 x float> %A
+}
+
+; On Sandy Bridge or Ivy Bridge, we should not generate an unaligned 32-byte store
+; because that is slowerthan two 16-byte stores. 
+; Other AVX-capable chips don't have that problem.
+
+define void @store32bytes(<8 x float> %A, <8 x float>* %P) {
+  ; CHECK-LABEL: store32bytes
+
+  ; SANDYB: vextractf128
+  ; SANDYB: vmovaps
+  ; SANDYB: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL: retq
+
+  store <8 x float> %A, <8 x float>* %P, align 16
+  ret void
+}
+
+; Merge two consecutive 16-byte subvector loads into a single 32-byte load
+; if it's faster.
+
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8)
+
+; Use the vinsertf128 intrinsic to model source code 
+; that explicitly uses AVX intrinsics.
+define <8 x float> @combine_16_byte_loads(<4 x float>* %ptr) {
+  ; CHECK-LABEL: combine_16_byte_loads
+
+  ; SANDYB: vmovups
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
+  %v1 = load <4 x float>* %ptr, align 1
+  %v2 = load <4 x float>* %ptr2, align 1
+  %shuffle = shufflevector <4 x float> %v1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v2, i8 1)
+  ret <8 x float> %v3
+}
+
+; Swap the operands of the shufflevector and vinsertf128 to ensure that the
+; pattern still matches.
+define <8 x float> @combine_16_byte_loads_swap(<4 x float>* %ptr) {
+  ; CHECK-LABEL: combine_16_byte_loads_swap
+
+  ; SANDYB: vmovups
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
+  %v1 = load <4 x float>* %ptr, align 1
+  %v2 = load <4 x float>* %ptr2, align 1
+  %shuffle = shufflevector <4 x float> %v2, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v1, i8 0)
+  ret <8 x float> %v3
+}
+
+; Replace the vinsertf128 intrinsic with a shufflevector as might be
+; expected from auto-vectorized code.
+define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
+  ; CHECK-LABEL: combine_16_byte_loads_no_intrinsic
+
+  ; SANDYB: vmovups
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
+  %v1 = load <4 x float>* %ptr, align 1
+  %v2 = load <4 x float>* %ptr2, align 1
+  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %v3
+}
+
+; Swap the order of the shufflevector operands to ensure that the
+; pattern still matches.
+define <8 x float> @combine_16_byte_loads_no_intrinsic_swap(<4 x float>* %ptr) {
+  ; CHECK-LABEL: combine_16_byte_loads_no_intrinsic_swap
+
+  ; SANDYB: vmovups
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vmovups
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovups
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <4 x float>* %ptr, i64 1
+  %v1 = load <4 x float>* %ptr, align 1
+  %v2 = load <4 x float>* %ptr2, align 1
+  %v3 = shufflevector <4 x float> %v2, <4 x float> %v1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x float> %v3
+}
+
+; Check each element type other than float to make sure it is handled correctly.
+; Use the loaded values with an 'add' to make sure we're using the correct load type.
+; Even though BtVer2 has fast 32-byte loads, we should not generate those for
+; 256-bit integer vectors because BtVer2 doesn't have AVX2.
+
+define <4 x i64> @combine_16_byte_loads_i64(<2 x i64>* %ptr, <4 x i64> %x) {
+  ; CHECK-LABEL: combine_16_byte_loads_i64
+
+  ; SANDYB: vextractf128
+  ; SANDYB-NEXT: vpaddq
+  ; SANDYB-NEXT: vpaddq
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vextractf128
+  ; BTVER2-NEXT: vpaddq
+  ; BTVER2-NEXT: vpaddq
+  ; BTVER2-NEXT: vinsertf128
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovdqu
+  ; HASWELL-NEXT: vpaddq
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <2 x i64>* %ptr, i64 1
+  %v1 = load <2 x i64>* %ptr, align 1
+  %v2 = load <2 x i64>* %ptr2, align 1
+  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v4 = add <4 x i64> %v3, %x
+  ret <4 x i64> %v4
+}
+
+define <8 x i32> @combine_16_byte_loads_i32(<4 x i32>* %ptr, <8 x i32> %x) {
+  ; CHECK-LABEL: combine_16_byte_loads_i32
+
+  ; SANDYB: vextractf128
+  ; SANDYB-NEXT: vpaddd
+  ; SANDYB-NEXT: vpaddd
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vextractf128
+  ; BTVER2-NEXT: vpaddd
+  ; BTVER2-NEXT: vpaddd
+  ; BTVER2-NEXT: vinsertf128
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovdqu
+  ; HASWELL-NEXT: vpaddd
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <4 x i32>* %ptr, i64 1
+  %v1 = load <4 x i32>* %ptr, align 1
+  %v2 = load <4 x i32>* %ptr2, align 1
+  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v4 = add <8 x i32> %v3, %x
+  ret <8 x i32> %v4
+}
+
+define <16 x i16> @combine_16_byte_loads_i16(<8 x i16>* %ptr, <16 x i16> %x) {
+  ; CHECK-LABEL: combine_16_byte_loads_i16
+
+  ; SANDYB: vextractf128
+  ; SANDYB-NEXT: vpaddw
+  ; SANDYB-NEXT: vpaddw
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vextractf128
+  ; BTVER2-NEXT: vpaddw
+  ; BTVER2-NEXT: vpaddw
+  ; BTVER2-NEXT: vinsertf128
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovdqu
+  ; HASWELL-NEXT: vpaddw
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <8 x i16>* %ptr, i64 1
+  %v1 = load <8 x i16>* %ptr, align 1
+  %v2 = load <8 x i16>* %ptr2, align 1
+  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %v4 = add <16 x i16> %v3, %x
+  ret <16 x i16> %v4
+}
+
+define <32 x i8> @combine_16_byte_loads_i8(<16 x i8>* %ptr, <32 x i8> %x) {
+  ; CHECK-LABEL: combine_16_byte_loads_i8
+
+  ; SANDYB: vextractf128
+  ; SANDYB-NEXT: vpaddb
+  ; SANDYB-NEXT: vpaddb
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vextractf128
+  ; BTVER2-NEXT: vpaddb
+  ; BTVER2-NEXT: vpaddb
+  ; BTVER2-NEXT: vinsertf128
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovdqu
+  ; HASWELL-NEXT: vpaddb
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <16 x i8>* %ptr, i64 1
+  %v1 = load <16 x i8>* %ptr, align 1
+  %v2 = load <16 x i8>* %ptr2, align 1
+  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %v4 = add <32 x i8> %v3, %x
+  ret <32 x i8> %v4
+}
+
+define <4 x double> @combine_16_byte_loads_double(<2 x double>* %ptr, <4 x double> %x) {
+  ; CHECK-LABEL: combine_16_byte_loads_double
+
+  ; SANDYB: vmovupd
+  ; SANDYB-NEXT: vinsertf128
+  ; SANDYB-NEXT: vaddpd
+  ; SANDYB-NEXT: retq
+
+  ; BTVER2: vmovupd
+  ; BTVER2-NEXT: vaddpd
+  ; BTVER2-NEXT: retq
+
+  ; HASWELL: vmovupd
+  ; HASWELL: vaddpd
+  ; HASWELL-NEXT: retq
+
+  %ptr2 = getelementptr inbounds <2 x double>* %ptr, i64 1
+  %v1 = load <2 x double>* %ptr, align 1
+  %v2 = load <2 x double>* %ptr2, align 1
+  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v4 = fadd <4 x double> %v3, %x
+  ret <4 x double> %v4
+}
+
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index d7ae46939035..140121ba3035 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -21,16 +21,16 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, i32, i32, i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"producer", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786443, metadata !2, metadata !1, i32 1, i32 30, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 4, i32 3, metadata !7, null}
-!9 = metadata !{metadata !1}
-!10 = metadata !{metadata !"test.c", metadata !"/dir"}
-!11 = metadata !{i32 0}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00x\001\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\001\000\001\000\006\000\000\001", !10, !2, !4, null, i32 (i32, i32, i32, i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00producer\000\00\000\00\000", !10, !11, !11, !9, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !10, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !10, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0xb\001\0030\000", !2, !1} ; [ DW_TAG_lexical_block ]
+!8 = !MDLocation(line: 4, column: 3, scope: !7)
+!9 = !{!1}
+!10 = !{!"test.c", !"/dir"}
+!11 = !{i32 0}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
index af76a333e8a6..c7ec3eb7abce 100644
--- a/test/CodeGen/X86/utf16-cfstrings.ll
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -29,7 +29,7 @@ declare void @NSLog(%0*, ...)
 
 !llvm.module.flags = !{!0, !1, !2, !3}
 
-!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!0 = !{i32 1, !"Objective-C Version", i32 2}
+!1 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!2 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
deleted file mode 100644
index fca4da66a85e..000000000000
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
-; CHECK: divps
-; CHECK: divps
-; CHECK: divss
-
-%vec = type <9 x float>
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
-  %result = fdiv %vec %p1, %p2
-  ret %vec %result
-}
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
deleted file mode 100644
index 334211132f14..000000000000
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
-; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
-
-%vec = type <6 x float>
-; CHECK: divps
-; CHECK: divss
-; CHECK: divss
-
-; Scheduler causes a different instruction order to be produced on Intel Atom
-; ATOM: divps
-; ATOM: divss
-; ATOM: divss
-
-define %vec @vecdiv( %vec %p1, %vec %p2)
-{
-  %result = fdiv %vec %p1, %p2
-  ret %vec %result
-}
-
-@a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
-@b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
-
-; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
-; main() returns 0 if the result is expected and 1 otherwise
-; to execute, use llvm-as < %s | lli
-define i32 @main() nounwind {
-entry:
-  %avec = load %vec* @a
-  %bvec = load %vec* @b
-
-  %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
-  br label %loop
-loop:
-  %idx = phi i32 [0, %entry], [%nextInd, %looptail]
-  %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
-  %elem = extractelement %vec %res, i32 %idx
-  %expcmp = fcmp oeq float %elem, %expected
-  br i1 %expcmp, label %looptail, label %return
-looptail:
-  %nextExpected = fmul float %expected, 2.0
-  %nextInd = add i32 %idx, 1
-  %cmp = icmp slt i32 %nextInd, 6
-  br i1 %cmp, label %loop, label %return
-return:
-  %retval = phi i32 [0, %looptail], [1, %loop]
-  ret i32 %retval
-}
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index dab5e7bc944c..b9bd80f949ec 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -1,115 +1,94 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -o - | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32
 
 ; PR7518
 define void @test1(<2 x float> %Q, float *%P2) nounwind {
+; X64-LABEL: test1:
+; X64:       # BB#0:
+; X64-NEXT:    movaps %xmm0, %xmm1
+; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X64-NEXT:    addss %xmm0, %xmm1
+; X64-NEXT:    movss %xmm1, (%rdi)
+; X64-NEXT:    retq
+;
+; X32-LABEL: test1:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movaps %xmm0, %xmm1
+; X32-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; X32-NEXT:    addss %xmm0, %xmm1
+; X32-NEXT:    movss %xmm1, (%eax)
+; X32-NEXT:    retl
   %a = extractelement <2 x float> %Q, i32 0
   %b = extractelement <2 x float> %Q, i32 1
   %c = fadd float %a, %b
-
   store float %c, float* %P2
   ret void
-; X64-LABEL: test1:
-; X64-NEXT: pshufd	$1, %xmm0, %xmm1
-; X64-NEXT: addss	%xmm0, %xmm1
-; X64-NEXT: movss	%xmm1, (%rdi)
-; X64-NEXT: ret
-
-; W64-LABEL: test1:
-; W64-NEXT: movdqa  (%rcx), %xmm0
-; W64-NEXT: pshufd  $1, %xmm0, %xmm1
-; W64-NEXT: addss   %xmm0, %xmm1
-; W64-NEXT: movss   %xmm1, (%rdx)
-; W64-NEXT: ret
-
-; X32-LABEL: test1:
-; X32-NEXT: movl	4(%esp), %eax
-; X32-NEXT: pshufd	$1, %xmm0, %xmm1
-; X32-NEXT: addss	%xmm0, %xmm1
-; X32-NEXT: movss	%xmm1, (%eax)
-; X32-NEXT: ret
 }
 
-
 define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
-  %Z = fadd <2 x float> %Q, %R
-  ret <2 x float> %Z
-  
 ; X64-LABEL: test2:
-; X64-NEXT: addps	%xmm1, %xmm0
-; X64-NEXT: ret
-
-; W64-LABEL: test2:
-; W64-NEXT: movaps  (%rcx), %xmm0
-; W64-NEXT: addps   (%rdx), %xmm0
-; W64-NEXT: ret
-
+; X64:       # BB#0:
+; X64-NEXT:    addps %xmm1, %xmm0
+; X64-NEXT:    retq
+;
 ; X32-LABEL: test2:
-; X32:      addps	%xmm1, %xmm0
+; X32:       # BB#0:
+; X32-NEXT:    addps %xmm1, %xmm0
+; X32-NEXT:    retl
+  %Z = fadd <2 x float> %Q, %R
+  ret <2 x float> %Z
 }
 
-
 define <2 x float> @test3(<4 x float> %A) nounwind {
+; X64-LABEL: test3:
+; X64:       # BB#0:
+; X64-NEXT:    addps %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X32-LABEL: test3:
+; X32:       # BB#0:
+; X32-NEXT:    addps %xmm0, %xmm0
+; X32-NEXT:    retl
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
 	ret <2 x float> %C
-; X64-LABEL: test3:
-; X64-NEXT: addps	%xmm0, %xmm0
-; X64-NEXT: ret
-
-; W64-LABEL: test3:
-; W64-NEXT: movaps  (%rcx), %xmm0
-; W64-NEXT: addps   %xmm0, %xmm0
-; W64-NEXT: ret
-
-; X32-LABEL: test3:
-; X32-NEXT: addps	%xmm0, %xmm0
-; X32-NEXT: ret
 }
 
 define <2 x float> @test4(<2 x float> %A) nounwind {
-	%C = fadd <2 x float> %A, %A
-	ret <2 x float> %C
 ; X64-LABEL: test4:
-; X64-NEXT: addps	%xmm0, %xmm0
-; X64-NEXT: ret
-
-; W64-LABEL: test4:
-; W64-NEXT: movaps  (%rcx), %xmm0
-; W64-NEXT: addps   %xmm0, %xmm0
-; W64-NEXT: ret
-
+; X64:       # BB#0:
+; X64-NEXT:    addps %xmm0, %xmm0
+; X64-NEXT:    retq
+;
 ; X32-LABEL: test4:
-; X32-NEXT: addps	%xmm0, %xmm0
-; X32-NEXT: ret
+; X32:       # BB#0:
+; X32-NEXT:    addps %xmm0, %xmm0
+; X32-NEXT:    retl
+	%C = fadd <2 x float> %A, %A
+	ret <2 x float> %C
 }
 
 define <4 x float> @test5(<4 x float> %A) nounwind {
+; X64-LABEL: test5:
+; X64:       # BB#0:
+; X64-NEXT:    addps %xmm0, %xmm0
+; X64-NEXT:    addps %xmm0, %xmm0
+; X64-NEXT:    retq
+;
+; X32-LABEL: test5:
+; X32:       # BB#0:
+; X32-NEXT:    addps %xmm0, %xmm0
+; X32-NEXT:    addps %xmm0, %xmm0
+; X32-NEXT:    retl
 	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
 	%C = fadd <2 x float> %B, %B
-        br label %BB
-        
+  br label %BB
+
 BB:
-        %D = fadd <2 x float> %C, %C
+  %D = fadd <2 x float> %C, %C
 	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 	ret <4 x float> %E
-        
-; X64-LABEL: test5:
-; X64-NEXT: addps	%xmm0, %xmm0
-; X64-NEXT: addps	%xmm0, %xmm0
-; X64-NEXT: ret
-
-; W64-LABEL: test5:
-; W64-NEXT: movaps  (%rcx), %xmm0
-; W64-NEXT: addps   %xmm0, %xmm0
-; W64-NEXT: addps   %xmm0, %xmm0
-; W64-NEXT: ret
-
-; X32-LABEL: test5:
-; X32-NEXT: addps	%xmm0, %xmm0
-; X32-NEXT: addps	%xmm0, %xmm0
-; X32-NEXT: ret
 }
 
 
diff --git a/test/CodeGen/X86/vaargs.ll b/test/CodeGen/X86/vaargs.ll
index ddeb7a336d4a..43c895eb39ef 100644
--- a/test/CodeGen/X86/vaargs.ll
+++ b/test/CodeGen/X86/vaargs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS
+; RUN: llc -verify-machineinstrs -mcpu=corei7-avx %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NO-FLAGS
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
diff --git a/test/CodeGen/X86/vararg-callee-cleanup.ll b/test/CodeGen/X86/vararg-callee-cleanup.ll
new file mode 100644
index 000000000000..2dcf319a2080
--- /dev/null
+++ b/test/CodeGen/X86/vararg-callee-cleanup.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=i686-pc-windows < %s | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+
+declare x86_thiscallcc void @thiscall_thunk(i8* %this, ...)
+define i32 @call_varargs_thiscall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+  call x86_thiscallcc void (i8*, ...)* @thiscall_thunk(i8* %a, i32 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_thiscall_thunk:
+; CHECK: calll _thiscall_thunk
+; CHECK-NEXT: subl $8, %esp
+
+; We don't mangle the argument size into variadic callee cleanup functions.
+
+declare x86_stdcallcc void @stdcall_thunk(i8* %this, ...)
+define i32 @call_varargs_stdcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+  call x86_stdcallcc void (i8*, ...)* @stdcall_thunk(i8* %a, i32 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_stdcall_thunk:
+; CHECK: calll _stdcall_thunk{{$}}
+; CHECK-NEXT: subl $12, %esp
+
+declare x86_fastcallcc void @fastcall_thunk(i8* %this, ...)
+define i32 @call_varargs_fastcall_thunk(i8* %a, i32 %b, i32 %c, i32 %d) {
+  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  call x86_fastcallcc void (i8*, ...)* @fastcall_thunk(i8* inreg %a, i32 inreg 1, i32 2)
+  %t1 = add i32 %b, %c
+  %r = add i32 %t1, %d
+  ret i32 %r
+}
+
+; CHECK: _call_varargs_fastcall_thunk:
+; CHECK: calll @fastcall_thunk{{$}}
+; CHECK-NEXT: subl $4, %esp
+
+; If you actually return from such a thunk, it will only pop the non-variadic
+; portion of the arguments, which is different from what the callee passes.
+
+define x86_stdcallcc void @varargs_stdcall_return(i32, i32, ...) {
+  ret void
+}
+
+; CHECK: _varargs_stdcall_return:
+; CHECK: retl $8
diff --git a/test/CodeGen/X86/vararg_no_start.ll b/test/CodeGen/X86/vararg_no_start.ll
new file mode 100644
index 000000000000..ab5c6fc58aa2
--- /dev/null
+++ b/test/CodeGen/X86/vararg_no_start.ll
@@ -0,0 +1,9 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+define void @foo(i8*, ...) {
+  ret void
+}
+; CHECK-LABEL: {{^_?}}foo:
+; CHECK-NOT: movq
+; CHECK: retq
diff --git a/test/CodeGen/X86/vastart-defs-eflags.ll b/test/CodeGen/X86/vastart-defs-eflags.ll
index 6017753fc8fd..d0c515089f48 100644
--- a/test/CodeGen/X86/vastart-defs-eflags.ll
+++ b/test/CodeGen/X86/vastart-defs-eflags.ll
@@ -14,6 +14,7 @@ entry:
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
+  call void @llvm.va_start(i8* null)
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.then
@@ -21,3 +22,4 @@ if.end:                                           ; preds = %entry, %if.then
   ret i32 %hasflag
 }
 
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/vec-loadsingles-alignment.ll b/test/CodeGen/X86/vec-loadsingles-alignment.ll
new file mode 100644
index 000000000000..6aa2adb228e1
--- /dev/null
+++ b/test/CodeGen/X86/vec-loadsingles-alignment.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s
+
+@e = global [8 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8], align 16
+@d = global [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+; The global 'e' has 16 byte alignment, so make sure we don't generate an
+; aligned 32-byte load instruction when we combine the load+insert sequence.
+
+define i32 @subb() nounwind ssp {
+; CHECK-LABEL: subb:
+; CHECK:  vmovups e(%rip), %ymm
+entry:
+  %0 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 7), align 4
+  %1 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 6), align 8
+  %2 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 5), align 4
+  %3 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 4), align 16
+  %4 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 3), align 4
+  %5 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 2), align 8
+  %6 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 1), align 4
+  %7 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 0), align 16
+  %vecinit.i = insertelement <8 x i32> undef, i32 %7, i32 0
+  %vecinit1.i = insertelement <8 x i32> %vecinit.i, i32 %6, i32 1
+  %vecinit2.i = insertelement <8 x i32> %vecinit1.i, i32 %5, i32 2
+  %vecinit3.i = insertelement <8 x i32> %vecinit2.i, i32 %4, i32 3
+  %vecinit4.i = insertelement <8 x i32> %vecinit3.i, i32 %3, i32 4
+  %vecinit5.i = insertelement <8 x i32> %vecinit4.i, i32 %2, i32 5
+  %vecinit6.i = insertelement <8 x i32> %vecinit5.i, i32 %1, i32 6
+  %vecinit7.i = insertelement <8 x i32> %vecinit6.i, i32 %0, i32 7
+  %8 = bitcast <8 x i32> %vecinit7.i to <32 x i8>
+  tail call void @llvm.x86.avx.storeu.dq.256(i8* bitcast ([8 x i32]* @d to i8*), <32 x i8> %8)
+  ret i32 0
+}
+
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 1a6c05dd9f41..8600c48aaac1 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,75 +1,177 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
-;CHECK-LABEL: foo1_8:
-;CHECK: vcvtdq2ps
-;CHECK: ret
-;
-;CHECK-WIDE-LABEL: foo1_8:
-;CHECK-WIDE:      vpmovzxbd %xmm0, %xmm1
-;CHECK-WIDE-NEXT: vpslld $24, %xmm1, %xmm1
-;CHECK-WIDE-NEXT: vpsrad $24, %xmm1, %xmm1
-;CHECK-WIDE-NEXT: vpshufb {{.*}}, %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
-;CHECK-WIDE-NEXT: vcvtdq2ps %ymm0, %ymm0
-;CHECK-WIDE-NEXT: ret
 define <8 x float> @foo1_8(<8 x i8> %src) {
+; CHECK-LABEL: foo1_8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm0
+; CHECK-NEXT:    vpslld $24, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
+; CHECK-NEXT:    vpslld $24, %xmm1, %xmm1
+; CHECK-NEXT:    vpsrad $24, %xmm1, %xmm1
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo1_8:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vpmovzxbd %xmm0, %xmm1
+; CHECK-WIDE-NEXT:    vpslld $24, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpsrad $24, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; CHECK-WIDE-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; CHECK-WIDE-NEXT:    vpslld $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vpsrad $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-WIDE-NEXT:    retl
   %res = sitofp <8 x i8> %src to <8 x float>
   ret <8 x float> %res
 }
 
-;CHECK-LABEL: foo1_4:
-;CHECK: vcvtdq2ps
-;CHECK: ret
-;
-;CHECK-WIDE-LABEL: foo1_4:
-;CHECK-WIDE:      vpmovzxbd %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vpslld $24, %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vpsrad $24, %xmm0, %xmm0
-;CHECK-WIDE-NEXT: vcvtdq2ps %xmm0, %xmm0
-;CHECK-WIDE-NEXT: ret
 define <4 x float> @foo1_4(<4 x i8> %src) {
+; CHECK-LABEL: foo1_4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpslld $24, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrad $24, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo1_4:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vpmovzxbd %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vpslld $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vpsrad $24, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    retl
   %res = sitofp <4 x i8> %src to <4 x float>
   ret <4 x float> %res
 }
 
-;CHECK-LABEL: foo2_8:
-;CHECK: vcvtdq2ps
-;CHECK: ret
-;
-;CHECK-WIDE-LABEL: foo2_8:
-;CHECK-WIDE: vcvtdq2ps %ymm{{.*}}, %ymm{{.*}}
-;CHECK-WIDE: ret
 define <8 x float> @foo2_8(<8 x i8> %src) {
+; CHECK-LABEL: foo2_8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovzxwd %xmm0, %xmm1
+; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    vandps LCPI2_0, %ymm0, %ymm0
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo2_8:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; CHECK-WIDE-NEXT:    vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; CHECK-WIDE-NEXT:    vpshufb %xmm3, %xmm2, %xmm4
+; CHECK-WIDE-NEXT:    vmovdqa {{.*#+}} xmm5 = <2,6,10,14,u,u,u,u,u,u,u,u,u,u,u,u>
+; CHECK-WIDE-NEXT:    vpshufb %xmm5, %xmm2, %xmm2
+; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm6 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm6[0],xmm2[0],xmm6[1],xmm2[1],xmm6[2],xmm2[2],xmm6[3],xmm2[3],xmm6[4],xmm2[4],xmm6[5],xmm2[5],xmm6[6],xmm2[6],xmm6[7],xmm2[7]
+; CHECK-WIDE-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; CHECK-WIDE-NEXT:    vpshufb %xmm3, %xmm1, %xmm3
+; CHECK-WIDE-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-WIDE-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-WIDE-NEXT:    retl
   %res = uitofp <8 x i8> %src to <8 x float>
   ret <8 x float> %res
 }
 
-;CHECK-LABEL: foo2_4:
-;CHECK: vcvtdq2ps
-;CHECK: ret
-;
-;CHECK-WIDE-LABEL: foo2_4:
-;CHECK-WIDE: vcvtdq2ps %xmm{{.*}}, %xmm{{.*}}
-;CHECK-WIDE: ret
 define <4 x float> @foo2_4(<4 x i8> %src) {
+; CHECK-LABEL: foo2_4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vandps LCPI3_0, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo2_4:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vpmovzxbd %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    retl
   %res = uitofp <4 x i8> %src to <4 x float>
   ret <4 x float> %res
 }
 
-;CHECK-LABEL: foo3_8:
-;CHECK: vcvttps2dq
-;CHECK: ret
 define <8 x i8> @foo3_8(<8 x float> %src) {
+; CHECK-LABEL: foo3_8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo3_8:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %ecx
+; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
+; CHECK-WIDE-NEXT:    orl %eax, %ecx
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %edx
+; CHECK-WIDE-NEXT:    movzbl %dl, %edx
+; CHECK-WIDE-NEXT:    orl %eax, %edx
+; CHECK-WIDE-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
+; CHECK-WIDE-NEXT:    vpinsrw $1, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
+; CHECK-WIDE-NEXT:    orl %eax, %ecx
+; CHECK-WIDE-NEXT:    vpinsrw $2, %ecx, %xmm1, %xmm1
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %ecx
+; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
+; CHECK-WIDE-NEXT:    orl %eax, %ecx
+; CHECK-WIDE-NEXT:    vpinsrw $3, %ecx, %xmm1, %xmm0
+; CHECK-WIDE-NEXT:    vzeroupper
+; CHECK-WIDE-NEXT:    retl
   %res = fptosi <8 x float> %src to <8 x i8>
   ret <8 x i8> %res
 }
-;CHECK-LABEL: foo3_4:
-;CHECK: vcvttps2dq
-;CHECK: ret
+
 define <4 x i8> @foo3_4(<4 x float> %src) {
+; CHECK-LABEL: foo3_4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+;
+; CHECK-WIDE-LABEL: foo3_4:
+; CHECK-WIDE:       ## BB#0:
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %ecx
+; CHECK-WIDE-NEXT:    movzbl %cl, %ecx
+; CHECK-WIDE-NEXT:    orl %eax, %ecx
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm1, %eax
+; CHECK-WIDE-NEXT:    shll $8, %eax
+; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %edx
+; CHECK-WIDE-NEXT:    movzbl %dl, %edx
+; CHECK-WIDE-NEXT:    orl %eax, %edx
+; CHECK-WIDE-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
+; CHECK-WIDE-NEXT:    retl
   %res = fptosi <4 x float> %src to <4 x i8>
   ret <4 x i8> %res
 }
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
deleted file mode 100644
index 4da79538dbf6..000000000000
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -mtriple=i686-linux -mcpu=penryn | FileCheck %s
-
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
-
-define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
-entry:
-; CHECK: cfi_def_cfa_offset
-; CHECK-NOT: set
-; CHECK: pmovzxwq
-; CHECK: pshufb
-  %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
-  %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
-  %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
-  %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1]
-  %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1]
-  %or.i = or <4 x i32> %cmp318.i, %cmp323.i       ; <<4 x i32>> [#uses=1]
-  %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1]
-  %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1]
-  %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1]
-  %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i32>> [#uses=1]
-  %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1]
-  %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i16>> [#uses=1]
-  store <2 x i16> %swz.i30.i, <2 x i16>* undef
-  unreachable
-  ret void
-}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index bddd53514643..318aca1d54cb 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s
 
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
@@ -7,12 +7,61 @@ declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
   %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
+
+; CHECK-LABEL: footz
+; CHECK: bsfq
+; CHECK: bsfq
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
   %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
+
+; CHECK-LABEL: foolz
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: bsrq
+; CHECK: xorq $63
 }
+
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
   %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   ret <2 x i64> %c
 }
+
+declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+define <2 x i32> @promtz(<2 x i32> %a) nounwind {
+  %c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
+  ret <2 x i32> %c
+
+; CHECK: .quad 4294967296
+; CHECK: .quad 4294967296
+; CHECK-LABEL: promtz
+; CHECK: bsfq
+; CHECK: cmov
+; CHECK: bsfq
+; CHECK: cmov
+}
+define <2 x i32> @promlz(<2 x i32> %a) nounwind {
+  %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
+  ret <2 x i32> %c
+
+; CHECK: .quad 4294967295
+; CHECK: .quad 4294967295
+; CHECK: .quad 32
+; CHECK: .quad 32
+; CHECK-LABEL: promlz
+; CHECK: pand
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: bsrq
+; CHECK: xorq $63
+; CHECK: psub
+}
+
+define <2 x i32> @prompop(<2 x i32> %a) nounwind {
+  %c = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
+  ret <2 x i32> %c
+}
diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll
new file mode 100644
index 000000000000..fbb84170dc83
--- /dev/null
+++ b/test/CodeGen/X86/vec_extract-avx.ll
@@ -0,0 +1,82 @@
+target triple = "x86_64-unknown-unknown"
+
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+
+; When extracting multiple consecutive elements from a larger
+; vector into a smaller one, do it efficiently. We should use
+; an EXTRACT_SUBVECTOR node internally rather than a bunch of
+; single element extractions. 
+
+; Extracting the low elements only requires using the right kind of store.
+define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+  %ext0 = extractelement <8 x float> %v, i32 0
+  %ext1 = extractelement <8 x float> %v, i32 1
+  %ext2 = extractelement <8 x float> %v, i32 2
+  %ext3 = extractelement <8 x float> %v, i32 3
+  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
+  store <4 x float> %ins3, <4 x float>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: low_v8f32_to_v4f32
+; CHECK: vmovaps
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Extracting the high elements requires just one AVX instruction. 
+define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+  %ext0 = extractelement <8 x float> %v, i32 4
+  %ext1 = extractelement <8 x float> %v, i32 5
+  %ext2 = extractelement <8 x float> %v, i32 6
+  %ext3 = extractelement <8 x float> %v, i32 7
+  %ins0 = insertelement <4 x float> undef, float %ext0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
+  store <4 x float> %ins3, <4 x float>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v8f32_to_v4f32
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Make sure element type doesn't alter the codegen. Note that
+; if we were actually using the vector in this function and
+; have AVX2, we should generate vextracti128 (the int version).
+define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
+  %ext0 = extractelement <8 x i32> %v, i32 4
+  %ext1 = extractelement <8 x i32> %v, i32 5
+  %ext2 = extractelement <8 x i32> %v, i32 6
+  %ext3 = extractelement <8 x i32> %v, i32 7
+  %ins0 = insertelement <4 x i32> undef, i32 %ext0, i32 0
+  %ins1 = insertelement <4 x i32> %ins0, i32 %ext1, i32 1
+  %ins2 = insertelement <4 x i32> %ins1, i32 %ext2, i32 2
+  %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
+  store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v8i32_to_v4i32
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
+
+; Make sure that element size doesn't alter the codegen.
+define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
+  %ext0 = extractelement <4 x double> %v, i32 2
+  %ext1 = extractelement <4 x double> %v, i32 3
+  %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
+  %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
+  store <2 x double> %ins1, <2 x double>* %ptr, align 16
+  ret void
+
+; CHECK-LABEL: high_v4f64_to_v2f64
+; CHECK: vextractf128
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+}
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index 747c8a8e8d02..530911add121 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -1,11 +1,12 @@
 ; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse4.1 | FileCheck %s
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
-; CHECK-LABEL: @t1
-; CHECK:         movl 4(%esp), %[[R0:e[abcd]x]]
-; CHECK-NEXT:    movl 8(%esp), %[[R1:e[abcd]x]]
-; CHECK-NEXT:    movl 12(%[[R1]]), %[[R2:e[abcd]x]]
-; CHECK-NEXT:    movl %[[R2]], (%[[R0]])
+; CHECK-LABEL: t1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movss 12(%ecx), %xmm0
+; CHECK-NEXT:    movss %xmm0, (%eax)
 ; CHECK-NEXT:    retl
 
 	%X = load <4 x float>* %P1
@@ -15,9 +16,15 @@ define void @t1(float* %R, <4 x float>* %P1) nounwind {
 }
 
 define float @t2(<4 x float>* %P1) nounwind {
-; CHECK-LABEL: @t2
-; CHECK:         movl 4(%esp), %[[R0:e[abcd]x]]
-; CHECK-NEXT:    flds 8(%[[R0]])
+; CHECK-LABEL: t2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movapd (%eax), %xmm0
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 
 	%X = load <4 x float>* %P1
@@ -26,11 +33,12 @@ define float @t2(<4 x float>* %P1) nounwind {
 }
 
 define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
-; CHECK-LABEL: @t3
-; CHECK:         movl 4(%esp), %[[R0:e[abcd]x]]
-; CHECK-NEXT:    movl 8(%esp), %[[R1:e[abcd]x]]
-; CHECK-NEXT:    movl 12(%[[R1]]), %[[R2:e[abcd]x]]
-; CHECK-NEXT:    movl %[[R2]], (%[[R0]])
+; CHECK-LABEL: t3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl 12(%ecx), %ecx
+; CHECK-NEXT:    movl %ecx, (%eax)
 ; CHECK-NEXT:    retl
 
 	%X = load <4 x i32>* %P1
@@ -40,9 +48,10 @@ define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
 }
 
 define i32 @t4(<4 x i32>* %P1) nounwind {
-; CHECK-LABEL: @t4
-; CHECK:         movl 4(%esp), %[[R0:e[abcd]x]]
-; CHECK-NEXT:    movl 12(%[[R0]]), %eax
+; CHECK-LABEL: t4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl 12(%eax), %eax
 ; CHECK-NEXT:    retl
 
 	%X = load <4 x i32>* %P1
diff --git a/test/CodeGen/X86/vec_extract.ll b/test/CodeGen/X86/vec_extract.ll
index 88f5a585b9fd..6df7be7a087b 100644
--- a/test/CodeGen/X86/vec_extract.ll
+++ b/test/CodeGen/X86/vec_extract.ll
@@ -1,10 +1,17 @@
-; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 -o %t
-; RUN: grep movss    %t | count 4
-; RUN: grep movhlps  %t | count 1
-; RUN: not grep pshufd   %t 
-; RUN: grep unpckhpd %t | count 1
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2,-sse4.1 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
 
 define void @test1(<4 x float>* %F, float* %f) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movaps (%ecx), %xmm0
+; CHECK-NEXT:    addps %xmm0, %xmm0
+; CHECK-NEXT:    movss %xmm0, (%eax)
+; CHECK-NEXT:    retl
+entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
@@ -13,6 +20,18 @@ define void @test1(<4 x float>* %F, float* %f) nounwind {
 }
 
 define float @test2(<4 x float>* %F, float* %f) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movaps (%eax), %xmm0
+; CHECK-NEXT:    addps %xmm0, %xmm0
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    movss %xmm0, (%esp)
+; CHECK-NEXT:    flds (%esp)
+; CHECK-NEXT:    popl %eax
+; CHECK-NEXT:    retl
+entry:
 	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
@@ -20,6 +39,14 @@ define float @test2(<4 x float>* %F, float* %f) nounwind {
 }
 
 define void @test3(float* %R, <4 x float>* %P1) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movss 12(%ecx), %xmm0
+; CHECK-NEXT:    movss %xmm0, (%eax)
+; CHECK-NEXT:    retl
+entry:
 	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
 	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]
 	store float %tmp, float* %R
@@ -27,6 +54,17 @@ define void @test3(float* %R, <4 x float>* %P1) nounwind {
 }
 
 define double @test4(double %A) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    subl $12, %esp
+; CHECK-NEXT:    calll foo
+; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; CHECK-NEXT:    addsd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    movsd %xmm0, (%esp)
+; CHECK-NEXT:    fldl (%esp)
+; CHECK-NEXT:    addl $12, %esp
+; CHECK-NEXT:    retl
+entry:
 	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
 	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
 	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
index 4c14a9602d41..ac02acfed342 100644
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -38,21 +38,38 @@ define <8 x float> @fabs_v8f32(<8 x float> %p)
 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
 
 ; PR20354: when generating code for a vector fabs op,
-; make sure the correct mask is used for all vector elements.
-; CHECK-LABEL: .LCPI4_0:
-; CHECK-NEXT:    .long	2147483647
-; CHECK-NEXT:    .long	2147483647
-define i64 @fabs_v2f32(<2 x float> %v) {
-; CHECK-LABEL: fabs_v2f32:
-; CHECK:         movabsq $-9223372034707292160, %[[R:r[^ ]+]]
-; CHECK-NEXT:    vmovq %[[R]], %[[X:xmm[0-9]+]]
-; CHECK-NEXT:    vandps   {{.*}}.LCPI4_0{{.*}}, %[[X]], %[[X]]
-; CHECK-NEXT:    vmovq   %[[X]], %rax
-; CHECK-NEXT:    retq
-  %highbits = bitcast i64 9223372039002259456 to <2 x float> ; 0x8000_0000_8000_0000
-  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %highbits)
-  %ret = bitcast <2 x float> %fabs to i64
-  ret i64 %ret
+; make sure that we're only turning off the sign bit of each float value.
+; No constant pool loads or vector ops are needed for the fabs of a
+; bitcasted integer constant; we should just return an integer constant
+; that has the sign bits turned off.
+;
+; So instead of something like this:
+;    movabsq (constant pool load of mask for sign bits) 
+;    vmovq   (move from integer register to vector/fp register)
+;    vandps  (mask off sign bits)
+;    vmovq   (move vector/fp register back to integer return register)
+;
+; We should generate:
+;    mov     (put constant value in return register)
+
+define i64 @fabs_v2f32_1() {
+; CHECK-LABEL: fabs_v2f32_1:
+; CHECK: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
+; CHECK-NEXT: retq
+ %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
+}
+
+define i64 @fabs_v2f32_2() {
+; CHECK-LABEL: fabs_v2f32_2:
+; CHECK: movl $2147483647, %eax       # imm = 0x7FFFFFFF
+; CHECK-NEXT: retq
+ %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
+ %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
+ %ret = bitcast <2 x float> %fabs to i64
+ ret i64 %ret
 }
 
 declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
diff --git a/test/CodeGen/X86/vec_fneg.ll b/test/CodeGen/X86/vec_fneg.ll
index d49c70e56391..9743f7148c69 100644
--- a/test/CodeGen/X86/vec_fneg.ll
+++ b/test/CodeGen/X86/vec_fneg.ll
@@ -1,11 +1,45 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
 
+; FNEG is defined as subtraction from -0.0.
+
+; This test verifies that we use an xor with a constant to flip the sign bits; no subtraction needed.
 define <4 x float> @t1(<4 x float> %Q) {
-        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
-	ret <4 x float> %tmp15
+; CHECK-LABEL: t1:
+; CHECK: xorps	{{.*}}LCPI0_0{{.*}}, %xmm0
+; CHECK-NEXT: retq
+        %tmp = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
+	ret <4 x float> %tmp
 }
 
+; This test verifies that we generate an FP subtraction because "0.0 - x" is not an fneg.
 define <4 x float> @t2(<4 x float> %Q) {
-        %tmp15 = fsub <4 x float> zeroinitializer, %Q
-	ret <4 x float> %tmp15
+; CHECK-LABEL: t2:
+; CHECK: xorps	%[[X:xmm[0-9]+]], %[[X]]
+; CHECK-NEXT: subps	%xmm0, %[[X]]
+; CHECK-NEXT: movaps	%[[X]], %xmm0
+; CHECK-NEXT: retq
+        %tmp = fsub <4 x float> zeroinitializer, %Q
+	ret <4 x float> %tmp
+}
+
+; If we're bitcasting an integer to an FP vector, we should avoid the FPU/vector unit entirely.
+; Make sure that we're flipping the sign bit and only the sign bit of each float.
+; So instead of something like this:
+;    movd	%rdi, %xmm0
+;    xorps	.LCPI2_0(%rip), %xmm0
+;
+; We should generate:
+;    movabsq     (put sign bit mask in integer register))
+;    xorq        (flip sign bits)
+;    movd        (move to xmm return register) 
+
+define <2 x float> @fneg_bitcast(i64 %i) {
+; CHECK-LABEL: fneg_bitcast:
+; CHECK:	movabsq	$-9223372034707292160, %rax # imm = 0x8000000080000000
+; CHECK-NEXT:	xorq	%rdi, %rax
+; CHECK-NEXT:	movd	%rax, %xmm0
+; CHECK-NEXT:	retq
+  %bitcast = bitcast i64 %i to <2 x float>
+  %fneg = fsub <2 x float> <float -0.0, float -0.0>, %bitcast
+  ret <2 x float> %fneg
 }
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 5cb9f694bd61..b72044aee30b 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -2,66 +2,87 @@
 ; There are no MMX operations in @t1
 
 define void  @t1(i32 %a, x86_mmx* %P) nounwind {
-       %tmp12 = shl i32 %a, 12
-       %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
-       %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
-       %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
-       store x86_mmx %tmp23, x86_mmx* %P
-       ret void
-
 ; CHECK-LABEL: t1:
-; CHECK-NOT: %mm
-; CHECK: shll $12
-; CHECK-NOT: %mm
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    shll $12, %ecx
+; CHECK-NEXT:    movd %ecx, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,1]
+; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    retl
+ %tmp12 = shl i32 %a, 12
+ %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
+ %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
+ %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+ store x86_mmx %tmp23, x86_mmx* %P
+ ret void
 }
 
 define <4 x float> @t2(<4 x float>* %P) nounwind {
-        %tmp1 = load <4 x float>* %P
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
-        ret <4 x float> %tmp2
-
 ; CHECK-LABEL: t2:
-; CHECK: pslldq $12
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movaps (%eax), %xmm1
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; CHECK-NEXT:    retl
+  %tmp1 = load <4 x float>* %P
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+  ret <4 x float> %tmp2
 }
 
 define <4 x float> @t3(<4 x float>* %P) nounwind {
-        %tmp1 = load <4 x float>* %P
-        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
-        ret <4 x float> %tmp2
-
 ; CHECK-LABEL: t3:
-; CHECK: psrldq $8
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movaps (%eax), %xmm0
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[0,0]
+; CHECK-NEXT:    retl
+  %tmp1 = load <4 x float>* %P
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
+  ret <4 x float> %tmp2
 }
 
 define <4 x float> @t4(<4 x float>* %P) nounwind {
-        %tmp1 = load <4 x float>* %P
-        %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
-        ret <4 x float> %tmp2
-
 ; CHECK-LABEL: t4:
-; CHECK: psrldq $12
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movaps (%eax), %xmm0
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; CHECK-NEXT:    retl
+  %tmp1 = load <4 x float>* %P
+  %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
+  ret <4 x float> %tmp2
 }
 
 define <16 x i8> @t5(<16 x i8> %x) nounwind {
-        %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
-        ret <16 x i8> %s
-
 ; CHECK-LABEL: t5:
-; CHECK: psrldq $1
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT:    retl
+  %s = shufflevector <16 x i8> %x, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 17>
+  ret <16 x i8> %s
 }
 
 define <16 x i8> @t6(<16 x i8> %x) nounwind {
-        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-        ret <16 x i8> %s
-
 ; CHECK-LABEL: t6:
-; CHECK: palignr $1
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; CHECK-NEXT:    retl
+  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %s
 }
 
 define <16 x i8> @t7(<16 x i8> %x) nounwind {
-        %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
-        ret <16 x i8> %s
-
 ; CHECK-LABEL: t7:
-; CHECK: pslldq $13
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; CHECK-NEXT:    retl
+  %s = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2>
+  ret <16 x i8> %s
 }
diff --git a/test/CodeGen/X86/vec_insert-6.ll b/test/CodeGen/X86/vec_insert-6.ll
deleted file mode 100644
index 4583e1925e59..000000000000
--- a/test/CodeGen/X86/vec_insert-6.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
-
-define <4 x float> @t3(<4 x float>* %P) nounwind  {
-	%tmp1 = load <4 x float>* %P
-	%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
-	ret <4 x float> %tmp2
-}
diff --git a/test/CodeGen/X86/vec_insert.ll b/test/CodeGen/X86/vec_insert.ll
deleted file mode 100644
index 0ed8f1052366..000000000000
--- a/test/CodeGen/X86/vec_insert.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | grep movss | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse4.1 | not grep pinsrw
-
-define void @test(<4 x float>* %F, i32 %I) nounwind {
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
-	%f = sitofp i32 %I to float		; <float> [#uses=1]
-	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
-	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp18, <4 x float>* %F
-	ret void
-}
-
-define void @test2(<4 x float>* %F, i32 %I, float %g) nounwind {
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
-	%f = sitofp i32 %I to float		; <float> [#uses=1]
-	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp1, <4 x float>* %F
-	ret void
-}
diff --git a/test/CodeGen/X86/vec_loadsingles.ll b/test/CodeGen/X86/vec_loadsingles.ll
index 8812c4f820c6..fd132a52b8f1 100644
--- a/test/CodeGen/X86/vec_loadsingles.ll
+++ b/test/CodeGen/X86/vec_loadsingles.ll
@@ -1,12 +1,145 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-
-define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
-entry:
-	%tmp1 = load float* %p
-	%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
-	%add.ptr = getelementptr float* %p, i32 1
-	%tmp5 = load float* %add.ptr
-	%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
-	ret <4 x float> %vecins7
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=FAST32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=SLOW32
+
+define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly {
+  %tmp1 = load float* %p
+  %vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+  %add.ptr = getelementptr float* %p, i32 1
+  %tmp5 = load float* %add.ptr
+  %vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+  ret <4 x float> %vecins7
+
+; ALL-LABEL: merge_2_floats
+; ALL: vmovq
+; ALL-NEXT: retq
+}
+
+; Test-case generated due to a crash when trying to treat loading the first
+; two i64s of a <4 x i64> as a load of two i32s.
+define <4 x i64> @merge_2_floats_into_4() {
+  %1 = load i64** undef, align 8
+  %2 = getelementptr inbounds i64* %1, i64 0
+  %3 = load i64* %2
+  %4 = insertelement <4 x i64> undef, i64 %3, i32 0
+  %5 = load i64** undef, align 8
+  %6 = getelementptr inbounds i64* %5, i64 1
+  %7 = load i64* %6
+  %8 = insertelement <4 x i64> %4, i64 %7, i32 1
+  %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i64> %9
+  
+; ALL-LABEL: merge_2_floats_into_4
+; ALL: vmovups
+; ALL-NEXT: retq
+}
+
+define <4 x float> @merge_4_floats(float* %ptr) {
+  %a = load float* %ptr, align 8
+  %vec = insertelement <4 x float> undef, float %a, i32 0
+  %idx1 = getelementptr inbounds float* %ptr, i64 1
+  %b = load float* %idx1, align 8
+  %vec2 = insertelement <4 x float> %vec, float %b, i32 1
+  %idx3 = getelementptr inbounds float* %ptr, i64 2
+  %c = load float* %idx3, align 8
+  %vec4 = insertelement <4 x float> %vec2, float %c, i32 2
+  %idx5 = getelementptr inbounds float* %ptr, i64 3
+  %d = load float* %idx5, align 8
+  %vec6 = insertelement <4 x float> %vec4, float %d, i32 3
+  ret <4 x float> %vec6
+
+; ALL-LABEL: merge_4_floats
+; ALL: vmovups
+; ALL-NEXT: retq
+}
+
+; PR21710 ( http://llvm.org/bugs/show_bug.cgi?id=21710 ) 
+; Make sure that 32-byte vectors are handled efficiently.
+; If the target has slow 32-byte accesses, we should still generate
+; 16-byte loads.
+
+define <8 x float> @merge_8_floats(float* %ptr) {
+  %a = load float* %ptr, align 4
+  %vec = insertelement <8 x float> undef, float %a, i32 0
+  %idx1 = getelementptr inbounds float* %ptr, i64 1
+  %b = load float* %idx1, align 4
+  %vec2 = insertelement <8 x float> %vec, float %b, i32 1
+  %idx3 = getelementptr inbounds float* %ptr, i64 2
+  %c = load float* %idx3, align 4
+  %vec4 = insertelement <8 x float> %vec2, float %c, i32 2
+  %idx5 = getelementptr inbounds float* %ptr, i64 3
+  %d = load float* %idx5, align 4
+  %vec6 = insertelement <8 x float> %vec4, float %d, i32 3
+  %idx7 = getelementptr inbounds float* %ptr, i64 4
+  %e = load float* %idx7, align 4
+  %vec8 = insertelement <8 x float> %vec6, float %e, i32 4
+  %idx9 = getelementptr inbounds float* %ptr, i64 5
+  %f = load float* %idx9, align 4
+  %vec10 = insertelement <8 x float> %vec8, float %f, i32 5
+  %idx11 = getelementptr inbounds float* %ptr, i64 6
+  %g = load float* %idx11, align 4
+  %vec12 = insertelement <8 x float> %vec10, float %g, i32 6
+  %idx13 = getelementptr inbounds float* %ptr, i64 7
+  %h = load float* %idx13, align 4
+  %vec14 = insertelement <8 x float> %vec12, float %h, i32 7
+  ret <8 x float> %vec14
+
+; ALL-LABEL: merge_8_floats
+
+; FAST32: vmovups
+; FAST32-NEXT: retq
+
+; SLOW32: vmovups
+; SLOW32-NEXT: vinsertf128
+; SLOW32-NEXT: retq
+}
+
+define <4 x double> @merge_4_doubles(double* %ptr) {
+  %a = load double* %ptr, align 8
+  %vec = insertelement <4 x double> undef, double %a, i32 0
+  %idx1 = getelementptr inbounds double* %ptr, i64 1
+  %b = load double* %idx1, align 8
+  %vec2 = insertelement <4 x double> %vec, double %b, i32 1
+  %idx3 = getelementptr inbounds double* %ptr, i64 2
+  %c = load double* %idx3, align 8
+  %vec4 = insertelement <4 x double> %vec2, double %c, i32 2
+  %idx5 = getelementptr inbounds double* %ptr, i64 3
+  %d = load double* %idx5, align 8
+  %vec6 = insertelement <4 x double> %vec4, double %d, i32 3
+  ret <4 x double> %vec6
+
+; ALL-LABEL: merge_4_doubles
+; FAST32: vmovups
+; FAST32-NEXT: retq
+
+; SLOW32: vmovups
+; SLOW32-NEXT: vinsertf128
+; SLOW32-NEXT: retq
+}
+
+; PR21771 ( http://llvm.org/bugs/show_bug.cgi?id=21771 ) 
+; Recognize and combine consecutive loads even when the
+; first of the combined loads is offset from the base address.
+define <4 x double> @merge_4_doubles_offset(double* %ptr) {
+  %arrayidx4 = getelementptr inbounds double* %ptr, i64 4
+  %arrayidx5 = getelementptr inbounds double* %ptr, i64 5
+  %arrayidx6 = getelementptr inbounds double* %ptr, i64 6
+  %arrayidx7 = getelementptr inbounds double* %ptr, i64 7
+  %e = load double* %arrayidx4, align 8
+  %f = load double* %arrayidx5, align 8
+  %g = load double* %arrayidx6, align 8
+  %h = load double* %arrayidx7, align 8
+  %vecinit4 = insertelement <4 x double> undef, double %e, i32 0
+  %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1
+  %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2
+  %vecinit7 = insertelement <4 x double> %vecinit6, double %h, i32 3
+  ret <4 x double> %vecinit7
+
+; ALL-LABEL: merge_4_doubles_offset
+; FAST32: vmovups
+; FAST32-NEXT: retq
+
+; SLOW32: vmovups
+; SLOW32-NEXT: vinsertf128
+; SLOW32-NEXT: retq
 }
 
diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll
index d1d7608a0411..a13c813ea7b0 100644
--- a/test/CodeGen/X86/vec_set-3.ll
+++ b/test/CodeGen/X86/vec_set-3.ll
@@ -1,17 +1,37 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -o %t
-; RUN: grep pshufd %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | FileCheck %s
 
-define <4 x float> @test(float %a) nounwind {
-        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]
-        %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2               ; <<4 x float>> [#uses=1]
-        %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3              ; <<4 x float>> [#uses=1]
-        ret <4 x float> %tmp6
+define <4 x float> @test(float %a) {
+; CHECK-LABEL: test:
+; CHECK:         insertps $29, {{.*}}, %xmm0
+; CHECK-NEXT:    retl
+
+entry:
+  %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1
+  %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  ret <4 x float> %tmp6
 }
 
-define <2 x i64> @test2(i32 %a) nounwind {
-        %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2          ; <<4 x i32>> [#uses=1]
-        %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3             ; <<4 x i32>> [#uses=1]
-        %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>           ; <<2 x i64>> [#uses=1]
-        ret <2 x i64> %tmp10
+define <2 x i64> @test2(i32 %a) {
+; CHECK-LABEL: test2:
+; CHECK:         movd {{.*}}, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; CHECK-NEXT:    retl
+
+entry:
+  %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2
+  %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3
+  %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+  ret <2 x i64> %tmp10
 }
 
+define <4 x float> @test3(<4 x float> %A) {
+; CHECK-LABEL: test3:
+; CHECK:         insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
+; CHECK-NEXT:    retl
+
+  %tmp0 = extractelement <4 x float> %A, i32 0
+  %tmp1 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef >, float %tmp0, i32 1
+  %tmp2 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 2
+  ret <4 x float> %tmp2
+}
diff --git a/test/CodeGen/X86/vec_set-5.ll b/test/CodeGen/X86/vec_set-5.ll
deleted file mode 100644
index f811a7404a27..000000000000
--- a/test/CodeGen/X86/vec_set-5.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
-; RUN: grep movlhps   %t | count 1
-; RUN: grep movq      %t | count 2
-
-define <4 x float> @test1(float %a, float %b) nounwind {
-	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
-	%tmp6 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp8 = insertelement <4 x float> %tmp6, float %b, i32 2		; <<4 x float>> [#uses=1]
-	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	ret <4 x float> %tmp9
-}
-
-define <4 x float> @test2(float %a, float %b) nounwind {
-	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
-	%tmp7 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
-	%tmp8 = insertelement <4 x float> %tmp7, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	ret <4 x float> %tmp9
-}
-
-define <2 x i64> @test3(i32 %a, i32 %b) nounwind {
-	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<4 x i32>> [#uses=1]
-	%tmp6 = insertelement <4 x i32> %tmp, i32 %b, i32 1		; <<4 x i32>> [#uses=1]
-	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<4 x i32>> [#uses=1]
-	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<4 x i32>> [#uses=1]
-	%tmp11 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %tmp11
-}
diff --git a/test/CodeGen/X86/vec_set-9.ll b/test/CodeGen/X86/vec_set-9.ll
deleted file mode 100644
index a73909097c11..000000000000
--- a/test/CodeGen/X86/vec_set-9.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=-avx,-pad-short-functions | FileCheck %s
-
-; CHECK: test3
-; CHECK: movd
-; CHECK-NOT: movd
-; CHECK: {{movlhps.*%xmm0, %xmm0}}
-; CHECK-NEXT: ret
-
-define <2 x i64> @test3(i64 %A) nounwind {
-entry:
-	%B = insertelement <2 x i64> undef, i64 %A, i32 1
-	ret <2 x i64> %B
-}
-
diff --git a/test/CodeGen/X86/vec_set-E.ll b/test/CodeGen/X86/vec_set-E.ll
deleted file mode 100644
index d78be669fc7f..000000000000
--- a/test/CodeGen/X86/vec_set-E.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
-
-define <4 x float> @t(float %X) nounwind  {
-	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
-	%tmp12 = insertelement <4 x float> %tmp11, float %X, i32 1
-	%tmp27 = insertelement <4 x float> %tmp12, float 0.000000e+00, i32 2
-	%tmp28 = insertelement <4 x float> %tmp27, float 0.000000e+00, i32 3
-	ret <4 x float> %tmp28
-}
diff --git a/test/CodeGen/X86/vec_set-G.ll b/test/CodeGen/X86/vec_set-G.ll
deleted file mode 100644
index 4a542feafaff..000000000000
--- a/test/CodeGen/X86/vec_set-G.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
-
-define fastcc void @t(<4 x float> %A) nounwind  {
-	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
-	%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1		; <<4 x float>> [#uses=1]
-	%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp14083, <4 x float>* null, align 16
-        ret void
-}
diff --git a/test/CodeGen/X86/vec_set-I.ll b/test/CodeGen/X86/vec_set-I.ll
deleted file mode 100644
index c5d6ab88a35d..000000000000
--- a/test/CodeGen/X86/vec_set-I.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
-
-; CHECK-NOT: xorp
-; CHECK: movd
-; CHECK-NOT: xorp
-
-define void @t1() nounwind  {
-	%tmp298.i.i = load <4 x float>* null, align 16
-	%tmp304.i.i = bitcast <4 x float> %tmp298.i.i to <4 x i32>
-	%tmp305.i.i = and <4 x i32> %tmp304.i.i, < i32 -1, i32 0, i32 0, i32 0 >
-	store <4 x i32> %tmp305.i.i, <4 x i32>* null, align 16
-	unreachable
-}
diff --git a/test/CodeGen/X86/vec_set-J.ll b/test/CodeGen/X86/vec_set-J.ll
deleted file mode 100644
index d90ab85b8cf7..000000000000
--- a/test/CodeGen/X86/vec_set-J.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
-; PR2472
-
-define <4 x i32> @a(<4 x i32> %a) nounwind {
-entry:
-        %vecext = extractelement <4 x i32> %a, i32 0
-        insertelement <4 x i32> zeroinitializer, i32 %vecext, i32 0
-        %add = add <4 x i32> %a, %0
-        ret <4 x i32> %add
-}
diff --git a/test/CodeGen/X86/vec_setcc.ll b/test/CodeGen/X86/vec_setcc.ll
index 322dbae0c89f..b69f90cd6e2f 100644
--- a/test/CodeGen/X86/vec_setcc.ll
+++ b/test/CodeGen/X86/vec_setcc.ll
@@ -62,8 +62,7 @@ define <8 x i16> @v8i16_icmp_ule(<8 x i16> %a, <8 x i16> %b) nounwind readnone s
 ; SSE2-LABEL: v8i16_icmp_ule:
 ; SSE2: psubusw %xmm1, %xmm0
 ; SSE2: pxor    %xmm1, %xmm1
-; SSE2: pcmpeqw %xmm0, %xmm1
-; SSE2: movdqa  %xmm1, %xmm0
+; SSE2: pcmpeqw %xmm1, %xmm0
 
 ; SSE41-LABEL: v8i16_icmp_ule:
 ; SSE41: pminuw  %xmm0, %xmm1
@@ -106,8 +105,7 @@ define <4 x i32> @v4i32_icmp_ule(<4 x i32> %a, <4 x i32> %b) nounwind readnone s
 ; SSE2: pxor    %xmm2, %xmm0
 ; SSE2: pcmpgtd %xmm1, %xmm0
 ; SSE2: pcmpeqd %xmm1, %xmm1
-; SSE2: pxor    %xmm0, %xmm1
-; SSE2: movdqa  %xmm1, %xmm0
+; SSE2: pxor    %xmm1, %xmm0
 
 ; SSE41-LABEL: v4i32_icmp_ule:
 ; SSE41: pminud  %xmm0, %xmm1
diff --git a/test/CodeGen/X86/vec_sext.ll b/test/CodeGen/X86/vec_sext.ll
deleted file mode 100644
index 776ddec2e63b..000000000000
--- a/test/CodeGen/X86/vec_sext.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=x86-64
-; PR 9267
-
-define<4 x i32> @func_16_32() {
-  %F = load <4 x i16>* undef
-  %G = sext <4 x i16> %F to <4 x i32>
-  %H = load <4 x i16>* undef
-  %Y = sext <4 x i16> %H to <4 x i32>
-  %T = add <4 x i32> %Y, %G
-  store <4 x i32>%T , <4 x i32>* undef
-  ret <4 x i32> %T
-}
-
-define<4 x i64> @func_16_64() {
-  %F = load <4 x i16>* undef
-  %G = sext <4 x i16> %F to <4 x i64>
-  %H = load <4 x i16>* undef
-  %Y = sext <4 x i16> %H to <4 x i64>
-  %T = xor <4 x i64> %Y, %G
-  store <4 x i64>%T , <4 x i64>* undef
-  ret <4 x i64> %T
-}
-
-define<4 x i64> @func_32_64() {
-  %F = load <4 x i32>* undef
-  %G = sext <4 x i32> %F to <4 x i64>
-  %H = load <4 x i32>* undef
-  %Y = sext <4 x i32> %H to <4 x i64>
-  %T = or <4 x i64> %Y, %G
-  ret <4 x i64> %T
-}
-
-define<4 x i16> @func_8_16() {
-  %F = load <4 x i8>* undef
-  %G = sext <4 x i8> %F to <4 x i16>
-  %H = load <4 x i8>* undef
-  %Y = sext <4 x i8> %H to <4 x i16>
-  %T = add <4 x i16> %Y, %G
-  ret <4 x i16> %T
-}
-
-define<4 x i32> @func_8_32() {
-  %F = load <4 x i8>* undef
-  %G = sext <4 x i8> %F to <4 x i32>
-  %H = load <4 x i8>* undef
-  %Y = sext <4 x i8> %H to <4 x i32>
-  %T = sub <4 x i32> %Y, %G
-  ret <4 x i32> %T
-}
-
-define<4 x i64> @func_8_64() {
-  %F = load <4 x i8>* undef
-  %G = sext <4 x i8> %F to <4 x i64>
-  %H = load <4 x i8>* undef
-  %Y = sext <4 x i8> %H to <4 x i64>
-  %T = add <4 x i64> %Y, %G
-  ret <4 x i64> %T
-}
-
-define<4 x i32> @const_16_32() {
-  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
-  ret <4 x i32> %G
-}
-
-define<4 x i64> @const_16_64() {
-  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
-  ret <4 x i64> %G
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-11.ll b/test/CodeGen/X86/vec_shuffle-11.ll
deleted file mode 100644
index 640745ae2645..000000000000
--- a/test/CodeGen/X86/vec_shuffle-11.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
-
-define <4 x i32> @test() nounwind {
-        %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
-        %tmp137 = bitcast <2 x i64> %tmp131 to <4 x i32>                ; <<4 x i32>> [#uses=1]
-        %tmp138 = and <4 x i32> %tmp137, bitcast (<2 x i64> < i64 -1, i64 -1 > to <4 x i32>)            ; <<4 x i32>> [#uses=1]
-        ret <4 x i32> %tmp138
-}
-
-declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32)
diff --git a/test/CodeGen/X86/vec_shuffle-14.ll b/test/CodeGen/X86/vec_shuffle-14.ll
deleted file mode 100644
index 8f2519728b77..000000000000
--- a/test/CodeGen/X86/vec_shuffle-14.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-32
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-avx | FileCheck %s -check-prefix=X86-64
-
-define <4 x i32> @t1(i32 %a) nounwind  {
-entry:
-        %tmp = insertelement <4 x i32> undef, i32 %a, i32 0
-	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp6
-
-; X86-32-LABEL: t1:
-; X86-32: movd	4(%esp), %xmm0
-
-; X86-64-LABEL: t1:
-; X86-64: movd	%e{{..}}, %xmm0
-}
-
-define <2 x i64> @t2(i64 %a) nounwind  {
-entry:
-        %tmp = insertelement <2 x i64> undef, i64 %a, i32 0
-	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
-	ret <2 x i64> %tmp6
-
-; X86-32-LABEL: t2:
-; X86-32: movq	4(%esp), %xmm0
-
-; X86-64-LABEL: t2:
-; X86-64: movd	%r{{..}}, %xmm0
-}
-
-define <2 x i64> @t3(<2 x i64>* %a) nounwind  {
-entry:
-	%tmp4 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
-	%tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
-	%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %tmp8
-
-; X86-32-LABEL: t3:
-; X86-32: movl	4(%esp)
-; X86-32: movq
-
-; X86-64-LABEL: t3:
-; X86-64: movq	({{.*}}), %xmm0
-}
-
-define <2 x i64> @t4(<2 x i64> %a) nounwind  {
-entry:
-	%tmp5 = bitcast <2 x i64> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
-	%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %tmp7
-
-; X86-32-LABEL: t4:
-; X86-32: movq %xmm0, %xmm0
-
-; X86-64-LABEL: t4:
-; X86-64: movq {{.*}}, %xmm0
-}
-
-define <2 x i64> @t5(<2 x i64> %a) nounwind  {
-entry:
-	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
-	ret <2 x i64> %tmp6
-
-; X86-32-LABEL: t5:
-; X86-32: movq %xmm0, %xmm0
-
-; X86-64-LABEL: t5:
-; X86-64: movq {{.*}}, %xmm0
-}
diff --git a/test/CodeGen/X86/vec_shuffle-15.ll b/test/CodeGen/X86/vec_shuffle-15.ll
deleted file mode 100644
index 5a9b8fd34579..000000000000
--- a/test/CodeGen/X86/vec_shuffle-15.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2
-
-define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 >
-	ret <2 x i64> %tmp
-}
-
-define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind  {
-	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 >
-	ret <2 x i64> %tmp
-}
diff --git a/test/CodeGen/X86/vec_shuffle-16.ll b/test/CodeGen/X86/vec_shuffle-16.ll
deleted file mode 100644
index 9aeb94289c87..000000000000
--- a/test/CodeGen/X86/vec_shuffle-16.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse,-sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i386-apple-darwin | FileCheck %s -check-prefix=sse2
-
-; sse-LABEL:  t1:
-; sse2-LABEL: t1:
-define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
-; sse: shufps
-; sse2: pshufd
-; sse2-NEXT: ret
-        %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
-        ret <4 x float> %tmp1
-}
-
-; sse-LABEL:  t2:
-; sse2-LABEL: t2:
-define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
-; sse: shufps
-; sse2: pshufd
-; sse2-NEXT: ret
-	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
-	ret <4 x float> %tmp
-}
-
-; sse-LABEL:  t3:
-; sse2-LABEL: t3:
-define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
-; sse: shufps
-; sse2: pshufd
-; sse2-NEXT: ret
-	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
-	ret <4 x float> %tmp
-}
-
-; sse-LABEL:  t4:
-; sse2-LABEL: t4:
-define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
-
-; sse: shufps
-; sse2: pshufd
-; sse2-NEXT: ret
-	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
-	ret <4 x float> %tmp
-}
diff --git a/test/CodeGen/X86/vec_shuffle-17.ll b/test/CodeGen/X86/vec_shuffle-17.ll
deleted file mode 100644
index f2f96ba94af1..000000000000
--- a/test/CodeGen/X86/vec_shuffle-17.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mattr=-avx | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mattr=-avx | FileCheck %s
-; CHECK-NOT: xor
-; CHECK: movd {{%rdi|%rcx}}, %xmm0
-; CHECK-NOT: xor
-; PR2108
-
-define <2 x i64> @doload64(i64 %x) nounwind  {
-entry:
-	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
-	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0		; <<2 x double>> [#uses=1]
-	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
-	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %tmp11
-}
-
diff --git a/test/CodeGen/X86/vec_shuffle-18.ll b/test/CodeGen/X86/vec_shuffle-18.ll
deleted file mode 100644
index 1104a4a8856b..000000000000
--- a/test/CodeGen/X86/vec_shuffle-18.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
-
-	%struct.vector4_t = type { <4 x float> }
-
-define void @swizzle(i8* %a, %struct.vector4_t* %b, %struct.vector4_t* %c) nounwind  {
-entry:
-	%tmp9 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
-	%tmp10 = load <4 x float>* %tmp9, align 16		; <<4 x float>> [#uses=1]
-	%tmp14 = bitcast i8* %a to double*		; <double*> [#uses=1]
-	%tmp15 = load double* %tmp14		; <double> [#uses=1]
-	%tmp16 = insertelement <2 x double> undef, double %tmp15, i32 0		; <<2 x double>> [#uses=1]
-	%tmp18 = bitcast <2 x double> %tmp16 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp19 = shufflevector <4 x float> %tmp10, <4 x float> %tmp18, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp19, <4 x float>* %tmp9, align 16
-	%tmp28 = getelementptr %struct.vector4_t* %c, i32 0, i32 0		; <<4 x float>*> [#uses=2]
-	%tmp29 = load <4 x float>* %tmp28, align 16		; <<4 x float>> [#uses=1]
-	%tmp26 = getelementptr i8* %a, i32 8		; <i8*> [#uses=1]
-	%tmp33 = bitcast i8* %tmp26 to double*		; <double*> [#uses=1]
-	%tmp34 = load double* %tmp33		; <double> [#uses=1]
-	%tmp35 = insertelement <2 x double> undef, double %tmp34, i32 0		; <<2 x double>> [#uses=1]
-	%tmp37 = bitcast <2 x double> %tmp35 to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp38 = shufflevector <4 x float> %tmp29, <4 x float> %tmp37, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp38, <4 x float>* %tmp28, align 16
-	ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-19.ll b/test/CodeGen/X86/vec_shuffle-19.ll
deleted file mode 100644
index 48db8de0d936..000000000000
--- a/test/CodeGen/X86/vec_shuffle-19.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
-; PR2485
-
-define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
-entry:
-	%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 >		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %shuffle
-}
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
deleted file mode 100644
index 5a2c4449456b..000000000000
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
-
-define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
-entry:
-	shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:0 [#uses=1]
-	ret <4 x float> %0
-}
diff --git a/test/CodeGen/X86/vec_shuffle-22.ll b/test/CodeGen/X86/vec_shuffle-22.ll
deleted file mode 100644
index 6807e4d63909..000000000000
--- a/test/CodeGen/X86/vec_shuffle-22.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium-m  | FileCheck %s
-
-define <4 x float> @t1(<4 x float> %a) nounwind  {
-; CHECK: movlhps
-  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >       ; <<4 x float>> [#uses=1]
-  ret <4 x float> %tmp1
-}
-
-define <4 x i32> @t2(<4 x i32>* %a) nounwind {
-; CHECK: pshufd
-; CHECK: ret
-  %tmp1 = load <4 x i32>* %a
-	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %tmp2
-}
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
deleted file mode 100644
index 24687359cc5a..000000000000
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
-; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
-
-define i32 @t() nounwind {
-entry:
-	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
-	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
-	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
-	store <4 x i32> %tmp, <4 x i32>* %b
-	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
-	store <4 x i32> %punpckldq, <4 x i32>* %b
-	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
-	ret i32 %result
-}
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
deleted file mode 100644
index d038dafaf294..000000000000
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
-
-define i32 @t() nounwind optsize {
-entry:
-; CHECK: punpckldq
-	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
-	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
-	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
-	store <4 x i32> %tmp, <4 x i32>* %b
-	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
-	store <4 x i32> %punpckldq, <4 x i32>* %b
-	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
-	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
-	ret i32 %result
-}
diff --git a/test/CodeGen/X86/vec_shuffle-25.ll b/test/CodeGen/X86/vec_shuffle-25.ll
deleted file mode 100644
index 3f42a132ef2b..000000000000
--- a/test/CodeGen/X86/vec_shuffle-25.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=sse4.1 -o %t
-; RUN: grep unpcklps %t | count 3
-; RUN: grep unpckhps %t | count 1
- 
-; Transpose example using the more generic vector shuffle.  We return
-; float8 instead of float16 since x86 can return that in register.
-; ModuleID = 'transpose2_opt.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-apple-cl.1.0"
-@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-
-define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
-entry:
-	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
-	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
-	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
-	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
-	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
-	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
-	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-	ret <8 x float> %r4
-}
diff --git a/test/CodeGen/X86/vec_shuffle-26.ll b/test/CodeGen/X86/vec_shuffle-26.ll
deleted file mode 100644
index 00e8e73e184e..000000000000
--- a/test/CodeGen/X86/vec_shuffle-26.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=generic -mattr=sse4.1 | FileCheck %s
-; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
-
-; Transpose example using the more generic vector shuffle. Return float8
-; instead of float16
-; ModuleID = 'transpose2_opt.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-apple-cl.1.0"
-@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
-
-define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
-entry:
-; CHECK: transpose2
-; CHECK: unpckhps
-; CHECK: unpckhps
-; CHECK: unpcklps
-; CHECK: unpckhps
-; Different instruction order for Atom.
-; ATOM: transpose2
-; ATOM: unpckhps
-; ATOM: unpckhps
-; ATOM: unpckhps
-; ATOM: unpcklps
-	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
-	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
-	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
-	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
-	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
-	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
-	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
-        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
-;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
-	ret <8 x float> %r2
-}
-
-define <2 x i64> @lo_hi_shift(float* nocapture %x, float* nocapture %y) nounwind {
-entry:
-; movhps should happen before extractps to assure it gets the correct value.
-; CHECK: lo_hi_shift
-; CHECK: movhps ([[BASEREG:%[a-z]+]]),
-; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
-; CHECK: extractps ${{[0-9]+}}, %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
-; ATOM: lo_hi_shift
-; ATOM: movhps ([[BASEREG:%[a-z]+]]),
-; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
-; ATOM: movd %xmm{{[0-9]+}}, {{[0-9]*}}([[BASEREG]])
-  %v.i = bitcast float* %y to <4 x float>*
-  %0 = load <4 x float>* %v.i, align 1
-  %1 = bitcast float* %x to <1 x i64>*
-  %.val = load <1 x i64>* %1, align 1
-  %2 = bitcast <1 x i64> %.val to <2 x float>
-  %shuffle.i = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %shuffle1.i = shufflevector <4 x float> %0, <4 x float> %shuffle.i, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %cast.i = bitcast <4 x float> %0 to <2 x i64>
-  %extract.i = extractelement <2 x i64> %cast.i, i32 1
-  %3 = bitcast float* %x to i64*
-  store i64 %extract.i, i64* %3, align 4
-  %4 = bitcast <4 x float> %0 to <16 x i8>
-  %5 = bitcast <4 x float> %shuffle1.i to <16 x i8>
-  %palignr = shufflevector <16 x i8> %5, <16 x i8> %4, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-  %6 = bitcast <16 x i8> %palignr to <2 x i64>
-  ret <2 x i64> %6
-}
diff --git a/test/CodeGen/X86/vec_shuffle-27.ll b/test/CodeGen/X86/vec_shuffle-27.ll
deleted file mode 100644
index c9b2fb51d78f..000000000000
--- a/test/CodeGen/X86/vec_shuffle-27.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
-
-; ModuleID = 'vec_shuffle-27.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-apple-cl.1.0"
-
-define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
-entry:
-; CHECK: subps
-; CHECK: subps
-; CHECK: mulps
-; CHECK: mulps
-; CHECK: addps
-; CHECK: addps
-	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
-	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
-	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
-	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
-	ret <8 x float> %add
-}
-
-; Test case for r122206
-define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
-entry:
-; CHECK: movdqa
-  %a = load <4 x i64> * %ap
-  %b = load <4 x i64> * %bp
-  %mulaa = mul <4 x i64> %a, %a
-  %mulbb = mul <4 x i64> %b, %b
-  %mulab = mul <4 x i64> %a, %b
-  %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
-  %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
-  %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-  %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
-  store <4 x i64> %vect1487, <4 x i64>* %ap
-  store <4 x i64> %vect1488, <4 x i64>* %bp
-  ret void;
-}
diff --git a/test/CodeGen/X86/vec_shuffle-28.ll b/test/CodeGen/X86/vec_shuffle-28.ll
deleted file mode 100644
index ebf557762cb9..000000000000
--- a/test/CodeGen/X86/vec_shuffle-28.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
-
-; CHECK:     pshufb
-; CHECK-NOT: pshufb
-
-; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
-;        Don't XFAIL it because it's still better than the previous code.
-
-; Pack various elements via shuffles.
-define <8 x i16> @shuf1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
-	ret <8 x i16> %tmp7
-}
diff --git a/test/CodeGen/X86/vec_shuffle-30.ll b/test/CodeGen/X86/vec_shuffle-30.ll
deleted file mode 100644
index f5f88426058c..000000000000
--- a/test/CodeGen/X86/vec_shuffle-30.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
-
-; CHECK: test
-; Test case when creating pshufhw, we incorrectly set the higher order bit
-; for an undef,
-define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
-entry:
-; CHECK-NOT: vmovaps
-; CHECK: vmovlpd
-; CHECK: vpshufhw        $-95
-  %0 = load <8 x i16>* %dest
-  %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
-  store <8 x i16> %1, <8 x i16>* %dest
-  ret void
-}
-
-; CHECK: test2
-; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
-define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
-entry:
-; CHECK-NOT: pslldq
-; CHECK: shufps
-  %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
-  store <4 x i32> %0, <4 x i32>* %dest
-  ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-31.ll b/test/CodeGen/X86/vec_shuffle-31.ll
deleted file mode 100644
index bb06e15425bb..000000000000
--- a/test/CodeGen/X86/vec_shuffle-31.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep pshufb %t | count 1
-
-define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
-	ret <8 x i16> %tmp9
-}
diff --git a/test/CodeGen/X86/vec_shuffle-34.ll b/test/CodeGen/X86/vec_shuffle-34.ll
deleted file mode 100644
index d057b3fa7ea8..000000000000
--- a/test/CodeGen/X86/vec_shuffle-34.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
-
-define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-entry:
-	%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
-	ret <8 x i16> %tmp8
-}
diff --git a/test/CodeGen/X86/vec_shuffle-35.ll b/test/CodeGen/X86/vec_shuffle-35.ll
deleted file mode 100644
index f5083b4b8011..000000000000
--- a/test/CodeGen/X86/vec_shuffle-35.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
-; RUN: grep pextrw %t | count 12
-; RUN: grep pinsrw %t | count 13
-; RUN: grep rolw %t | count 13
-; RUN: not grep esp %t
-; RUN: not grep ebp %t
-; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
-; RUN: grep pshufb %t | count 3
-
-define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
-entry:
-	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> undef, <16 x i32> < i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
-	ret <16 x i8> %tmp8
-}
-
-define <16 x i8> @shuf2(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-entry:
-	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> < i32 undef, i32 undef, i32 3, i32 2, i32 17, i32 16, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
-	ret <16 x i8> %tmp8
-}
diff --git a/test/CodeGen/X86/vec_shuffle-36.ll b/test/CodeGen/X86/vec_shuffle-36.ll
deleted file mode 100644
index f1d0f939e60c..000000000000
--- a/test/CodeGen/X86/vec_shuffle-36.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
-
-define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-; CHECK: pshufb
-; CHECK-NOT: pshufb
-; CHECK: ret
-entry:
-  %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
-  ret <8 x i16> %tmp9
-}
-
-define <8 x i16> @shuf7(<8 x i16> %t0) {
-; CHECK: pshufd
-  %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
-  ret <8 x i16> %tmp10
-}
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
deleted file mode 100644
index ed285f93fe1b..000000000000
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2 | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
-
-define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
-entry:
-; CHECK: movaps  ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]]
-; CHECK: movaps  %[[XMM0]], %[[XMM1:xmm[0-9]+]]
-; CHECK-NEXT: movss   %xmm{{[0-9]+}}, %[[XMM1]]
-; CHECK-NEXT: shufps  $36, %[[XMM1]], %[[XMM0]]
-  %0 = load <4 x i32>* undef, align 16
-  %1 = load <4 x i32>* %a0, align 16
-  %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-  ret <4 x i32> %2
-}
-
-define void @t01(double* %a0) nounwind ssp {
-entry:
-; CHECK_O0: movsd (%eax), %xmm0
-; CHECK_O0: unpcklpd  %xmm0, %xmm0
-  %tmp93 = load double* %a0, align 8
-  %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
-  store <2 x double> %vecinit94, <2 x double>* undef
-  ret void
-}
-
-define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
-entry:
-; CHECK: t02
-; CHECK: movaps
-; CHECK: shufps
-; CHECK: pshufd
-; CHECK: movq
-; CHECK: ret
-  %0 = bitcast <8 x i32>* %source to <4 x i32>*
-  %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
-  %tmp2 = load <4 x i32>* %arrayidx, align 16
-  %tmp3 = extractelement <4 x i32> %tmp2, i32 0
-  %tmp5 = insertelement <2 x i32> <i32 undef, i32 0>, i32 %tmp3, i32 0
-  %arrayidx7 = getelementptr inbounds <8 x i32>* %source, i64 1
-  %1 = bitcast <8 x i32>* %arrayidx7 to <4 x i32>*
-  %tmp8 = load <4 x i32>* %1, align 16
-  %tmp9 = extractelement <4 x i32> %tmp8, i32 1
-  %tmp11 = insertelement <2 x i32> %tmp5, i32 %tmp9, i32 1
-  store <2 x i32> %tmp11, <2 x i32>* %dest, align 8
-  ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
deleted file mode 100644
index ec196df7aeff..000000000000
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
-; CHECK: unpcklpd
-  %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> zeroinitializer
-  ret <2 x double> %shuffle
-}
-
-define <2 x double> @hd(<2 x double> %p) nounwind optsize ssp {
-; CHECK: unpckhpd
-  %shuffle = shufflevector <2 x double> %p, <2 x double> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x double> %shuffle
-}
-
-define <2 x i64> @ldi(<2 x i64> %p) nounwind optsize ssp {
-; CHECK: punpcklqdq
-  %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %shuffle
-}
-
-define <2 x i64> @hdi(<2 x i64> %p) nounwind optsize ssp {
-; CHECK: punpckhqdq
-  %shuffle = shufflevector <2 x i64> %p, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %shuffle
-}
-
-; rdar://10050549
-%struct.Float2 = type { float, float }
-
-define <4 x float> @loadhpi(%struct.Float2* %vPtr, <4 x float> %vecin1) nounwind readonly ssp {
-entry:
-; CHECK: loadhpi
-; CHECK-NOT: movq
-; CHECK: movhps (
-  %tmp1 = bitcast %struct.Float2* %vPtr to <1 x i64>*
-  %addptr7 = getelementptr inbounds <1 x i64>* %tmp1, i64 0
-  %tmp2 = bitcast <1 x i64>* %addptr7 to float*
-  %tmp3 = load float* %tmp2, align 4
-  %vec = insertelement <4 x float> undef, float %tmp3, i32 0
-  %addptr.i12 = getelementptr inbounds float* %tmp2, i64 1
-  %tmp4 = load float* %addptr.i12, align 4
-  %vecin2 = insertelement <4 x float> %vec, float %tmp4, i32 1
-  %shuffle = shufflevector <4 x float> %vecin1, <4 x float> %vecin2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x float> %shuffle
-}
-
-; rdar://10119696
-; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
-entry:
-  ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
-  %u110.i = load double* %y, align 1
-  %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
-  %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
-  %shuffle.i = shufflevector <4 x float> %x, <4 x float> %tmp9.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x float> %shuffle.i
-}
-
-define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
-entry:
-; CHECK: loadhpi2
-; CHECK: movhps (
-; CHECK-NOT: movlhps
-  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
-  %idx.ext = sext i32 %s to i64
-  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
-  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
-  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
-  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
-  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
-  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
-  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
-  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  ret <4 x float> %shuffle1.i5
-}
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
deleted file mode 100644
index 8fd9a5cd023e..000000000000
--- a/test/CodeGen/X86/vec_shuffle-39.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn | FileCheck %s
-; rdar://10050222, rdar://10134392
-
-define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: movlps (%rdi), %xmm0
-; CHECK: ret
-  %p.val = load <1 x i64>* %p, align 1
-  %0 = bitcast <1 x i64> %p.val to <2 x float>
-  %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x float> %shuffle1.i
-}
-
-define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
-entry:
-; CHECK-LABEL: t1a:
-; CHECK: movlps (%rdi), %xmm0
-; CHECK: ret
-  %0 = bitcast <1 x i64>* %p to double*
-  %1 = load double* %0
-  %2 = insertelement <2 x double> undef, double %1, i32 0
-  %3 = bitcast <2 x double> %2 to <4 x float>
-  %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x float> %4
-}
-
-define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: movlps %xmm0, (%rdi)
-; CHECK: ret
-  %cast.i = bitcast <4 x float> %a to <2 x i64>
-  %extract.i = extractelement <2 x i64> %cast.i, i32 0
-  %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
-  store i64 %extract.i, i64* %0, align 8
-  ret void
-}
-
-define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
-entry:
-; CHECK-LABEL: t2a:
-; CHECK: movlps %xmm0, (%rdi)
-; CHECK: ret
-  %0 = bitcast <1 x i64>* %p to double*
-  %1 = bitcast <4 x float> %a to <2 x double>
-  %2 = extractelement <2 x double> %1, i32 0
-  store double %2, double* %0
-  ret void
-}
-
-; rdar://10436044
-define <2 x double> @t3() nounwind readonly {
-bb:
-; CHECK-LABEL: t3:
-; CHECK: movq (%rax), %xmm1
-; CHECK: punpcklqdq %xmm2, %xmm0
-; CHECK: movsd %xmm1, %xmm0
-  %tmp0 = load i128* null, align 1
-  %tmp1 = load <2 x i32>* undef, align 8
-  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
-  %tmp3 = bitcast <2 x i32> %tmp1 to i64
-  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
-  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
-  %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
-  %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
-  ret <2 x double> %tmp7
-}
-
-; rdar://10450317
-define <2 x i64> @t4() nounwind readonly {
-bb:
-; CHECK-LABEL: t4:
-; CHECK: movq (%rax), %xmm0
-; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
-; CHECK: movsd %[[XMM]], %xmm0
-  %tmp0 = load i128* null, align 1
-  %tmp1 = load <2 x i32>* undef, align 8
-  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
-  %tmp3 = bitcast <2 x i32> %tmp1 to i64
-  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
-  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
-  %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
-  ret <2 x i64> %tmp6
-}
diff --git a/test/CodeGen/X86/vec_shuffle-40.ll b/test/CodeGen/X86/vec_shuffle-40.ll
deleted file mode 100644
index 75b45e3df111..000000000000
--- a/test/CodeGen/X86/vec_shuffle-40.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
-
-define void @shuffle_v16i16(<16 x i16>* %a) {
-; CHECK-LABEL: shuffle_v16i16:
-; CHECK: vpshufb {{.*}}%ymm
-; CHECK-NOT: vpshufb {{.*}}%xmm
-entry:
-  %0 = load <16 x i16>* %a, align 32
-  %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
-  store <16 x i16> %shuffle, <16 x i16>* %a, align 32
-  ret void
-}
-
-define void @shuffle_v16i16_lanecrossing(<16 x i16>* %a) {
-; CHECK-LABEL: shuffle_v16i16_lanecrossing:
-; CHECK-NOT: vpshufb {{.*}}%ymm
-entry:
-  %0 = load <16 x i16>* %a, align 32
-  %shuffle = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 13, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
-  store <16 x i16> %shuffle, <16 x i16>* %a, align 32
-  ret void
-}
diff --git a/test/CodeGen/X86/vec_shuffle-41.ll b/test/CodeGen/X86/vec_shuffle-41.ll
deleted file mode 100644
index 28fdd2f5ce17..000000000000
--- a/test/CodeGen/X86/vec_shuffle-41.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx | FileCheck %s
-
-; Use buildFromShuffleMostly which allows this to be generated as two 128-bit
-; shuffles and an insert.
-
-; This is the (somewhat questionable) LLVM IR that is generated for:
-;    x8.s0123456 = x8.s1234567;  // x8 is a <8 x float> type
-;    x8.s7 = f;                  // f is float
-
-
-define <8 x float> @test1(<8 x float> %a, float %b) {
-; CHECK-LABEL: test1:
-; CHECK: vinsertps
-; CHECK-NOT: vinsertps
-entry:
-  %shift = shufflevector <8 x float> %a, <8 x float> undef, <7 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %extend = shufflevector <7 x float> %shift, <7 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
-  %insert = insertelement <8 x float> %extend, float %b, i32 7
-
-  ret <8 x float> %insert
-}
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
deleted file mode 100644
index 65995984859b..000000000000
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 | FileCheck %s
-
-; CHECK: test_v4sf
-; CHECK: movq 8(%esp)
-; CHECK: pshufd $80
-define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
-	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
-	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
-	%tmp4 = insertelement <4 x float> %tmp2, float %Y, i32 2		; <<4 x float>> [#uses=1]
-	%tmp6 = insertelement <4 x float> %tmp4, float %Y, i32 3		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp6, <4 x float>* %P
-	ret void
-}
-
-; CHECK: test_v2sd
-; CHECK: movups	8(%esp)
-; CHECK: movaps
-define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
-	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
-	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
-	store <2 x double> %tmp2, <2 x double>* %P
-	ret void
-}
-
-; CHECK: test_v8i16
-; CHECK: pshufhw $-58
-; CHECK: movdqa
-define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
-	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
-	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
-	%tmp.upgrd.2 = extractelement <8 x i16> %tmp.upgrd.1, i32 0		; <i16> [#uses=1]
-	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1		; <i16> [#uses=1]
-	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2		; <i16> [#uses=1]
-	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3		; <i16> [#uses=1]
-	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 6		; <i16> [#uses=1]
-	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5		; <i16> [#uses=1]
-	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 4		; <i16> [#uses=1]
-	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7		; <i16> [#uses=1]
-	%tmp8 = insertelement <8 x i16> undef, i16 %tmp.upgrd.2, i32 0		; <<8 x i16>> [#uses=1]
-	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1		; <<8 x i16>> [#uses=1]
-	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp2, i32 2		; <<8 x i16>> [#uses=1]
-	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3		; <<8 x i16>> [#uses=1]
-	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 4		; <<8 x i16>> [#uses=1]
-	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5		; <<8 x i16>> [#uses=1]
-	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 6		; <<8 x i16>> [#uses=1]
-	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7		; <<8 x i16>> [#uses=1]
-	%tmp15.upgrd.3 = bitcast <8 x i16> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	store <2 x i64> %tmp15.upgrd.3, <2 x i64>* %res
-	ret void
-}
diff --git a/test/CodeGen/X86/vec_splat-2.ll b/test/CodeGen/X86/vec_splat-2.ll
deleted file mode 100644
index 9d82f97dca1c..000000000000
--- a/test/CodeGen/X86/vec_splat-2.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s
-
-define void @test(<2 x i64>* %P, i8 %x) nounwind {
-	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
-	%tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1		; <<16 x i8>> [#uses=1]
-	%tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2		; <<16 x i8>> [#uses=1]
-	%tmp40 = insertelement <16 x i8> %tmp38, i8 %x, i32 3		; <<16 x i8>> [#uses=1]
-	%tmp42 = insertelement <16 x i8> %tmp40, i8 %x, i32 4		; <<16 x i8>> [#uses=1]
-	%tmp44 = insertelement <16 x i8> %tmp42, i8 %x, i32 5		; <<16 x i8>> [#uses=1]
-	%tmp46 = insertelement <16 x i8> %tmp44, i8 %x, i32 6		; <<16 x i8>> [#uses=1]
-	%tmp48 = insertelement <16 x i8> %tmp46, i8 %x, i32 7		; <<16 x i8>> [#uses=1]
-	%tmp50 = insertelement <16 x i8> %tmp48, i8 %x, i32 8		; <<16 x i8>> [#uses=1]
-	%tmp52 = insertelement <16 x i8> %tmp50, i8 %x, i32 9		; <<16 x i8>> [#uses=1]
-	%tmp54 = insertelement <16 x i8> %tmp52, i8 %x, i32 10		; <<16 x i8>> [#uses=1]
-	%tmp56 = insertelement <16 x i8> %tmp54, i8 %x, i32 11		; <<16 x i8>> [#uses=1]
-	%tmp58 = insertelement <16 x i8> %tmp56, i8 %x, i32 12		; <<16 x i8>> [#uses=1]
-	%tmp60 = insertelement <16 x i8> %tmp58, i8 %x, i32 13		; <<16 x i8>> [#uses=1]
-	%tmp62 = insertelement <16 x i8> %tmp60, i8 %x, i32 14		; <<16 x i8>> [#uses=1]
-	%tmp64 = insertelement <16 x i8> %tmp62, i8 %x, i32 15		; <<16 x i8>> [#uses=1]
-	%tmp68 = load <2 x i64>* %P		; <<2 x i64>> [#uses=1]
-	%tmp71 = bitcast <2 x i64> %tmp68 to <16 x i8>		; <<16 x i8>> [#uses=1]
-	%tmp73 = add <16 x i8> %tmp71, %tmp64		; <<16 x i8>> [#uses=1]
-	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
-	ret void
-
-; CHECK-LABEL: test:
-; CHECK-NOT: pshufd
-; CHECK: punpcklbw
-; CHECK: punpcklbw
-; CHECK: pshufd $0
-; CHECK-NOT: pshufd
-}
diff --git a/test/CodeGen/X86/vec_splat-3.ll b/test/CodeGen/X86/vec_splat-3.ll
deleted file mode 100644
index 754cbf41867d..000000000000
--- a/test/CodeGen/X86/vec_splat-3.ll
+++ /dev/null
@@ -1,230 +0,0 @@
-; RUN: llc <%s -march=x86 -mcpu=penryn -mattr=sse4.1 | FileCheck %s
-
-; Splat test for v8i16
-define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_0:
-; CHECK: pshuflw $0
-}
-
-define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_1:
-; CHECK: pshuflw $5
-}
-
-define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_2:
-; CHECK: punpcklwd
-; CHECK-NEXT: pshufd $-86
-}
-
-define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_3:
-; CHECK: pshuflw $15
-}
-
-define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_4:
-; CHECK: movhlps
-}
-
-define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_5:
-; CHECK: punpckhwd
-; CHECK-NEXT: pshufd $85
-}
-
-define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_6:
-; CHECK: punpckhwd
-; CHECK-NEXT: pshufd $-86
-}
-
-define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
-	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
-	ret <8 x i16> %tmp6
-
-; CHECK-LABEL: shuf_8i16_7:
-; CHECK: punpckhwd
-; CHECK-NEXT: pshufd $-1
-}
-
-; Splat test for v16i8
-define <16 x i8> @shuf_16i8_8(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_8:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $0
-}
-
-define <16 x i8> @shuf_16i8_9(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_9:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $85
-}
-
-define <16 x i8> @shuf_16i8_10(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_10:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $-86
-}
-
-define <16 x i8> @shuf_16i8_11(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_11:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $-1
-}
-
-
-define <16 x i8> @shuf_16i8_12(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_12:
-; CHECK: pshufd $5
-}
-
-define <16 x i8> @shuf_16i8_13(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_13:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $85
-}
-
-define <16 x i8> @shuf_16i8_14(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_14:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $-86
-}
-
-define <16 x i8> @shuf_16i8_15(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_15:
-; CHECK: punpcklbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $-1
-}
-
-define <16 x i8> @shuf_16i8_16(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_16:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $0
-}
-
-define <16 x i8> @shuf_16i8_17(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_17:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $85
-}
-
-define <16 x i8> @shuf_16i8_18(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_18:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $-86
-}
-
-define <16 x i8> @shuf_16i8_19(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_19:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpcklbw
-; CHECK-NEXT: pshufd $-1
-}
-
-define <16 x i8> @shuf_16i8_20(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_20:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $0
-}
-
-define <16 x i8> @shuf_16i8_21(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_21:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $85
-}
-
-define <16 x i8> @shuf_16i8_22(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_22:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $-86
-}
-
-define <16 x i8> @shuf_16i8_23(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
-	%tmp6 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> <i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-	ret <16 x i8> %tmp6
-
-; CHECK-LABEL: shuf_16i8_23:
-; CHECK: punpckhbw
-; CHECK-NEXT: punpckhbw
-; CHECK-NEXT: pshufd $-1
-}
diff --git a/test/CodeGen/X86/vec_splat.ll b/test/CodeGen/X86/vec_splat.ll
deleted file mode 100644
index a02e3836078c..000000000000
--- a/test/CodeGen/X86/vec_splat.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse2 | FileCheck %s -check-prefix=SSE2
-; RUN: llc < %s -march=x86 -mcpu=pentium4 -mattr=+sse3 | FileCheck %s -check-prefix=SSE3
-
-define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
-	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
-	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
-	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]
-	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]
-	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
-	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp10, <4 x float>* %P
-	ret void
-
-; SSE2-LABEL: test_v4sf:
-; SSE2: pshufd $0
-
-; SSE3-LABEL: test_v4sf:
-; SSE3: pshufd $0
-}
-
-define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
-	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
-	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]
-	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]
-	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
-	store <2 x double> %tmp6, <2 x double>* %P
-	ret void
-
-; SSE2-LABEL: test_v2sd:
-; SSE2: shufpd $0
-
-; SSE3-LABEL: test_v2sd:
-; SSE3: movddup
-}
-
-; Fold extract of a load into the load's address computation. This avoids spilling to the stack.
-define <4 x float> @load_extract_splat(<4 x float>* nocapture readonly %ptr, i64 %i, i64 %j) nounwind {
-  %1 = getelementptr inbounds <4 x float>* %ptr, i64 %i
-  %2 = load <4 x float>* %1, align 16
-  %3 = extractelement <4 x float> %2, i64 %j
-  %4 = insertelement <4 x float> undef, float %3, i32 0
-  %5 = insertelement <4 x float> %4, float %3, i32 1
-  %6 = insertelement <4 x float> %5, float %3, i32 2
-  %7 = insertelement <4 x float> %6, float %3, i32 3
-  ret <4 x float> %7
-  
-; AVX-LABEL: load_extract_splat
-; AVX-NOT: movs
-; AVX: vbroadcastss
-}
diff --git a/test/CodeGen/X86/vec_trunc_sext.ll b/test/CodeGen/X86/vec_trunc_sext.ll
new file mode 100644
index 000000000000..3c446bba4ea8
--- /dev/null
+++ b/test/CodeGen/X86/vec_trunc_sext.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='-sse4.1' -o - | FileCheck %s -check-prefix=NO_SSE_41
+; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='+sse4.1' -o - | FileCheck %s -check-prefix=SSE_41
+
+; PR20472 ( http://llvm.org/bugs/show_bug.cgi?id=20472 )
+; When sexting a trunc'd vector value, we can't eliminate the zext.
+; If we don't have SSE4.1, use punpck.
+; If we have SSE4.1, use pmovzx because it combines the load op.
+; There may be a better way to do this using pshufb + pmovsx,
+; but that is beyond our current codegen capabilities.
+
+define <4 x i32> @trunc_sext(<4 x i16>* %in) {
+  %load = load <4 x i16>* %in
+  %trunc = trunc <4 x i16> %load to <4 x i8>
+  %sext = sext <4 x i8> %trunc to <4 x i32>
+  ret <4 x i32> %sext
+
+; NO_SSE_41-LABEL: trunc_sext:
+; NO_SSE_41: movq (%rdi), %xmm0
+; NO_SSE_41-NEXT: punpcklwd %xmm0, %xmm0
+; NO_SSE_41-NEXT: pslld $24, %xmm0
+; NO_SSE_41-NEXT: psrad $24, %xmm0
+; NO_SSE_41-NEXT: retq
+
+; SSE_41-LABEL: trunc_sext:
+; SSE_41: pmovzxwd (%rdi), %xmm0
+; SSE_41-NEXT: pslld $24, %xmm0
+; SSE_41-NEXT: psrad $24, %xmm0
+; SSE_41-NEXT: retq
+}
+
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index ee20f1fcbd04..46cfcd9a9a12 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -1,11 +1,167 @@
-; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse4.1 | FileCheck --check-prefix=CHECK --check-prefix=SSE41 --check-prefix=CST  %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck --check-prefix=CHECK --check-prefix=AVX2 %s
+
+; Check that the constant used in the vectors are the right ones.
+; SSE: [[MASKCSTADDR:LCPI0_[0-9]+]]:
+; SSE-NEXT: .long	65535                   ## 0xffff
+; SSE-NEXT: .long	65535                   ## 0xffff
+; SSE-NEXT: .long	65535                   ## 0xffff
+; SSE-NEXT: .long	65535                   ## 0xffff
+
+; CST: [[LOWCSTADDR:LCPI0_[0-9]+]]:
+; CST-NEXT: .long	1258291200              ## 0x4b000000
+; CST-NEXT: .long	1258291200              ## 0x4b000000
+; CST-NEXT: .long	1258291200              ## 0x4b000000
+; CST-NEXT: .long	1258291200              ## 0x4b000000
+
+; CST: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
+; CST-NEXT: .long	1392508928              ## 0x53000000
+; CST-NEXT: .long	1392508928              ## 0x53000000
+; CST-NEXT: .long	1392508928              ## 0x53000000
+; CST-NEXT: .long	1392508928              ## 0x53000000
+
+; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
+; CST-NEXT: .long	3539992704              ## float -5.497642e+11
+; CST-NEXT: .long	3539992704              ## float -5.497642e+11
+; CST-NEXT: .long	3539992704              ## float -5.497642e+11
+; CST-NEXT: .long	3539992704              ## float -5.497642e+11
+
+; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]:
+; AVX2-NEXT: .long	1258291200              ## 0x4b000000
+
+; AVX2: [[HIGHCSTADDR:LCPI0_[0-9]+]]:
+; AVX2-NEXT: .long	1392508928              ## 0x53000000
+
+; AVX2: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
+; AVX2-NEXT: .long	3539992704              ## float -5.49764202E+11
 
-; Test that we are not lowering uinttofp to scalars
 define <4 x float> @test1(<4 x i32> %A) nounwind {
 ; CHECK-LABEL: test1:
-; CHECK-NOT: cvtsd2ss
-; CHECK: ret
+;
+; SSE: movdqa [[MASKCSTADDR]](%rip), [[MASK:%xmm[0-9]+]]
+; SSE-NEXT: pand %xmm0, [[MASK]]
+; After this instruction, MASK will have the value of the low parts
+; of the vector.
+; SSE-NEXT: por [[LOWCSTADDR]](%rip), [[MASK]]
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: por [[HIGHCSTADDR]](%rip), %xmm0
+; SSE-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0
+; SSE-NEXT: addps [[MASK]], %xmm0
+; SSE-NEXT: retq
+;
+; Currently we commute the arguments of the first blend, but this could be
+; improved to match the lowering of the second blend.
+; SSE41: movdqa [[LOWCSTADDR]](%rip), [[LOWVEC:%xmm[0-9]+]]
+; SSE41-NEXT: pblendw $85, %xmm0, [[LOWVEC]]
+; SSE41-NEXT: psrld $16, %xmm0
+; SSE41-NEXT: pblendw $170, [[HIGHCSTADDR]](%rip), %xmm0
+; SSE41-NEXT: addps [[MAGICCSTADDR]](%rip), %xmm0
+; SSE41-NEXT: addps [[LOWVEC]], %xmm0
+; SSE41-NEXT: retq
+;
+; AVX: vpblendw $170, [[LOWCSTADDR]](%rip), %xmm0, [[LOWVEC:%xmm[0-9]+]]
+; AVX-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
+; AVX-NEXT: vpblendw $170, [[HIGHCSTADDR]](%rip), [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
+; AVX-NEXT: vaddps [[MAGICCSTADDR]](%rip), [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
+; AVX-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
+; AVX-NEXT: retq
+;
+; The lowering for AVX2 is a bit messy, because we select broadcast
+; instructions, instead of folding the constant loads.
+; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%xmm[0-9]+]]
+; AVX2-NEXT: vpblendw $170, [[LOWCST]], %xmm0, [[LOWVEC:%xmm[0-9]+]]
+; AVX2-NEXT: vpsrld $16, %xmm0, [[SHIFTVEC:%xmm[0-9]+]]
+; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%xmm[0-9]+]]
+; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%xmm[0-9]+]]
+; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%xmm[0-9]+]]
+; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%xmm[0-9]+]]
+; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %xmm0
+; AVX2-NEXT: retq
   %C = uitofp <4 x i32> %A to <4 x float>
   ret <4 x float> %C
 }
 
+; Match the AVX2 constants used in the next function
+; AVX2: [[LOWCSTADDR:LCPI1_[0-9]+]]:
+; AVX2-NEXT: .long	1258291200              ## 0x4b000000
+
+; AVX2: [[HIGHCSTADDR:LCPI1_[0-9]+]]:
+; AVX2-NEXT: .long	1392508928              ## 0x53000000
+
+; AVX2: [[MAGICCSTADDR:LCPI1_[0-9]+]]:
+; AVX2-NEXT: .long	3539992704              ## float -5.49764202E+11
+
+define <8 x float> @test2(<8 x i32> %A) nounwind {
+; CHECK-LABEL: test2:
+; Legalization will break the thing is 2 x <4 x i32> on anthing prior AVX.
+; The constant used for in the vector instruction are shared between the
+; two sequences of instructions.
+;
+; SSE: movdqa {{.*#+}} [[MASK:xmm[0-9]+]] = [65535,65535,65535,65535]
+; SSE-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
+; SSE-NEXT: pand %[[MASK]], [[VECLOW]]
+; SSE-NEXT: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
+; SSE-NEXT: por %[[LOWCST]], [[VECLOW]]
+; SSE-NEXT: psrld $16, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
+; SSE-NEXT: por %[[HIGHCST]], %xmm0
+; SSE-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
+; SSE-NEXT: addps %[[MAGICCST]], %xmm0
+; SSE-NEXT: addps [[VECLOW]], %xmm0
+; MASK is the low vector of the second part after this point.
+; SSE-NEXT: pand %xmm1, %[[MASK]]
+; SSE-NEXT: por %[[LOWCST]], %[[MASK]]
+; SSE-NEXT: psrld $16, %xmm1
+; SSE-NEXT: por %[[HIGHCST]], %xmm1
+; SSE-NEXT: addps %[[MAGICCST]], %xmm1
+; SSE-NEXT: addps %[[MASK]], %xmm1
+; SSE-NEXT: retq
+;
+; SSE41: movdqa {{.*#+}} [[LOWCST:xmm[0-9]+]] = [1258291200,1258291200,1258291200,1258291200]
+; SSE41-NEXT: movdqa %xmm0, [[VECLOW:%xmm[0-9]+]]
+; SSE41-NEXT: pblendw $170, %[[LOWCST]], [[VECLOW]]
+; SSE41-NEXT: psrld $16, %xmm0
+; SSE41-NEXT: movdqa {{.*#+}} [[HIGHCST:xmm[0-9]+]] = [1392508928,1392508928,1392508928,1392508928]
+; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm0
+; SSE41-NEXT: movaps {{.*#+}} [[MAGICCST:xmm[0-9]+]] = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
+; SSE41-NEXT: addps %[[MAGICCST]], %xmm0
+; SSE41-NEXT: addps [[VECLOW]], %xmm0
+; LOWCST is the low vector of the second part after this point.
+; The operands of the blend are inverted because we reuse xmm1
+; in the next shift.
+; SSE41-NEXT: pblendw $85, %xmm1, %[[LOWCST]]
+; SSE41-NEXT: psrld $16, %xmm1
+; SSE41-NEXT: pblendw $170, %[[HIGHCST]], %xmm1
+; SSE41-NEXT: addps %[[MAGICCST]], %xmm1
+; SSE41-NEXT: addps %[[LOWCST]], %xmm1
+; SSE41-NEXT: retq
+;
+; Test that we are not lowering uinttofp to scalars
+; AVX-NOT: cvtsd2ss
+; AVX: retq
+;
+; AVX2: vpbroadcastd [[LOWCSTADDR]](%rip), [[LOWCST:%ymm[0-9]+]]
+; AVX2-NEXT: vpblendw $170, [[LOWCST]], %ymm0, [[LOWVEC:%ymm[0-9]+]]
+; AVX2-NEXT: vpsrld $16, %ymm0, [[SHIFTVEC:%ymm[0-9]+]]
+; AVX2-NEXT: vpbroadcastd [[HIGHCSTADDR]](%rip), [[HIGHCST:%ymm[0-9]+]]
+; AVX2-NEXT: vpblendw $170, [[HIGHCST]], [[SHIFTVEC]], [[HIGHVEC:%ymm[0-9]+]]
+; AVX2-NEXT: vbroadcastss [[MAGICCSTADDR]](%rip), [[MAGICCST:%ymm[0-9]+]]
+; AVX2-NEXT: vaddps [[MAGICCST]], [[HIGHVEC]], [[TMP:%ymm[0-9]+]]
+; AVX2-NEXT: vaddps [[TMP]], [[LOWVEC]], %ymm0
+; AVX2-NEXT: retq
+  %C = uitofp <8 x i32> %A to <8 x float>
+  ret <8 x float> %C
+}
+
+define <4 x double> @test3(<4 x i32> %arg) {
+; CHECK-LABEL: test3:
+; This test used to crash because we were custom lowering it as if it was
+; a conversion between <4 x i32> and <4 x float>.
+; AVX: vcvtdq2pd
+; AVX2: vcvtdq2pd
+; CHECK: retq
+  %tmp = uitofp <4 x i32> %arg to <4 x double>
+  ret <4 x double> %tmp
+}
diff --git a/test/CodeGen/X86/vec_unsafe-fp-math.ll b/test/CodeGen/X86/vec_unsafe-fp-math.ll
new file mode 100644
index 000000000000..827d4184d111
--- /dev/null
+++ b/test/CodeGen/X86/vec_unsafe-fp-math.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -enable-unsafe-fp-math -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s
+
+; Make sure that vectors get the same benefits as scalars when using unsafe-fp-math.
+
+; Subtracting zero is free.
+define <4 x float> @vec_fsub_zero(<4 x float> %x) {
+; CHECK-LABEL: vec_fsub_zero:
+; CHECK-NOT: subps
+; CHECK-NOT: xorps
+; CHECK: retq
+  %sub = fsub <4 x float> %x, zeroinitializer
+  ret <4 x float> %sub
+}
+
+; Negating doesn't require subtraction.
+define <4 x float> @vec_fneg(<4 x float> %x) {
+; CHECK-LABEL: vec_fneg:
+; CHECK: xorps  {{.*}}LCP{{.*}}, %xmm0
+; CHECK-NOT: subps
+; CHECK-NEXT: retq
+  %sub = fsub <4 x float> zeroinitializer, %x
+  ret <4 x float> %sub
+}
diff --git a/test/CodeGen/X86/vec_zext.ll b/test/CodeGen/X86/vec_zext.ll
deleted file mode 100644
index 615a50b7afc3..000000000000
--- a/test/CodeGen/X86/vec_zext.ll
+++ /dev/null
@@ -1,69 +0,0 @@
-; RUN: llc < %s -march=x86-64
-; PR 9267
-
-define<4 x i32> @func_16_32() {
-  %F = load <4 x i16>* undef
-  %G = zext <4 x i16> %F to <4 x i32>
-  %H = load <4 x i16>* undef
-  %Y = zext <4 x i16> %H to <4 x i32>
-  %T = add <4 x i32> %Y, %G
-  store <4 x i32>%T , <4 x i32>* undef
-  ret <4 x i32> %T
-}
-
-define<4 x i64> @func_16_64() {
-  %F = load <4 x i16>* undef
-  %G = zext <4 x i16> %F to <4 x i64>
-  %H = load <4 x i16>* undef
-  %Y = zext <4 x i16> %H to <4 x i64>
-  %T = xor <4 x i64> %Y, %G
-  store <4 x i64>%T , <4 x i64>* undef
-  ret <4 x i64> %T
-}
-
-define<4 x i64> @func_32_64() {
-  %F = load <4 x i32>* undef
-  %G = zext <4 x i32> %F to <4 x i64>
-  %H = load <4 x i32>* undef
-  %Y = zext <4 x i32> %H to <4 x i64>
-  %T = or <4 x i64> %Y, %G
-  ret <4 x i64> %T
-}
-
-define<4 x i16> @func_8_16() {
-  %F = load <4 x i8>* undef
-  %G = zext <4 x i8> %F to <4 x i16>
-  %H = load <4 x i8>* undef
-  %Y = zext <4 x i8> %H to <4 x i16>
-  %T = add <4 x i16> %Y, %G
-  ret <4 x i16> %T
-}
-
-define<4 x i32> @func_8_32() {
-  %F = load <4 x i8>* undef
-  %G = zext <4 x i8> %F to <4 x i32>
-  %H = load <4 x i8>* undef
-  %Y = zext <4 x i8> %H to <4 x i32>
-  %T = sub <4 x i32> %Y, %G
-  ret <4 x i32> %T
-}
-
-define<4 x i64> @func_8_64() {
-  %F = load <4 x i8>* undef
-  %G = zext <4 x i8> %F to <4 x i64>
-  %H = load <4 x i8>* undef
-  %Y = zext <4 x i8> %H to <4 x i64>
-  %T = add <4 x i64> %Y, %G
-  ret <4 x i64> %T
-}
-
-define<4 x i32> @const_16_32() {
-  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
-  ret <4 x i32> %G
-}
-
-define<4 x i64> @const_16_64() {
-  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
-  ret <4 x i64> %G
-}
-
diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll
new file mode 100644
index 000000000000..f23b82883858
--- /dev/null
+++ b/test/CodeGen/X86/vector-blend.ll
@@ -0,0 +1,801 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+; AVX128 tests:
+
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+; SSE2-LABEL: vsel_float:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_float:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_float:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_float:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
+; SSE2-LABEL: vsel_float2:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_float2:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_float2:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_float2:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+; SSE2-LABEL: vsel_4xi8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_4xi8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_4xi8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_4xi8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_4xi8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
+; SSE2-LABEL: vsel_4xi16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_4xi16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_4xi16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_4xi16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_4xi16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
+  ret <4 x i16> %vsel
+}
+
+define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
+; SSE2-LABEL: vsel_i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
+  ret <4 x i32> %vsel
+}
+
+define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
+; SSE2-LABEL: vsel_double:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_double:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_double:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_double:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
+  ret <2 x double> %vsel
+}
+
+define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
+; SSE2-LABEL: vsel_i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
+  ret <2 x i64> %vsel
+}
+
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+; SSE2-LABEL: vsel_8xi16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_8xi16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_8xi16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_8xi16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
+
+define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
+; SSE2-LABEL: vsel_i8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_i8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_i8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_i8:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
+  ret <16 x i8> %vsel
+}
+
+
+; AVX256 tests:
+
+define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
+; SSE2-LABEL: vsel_float8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movss %xmm0, %xmm2
+; SSE2-NEXT:    movss %xmm1, %xmm3
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_float8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movss %xmm0, %xmm2
+; SSSE3-NEXT:    movss %xmm1, %xmm3
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_float8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_float8:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
+  ret <8 x float> %vsel
+}
+
+define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
+; SSE2-LABEL: vsel_i328:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movss %xmm0, %xmm2
+; SSE2-NEXT:    movss %xmm1, %xmm3
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_i328:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movss %xmm0, %xmm2
+; SSSE3-NEXT:    movss %xmm1, %xmm3
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_i328:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_i328:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_i328:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
+  ret <8 x i32> %vsel
+}
+
+define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
+; SSE2-LABEL: vsel_double8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm4
+; SSE2-NEXT:    movsd %xmm2, %xmm6
+; SSE2-NEXT:    movaps %xmm4, %xmm0
+; SSE2-NEXT:    movaps %xmm5, %xmm1
+; SSE2-NEXT:    movaps %xmm6, %xmm2
+; SSE2-NEXT:    movaps %xmm7, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_double8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm4
+; SSSE3-NEXT:    movsd %xmm2, %xmm6
+; SSSE3-NEXT:    movaps %xmm4, %xmm0
+; SSSE3-NEXT:    movaps %xmm5, %xmm1
+; SSSE3-NEXT:    movaps %xmm6, %xmm2
+; SSSE3-NEXT:    movaps %xmm7, %xmm3
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_double8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm4[1]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm2 = xmm2[0],xmm6[1]
+; SSE41-NEXT:    movaps %xmm5, %xmm1
+; SSE41-NEXT:    movaps %xmm7, %xmm3
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_double8:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
+; AVX-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
+  ret <8 x double> %vsel
+}
+
+define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
+; SSE2-LABEL: vsel_i648:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm4
+; SSE2-NEXT:    movsd %xmm2, %xmm6
+; SSE2-NEXT:    movaps %xmm4, %xmm0
+; SSE2-NEXT:    movaps %xmm5, %xmm1
+; SSE2-NEXT:    movaps %xmm6, %xmm2
+; SSE2-NEXT:    movaps %xmm7, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_i648:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm4
+; SSSE3-NEXT:    movsd %xmm2, %xmm6
+; SSSE3-NEXT:    movaps %xmm4, %xmm0
+; SSSE3-NEXT:    movaps %xmm5, %xmm1
+; SSSE3-NEXT:    movaps %xmm6, %xmm2
+; SSSE3-NEXT:    movaps %xmm7, %xmm3
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_i648:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
+; SSE41-NEXT:    movaps %xmm5, %xmm1
+; SSE41-NEXT:    movaps %xmm7, %xmm3
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: vsel_i648:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0],ymm3[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vsel_i648:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
+; AVX2-NEXT:    retq
+entry:
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
+  ret <8 x i64> %vsel
+}
+
+define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
+; SSE2-LABEL: vsel_double4:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm2
+; SSE2-NEXT:    movsd %xmm1, %xmm3
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: vsel_double4:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm2
+; SSSE3-NEXT:    movsd %xmm1, %xmm3
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: vsel_double4:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: vsel_double4:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX-NEXT:    retq
+entry:
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
+  ret <4 x double> %vsel
+}
+
+define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
+; SSE2-LABEL: testa:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movapd %xmm1, %xmm2
+; SSE2-NEXT:    cmplepd %xmm0, %xmm2
+; SSE2-NEXT:    andpd %xmm2, %xmm0
+; SSE2-NEXT:    andnpd %xmm1, %xmm2
+; SSE2-NEXT:    orpd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: testa:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movapd %xmm1, %xmm2
+; SSSE3-NEXT:    cmplepd %xmm0, %xmm2
+; SSSE3-NEXT:    andpd %xmm2, %xmm0
+; SSSE3-NEXT:    andnpd %xmm1, %xmm2
+; SSSE3-NEXT:    orpd %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: testa:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movapd %xmm0, %xmm2
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    cmplepd %xmm2, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: testa:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcmplepd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %max_is_x = fcmp oge <2 x double> %x, %y
+  %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %max
+}
+
+define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
+; SSE2-LABEL: testb:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movapd %xmm1, %xmm2
+; SSE2-NEXT:    cmpnlepd %xmm0, %xmm2
+; SSE2-NEXT:    andpd %xmm2, %xmm0
+; SSE2-NEXT:    andnpd %xmm1, %xmm2
+; SSE2-NEXT:    orpd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: testb:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movapd %xmm1, %xmm2
+; SSSE3-NEXT:    cmpnlepd %xmm0, %xmm2
+; SSSE3-NEXT:    andpd %xmm2, %xmm0
+; SSSE3-NEXT:    andnpd %xmm1, %xmm2
+; SSSE3-NEXT:    orpd %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: testb:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movapd %xmm0, %xmm2
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    cmpnlepd %xmm2, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: testb:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vcmpnlepd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %min_is_x = fcmp ult <2 x double> %x, %y
+  %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %min
+}
+
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; SSE2-LABEL: constant_blendvpd_avx:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm1, %xmm3
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: constant_blendvpd_avx:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm1, %xmm3
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: constant_blendvpd_avx:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendpd {{.*#+}} xmm1 = xmm1[0],xmm3[1]
+; SSE41-NEXT:    movaps %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_blendvpd_avx:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX-NEXT:    retq
+entry:
+  %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
+  ret <4 x double> %select
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; SSE2-LABEL: constant_blendvps_avx:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [4294967295,4294967295,4294967295,0]
+; SSE2-NEXT:    andps %xmm4, %xmm2
+; SSE2-NEXT:    movaps {{.*#+}} xmm5 = [0,0,0,4294967295]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm4, %xmm3
+; SSE2-NEXT:    andps %xmm5, %xmm1
+; SSE2-NEXT:    orps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: constant_blendvps_avx:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movaps {{.*#+}} xmm4 = [4294967295,4294967295,4294967295,0]
+; SSSE3-NEXT:    andps %xmm4, %xmm2
+; SSSE3-NEXT:    movaps {{.*#+}} xmm5 = [0,0,0,4294967295]
+; SSSE3-NEXT:    andps %xmm5, %xmm0
+; SSSE3-NEXT:    orps %xmm2, %xmm0
+; SSSE3-NEXT:    andps %xmm4, %xmm3
+; SSSE3-NEXT:    andps %xmm5, %xmm1
+; SSSE3-NEXT:    orps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: constant_blendvps_avx:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
+; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_blendvps_avx:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
+; AVX-NEXT:    retq
+entry:
+  %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
+  ret <8 x float> %select
+}
+
+define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; SSE2-LABEL: constant_pblendvb_avx2:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; SSE2-NEXT:    andps %xmm4, %xmm2
+; SSE2-NEXT:    movaps {{.*#+}} xmm5 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE2-NEXT:    andps %xmm5, %xmm0
+; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    andps %xmm4, %xmm3
+; SSE2-NEXT:    andps %xmm5, %xmm1
+; SSE2-NEXT:    orps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: constant_pblendvb_avx2:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; SSSE3-NEXT:    andps %xmm4, %xmm2
+; SSSE3-NEXT:    movaps {{.*#+}} xmm5 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSSE3-NEXT:    andps %xmm5, %xmm0
+; SSSE3-NEXT:    orps %xmm2, %xmm0
+; SSSE3-NEXT:    andps %xmm4, %xmm3
+; SSSE3-NEXT:    andps %xmm5, %xmm1
+; SSSE3-NEXT:    orps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: constant_pblendvb_avx2:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; SSE41-NEXT:    pblendvb %xmm4, %xmm2
+; SSE41-NEXT:    pblendvb %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_pblendvb_avx2:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_pblendvb_avx2:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
+  ret <32 x i8> %select
+}
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+;; 4 tests for shufflevectors that optimize to blend + immediate
+define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: blend_shufflevector_4xfloat:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: blend_shufflevector_4xfloat:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: blend_shufflevector_4xfloat:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: blend_shufflevector_4xfloat:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT:    retq
+entry:
+  %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %select
+}
+
+define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
+; SSE2-LABEL: blend_shufflevector_8xfloat:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movss %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: blend_shufflevector_8xfloat:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movss %xmm0, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: blend_shufflevector_8xfloat:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
+; SSE41-NEXT:    blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: blend_shufflevector_8xfloat:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7]
+; AVX-NEXT:    retq
+entry:
+  %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
+  ret <8 x float> %select
+}
+
+define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
+; SSE2-LABEL: blend_shufflevector_4xdouble:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: blend_shufflevector_4xdouble:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm0, %xmm2
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: blend_shufflevector_4xdouble:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm2[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: blend_shufflevector_4xdouble:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX-NEXT:    retq
+entry:
+  %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x double> %select
+}
+
+define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: blend_shufflevector_4xi64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm2, %xmm0
+; SSE2-NEXT:    movaps %xmm3, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: blend_shufflevector_4xi64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsd %xmm2, %xmm0
+; SSSE3-NEXT:    movaps %xmm3, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: blend_shufflevector_4xi64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    movaps %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: blend_shufflevector_4xi64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: blend_shufflevector_4xi64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    retq
+entry:
+  %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  ret <4 x i64> %select
+}
diff --git a/test/CodeGen/X86/vector-ctpop.ll b/test/CodeGen/X86/vector-ctpop.ll
new file mode 100644
index 000000000000..7091927a9006
--- /dev/null
+++ b/test/CodeGen/X86/vector-ctpop.ll
@@ -0,0 +1,159 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck -check-prefix=AVX2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=-popcnt | FileCheck -check-prefix=AVX1-NOPOPCNT %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=-popcnt | FileCheck -check-prefix=AVX2-NOPOPCNT %s
+
+; Vector version of:
+; v = v - ((v >> 1) & 0x55555555)
+; v = (v & 0x33333333) + ((v >> 2) & 0x33333333)
+; v = (v + (v >> 4) & 0xF0F0F0F)
+; v = v + (v >> 8)
+; v = v + (v >> 16)
+; v = v + (v >> 32) ; i64 only
+
+define <8 x i32> @test0(<8 x i32> %x) {
+; AVX2-LABEL: @test0
+entry:
+; AVX2:  vpsrld  $1, %ymm
+; AVX2-NEXT:  vpbroadcastd
+; AVX2-NEXT:  vpand
+; AVX2-NEXT:  vpsubd
+; AVX2-NEXT:  vpbroadcastd
+; AVX2-NEXT:  vpand
+; AVX2-NEXT:  vpsrld  $2
+; AVX2-NEXT:  vpand
+; AVX2-NEXT:  vpaddd
+; AVX2-NEXT:  vpsrld  $4
+; AVX2-NEXT:  vpaddd
+; AVX2-NEXT:  vpbroadcastd
+; AVX2-NEXT:	vpand
+; AVX2-NEXT:	vpsrld	$8
+; AVX2-NEXT:	vpaddd
+; AVX2-NEXT:	vpsrld	$16
+; AVX2-NEXT:	vpaddd
+; AVX2-NEXT:	vpbroadcastd
+; AVX2-NEXT:	vpand
+  %y = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %x)
+  ret <8 x i32> %y
+}
+
+define <4 x i64> @test1(<4 x i64> %x) {
+; AVX2-NOPOPCNT-LABEL: @test1
+entry:
+;	AVX2-NOPOPCNT: vpsrlq	$1, %ymm
+;	AVX2-NOPOPCNT-NEXT: vpbroadcastq
+;	AVX2-NOPOPCNT-NEXT: vpand
+;	AVX2-NOPOPCNT-NEXT: vpsubq
+;	AVX2-NOPOPCNT-NEXT: vpbroadcastq
+;	AVX2-NOPOPCNT-NEXT: vpand
+;	AVX2-NOPOPCNT-NEXT: vpsrlq	$2
+;	AVX2-NOPOPCNT-NEXT: vpand
+;	AVX2-NOPOPCNT-NEXT: vpaddq
+;	AVX2-NOPOPCNT-NEXT: vpsrlq	$4
+;	AVX2-NOPOPCNT-NEXT: vpaddq
+;	AVX2-NOPOPCNT-NEXT: vpbroadcastq
+;	AVX2-NOPOPCNT-NEXT: vpand
+;	AVX2-NOPOPCNT-NEXT: vpsrlq	$8
+;	AVX2-NOPOPCNT-NEXT: vpaddq
+;	AVX2-NOPOPCNT-NEXT: vpsrlq	$16
+;	AVX2-NOPOPCNT-NEXT: vpaddq
+;	AVX2-NOPOPCNT-NEXT: vpsrlq	$32
+;	AVX2-NOPOPCNT-NEXT: vpaddq
+;	AVX2-NOPOPCNT-NEXT: vpbroadcastq
+;	AVX2-NOPOPCNT-NEXT: vpand
+  %y = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %x)
+  ret <4 x i64> %y
+}
+
+define <4 x i32> @test2(<4 x i32> %x) {
+; AVX2-NOPOPCNT-LABEL: @test2
+; AVX1-NOPOPCNT-LABEL: @test2
+entry:
+; AVX2-NOPOPCNT:	vpsrld	$1, %xmm
+; AVX2-NOPOPCNT-NEXT:	vpbroadcastd
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsubd
+; AVX2-NOPOPCNT-NEXT:	vpbroadcastd
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsrld	$2
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpaddd
+; AVX2-NOPOPCNT-NEXT:	vpsrld	$4
+; AVX2-NOPOPCNT-NEXT:	vpaddd
+; AVX2-NOPOPCNT-NEXT:	vpbroadcastd
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsrld	$8
+; AVX2-NOPOPCNT-NEXT:	vpaddd
+; AVX2-NOPOPCNT-NEXT:	vpsrld	$16
+; AVX2-NOPOPCNT-NEXT:	vpaddd
+; AVX2-NOPOPCNT-NEXT:	vpbroadcastd
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT:	vpsrld	$1, %xmm
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsubd
+; AVX1-NOPOPCNT-NEXT:	vmovdqa
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsrld	$2
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpaddd
+; AVX1-NOPOPCNT-NEXT:	vpsrld	$4
+; AVX1-NOPOPCNT-NEXT:	vpaddd
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsrld	$8
+; AVX1-NOPOPCNT-NEXT:	vpaddd
+; AVX1-NOPOPCNT-NEXT:	vpsrld	$16
+; AVX1-NOPOPCNT-NEXT:	vpaddd
+; AVX1-NOPOPCNT-NEXT:	vpand
+  %y = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %x)
+  ret <4 x i32> %y
+}
+
+define <2 x i64> @test3(<2 x i64> %x) {
+; AVX2-NOPOPCNT-LABEL: @test3
+; AVX1-NOPOPCNT-LABEL: @test3
+entry:
+; AVX2-NOPOPCNT:	vpsrlq	$1, %xmm
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsubq
+; AVX2-NOPOPCNT-NEXT:	vmovdqa
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsrlq	$2
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpaddq
+; AVX2-NOPOPCNT-NEXT:	vpsrlq	$4
+; AVX2-NOPOPCNT-NEXT:	vpaddq
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX2-NOPOPCNT-NEXT:	vpsrlq	$8
+; AVX2-NOPOPCNT-NEXT:	vpaddq
+; AVX2-NOPOPCNT-NEXT:	vpsrlq	$16
+; AVX2-NOPOPCNT-NEXT:	vpaddq
+; AVX2-NOPOPCNT-NEXT:	vpsrlq	$32
+; AVX2-NOPOPCNT-NEXT:	vpaddq
+; AVX2-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT:	vpsrlq	$1, %xmm
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsubq
+; AVX1-NOPOPCNT-NEXT:	vmovdqa
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsrlq	$2
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpaddq
+; AVX1-NOPOPCNT-NEXT:	vpsrlq	$4
+; AVX1-NOPOPCNT-NEXT:	vpaddq
+; AVX1-NOPOPCNT-NEXT:	vpand
+; AVX1-NOPOPCNT-NEXT:	vpsrlq	$8
+; AVX1-NOPOPCNT-NEXT:	vpaddq
+; AVX1-NOPOPCNT-NEXT:	vpsrlq	$16
+; AVX1-NOPOPCNT-NEXT:	vpaddq
+; AVX1-NOPOPCNT-NEXT:	vpsrlq	$32
+; AVX1-NOPOPCNT-NEXT:	vpaddq
+; AVX1-NOPOPCNT-NEXT:	vpand
+  %y = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %x)
+  ret <2 x i64> %y
+}
+
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+
+declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
+declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
+
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index a3229073751b..4b269dc923c4 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -1,218 +1,1255 @@
-; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=SSE41
-; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE
-; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX
+; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=SSE41
+; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX
 
-define <4 x i32> @test1(<4 x i32> %a) {
-  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
-  ret <4 x i32> %div
+target triple = "x86_64-unknown-unknown"
 
+define <4 x i32> @test1(<4 x i32> %a) {
 ; SSE41-LABEL: test1:
-; SSE41: pmuludq
-; SSE41: pshufd	$49
-; SSE41: pmuludq
-; SSE41: shufps	$-35
-; SSE41: psubd
-; SSE41: psrld $1
-; SSE41: padd
-; SSE41: psrld $2
-
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pmuludq %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuludq %xmm1, %xmm3
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT:    psubd %xmm2, %xmm0
+; SSE41-NEXT:    psrld $1, %xmm0
+; SSE41-NEXT:    paddd %xmm2, %xmm0
+; SSE41-NEXT:    psrld $2, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [613566757,613566757,613566757,613566757]
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm1, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT:    psubd %xmm2, %xmm0
+; SSE-NEXT:    psrld $1, %xmm0
+; SSE-NEXT:    paddd %xmm2, %xmm0
+; SSE-NEXT:    psrld $2, %xmm0
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: test1:
-; AVX: vpmuludq
-; AVX: vpshufd	$49
-; AVX: vpmuludq
-; AVX: vshufps	$-35
-; AVX: vpsubd
-; AVX: vpsrld $1
-; AVX: vpadd
-; AVX: vpsrld $2
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrld $2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
 }
 
 define <8 x i32> @test2(<8 x i32> %a) {
+; SSE41-LABEL: test2:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    pmuludq %xmm2, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuludq %xmm4, %xmm5
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE41-NEXT:    psubd %xmm3, %xmm0
+; SSE41-NEXT:    psrld $1, %xmm0
+; SSE41-NEXT:    paddd %xmm3, %xmm0
+; SSE41-NEXT:    psrld $2, %xmm0
+; SSE41-NEXT:    pmuludq %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pmuludq %xmm4, %xmm3
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT:    psubd %xmm2, %xmm1
+; SSE41-NEXT:    psrld $1, %xmm1
+; SSE41-NEXT:    paddd %xmm2, %xmm1
+; SSE41-NEXT:    psrld $2, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pmuludq %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm5
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE-NEXT:    psubd %xmm3, %xmm0
+; SSE-NEXT:    psrld $1, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm0
+; SSE-NEXT:    psrld $2, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT:    psubd %xmm2, %xmm1
+; SSE-NEXT:    psrld $1, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm1
+; SSE-NEXT:    psrld $2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpsrld $1, %ymm0, %ymm0
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpsrld $2, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
   ret <8 x i32> %div
-
-; AVX-LABEL: test2:
-; AVX: vpbroadcastd
-; AVX: vpalignr $4
-; AVX: vpmuludq
-; AVX: vpmuludq
-; AVX: vpblendd $170
-; AVX: vpsubd
-; AVX: vpsrld $1
-; AVX: vpadd
-; AVX: vpsrld $2
 }
 
 define <8 x i16> @test3(<8 x i16> %a) {
-  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-  ret <8 x i16> %div
-
 ; SSE41-LABEL: test3:
-; SSE41: pmulhuw
-; SSE41: psubw
-; SSE41: psrlw $1
-; SSE41: paddw
-; SSE41: psrlw $2
-
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; SSE41-NEXT:    pmulhuw %xmm0, %xmm1
+; SSE41-NEXT:    psubw %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $1, %xmm0
+; SSE41-NEXT:    paddw %xmm1, %xmm0
+; SSE41-NEXT:    psrlw $2, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; SSE-NEXT:    pmulhuw %xmm0, %xmm1
+; SSE-NEXT:    psubw %xmm1, %xmm0
+; SSE-NEXT:    psrlw $1, %xmm0
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    psrlw $2, %xmm0
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: test3:
-; AVX: vpmulhuw
-; AVX: vpsubw
-; AVX: vpsrlw $1
-; AVX: vpaddw
-; AVX: vpsrlw $2
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
+; AVX-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
 }
 
 define <16 x i16> @test4(<16 x i16> %a) {
+; SSE41-LABEL: test4:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    pmulhuw %xmm2, %xmm3
+; SSE41-NEXT:    psubw %xmm3, %xmm0
+; SSE41-NEXT:    psrlw $1, %xmm0
+; SSE41-NEXT:    paddw %xmm3, %xmm0
+; SSE41-NEXT:    psrlw $2, %xmm0
+; SSE41-NEXT:    pmulhuw %xmm1, %xmm2
+; SSE41-NEXT:    psubw %xmm2, %xmm1
+; SSE41-NEXT:    psrlw $1, %xmm1
+; SSE41-NEXT:    paddw %xmm2, %xmm1
+; SSE41-NEXT:    psrlw $2, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9363,9363,9363,9363,9363,9363,9363,9363]
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pmulhuw %xmm2, %xmm3
+; SSE-NEXT:    psubw %xmm3, %xmm0
+; SSE-NEXT:    psrlw $1, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm0
+; SSE-NEXT:    psrlw $2, %xmm0
+; SSE-NEXT:    pmulhuw %xmm1, %xmm2
+; SSE-NEXT:    psubw %xmm2, %xmm1
+; SSE-NEXT:    psrlw $1, %xmm1
+; SSE-NEXT:    paddw %xmm2, %xmm1
+; SSE-NEXT:    psrlw $2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmulhuw {{.*}}(%rip), %ymm0, %ymm1
+; AVX-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpsrlw $1, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpsrlw $2, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
   ret <16 x i16> %div
-
-; AVX-LABEL: test4:
-; AVX: vpmulhuw
-; AVX: vpsubw
-; AVX: vpsrlw $1
-; AVX: vpaddw
-; AVX: vpsrlw $2
-; AVX-NOT: vpmulhuw
 }
 
 define <8 x i16> @test5(<8 x i16> %a) {
-  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-  ret <8 x i16> %div
-
 ; SSE41-LABEL: test5:
-; SSE41: pmulhw
-; SSE41: psrlw $15
-; SSE41: psraw $1
-; SSE41: paddw
-
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmulhw {{.*}}(%rip), %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlw $15, %xmm1
+; SSE41-NEXT:    psraw $1, %xmm0
+; SSE41-NEXT:    paddw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test5:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmulhw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrlw $15, %xmm1
+; SSE-NEXT:    psraw $1, %xmm0
+; SSE-NEXT:    paddw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: test5:
-; AVX: vpmulhw
-; AVX: vpsrlw $15
-; AVX: vpsraw $1
-; AVX: vpaddw
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmulhw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $15, %xmm0, %xmm1
+; AVX-NEXT:    vpsraw $1, %xmm0, %xmm0
+; AVX-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  ret <8 x i16> %div
 }
 
 define <16 x i16> @test6(<16 x i16> %a) {
+; SSE41-LABEL: test6:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; SSE41-NEXT:    pmulhw %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrlw $15, %xmm3
+; SSE41-NEXT:    psraw $1, %xmm0
+; SSE41-NEXT:    paddw %xmm3, %xmm0
+; SSE41-NEXT:    pmulhw %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    psrlw $15, %xmm2
+; SSE41-NEXT:    psraw $1, %xmm1
+; SSE41-NEXT:    paddw %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [18725,18725,18725,18725,18725,18725,18725,18725]
+; SSE-NEXT:    pmulhw %xmm2, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psrlw $15, %xmm3
+; SSE-NEXT:    psraw $1, %xmm0
+; SSE-NEXT:    paddw %xmm3, %xmm0
+; SSE-NEXT:    pmulhw %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psrlw $15, %xmm2
+; SSE-NEXT:    psraw $1, %xmm1
+; SSE-NEXT:    paddw %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmulhw {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vpsrlw $15, %ymm0, %ymm1
+; AVX-NEXT:    vpsraw $1, %ymm0, %ymm0
+; AVX-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
   ret <16 x i16> %div
-
-; AVX-LABEL: test6:
-; AVX: vpmulhw
-; AVX: vpsrlw $15
-; AVX: vpsraw $1
-; AVX: vpaddw
-; AVX-NOT: vpmulhw
 }
 
 define <16 x i8> @test7(<16 x i8> %a) {
-  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
-  ret <16 x i8> %div
-
-; FIXME: scalarized
 ; SSE41-LABEL: test7:
-; SSE41: pext
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pextrb $1, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pextrb $0, %xmm0, %ecx
+; SSE41-NEXT:    movsbl %cl, %ecx
+; SSE41-NEXT:    imull $-109, %ecx, %edx
+; SSE41-NEXT:    shrl $8, %edx
+; SSE41-NEXT:    addb %dl, %cl
+; SSE41-NEXT:    movb %cl, %dl
+; SSE41-NEXT:    shrb $7, %dl
+; SSE41-NEXT:    sarb $2, %cl
+; SSE41-NEXT:    addb %dl, %cl
+; SSE41-NEXT:    movzbl %cl, %ecx
+; SSE41-NEXT:    movd %ecx, %xmm1
+; SSE41-NEXT:    pinsrb $1, %eax, %xmm1
+; SSE41-NEXT:    pextrb $2, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $2, %eax, %xmm1
+; SSE41-NEXT:    pextrb $3, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $3, %eax, %xmm1
+; SSE41-NEXT:    pextrb $4, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $4, %eax, %xmm1
+; SSE41-NEXT:    pextrb $5, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $5, %eax, %xmm1
+; SSE41-NEXT:    pextrb $6, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $6, %eax, %xmm1
+; SSE41-NEXT:    pextrb $7, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $7, %eax, %xmm1
+; SSE41-NEXT:    pextrb $8, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $8, %eax, %xmm1
+; SSE41-NEXT:    pextrb $9, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $9, %eax, %xmm1
+; SSE41-NEXT:    pextrb $10, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $10, %eax, %xmm1
+; SSE41-NEXT:    pextrb $11, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $11, %eax, %xmm1
+; SSE41-NEXT:    pextrb $12, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $12, %eax, %xmm1
+; SSE41-NEXT:    pextrb $13, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $13, %eax, %xmm1
+; SSE41-NEXT:    pextrb $14, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
+; SSE41-NEXT:    pextrb $15, %xmm0, %eax
+; SSE41-NEXT:    movsbl %al, %eax
+; SSE41-NEXT:    imull $-109, %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movb %al, %cl
+; SSE41-NEXT:    shrb $7, %cl
+; SSE41-NEXT:    sarb $2, %al
+; SSE41-NEXT:    addb %cl, %al
+; SSE41-NEXT:    movzbl %al, %eax
+; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test7:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm4
+; SSE-NEXT:    movsbl -{{[0-9]+}}(%rsp), %eax
+; SSE-NEXT:    imull $-109, %eax, %ecx
+; SSE-NEXT:    shrl $8, %ecx
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movb %cl, %al
+; SSE-NEXT:    shrb $7, %al
+; SSE-NEXT:    sarb $2, %cl
+; SSE-NEXT:    addb %al, %cl
+; SSE-NEXT:    movzbl %cl, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: test7:
-; AVX: pext
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrb $1, %xmm0, %eax
+; AVX-NEXT:    movsbl %al, %eax
+; AVX-NEXT:    imull $-109, %eax, %ecx
+; AVX-NEXT:    shrl $8, %ecx
+; AVX-NEXT:    addb %cl, %al
+; AVX-NEXT:    movb %al, %cl
+; AVX-NEXT:    shrb $7, %cl
+; AVX-NEXT:    sarb $2, %al
+; AVX-NEXT:    addb %cl, %al
+; AVX-NEXT:    movzbl %al, %eax
+; AVX-NEXT:    vpextrb $0, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %dl
+; AVX-NEXT:    shrb $7, %dl
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movzbl %cl, %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm1
+; AVX-NEXT:    vpextrb $2, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $3, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $4, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $5, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $6, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $7, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $8, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $9, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $10, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $11, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $12, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $13, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $14, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpextrb $15, %xmm0, %ecx
+; AVX-NEXT:    movsbl %cl, %ecx
+; AVX-NEXT:    imull $-109, %ecx, %edx
+; AVX-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm0
+; AVX-NEXT:    shrl $8, %edx
+; AVX-NEXT:    addb %dl, %cl
+; AVX-NEXT:    movb %cl, %al
+; AVX-NEXT:    shrb $7, %al
+; AVX-NEXT:    sarb $2, %cl
+; AVX-NEXT:    addb %al, %cl
+; AVX-NEXT:    movzbl %cl, %eax
+; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %div
 }
 
 define <4 x i32> @test8(<4 x i32> %a) {
-  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
-  ret <4 x i32> %div
-
 ; SSE41-LABEL: test8:
-; SSE41: pmuldq
-; SSE41: pshufd	$49
-; SSE41-NOT: pshufd	$49
-; SSE41: pmuldq
-; SSE41: shufps	$-35
-; SSE41: pshufd	$-40
-; SSE41: padd
-; SSE41: psrld $31
-; SSE41: psrad $2
-; SSE41: padd
-
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pmuldq %xmm2, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm2, %xmm3
+; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE41-NEXT:    paddd %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psrld $31, %xmm0
+; SSE41-NEXT:    psrad $2, %xmm1
+; SSE41-NEXT:    paddd %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
 ; SSE-LABEL: test8:
-; SSE: pmuludq
-; SSE: pshufd	$49
-; SSE-NOT: pshufd	$49
-; SSE: pmuludq
-; SSE: shufps	$-35
-; SSE: pshufd	$-40
-; SSE: psubd
-; SSE: padd
-; SSE: psrld $31
-; SSE: psrad $2
-; SSE: padd
-
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    psrad $31, %xmm1
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psrad $31, %xmm3
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    paddd %xmm1, %xmm3
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pmuludq %xmm2, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm2, %xmm4
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE-NEXT:    psubd %xmm3, %xmm1
+; SSE-NEXT:    paddd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    psrld $31, %xmm0
+; SSE-NEXT:    psrad $2, %xmm1
+; SSE-NEXT:    paddd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
 ; AVX-LABEL: test8:
-; AVX: vpmuldq
-; AVX: vpshufd	$49
-; AVX-NOT: vpshufd	$49
-; AVX: vpmuldq
-; AVX: vshufps	$-35
-; AVX: vpshufd	$-40
-; AVX: vpadd
-; AVX: vpsrld $31
-; AVX: vpsrad $2
-; AVX: vpadd
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-NEXT:    vpmuldq %xmm1, %xmm3, %xmm1
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm2[1,3],xmm1[1,3]
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; AVX-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsrld $31, %xmm0, %xmm1
+; AVX-NEXT:    vpsrad $2, %xmm0, %xmm0
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+  ret <4 x i32> %div
 }
 
 define <8 x i32> @test9(<8 x i32> %a) {
+; SSE41-LABEL: test9:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT:   # kill: XMM0<def> XMM3<kill>
+; SSE41-NEXT:    pmuldq %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm4, %xmm5
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm5[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT:    paddd %xmm3, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrld $31, %xmm3
+; SSE41-NEXT:    psrad $2, %xmm0
+; SSE41-NEXT:    paddd %xmm3, %xmm0
+; SSE41-NEXT:    pmuldq %xmm2, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm4, %xmm3
+; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm3[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE41-NEXT:    paddd %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    psrld $31, %xmm2
+; SSE41-NEXT:    psrad $2, %xmm1
+; SSE41-NEXT:    paddd %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test9:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2454267027,2454267027,2454267027,2454267027]
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    psrad $31, %xmm4
+; SSE-NEXT:    movdqa %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm5
+; SSE-NEXT:    psrad $31, %xmm5
+; SSE-NEXT:    pand %xmm1, %xmm5
+; SSE-NEXT:    paddd %xmm0, %xmm5
+; SSE-NEXT:    movdqa %xmm3, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm1[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm6, %xmm7
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm7[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    psubd %xmm5, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psrld $31, %xmm3
+; SSE-NEXT:    psrad $2, %xmm0
+; SSE-NEXT:    paddd %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm4
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    psrad $31, %xmm3
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    paddd %xmm4, %xmm3
+; SSE-NEXT:    pmuludq %xmm2, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm6, %xmm4
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE-NEXT:    psubd %xmm3, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psrld $31, %xmm2
+; SSE-NEXT:    psrad $2, %xmm1
+; SSE-NEXT:    paddd %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test9:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vpsrld $31, %ymm0, %ymm1
+; AVX-NEXT:    vpsrad $2, %ymm0, %ymm0
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
   ret <8 x i32> %div
-
-; AVX-LABEL: test9:
-; AVX: vpalignr $4
-; AVX: vpbroadcastd
-; AVX: vpmuldq
-; AVX: vpmuldq
-; AVX: vpblendd $170
-; AVX: vpadd
-; AVX: vpsrld $31
-; AVX: vpsrad $2
-; AVX: vpadd
 }
 
 define <8 x i32> @test10(<8 x i32> %a) {
+; SSE41-LABEL: test10:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    pmuludq %xmm2, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuludq %xmm4, %xmm5
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psubd %xmm3, %xmm5
+; SSE41-NEXT:    psrld $1, %xmm5
+; SSE41-NEXT:    paddd %xmm3, %xmm5
+; SSE41-NEXT:    psrld $2, %xmm5
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [7,7,7,7]
+; SSE41-NEXT:    pmulld %xmm3, %xmm5
+; SSE41-NEXT:    psubd %xmm5, %xmm0
+; SSE41-NEXT:    pmuludq %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pmuludq %xmm4, %xmm5
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm5[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT:    movdqa %xmm1, %xmm4
+; SSE41-NEXT:    psubd %xmm2, %xmm4
+; SSE41-NEXT:    psrld $1, %xmm4
+; SSE41-NEXT:    paddd %xmm2, %xmm4
+; SSE41-NEXT:    psrld $2, %xmm4
+; SSE41-NEXT:    pmulld %xmm3, %xmm4
+; SSE41-NEXT:    psubd %xmm4, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test10:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [613566757,613566757,613566757,613566757]
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pmuludq %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm5
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE-NEXT:    movdqa %xmm0, %xmm5
+; SSE-NEXT:    psubd %xmm3, %xmm5
+; SSE-NEXT:    psrld $1, %xmm5
+; SSE-NEXT:    paddd %xmm3, %xmm5
+; SSE-NEXT:    psrld $2, %xmm5
+; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [7,7,7,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm3, %xmm5
+; SSE-NEXT:    pmuludq %xmm3, %xmm6
+; SSE-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2],xmm6[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm5 = xmm5[0,2,1,3]
+; SSE-NEXT:    psubd %xmm5, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm5
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm5[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT:    movdqa %xmm1, %xmm4
+; SSE-NEXT:    psubd %xmm2, %xmm4
+; SSE-NEXT:    psrld $1, %xmm4
+; SSE-NEXT:    paddd %xmm2, %xmm4
+; SSE-NEXT:    psrld $2, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm3, %xmm4
+; SSE-NEXT:    pmuludq %xmm3, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,2],xmm2[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm4 = xmm4[0,2,1,3]
+; SSE-NEXT:    psubd %xmm4, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpmuludq %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vpmuludq %ymm1, %ymm0, %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm2
+; AVX-NEXT:    vpsrld $1, %ymm2, %ymm2
+; AVX-NEXT:    vpaddd %ymm1, %ymm2, %ymm1
+; AVX-NEXT:    vpsrld $2, %ymm1, %ymm1
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
+; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %rem = urem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
   ret <8 x i32> %rem
-
-; AVX-LABEL: test10:
-; AVX: vpbroadcastd
-; AVX: vpalignr $4
-; AVX: vpmuludq
-; AVX: vpmuludq
-; AVX: vpblendd $170
-; AVX: vpsubd
-; AVX: vpsrld $1
-; AVX: vpadd
-; AVX: vpsrld $2
-; AVX: vpmulld
 }
 
 define <8 x i32> @test11(<8 x i32> %a) {
+; SSE41-LABEL: test11:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    pmuldq %xmm2, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm4, %xmm5
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,3],xmm5[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; SSE41-NEXT:    paddd %xmm0, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    psrld $31, %xmm5
+; SSE41-NEXT:    psrad $2, %xmm3
+; SSE41-NEXT:    paddd %xmm5, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm5 = [7,7,7,7]
+; SSE41-NEXT:    pmulld %xmm5, %xmm3
+; SSE41-NEXT:    psubd %xmm3, %xmm0
+; SSE41-NEXT:    pmuldq %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm4, %xmm3
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE41-NEXT:    paddd %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psrld $31, %xmm3
+; SSE41-NEXT:    psrad $2, %xmm2
+; SSE41-NEXT:    paddd %xmm3, %xmm2
+; SSE41-NEXT:    pmulld %xmm5, %xmm2
+; SSE41-NEXT:    psubd %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test11:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    psrad $31, %xmm3
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    pand %xmm0, %xmm4
+; SSE-NEXT:    movdqa %xmm0, %xmm6
+; SSE-NEXT:    psrad $31, %xmm6
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    paddd %xmm4, %xmm6
+; SSE-NEXT:    movdqa %xmm0, %xmm7
+; SSE-NEXT:    pmuludq %xmm2, %xmm7
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm5, %xmm4
+; SSE-NEXT:    shufps {{.*#+}} xmm7 = xmm7[1,3],xmm4[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2,1,3]
+; SSE-NEXT:    psubd %xmm6, %xmm7
+; SSE-NEXT:    paddd %xmm0, %xmm7
+; SSE-NEXT:    movdqa %xmm7, %xmm4
+; SSE-NEXT:    psrld $31, %xmm4
+; SSE-NEXT:    psrad $2, %xmm7
+; SSE-NEXT:    paddd %xmm4, %xmm7
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [7,7,7,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm7
+; SSE-NEXT:    pmuludq %xmm4, %xmm6
+; SSE-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm7 = xmm7[0,2,1,3]
+; SSE-NEXT:    psubd %xmm7, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    movdqa %xmm1, %xmm6
+; SSE-NEXT:    psrad $31, %xmm6
+; SSE-NEXT:    pand %xmm2, %xmm6
+; SSE-NEXT:    paddd %xmm3, %xmm6
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm5, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,3],xmm3[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT:    psubd %xmm6, %xmm2
+; SSE-NEXT:    paddd %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm3
+; SSE-NEXT:    psrld $31, %xmm3
+; SSE-NEXT:    psrad $2, %xmm2
+; SSE-NEXT:    paddd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm4, %xmm2
+; SSE-NEXT:    pmuludq %xmm4, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE-NEXT:    psubd %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm2 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpmuldq %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vpmuldq %ymm1, %ymm0, %ymm1
+; AVX-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,1,3,3,5,5,7,7]
+; AVX-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7]
+; AVX-NEXT:    vpaddd %ymm0, %ymm1, %ymm1
+; AVX-NEXT:    vpsrld $31, %ymm1, %ymm2
+; AVX-NEXT:    vpsrad $2, %ymm1, %ymm1
+; AVX-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX-NEXT:    vpmulld %ymm2, %ymm1, %ymm1
+; AVX-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %rem = srem <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
   ret <8 x i32> %rem
-
-; AVX-LABEL: test11:
-; AVX: vpalignr $4
-; AVX: vpbroadcastd
-; AVX: vpmuldq
-; AVX: vpmuldq
-; AVX: vpblendd $170
-; AVX: vpadd
-; AVX: vpsrld $31
-; AVX: vpsrad $2
-; AVX: vpadd
-; AVX: vpmulld
 }
 
 define <2 x i16> @test12() {
+; SSE41-LABEL: test12:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorps %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: test12:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %I8 = insertelement <2 x i16> zeroinitializer, i16 -1, i32 0
   %I9 = insertelement <2 x i16> %I8, i16 -1, i32 1
   %B9 = urem <2 x i16> %I9, %I9
   ret <2 x i16> %B9
+}
 
-; AVX-LABEL: test12:
-; AVX: xorps
+define <4 x i32> @PR20355(<4 x i32> %a) {
+; SSE41-LABEL: PR20355:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pmuldq %xmm2, %xmm1
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE41-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE41-NEXT:    movaps %xmm0, %xmm1
+; SSE41-NEXT:    psrld $31, %xmm1
+; SSE41-NEXT:    paddd %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE-LABEL: PR20355:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psrad $31, %xmm2
+; SSE-NEXT:    pand %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    psrad $31, %xmm3
+; SSE-NEXT:    pand %xmm1, %xmm3
+; SSE-NEXT:    paddd %xmm2, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    pmuludq %xmm2, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    psubd %xmm3, %xmm0
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psrld $31, %xmm1
+; SSE-NEXT:    paddd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR20355:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm2[1,3],xmm0[1,3]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    vpsrld $31, %xmm0, %xmm1
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %sdiv = sdiv <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %sdiv
 }
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
new file mode 100644
index 000000000000..758833155a96
--- /dev/null
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -0,0 +1,942 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+;
+; Just one 32-bit run to make sure we do reasonable things there.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:   # kill: XMM0<def> XMM1<kill>
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    pslld $16, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pslld $16, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_8i16_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:   # kill: XMM0<def> XMM1<kill>
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    pslld $16, %xmm0
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    pslld $16, %xmm1
+; SSSE3-NEXT:    psrad $16, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_8i16_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pmovzxwd %xmm1, %xmm0
+; SSE41-NEXT:    pslld $16, %xmm0
+; SSE41-NEXT:    psrad $16, %xmm0
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pslld $16, %xmm1
+; SSE41-NEXT:    psrad $16, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_8i16_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_8i16_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_8i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE41-NEXT:    pmovzxwd %xmm1, %xmm0
+; X32-SSE41-NEXT:    pslld $16, %xmm0
+; X32-SSE41-NEXT:    psrad $16, %xmm0
+; X32-SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; X32-SSE41-NEXT:    pslld $16, %xmm1
+; X32-SSE41-NEXT:    psrad $16, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_4i32_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_4i32_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_4i32_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
+; SSE41-NEXT:    pextrq $1, %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pextrq $1, %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_4i32_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_4i32_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_4i32_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; X32-SSE41-NEXT:    movd %xmm1, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_test1:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq (%rdi), %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test1:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq (%rdi), %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test1:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test1:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test1:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_test2:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd (%rdi), %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test2:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd (%rdi), %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test2:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test2:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test2:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i32>
+ ret <4 x i32>%Y
+}
+
+define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_test3:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsbq 1(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    movsbq (%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test3:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsbq 1(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    movsbq (%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test3:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test3:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test3:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i8>* %ptr
+ %Y = sext <2 x i8> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_test4:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movswq 2(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    movswq (%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test4:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movswq 2(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    movswq (%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test4:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test4:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test4:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i16>* %ptr
+ %Y = sext <2 x i16> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
+; SSE2-LABEL: load_sext_test5:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movslq 4(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    movslq (%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test5:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movslq 4(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    movslq (%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test5:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test5:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test5:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i32>* %ptr
+ %Y = sext <2 x i32> %X to <2 x i64>
+ ret <2 x i64>%Y
+}
+
+define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_test6:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq (%rdi), %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_test6:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq (%rdi), %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_test6:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_test6:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_test6:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i16>
+ ret <8 x i16>%Y
+}
+
+define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
+; SSE2-LABEL: sext_4i1_to_4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pslld $31, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_4i1_to_4i64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pslld $31, %xmm0
+; SSSE3-NEXT:    psrad $31, %xmm0
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_4i1_to_4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pslld $31, %xmm0
+; SSE41-NEXT:    psrad $31, %xmm0
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
+; SSE41-NEXT:    pextrq $1, %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pextrq $1, %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_4i1_to_4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_4i1_to_4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_4i1_to_4i64:
+; X32-SSE41:       # BB#0:
+; X32-SSE41-NEXT:    pslld $31, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
+; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; X32-SSE41-NEXT:    movd %xmm1, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+  %extmask = sext <4 x i1> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
+; SSE2-LABEL: sext_16i8_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psllw $8, %xmm0
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT:    psllw $8, %xmm1
+; SSE2-NEXT:    psraw $8, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa (%rdi), %xmm1
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psllw $8, %xmm0
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSSE3-NEXT:    psllw $8, %xmm1
+; SSSE3-NEXT:    psraw $8, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa (%rdi), %xmm1
+; SSE41-NEXT:    pmovzxbw %xmm1, %xmm0
+; SSE41-NEXT:    psllw $8, %xmm0
+; SSE41-NEXT:    psraw $8, %xmm0
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    psllw $8, %xmm1
+; SSE41-NEXT:    psraw $8, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_16i8_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_16i8_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_16i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movdqa (%eax), %xmm1
+; X32-SSE41-NEXT:    pmovzxbw %xmm1, %xmm0
+; X32-SSE41-NEXT:    psllw $8, %xmm0
+; X32-SSE41-NEXT:    psraw $8, %xmm0
+; X32-SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE41-NEXT:    psllw $8, %xmm1
+; X32-SSE41-NEXT:    psraw $8, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <16 x i8>* %ptr
+ %Y = sext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
+; SSE2-LABEL: sext_4i8_to_4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pslld $24, %xmm0
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movd %xmm1, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    movd %xmm0, %rax
+; SSE2-NEXT:    cltq
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_4i8_to_4i64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pslld $24, %xmm0
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm1, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm0, %rax
+; SSSE3-NEXT:    cltq
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_4i8_to_4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pslld $24, %xmm0
+; SSE41-NEXT:    psrad $24, %xmm0
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm1
+; SSE41-NEXT:    pextrq $1, %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm1, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pextrq $1, %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm3
+; SSE41-NEXT:    movd %xmm0, %rax
+; SSE41-NEXT:    cltq
+; SSE41-NEXT:    movd %rax, %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_4i8_to_4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld $24, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrad $24, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_4i8_to_4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpslld $24, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_4i8_to_4i64:
+; X32-SSE41:       # BB#0:
+; X32-SSE41-NEXT:    pslld $24, %xmm0
+; X32-SSE41-NEXT:    psrad $24, %xmm0
+; X32-SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm2
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; X32-SSE41-NEXT:    movd %xmm1, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pextrd $2, %xmm1, %ecx
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+  %extmask = sext <4 x i8> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd (%rdi), %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movsbq %al, %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movsbq %al, %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movsbq %al, %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movsbq %al, %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd (%rdi), %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movsbq %al, %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movsbq %al, %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movsbq %al, %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movsbq %al, %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbd (%rdi), %xmm1
+; SSE41-NEXT:    pmovzxdq %xmm1, %xmm0
+; SSE41-NEXT:    pextrq $1, %xmm0, %rax
+; SSE41-NEXT:    movsbq %al, %rax
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    movd %xmm0, %rax
+; SSE41-NEXT:    movsbq %al, %rax
+; SSE41-NEXT:    movd %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT:    pextrq $1, %xmm1, %rax
+; SSE41-NEXT:    movsbq %al, %rax
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    movd %xmm1, %rax
+; SSE41-NEXT:    movsbq %al, %rax
+; SSE41-NEXT:    movd %rax, %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movd (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovzxbd %xmm0, %xmm1
+; X32-SSE41-NEXT:    pmovzxbq %xmm0, %xmm2
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    movsbl %al, %eax
+; X32-SSE41-NEXT:    movd %eax, %xmm0
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %eax
+; X32-SSE41-NEXT:    movsbl %al, %eax
+; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    movsbl %al, %eax
+; X32-SSE41-NEXT:    movd %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %eax
+; X32-SSE41-NEXT:    movsbl %al, %eax
+; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
+
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_4i16_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq (%rdi), %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movswq %ax, %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movswq %ax, %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movswq %ax, %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movd %xmm2, %rax
+; SSE2-NEXT:    movswq %ax, %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i16_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq (%rdi), %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,1,3]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movswq %ax, %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movswq %ax, %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movswq %ax, %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSSE3-NEXT:    movd %xmm2, %rax
+; SSSE3-NEXT:    movswq %ax, %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i16_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movq (%rdi), %xmm0
+; SSE41-NEXT:    pmovzxwd %xmm0, %xmm1
+; SSE41-NEXT:    pmovzxwq %xmm0, %xmm0
+; SSE41-NEXT:    pextrq $1, %xmm0, %rax
+; SSE41-NEXT:    movswq %ax, %rax
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    movd %xmm0, %rax
+; SSE41-NEXT:    movswq %ax, %rax
+; SSE41-NEXT:    movd %rax, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT:    pextrq $1, %xmm1, %rax
+; SSE41-NEXT:    movswq %ax, %rax
+; SSE41-NEXT:    movd %rax, %xmm2
+; SSE41-NEXT:    movd %xmm1, %rax
+; SSE41-NEXT:    movswq %ax, %rax
+; SSE41-NEXT:    movd %rax, %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i16_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i16_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxwq (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movsd (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovzxwd %xmm0, %xmm1
+; X32-SSE41-NEXT:    pmovzxwq %xmm0, %xmm2
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    cwtl
+; X32-SSE41-NEXT:    movd %eax, %xmm0
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm0
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %eax
+; X32-SSE41-NEXT:    cwtl
+; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; X32-SSE41-NEXT:    movd %xmm2, %eax
+; X32-SSE41-NEXT:    cwtl
+; X32-SSE41-NEXT:    movd %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $1, %eax, %xmm1
+; X32-SSE41-NEXT:    pextrd $2, %xmm2, %eax
+; X32-SSE41-NEXT:    cwtl
+; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm1
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 4da7e42caabf..14058c912861 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1,196 +1,1088 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; FIXME: SSE2 should look like the following:
+; FIXME-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00
+; FIXME:       # BB#0:
+; FIXME-NEXT:    punpcklbw %xmm0, %xmm0
+; FIXME-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; FIXME-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; FIXME-NEXT:    retq
+;
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,5,5,5]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,6,6,6]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,2,4,5,6,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    punpcklwd %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,6,6]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_00_01_01_02_02_03_03_04_04_05_05_06_06_07_07:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_0101010101010101
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; FIXME: SSE2 should be the following:
+; FIXME-LABEL: @shuffle_v16i8_0101010101010101
+; FIXME:       # BB#0:
+; FIXME-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; FIXME-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; FIXME-NEXT:    retq
+;
+; SSE2-LABEL: shuffle_v16i8_0101010101010101:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_0101010101010101:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_0101010101010101:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v16i8_0101010101010101:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i8_0101010101010101:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23
-; CHECK-SSE2:         punpcklbw %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
   ret <16 x i8> %shuffle
 }
 
+define <16 x i8> @shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_08_24_09_25_10_26_11_27_12_28_13_29_14_30_15_31:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i8> %shuffle
+}
+
 define <16 x i8> @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i8_16_00_16_01_16_02_16_03_16_04_16_05_16_06_16_07:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12
-; CHECK-SSE2:         pxor %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-SSE2-NEXT:    punpckhbw %xmm1, %xmm2
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm2 = xmm2[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm2 = xmm2[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    packuswb %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20
-; CHECK-SSE2:         pxor %xmm2, %xmm2
-; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm1
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    packuswb %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    packuswb %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    por %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_03_02_01_00_07_06_05_04_19_18_17_16_23_22_21_20:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[3,2,1,0,7,6,5,4]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20>
   ret <16 x i8> %shuffle
 }
 
 define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20
-; CHECK-SSE2:         pxor %xmm2, %xmm2
-; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm3
-; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm3
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm3 = xmm3[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm4
-; CHECK-SSE2-NEXT:    punpckhbw %xmm2, %xmm4
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm4 = xmm4[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    shufpd {{.*}} # xmm4 = xmm4[0],xmm3[1]
-; CHECK-SSE2-NEXT:    punpckhbw %xmm2, %xmm1
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    punpcklbw %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
-; CHECK-SSE2-NEXT:    packuswb %xmm4, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    movsd %xmm4, %xmm3
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    packuswb %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
+; SSSE3-NEXT:    por %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,xmm1[15,14,13,12],zero,zero,zero,zero,xmm1[7,6,5,4]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0],zero,zero,zero,zero,xmm0[11,10,9,8],zero,zero,zero,zero
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 31, i32 30, i32 29, i32 28, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20>
   ret <16 x i8> %shuffle
 }
 
-define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) {
-; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle
-; CHECK-SSE2:         pxor %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
-  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
+define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
+; SSE2-LABEL: trunc_v4i32_shuffle:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    packuswb %xmm0, %xmm0
+; SSE2-NEXT:    packuswb %xmm0, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc_v4i32_shuffle:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc_v4i32_shuffle:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: trunc_v4i32_shuffle:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @stress_test0(<16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i8> %s.0.7, <16 x i8> %s.0.8, <16 x i8> %s.0.9) {
+; We don't have anything useful to check here. This generates 100s of
+; instructions. Instead, just make sure we survived codegen.
+; ALL-LABEL: stress_test0:
+; ALL:         retq
+entry:
+  %s.1.4 = shufflevector <16 x i8> %s.0.4, <16 x i8> %s.0.5, <16 x i32> <i32 1, i32 22, i32 21, i32 28, i32 3, i32 16, i32 6, i32 1, i32 19, i32 29, i32 12, i32 31, i32 2, i32 3, i32 3, i32 6>
+  %s.1.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> %s.0.6, <16 x i32> <i32 31, i32 20, i32 12, i32 19, i32 2, i32 15, i32 12, i32 31, i32 2, i32 28, i32 2, i32 30, i32 7, i32 8, i32 17, i32 28>
+  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> %s.0.9, <16 x i32> <i32 14, i32 10, i32 17, i32 5, i32 17, i32 9, i32 17, i32 21, i32 31, i32 24, i32 16, i32 6, i32 20, i32 28, i32 23, i32 8>
+  %s.2.2 = shufflevector <16 x i8> %s.0.3, <16 x i8> %s.0.4, <16 x i32> <i32 20, i32 9, i32 21, i32 11, i32 11, i32 4, i32 3, i32 18, i32 3, i32 30, i32 4, i32 31, i32 11, i32 24, i32 13, i32 29>
+  %s.3.2 = shufflevector <16 x i8> %s.2.2, <16 x i8> %s.1.4, <16 x i32> <i32 15, i32 13, i32 5, i32 11, i32 7, i32 17, i32 14, i32 22, i32 22, i32 16, i32 7, i32 24, i32 16, i32 22, i32 7, i32 29>
+  %s.5.4 = shufflevector <16 x i8> %s.1.5, <16 x i8> %s.1.8, <16 x i32> <i32 3, i32 13, i32 19, i32 7, i32 23, i32 11, i32 1, i32 9, i32 16, i32 25, i32 2, i32 7, i32 0, i32 21, i32 23, i32 17>
+  %s.6.1 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.3.2, <16 x i32> <i32 11, i32 2, i32 28, i32 31, i32 27, i32 3, i32 9, i32 27, i32 25, i32 25, i32 14, i32 7, i32 12, i32 28, i32 12, i32 23>
+  %s.7.1 = shufflevector <16 x i8> %s.6.1, <16 x i8> %s.3.2, <16 x i32> <i32 15, i32 29, i32 14, i32 0, i32 29, i32 15, i32 26, i32 30, i32 6, i32 7, i32 2, i32 8, i32 12, i32 10, i32 29, i32 17>
+  %s.7.2 = shufflevector <16 x i8> %s.3.2, <16 x i8> %s.5.4, <16 x i32> <i32 3, i32 29, i32 3, i32 19, i32 undef, i32 20, i32 undef, i32 3, i32 27, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s.16.0 = shufflevector <16 x i8> %s.7.1, <16 x i8> %s.7.2, <16 x i32> <i32 13, i32 1, i32 16, i32 16, i32 6, i32 7, i32 29, i32 18, i32 19, i32 28, i32 undef, i32 undef, i32 31, i32 1, i32 undef, i32 10>
+  ret <16 x i8> %s.16.0
+}
+
+define <16 x i8> @stress_test1(<16 x i8> %s.0.5, <16 x i8> %s.0.8, <16 x i8> %s.0.9) noinline nounwind {
+; There is nothing interesting to check about these instructions other than
+; that they survive codegen. However, we actually do better and delete all of
+; them because the result is 'undef'.
+;
+; ALL-LABEL: stress_test1:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
+  %s.1.8 = shufflevector <16 x i8> %s.0.8, <16 x i8> undef, <16 x i32> <i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 6, i32 undef, i32 6, i32 undef, i32 14, i32 14, i32 undef, i32 undef, i32 0>
+  %s.2.4 = shufflevector <16 x i8> undef, <16 x i8> %s.0.5, <16 x i32> <i32 21, i32 undef, i32 undef, i32 19, i32 undef, i32 undef, i32 29, i32 24, i32 21, i32 23, i32 21, i32 17, i32 19, i32 undef, i32 20, i32 22>
+  %s.2.5 = shufflevector <16 x i8> %s.0.5, <16 x i8> undef, <16 x i32> <i32 3, i32 8, i32 undef, i32 7, i32 undef, i32 10, i32 8, i32 0, i32 15, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 9>
+  %s.2.9 = shufflevector <16 x i8> %s.0.9, <16 x i8> undef, <16 x i32> <i32 7, i32 undef, i32 14, i32 7, i32 8, i32 undef, i32 7, i32 8, i32 5, i32 15, i32 undef, i32 1, i32 11, i32 undef, i32 undef, i32 11>
+  %s.3.4 = shufflevector <16 x i8> %s.2.4, <16 x i8> %s.0.5, <16 x i32> <i32 5, i32 0, i32 21, i32 6, i32 15, i32 27, i32 22, i32 21, i32 4, i32 22, i32 19, i32 26, i32 9, i32 26, i32 8, i32 29>
+  %s.3.9 = shufflevector <16 x i8> %s.2.9, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 8, i32 1, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 undef>
+  %s.4.7 = shufflevector <16 x i8> %s.1.8, <16 x i8> %s.2.9, <16 x i32> <i32 9, i32 0, i32 22, i32 20, i32 24, i32 7, i32 21, i32 17, i32 20, i32 12, i32 19, i32 23, i32 2, i32 9, i32 17, i32 10>
+  %s.4.8 = shufflevector <16 x i8> %s.2.9, <16 x i8> %s.3.9, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 6, i32 10, i32 undef, i32 0, i32 5, i32 undef, i32 9, i32 undef>
+  %s.5.7 = shufflevector <16 x i8> %s.4.7, <16 x i8> %s.4.8, <16 x i32> <i32 16, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s.8.4 = shufflevector <16 x i8> %s.3.4, <16 x i8> %s.5.7, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 28, i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s.9.4 = shufflevector <16 x i8> %s.8.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 5>
+  %s.10.4 = shufflevector <16 x i8> %s.9.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %s.12.4 = shufflevector <16 x i8> %s.10.4, <16 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef>
+
+  ret <16 x i8> %s.12.4
+}
+
+define <16 x i8> @PR20540(<8 x i8> %a) {
+; SSE2-LABEL: PR20540:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    packuswb %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    packuswb %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: PR20540:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: PR20540:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: PR20540:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
+; SSE2-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movzbl %dil, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 0
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
+; SSE2-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movzbl %dil, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 0
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16(i8 %i) {
+; SSE-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %edi, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_uu_uu_zz_uu_uu_zz_zz_zz_zz_zz_zz_zz_zz_zz_16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 0
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(i8 %i) {
+; SSE2-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movzbl %dil, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %edi, %xmm0
+; SSSE3-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %edi, %xmm0
+; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_19_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %a = insertelement <16 x i8> undef, i8 %i, i32 3
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 1, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu(<16 x i8> %a) {
+; SSE-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
+; SSE:       # BB#0:
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_16_uu_18_uu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 undef, i32 18, i32 undef>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
+; SSE-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_28_uu_30_31_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> zeroinitializer, <16 x i8> %a, <16 x i32> <i32 28, i32 undef, i32 30, i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 09, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_31_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
   ret <16 x i8> %shuffle
 }
 
-define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) {
-; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle
-; CHECK-SSE2:         pxor %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    punpcklbw %xmm1, %xmm0
+define <16 x i8> @shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_17_18_19_20_21_22_23_24_25_26_27_28_29_30_31_00:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15_00:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_15_16_17_18_19_20_21_22_23_24_25_26_27_28_29_30:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_uu_uu_uu_uu_01_uu_uu_uu_uu_uu_uu_uu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_zz_zz_zz_zz_01_zz_zz_zz_zz_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 1, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_uu_uu_uu_01_uu_uu_uu_02_uu_uu_uu_03_uu_uu_uu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbd %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_zz_zz_zz_01_zz_zz_zz_02_zz_zz_zz_03_zz_zz_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbd %xmm0, %xmm0
+; AVX-NEXT:    retq
   %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 1, i32 21, i32 22, i32 23, i32 2, i32 25, i32 26, i32 27, i32 3, i32 29, i32 30, i32 31>
   ret <16 x i8> %shuffle
 }
 
-define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) {
-; CHECK-SSE2-LABEL: @trunc_v4i32_shuffle
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pand
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    packuswb %xmm0, %xmm0
-; CHECK-SSE2-NEXT:    retq
-  %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+define <16 x i8> @shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbw %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_uu_01_uu_02_uu_03_uu_04_uu_05_uu_06_uu_07_uu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbw %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz(<16 x i8> %a) {
+; SSE2-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbw %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_zz_01_zz_02_zz_03_zz_04_zz_05_zz_06_zz_07_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbw %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 17, i32 1, i32 19, i32 2, i32 21, i32 3, i32 23, i32 4, i32 25, i32 5, i32 27, i32 6, i32 29, i32 7, i32 31>
   ret <16 x i8> %shuffle
 }
+
+define <16 x i8> @shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm3[2,3,0,1]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,3,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[1,0,3,3,4,5,6,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm2[2,0,3,1,4,5,6,7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm3[2,1,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,3,2,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm4 = xmm4[0,2,3,1,4,5,6,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; SSE2-NEXT:    packuswb %xmm0, %xmm4
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,1,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,3,4,5,6,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm2 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT:    packuswb %xmm0, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,1,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pshufb {{.*#+}} xmm2 = xmm2[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_uu_10_02_07_22_14_07_02_18_03_01_14_18_09_11_00:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshufb {{.*#+}} xmm2 = xmm0[2,7,1,11,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[6,6,2,2,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,7,14,2,3,14,9,0,u,u,u,u,u,u,u,u]
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX-NEXT:    retq
+entry:
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 undef, i32 10, i32 2, i32 7, i32 22, i32 14, i32 7, i32 2, i32 18, i32 3, i32 1, i32 14, i32 18, i32 9, i32 11, i32 0>
+
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @stress_test2(<16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i8> %s.0.2) {
+; Nothing interesting to test here. Just make sure we didn't crashe.
+; ALL-LABEL: stress_test2:
+; ALL:         retq
+entry:
+  %s.1.0 = shufflevector <16 x i8> %s.0.0, <16 x i8> %s.0.1, <16 x i32> <i32 29, i32 30, i32 2, i32 16, i32 26, i32 21, i32 11, i32 26, i32 26, i32 3, i32 4, i32 5, i32 30, i32 28, i32 15, i32 5>
+  %s.1.1 = shufflevector <16 x i8> %s.0.1, <16 x i8> %s.0.2, <16 x i32> <i32 31, i32 1, i32 24, i32 12, i32 28, i32 5, i32 2, i32 9, i32 29, i32 1, i32 31, i32 5, i32 6, i32 17, i32 15, i32 22>
+  %s.2.0 = shufflevector <16 x i8> %s.1.0, <16 x i8> %s.1.1, <16 x i32> <i32 22, i32 1, i32 12, i32 3, i32 30, i32 4, i32 30, i32 undef, i32 1, i32 10, i32 14, i32 18, i32 27, i32 13, i32 16, i32 19>
+
+  ret <16 x i8> %s.2.0
+}
+
+define void @constant_gets_selected(<4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; SSE-LABEL: constant_gets_selected:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    movaps %xmm0, (%rdi)
+; SSE-NEXT:    movaps %xmm0, (%rsi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: constant_gets_selected:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps %xmm0, (%rsi)
+; AVX-NEXT:    retq
+entry:
+  %weird_zero = bitcast <4 x i32> zeroinitializer to <16 x i8>
+  %shuffle.i = shufflevector <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %weird_zero, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
+  %weirder_zero = bitcast <16 x i8> %shuffle.i to <4 x i32>
+  store <4 x i32> %weirder_zero, <4 x i32>* %ptr1, align 16
+  store <4 x i32> zeroinitializer, <4 x i32>* %ptr2, align 16
+  ret void
+}
+
+define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
+; SSE2-LABEL: PR12412:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    packuswb %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: PR12412:
+; SSSE3:      # BB#0: # %entry
+; SSSE3-NEXT:   pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
+; SSSE3-NEXT:   pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:   por %xmm1, %xmm0
+; SSSE3-NEXT:   retq
+;
+; SSE41-LABEL: PR12412:
+; SSE41:      # BB#0: # %entry
+; SSE41-NEXT:   pshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
+; SSE41-NEXT:   pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:   por %xmm1, %xmm0
+; SSE41-NEXT:   retq
+;
+; AVX-LABEL: PR12412:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:     vpshufb {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,2,4,6,8,10,12,14]
+; AVX-NEXT:     vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:     vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:     retq
+entry:
+  %0 = shufflevector <16 x i8> %inval1, <16 x i8> %inval2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  ret <16 x i8> %0
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 78b4ee7e5dd0..57fa0e859813 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -1,219 +1,1134 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
 define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_00
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_00:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_10(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_10
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_10:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_11(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_11
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_11:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 1>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_22
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_22:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_22:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_22:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_32(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_32
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_33
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_33:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_33:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 3>
   ret <2 x i64> %shuffle
 }
 
 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2f64_00
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2f64_00:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2f64_00:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2f64_00:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2f64_00:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_00:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
   ret <2 x double> %shuffle
 }
 define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2f64_10
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2f64_10:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
   ret <2 x double> %shuffle
 }
 define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2f64_11
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2f64_11:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
   ret <2 x double> %shuffle
 }
 define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
-; FIXME: Should these use movapd + shufpd to remove a domain change at the cost
-;        of a mov?
+; SSE2-LABEL: shuffle_v2f64_22:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2f64_22:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; SSE3-NEXT:    movapd %xmm1, %xmm0
+; SSE3-NEXT:    retq
 ;
-; CHECK-SSE2-LABEL: @shuffle_v2f64_22
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSSE3-LABEL: shuffle_v2f64_22:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; SSSE3-NEXT:    movapd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2f64_22:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_22:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
   ret <2 x double> %shuffle
 }
 define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2f64_32
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2f64_32:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
   ret <2 x double> %shuffle
 }
 define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2f64_33
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2f64_33:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_33:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm1[1,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
   ret <2 x double> %shuffle
 }
+define <2 x double> @shuffle_v2f64_03(<2 x double> %a, <2 x double> %b) {
+; SSE2-LABEL: shuffle_v2f64_03:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2f64_03:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm0, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2f64_03:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2f64_03:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_03:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) {
+; SSE2-LABEL: shuffle_v2f64_21:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2f64_21:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2f64_21:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2f64_21:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_21:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %shuffle
+}
 
 
 define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_02
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_02:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_02:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_02_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm2[0]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_02_copy:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_02_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm2[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_03
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[1]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_03:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_03:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm0, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_03:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_03:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_03:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_03:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_03_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm2[1]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_03_copy:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_03_copy:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm1, %xmm2
+; SSE3-NEXT:    movaps %xmm2, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_03_copy:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm2
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_03_copy:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_03_copy:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_03_copy:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_12
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_12:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_12:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_12:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_12:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_12_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_12_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm2[0]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_12_copy:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_12_copy:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm2[0]
+; SSE3-NEXT:    movapd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_12_copy:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_12_copy:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm2 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_12_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm2[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_13(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_13
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_13:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_13:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_13_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm2[1]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_13_copy:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm2[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_13_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm2[1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_20
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_20:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_20:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_20_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[0],xmm1[0]
-; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_20_copy:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_20_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm1[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_21
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[1]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_21:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_21:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_21:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_21:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_21:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_21:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_21_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[0],xmm1[1]
-; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_21_copy:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm2, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_21_copy:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsd %xmm2, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_21_copy:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm2, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_21_copy:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_21_copy:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_21_copy:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_30(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_30
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm0[0]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_30:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_30:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; SSE3-NEXT:    movapd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_30:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_30:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_30:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_30_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_30_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[1],xmm1[0]
-; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v2i64_30_copy:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
+; SSE2-NEXT:    movapd %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_30_copy:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1],xmm1[0]
+; SSE3-NEXT:    movapd %xmm2, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_30_copy:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_30_copy:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_30_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm2[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_31(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_31
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[1],xmm0[1]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_31:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_31:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
   ret <2 x i64> %shuffle
 }
 define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v2i64_31_copy
-; CHECK-SSE2:         shufpd {{.*}} # xmm2 = xmm2[1],xmm1[1]
-; CHECK-SSE2-NEXT:    movapd %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v2i64_31_copy:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_31_copy:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm2[1],xmm1[1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
   ret <2 x i64> %shuffle
 }
+
+define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
+; SSE-LABEL: shuffle_v2i64_0z:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_0z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
+; SSE-LABEL: shuffle_v2i64_1z:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_1z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
+; SSE-LABEL: shuffle_v2i64_z0:
+; SSE:       # BB#0:
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2i64_z0:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
+; SSE2-LABEL: shuffle_v2i64_z1:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_z1:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_z1:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_z1:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_z1:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_z1:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
+; SSE-LABEL: shuffle_v2f64_0z:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_0z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
+; SSE-LABEL: shuffle_v2f64_1z:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorpd %xmm1, %xmm1
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_1z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
+; SSE-LABEL: shuffle_v2f64_z0:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorpd %xmm1, %xmm1
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_z0:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_z1(<2 x double> %a) {
+; SSE2-LABEL: shuffle_v2f64_z1:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2f64_z1:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2f64_z1:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2f64_z1:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorpd %xmm1, %xmm1
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v2f64_z1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %shuffle
+}
+
+define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
+; SSE-LABEL: insert_reg_and_zero_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %rdi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_and_zero_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %rdi, %xmm0
+; AVX-NEXT:    retq
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
+; SSE-LABEL: insert_mem_and_zero_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_and_zero_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %a = load i64* %ptr
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x double> @insert_reg_and_zero_v2f64(double %a) {
+; SSE-LABEL: insert_reg_and_zero_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_and_zero_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
+; SSE-LABEL: insert_mem_and_zero_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_and_zero_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovsd (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
+; SSE2-LABEL: insert_reg_lo_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd %rdi, %xmm1
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_reg_lo_v2i64:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movd %rdi, %xmm1
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_reg_lo_v2i64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %rdi, %xmm1
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_reg_lo_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %rdi, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_reg_lo_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq %rdi, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_reg_lo_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq %rdi, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE2-LABEL: insert_mem_lo_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlpd (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_mem_lo_v2i64:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movlpd (%rdi), %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_mem_lo_v2i64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movlpd (%rdi), %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_mem_lo_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movq (%rdi), %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_mem_lo_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq (%rdi), %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_lo_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq (%rdi), %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
+  %a = load i64* %ptr
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_reg_hi_v2i64(i64 %a, <2 x i64> %b) {
+; SSE-LABEL: insert_reg_hi_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %rdi, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_hi_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %rdi, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
+; SSE-LABEL: insert_mem_hi_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq (%rdi), %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_hi_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq (%rdi), %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %a = load i64* %ptr
+  %v = insertelement <2 x i64> undef, i64 %a, i32 0
+  %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x i64> %shuffle
+}
+
+define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_lo_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_lo_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_lo_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movlpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_lo_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) {
+; SSE-LABEL: insert_reg_hi_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_hi_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
+; SSE-LABEL: insert_mem_hi_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_hi_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_dup_reg_v2f64(double %a) {
+; FIXME: We should match movddup for SSE3 and higher here.
+;
+; SSE2-LABEL: insert_dup_reg_v2f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_dup_reg_v2f64:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_reg_v2f64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_reg_v2f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_dup_reg_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+}
+define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v2f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd (%rdi), %xmm0
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_dup_mem_v2f64:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movddup (%rdi), %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem_v2f64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movddup (%rdi), %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem_v2f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movddup (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_dup_mem_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovddup (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
+; SSE-LABEL: shuffle_mem_v2f64_10:
+; SSE:       # BB#0:
+; SSE-NEXT:    movapd (%rdi), %xmm0
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_mem_v2f64_10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
+; AVX-NEXT:    retq
+  %a = load <2 x double>* %ptr
+  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x double> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 7d496fa19f15..53fb09e32428 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1,170 +1,1361 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
 define <4 x i32> @shuffle_v4i32_0001(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0001
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,0,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0001:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0001:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_0020(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0020
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,0,2,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0020:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0020:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
   ret <4 x i32> %shuffle
 }
+define <4 x i32> @shuffle_v4i32_0112(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4i32_0112:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0112:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,2]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
 define <4 x i32> @shuffle_v4i32_0300(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0300
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[0,3,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0300:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0300:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_1000(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_1000
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[1,0,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_1000:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_1000:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_2200(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_2200
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[2,2,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_2200:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_2200:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_3330(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_3330
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[3,3,3,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_3330:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_3330:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_3210(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_3210
-; CHECK-SSE2:         pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_3210:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_3210:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i32> %shuffle
 }
 
+define <4 x i32> @shuffle_v4i32_2121(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4i32_2121:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_2121:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 1, i32 2, i32 1>
+  ret <4 x i32> %shuffle
+}
+
 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_0001
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,0,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_0001:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0001:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_0020
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,2,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_0020:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0020:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_0300
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,3,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_0300:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0300:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_1000
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[1,0,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_1000:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_1000:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_2200
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[2,2,0,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_2200:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_2200:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_3330
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[3,3,3,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_3330:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_3330:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
   ret <4 x float> %shuffle
 }
 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4f32_3210
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[3,2,1,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4f32_3210:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_3210:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %shuffle
 }
+define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: shuffle_v4f32_0011:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0011:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: shuffle_v4f32_2233:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_2233:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: shuffle_v4f32_0022:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_0022:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_0022:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_0022:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0022:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: shuffle_v4f32_1133:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_1133:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_1133:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_1133:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_1133:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
 
 define <4 x i32> @shuffle_v4i32_0124(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0124
-; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[2,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v4i32_0124:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_0124:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0124:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_0124:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0124:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0142
-; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[2,0]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,2]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0142:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0142:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_0412(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0412
-; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm1 = xmm1[2,0],xmm0[1,2]
-; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0412:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[1,2]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0412:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[1,2]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_4012
-; CHECK-SSE2:         shufps {{.*}} # xmm1 = xmm1[0,0],xmm0[0,0]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm1 = xmm1[0,2],xmm0[1,2]
-; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_4012:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_4012:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,2]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0145
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[0],xmm1[0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0145:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0145:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_0451
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[0,2,3,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_0451:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0451:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,3,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_4501
-; CHECK-SSE2:         shufpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
-; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_4501:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_4501:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i32> %shuffle
 }
 define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v4i32_4015
-; CHECK-SSE2:         shufps {{.*}} # xmm0 = xmm0[0,1],xmm1[0,1]
-; CHECK-SSE2-NEXT:    shufps {{.*}} # xmm0 = xmm0[2,0,1,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v4i32_4015:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_4015:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,0,1,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
   ret <4 x i32> %shuffle
 }
+
+define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_4zzz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_4zzz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movss %xmm0, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_4zzz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_4zzz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_4zzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_z4zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_z4zz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_z4zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_z4zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_z4zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_zz4z:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_zz4z:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_zz4z:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_zz4z:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_zz4z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_zuu4:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_zuu4:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_zuu4:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_zuu4:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_zuu4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_zzz7:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_zzz7:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_zzz7:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_zzz7:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_zzz7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
+; SSE2-LABEL: shuffle_v4f32_z6zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_z6zz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_z6zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_z6zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_z6zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_4zzz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_4zzz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movss %xmm0, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_4zzz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_4zzz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_4zzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_z4zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_z4zz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movss %xmm0, %xmm1
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_z4zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_z4zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_z4zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_zz4z:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_zz4z:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movss %xmm0, %xmm1
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_zz4z:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_zz4z:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_zz4z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
+; SSE-LABEL: shuffle_v4i32_zuu4:
+; SSE:       # BB#0:
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_zuu4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_z6zz(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_z6zz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_z6zz:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_z6zz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_z6zz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_z6zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_7012(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_7012:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_7012:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm0[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,2]
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_7012:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_7012:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_7012:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_6701(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_6701:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_6701:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1],xmm0[0]
+; SSE3-NEXT:    movapd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_6701:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_6701:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_6701:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_5670(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_5670:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_5670:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,2],xmm0[2,0]
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_5670:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_5670:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_5670:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_1234:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_1234:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,2],xmm1[2,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_1234:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_1234:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_1234:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_2345:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_2345:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_2345:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_2345:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_2345:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_3456:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_3456:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_3456:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_3456:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_3456:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: shuffle_v4i32_0u1u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_0u1u:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0u1u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_0u1u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0u1u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0z1z(<4 x i32> %a) {
+; SSE2-LABEL: shuffle_v4i32_0z1z:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4i32_0z1z:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4i32_0z1z:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4i32_0z1z:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4i32_0z1z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
+; SSE-LABEL: insert_reg_and_zero_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %edi, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_and_zero_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    retq
+  %v = insertelement <4 x i32> undef, i32 %a, i32 0
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @insert_mem_and_zero_v4i32(i32* %ptr) {
+; SSE-LABEL: insert_mem_and_zero_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_and_zero_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %a = load i32* %ptr
+  %v = insertelement <4 x i32> undef, i32 %a, i32 0
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
+; SSE2-LABEL: insert_reg_and_zero_v4f32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_reg_and_zero_v4f32:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    movss %xmm0, %xmm1
+; SSE3-NEXT:    movaps %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_reg_and_zero_v4f32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_reg_and_zero_v4f32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_and_zero_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vmovss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %v = insertelement <4 x float> undef, float %a, i32 0
+  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
+; SSE-LABEL: insert_mem_and_zero_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_and_zero_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovss (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %a = load float* %ptr
+  %v = insertelement <4 x float> undef, float %a, i32 0
+  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x i32> @insert_reg_lo_v4i32(i64 %a, <4 x i32> %b) {
+; SSE2-LABEL: insert_reg_lo_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd %rdi, %xmm1
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_reg_lo_v4i32:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movd %rdi, %xmm1
+; SSE3-NEXT:    movsd %xmm1, %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_reg_lo_v4i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd %rdi, %xmm1
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_reg_lo_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd %rdi, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_reg_lo_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq %rdi, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_reg_lo_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq %rdi, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
+  %a.cast = bitcast i64 %a to <2 x i32>
+  %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @insert_mem_lo_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
+; SSE2-LABEL: insert_mem_lo_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlpd (%rdi), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_mem_lo_v4i32:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movlpd (%rdi), %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_mem_lo_v4i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movlpd (%rdi), %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_mem_lo_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movq (%rdi), %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_mem_lo_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq (%rdi), %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_lo_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq (%rdi), %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2-NEXT:    retq
+  %a = load <2 x i32>* %ptr
+  %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @insert_reg_hi_v4i32(i64 %a, <4 x i32> %b) {
+; SSE-LABEL: insert_reg_hi_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %rdi, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_hi_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq %rdi, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %a.cast = bitcast i64 %a to <2 x i32>
+  %v = shufflevector <2 x i32> %a.cast, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) {
+; SSE-LABEL: insert_mem_hi_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq (%rdi), %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_hi_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovq (%rdi), %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %a = load <2 x i32>* %ptr
+  %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x float> @insert_reg_lo_v4f32(double %a, <4 x float> %b) {
+; SSE-LABEL: insert_reg_lo_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_lo_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %a.cast = bitcast double %a to <2 x float>
+  %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
+; SSE-LABEL: insert_mem_lo_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movlpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_lo_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %a = load <2 x float>* %ptr
+  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) {
+; SSE-LABEL: insert_reg_hi_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_reg_hi_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+  %a.cast = bitcast double %a to <2 x float>
+  %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
+; SSE-LABEL: insert_mem_hi_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_mem_hi_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %a = load <2 x float>* %ptr
+  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
+; SSE-LABEL: shuffle_mem_v4f32_3210:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps (%rdi), %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_mem_v4f32_3210:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
+; AVX-NEXT:    retq
+  %a = load <4 x float>* %ptr
+  %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 5d1922a34837..de25a16a2e00 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1,493 +1,1919 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
 define <8 x i16> @shuffle_v8i16_01012323(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_01012323
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,0,1,1]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_01012323:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_01012323:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_67452301(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_67452301
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,2,1,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_67452301:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_67452301:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_456789AB(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_456789AB
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2:         shufpd {{.*}} # xmm0 = xmm0[1],xmm1[0]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_456789AB:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_456789AB:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_456789AB:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_456789AB:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_00000000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00000000
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_00000000:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_00000000:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_00000000:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v8i16_00000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i16_00000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_00004444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00004444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_00004444:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_00004444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
+define <8 x i16> @shuffle_v8i16_u0u1u2u3(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_u0u1u2u3:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u0u1u2u3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_u4u5u6u7(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_u4u5u6u7:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u4u5u6u7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7>
+  ret <8 x i16> %shuffle
+}
 define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_31206745
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,3,2]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_31206745:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_31206745:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_44440000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44440000
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_44440000:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_44440000:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_44440000:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_44440000:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i16> %shuffle
 }
+define <8 x i16> @shuffle_v8i16_23016745(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_23016745:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_23016745:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_23026745:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_23026745:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i16> %shuffle
+}
+define <8 x i16> @shuffle_v8i16_23016747(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_23016747:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_23016747:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,0,2,3]
+; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,4,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 7>
+  ret <8 x i16> %shuffle
+}
 define <8 x i16> @shuffle_v8i16_75643120(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_75643120
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,0,1]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,1,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,6,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_75643120:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_75643120:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_75643120:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_75643120:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,10,11,12,13,8,9,6,7,2,3,4,5,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 7, i32 5, i32 6, i32 4, i32 3, i32 1, i32 2, i32 0>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_10545410(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_10545410
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_10545410:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_10545410:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_10545410:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_10545410:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 5, i32 4, i32 1, i32 0>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_54105410(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_54105410
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,4,7,6]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_54105410:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_54105410:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_54105410:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_54105410:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 5, i32 4, i32 1, i32 0>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_54101054(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_54101054
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_54101054:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_54101054:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_54101054:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_54101054:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 4, i32 1, i32 0, i32 1, i32 0, i32 5, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_04400440(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04400440
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,4,4,6]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_04400440:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,4,4,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_04400440:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_04400440:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_04400440:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 0>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_40044004(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_40044004
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,0,0,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_40044004:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,0,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_40044004:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_40044004:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_40044004:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 0, i32 0, i32 4, i32 4, i32 0, i32 0, i32 4>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_26405173(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_26405173
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_26405173:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_26405173:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_26405173:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_26405173:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,10,11,2,3,14,15,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 5, i32 1, i32 7, i32 3>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_20645173(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_20645173
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,1]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,0,3,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,5,6,4,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_20645173:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,0,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,6,4,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_20645173:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_20645173:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_20645173:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,0,1,12,13,8,9,10,11,2,3,14,15,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 0, i32 6, i32 4, i32 5, i32 1, i32 7, i32 3>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_26401375(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_26401375
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,5,4,6]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,3,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_26401375:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,3,0,4,5,6,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_26401375:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_26401375:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_26401375:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4,5,12,13,8,9,0,1,2,3,6,7,14,15,10,11]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 6, i32 4, i32 0, i32 1, i32 3, i32 7, i32 5>
   ret <8 x i16> %shuffle
 }
 
+define <8 x i16> @shuffle_v8i16_66751643(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_66751643:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,3,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,4,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_66751643:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_66751643:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_66751643:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 6, i32 6, i32 7, i32 5, i32 1, i32 6, i32 4, i32 3>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_60514754(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_60514754:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,7,5,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_60514754:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_60514754:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_60514754:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,0,1,10,11,2,3,8,9,14,15,10,11,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 1, i32 4, i32 7, i32 5, i32 4>
+  ret <8 x i16> %shuffle
+}
+
 define <8 x i16> @shuffle_v8i16_00444444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_00444444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,0,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_00444444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_00444444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_00444444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_00444444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_44004444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44004444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,2,0,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_44004444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_44004444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_44004444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_44004444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_04404444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04404444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_04404444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_04404444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_04404444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_04404444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_04400000(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04400000
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,0,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_04400000:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_04400000:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_04400000:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_04400000:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,0,1]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_04404567(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_04404567
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_04404567:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_04404567:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 4, i32 4, i32 0, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_0X444444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0X444444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,1,2,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_0X444444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0X444444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0X444444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0X444444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 undef, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_44X04444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_44X04444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_44X04444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_44X04444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_44X04444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_44X04444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 undef, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_X4404444(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_X4404444
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,2,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,4,4,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_X4404444:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_X4404444:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_X4404444:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_X4404444:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,8,9]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 4, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0127XXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_0127XXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0127XXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0127XXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0127XXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXX4563
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,0]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_XXXX4563:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XXXX4563:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXXX4563:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_XXXX4563:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_4563XXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,0,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_4563XXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_4563XXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_4563XXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_4563XXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_01274563
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,1,3]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,5,4,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,1,2]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_01274563:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_01274563:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_01274563:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_01274563:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_45630127
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,1,2,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,0,1,3]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,6,7,5,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_45630127:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_45630127:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_45630127:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_45630127:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
   ret <8 x i16> %shuffle
 }
 
+define <8 x i16> @shuffle_v8i16_37102735(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_37102735:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,5,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,4]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,4,5,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_37102735:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_37102735:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_37102735:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 7, i32 1, i32 0, i32 2, i32 7, i32 3, i32 5>
+  ret <8 x i16> %shuffle
+}
+
 define <8 x i16> @shuffle_v8i16_08192a3b(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_08192a3b
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_08192a3b:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_08192a3b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d2e3f
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_0c1d2e3f:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 2, i32 14, i32 3, i32 15>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_4c5d6e7f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_4c5d6e7f
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_4c5d6e7f:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_4c5d6e7f:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_48596a7b
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_48596a7b:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_48596a7b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 8, i32 5, i32 9, i32 6, i32 10, i32 7, i32 11>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_08196e7f(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_08196e7f
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[0,3,2,3]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_08196e7f:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_08196e7f:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 6, i32 14, i32 7, i32 15>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_0c1d6879(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0c1d6879
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,0,2,3]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_0c1d6879:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0c1d6879:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,0,2,3]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 12, i32 1, i32 13, i32 6, i32 8, i32 7, i32 9>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_109832ba(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_109832ba
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm0[2,0,3,1,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[2,0,3,1,4,5,6,7]
-; CHECK-SSE2-NEXT:    punpcklqdq %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_109832ba:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_109832ba:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[2,0,3,1,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[2,0,3,1,4,5,6,7]
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 0, i32 9, i32 8, i32 3, i32 2, i32 11, i32 10>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_8091a2b3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_8091a2b3
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    punpcklwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_8091a2b3:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_8091a2b3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3>
   ret <8 x i16> %shuffle
 }
 define <8 x i16> @shuffle_v8i16_c4d5e6f7(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_c4d5e6f7
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm2 = xmm0[2,3,2,3]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm2, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_c4d5e6f7:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_c4d5e6f7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_0213cedf(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_0213cedf
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,3,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,2,1,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    punpcklqdq %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: shuffle_v8i16_0213cedf:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0213cedf:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,1,3,4,5,6,7]
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 1, i32 3, i32 12, i32 14, i32 13, i32 15>
   ret <8 x i16> %shuffle
 }
 
+define <8 x i16> @shuffle_v8i16_443aXXXX(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_443aXXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_443aXXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_443aXXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_443aXXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,12,13,10,11,12,13,10,11,12,13,14,15]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 4, i32 3, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
 define <8 x i16> @shuffle_v8i16_032dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_032dXXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,3,2,1,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_032dXXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_032dXXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_032dXXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_032dXXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,8,9,6,7,8,9,12,13,12,13,14,15]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 3, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
-define <8 x i16> @shuffle_v8i16_XXXcXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXcXXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[0,1,2,1,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+define <8 x i16> @shuffle_v8i16_XXXdXXXX(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_XXXdXXXX:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_XXXdXXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_012dXXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_012dXXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_012dXXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_012dXXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_012dXXXX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_XXXXcde3
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,0,2]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_XXXXcde3:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XXXXcde3:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXXXcde3:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v8i16_XXXXcde3:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,4,5,6,7,0,1,4,5,8,9,14,15]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 3>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_cde3XXXX
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT:    punpckhwd %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[0,2,2,3]
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_cde3XXXX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_cde3XXXX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_cde3XXXX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i16_cde3XXXX:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 12, i32 13, i32 14, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %shuffle
 }
 
 define <8 x i16> @shuffle_v8i16_012dcde3(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-SSE2-LABEL: @shuffle_v8i16_012dcde3
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm2 = xmm0[0,1,2,1]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm3 = xmm1[2,1,2,3]
-; CHECK-SSE2-NEXT:    punpckhwd %xmm2, %xmm1
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm1 = xmm1[0,2,2,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm1 = xmm1[0,1,2,3,4,7,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm1 = xmm1[0,2,2,3]
-; CHECK-SSE2-NEXT:    punpcklwd %xmm3, %xmm0
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[3,1,2,0]
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; CHECK-SSE2-NEXT:    pshufd {{.*}} # xmm0 = xmm0[2,1,2,3]
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[1,2,0,3,4,5,6,7]
-; CHECK-SSE2-NEXT:    punpcklqdq %xmm1, %xmm0
-; CHECK-SSE2-NEXT:    retq
+; SSE2-LABEL: shuffle_v8i16_012dcde3:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,7,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,2,0,3,4,5,6,7]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_012dcde3:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_012dcde3:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v8i16_012dcde3:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i16_012dcde3:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1,4,5,8,9,14,15,8,9,14,15,12,13,14,15]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,6,7,8,9,0,1,0,1,2,3]
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX2-NEXT:    retq
   %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 12, i32 13, i32 14, i32 3>
   ret <8 x i16> %shuffle
 }
+
+define <8 x i16> @shuffle_v8i16_XXX1X579(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_XXX1X579:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,3,2,4,5,6,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XXX1X579:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_XXX1X579:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_XXX1X579:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,10,11,14,15,14,15,10,11,12,13,14,15]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,4,5,8,9,8,9,12,13,6,7]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 5, i32 7, i32 9>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_XX4X8acX:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,1,2,0,4,5,6,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,4,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_XX4X8acX:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_XX4X8acX:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,2,3,0,1,0,1,4,5,8,9,0,1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_8zzzzzzz(i16 %i) {
+; SSE-LABEL: shuffle_v8i16_8zzzzzzz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_8zzzzzzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    retq
+  %a = insertelement <8 x i16> undef, i16 %i, i32 0
+  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_z8zzzzzz(i16 %i) {
+; SSE-LABEL: shuffle_v8i16_z8zzzzzz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_z8zzzzzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX-NEXT:    retq
+  %a = insertelement <8 x i16> undef, i16 %i, i32 0
+  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 2, i32 8, i32 3, i32 7, i32 6, i32 5, i32 4, i32 3>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_zzzzz8zz(i16 %i) {
+; SSE-LABEL: shuffle_v8i16_zzzzz8zz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_zzzzz8zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %a = insertelement <8 x i16> undef, i16 %i, i32 0
+  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_zuuzuuz8(i16 %i) {
+; SSE-LABEL: shuffle_v8i16_zuuzuuz8:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %edi, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_zuuzuuz8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovd %edi, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1]
+; AVX-NEXT:    retq
+  %a = insertelement <8 x i16> undef, i16 %i, i32 0
+  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 8>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_zzBzzzzz(i16 %i) {
+; SSE-LABEL: shuffle_v8i16_zzBzzzzz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_zzBzzzzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11]
+; AVX-NEXT:    retq
+  %a = insertelement <8 x i16> undef, i16 %i, i32 3
+  %shuffle = shufflevector <8 x i16> zeroinitializer, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 11, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_def01234(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_def01234:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_def01234:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_def01234:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_def01234:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_ueuu123u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_ueuu123u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_ueuu123u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_ueuu123u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_ueuu123u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_56701234(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_56701234:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_56701234:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_56701234:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_56701234:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_u6uu123u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_u6uu123u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u6uu123u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_u6uu123u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u6uu123u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_uuuu123u(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_uuuu123u:
+; SSE:       # BB#0:
+; SSE-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_uuuu123u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_bcdef012(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_bcdef012:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_bcdef012:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_bcdef012:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_bcdef012:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_ucdeuu1u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_ucdeuu1u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_ucdeuu1u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_ucdeuu1u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_ucdeuu1u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm1[6,7,8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 12, i32 13, i32 14, i32 undef, i32 undef, i32 1, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_34567012(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_34567012:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_34567012:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_34567012:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_34567012:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_u456uu1u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_u456uu1u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u456uu1u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_u456uu1u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u456uu1u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 1, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_u456uuuu(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: shuffle_v8i16_u456uuuu:
+; SSE:       # BB#0:
+; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u456uuuu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_3456789a(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_3456789a:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_3456789a:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_3456789a:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_3456789a:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_u456uu9u(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_u456uu9u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u456uu9u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_u456uu9u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u456uu9u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 4, i32 5, i32 6, i32 undef, i32 undef, i32 9, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_56789abc(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_56789abc:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_56789abc:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_56789abc:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_56789abc:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_u6uu9abu(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: shuffle_v8i16_u6uu9abu:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_u6uu9abu:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_u6uu9abu:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    palignr {{.*#+}} xmm1 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_u6uu9abu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7,8,9]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 6, i32 undef, i32 undef, i32 9, i32 10, i32 11, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0uuu1uuu(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0uuu1uuu:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0uuu1uuu:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSSE3-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0uuu1uuu:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxwq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0uuu1uuu:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxwq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0zzz1zzz(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0zzz1zzz:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0zzz1zzz:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0zzz1zzz:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxwq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0zzz1zzz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxwq %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0u1u2u3u(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0u1u2u3u:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0u1u2u3u:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0u1u2u3u:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxwd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0u1u2u3u:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxwd %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef>
+  ret <8 x i16> %shuffle
+}
+
+define <8 x i16> @shuffle_v8i16_0z1z2z3z(<8 x i16> %a) {
+; SSE2-LABEL: shuffle_v8i16_0z1z2z3z:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v8i16_0z1z2z3z:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v8i16_0z1z2z3z:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxwd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v8i16_0z1z2z3z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxwd %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
+  ret <8 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
new file mode 100644
index 000000000000..7c38149a700c
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -0,0 +1,1364 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+target triple = "x86_64-unknown-unknown"
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,5,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,2,3,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,3,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,4,5,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,6,7,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,1,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,3,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,10,11,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,3,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,12,13,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,3,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[14,15,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[3,3,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,7,7,7,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[6,7,6,7,6,7,6,7,14,15,14,15,14,15,14,15,22,23,22,23,22,23,22,23,30,31,30,31,30,31,30,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,6]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,16,17,16,17,20,21,20,21,24,25,24,25,28,29,28,29]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[1,1,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,7,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[1,1,3,3,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,7,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15,18,19,18,19,22,23,22,23,26,27,26,27,30,31,30,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_00_00_00_00_00_00_01_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_00_00_00_00_00_02_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_00_00_00_00_03_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_24_09_26_11_28_13_30_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_18_19_04_05_22_23_08_09_26_27_12_13_30_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_17_18_19_04_05_06_07_24_25_26_27_12_13_14_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5,6],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1],xmm3[2],xmm2[3],xmm3[4],xmm2[5],xmm3[6],xmm2[7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3],xmm1[4],xmm0[5],xmm1[6],xmm0[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255,255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_01_18_19_20_21_06_07_08_09_26_27_12_13_30_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_00_16_00_16_00_16_00_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
+; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_00_16_00_16_00_16_08_24_08_24_08_24_08_24:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,0,1,u,u,0,1,u,u,0,1,u,u,16,17,u,u,16,17,u,u,16,17,u,u,16,17]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 0, i32 16, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24, i32 8, i32 24>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_16_16_16_16_04_05_06_07_24_24_24_24_12_13_14_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,u,u,u,u,u,u,u,u,16,17,16,17,16,17,16,17,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 24, i32 24, i32 24, i32 24, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_19_18_17_16_07_06_05_04_27_26_25_24_15_14_13_12:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,14,15,12,13,10,11,8,9,u,u,u,u,u,u,u,u,30,31,28,29,26,27,24,25]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 7, i32 6, i32 5, i32 4, i32 27, i32 26, i32 25, i32 24, i32 15, i32 14, i32 13, i32 12>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_19_18_17_16_03_02_01_00_27_26_25_24_11_10_09_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5,2,3,0,1,u,u,u,u,u,u,u,u,22,23,20,21,18,19,16,17,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 19, i32 18, i32 17, i32 16, i32 3, i32 2, i32 1, i32 0, i32 27, i32 26, i32 25, i32 24, i32 11, i32 10, i32 9, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_08_08_08_08_08_09_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,18,19,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 9, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_08_08_08_10_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,20,21,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 10, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_08_11_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,22,23,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_12_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,24,25,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_13_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,26,27,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 13, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_14_08_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,28,29,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 14, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_15_08_08_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,30,31,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_08_24_09_25_10_26_11_27:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_12_28_13_29_14_30_15_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_12_28_13_29_14_30_15_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,4,5,u,u,6,7,u,u,24,25,u,u,26,27,u,u,28,29,u,u,30,31,u,u]
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_23_08_24_09_25_10_26_11_27:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,u,u,10,11,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23,u,u]
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_01_00_08_09_08_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,18,19,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 8, i32 9, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_02_00_00_08_08_10_08_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,20,21,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 8, i32 8, i32 10, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,6,7,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_03_00_00_00_08_08_08_11_08_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,22,23,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 11, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_04_00_00_00_00_08_08_08_08_12_08_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,16,17,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,10,11,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_05_00_00_00_00_00_08_08_08_08_08_13_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,26,27,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 13, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,12,13,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_06_00_00_00_00_00_00_08_08_08_08_08_08_14_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,28,29,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 14, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,14,15]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_00_00_00_00_00_00_00_08_08_08_08_08_08_08_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,30,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,2,2,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,8,9,8,9,4,5,4,5,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_02_02_04_04_06_06_14_14_12_12_10_10_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,8,9,8,9,12,13,12,13,28,29,28,29,24,25,24,25,20,21,20,21,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 14, i32 14, i32 12, i32 12, i32 10, i32 10, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_00_08_08_08_08_12_12_12_12:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,12,13,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_uu_uu_00_00_00_00_00_08_08_uu_uu_08_08_14_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,28,29,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 14, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,15,2,3,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,4,5,6,7,0,1,0,1,0,1,14,15]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_07_uu_00_00_00_00_00_00_08_08_uu_uu_08_08_08_15:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,u,u,u,u,16,17,16,17,16,17,30,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 15>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,1,2,2,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,6]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,13,12,13,12,13,8,9,4,5,4,5,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_uu_uu_02_04_04_uu_06_14_14_uu_12_10_10_08_08:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,u,u,4,5,8,9,8,9,u,u,12,13,28,29,28,29,u,u,24,25,20,21,20,21,16,17,16,17]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 undef, i32 undef, i32 2, i32 4, i32 4, i32 undef, i32 6, i32 14, i32 14, i32 undef, i32 12, i32 10, i32 10, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,1,2,3]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,3,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_04_04_04_04_uu_uu_uu_uu_08_08_08_uu_uu_12_12_12:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,9,8,9,8,9,8,9,u,u,u,u,u,u,u,u,16,17,16,17,16,17,u,u,u,u,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 12, i32 12>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_16_16_16_16_20_20_20_20:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_08_08_08_08_12_12_12_12_24_24_24_24_28_28_28_28:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,4,4]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_04_24_24_24_24_28_28_28_28:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,24,25,24,25,24,25,24,25]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_19_04_20_05_21_06_22_07_23:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  ret <16 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
new file mode 100644
index 000000000000..c7f4c3512fba
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -0,0 +1,1656 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+target triple = "x86_64-unknown-unknown"
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_03_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_04_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_05_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_06_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movl $15, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movl $15, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm1
+; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,u,1,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[2],zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,u,2,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[3],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,u,3,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[4],zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,u,4,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[5],zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,u,5,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[6],zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,u,6,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[7],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,u,7,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,xmm2[8],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,u,8,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[9],zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,u,9,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,xmm2[10],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,u,10,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,11,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,zero,xmm2[12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,12,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[13],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,13,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,xmm2[14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,14,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    movl $128, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX2-NEXT:    movl $15, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vinserti128 $0, %xmm2, %ymm3, %ymm2
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpshufb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15_23_23_23_23_23_23_23_23_31_31_31_31_31_31_31_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[7,7,7,7,7,7,7,7,15,15,15,15,15,15,15,15,23,23,23,23,23,23,23,23,31,31,31,31,31,31,31,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_16_16_16_16_20_20_20_20_24_24_24_24_28_28_28_28:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,16,16,16,16,20,20,20,20,24,24,24,24,28,28,28,28]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 16, i32 16, i32 16, i32 16, i32 20, i32 20, i32 20, i32 20, i32 24, i32 24, i32 24, i32 24, i32 28, i32 28, i32 28, i32 28>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_03_03_03_03_07_07_07_07_11_11_11_11_15_15_15_15_19_19_19_19_23_23_23_23_27_27_27_27_31_31_31_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,3,3,3,7,7,7,7,11,11,11,11,15,15,15,15,19,19,19,19,23,23,23,23,27,27,27,27,31,31,31,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7, i32 11, i32 11, i32 11, i32 11, i32 15, i32 15, i32 15, i32 15, i32 19, i32 19, i32 19, i32 19, i32 23, i32 23, i32 23, i32 23, i32 27, i32 27, i32 27, i32 27, i32 31, i32 31, i32 31, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_02_02_04_04_06_06_08_08_10_10_12_12_14_14_16_16_18_18_20_20_22_22_24_24_26_26_28_28_30_30:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14,16,16,18,18,20,20,22,22,24,24,26,26,28,28,30,30]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14, i32 16, i32 16, i32 18, i32 18, i32 20, i32 20, i32 22, i32 22, i32 24, i32 24, i32 26, i32 26, i32 28, i32 28, i32 30, i32 30>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_01_01_03_03_05_05_07_07_09_09_11_11_13_13_15_15_17_17_19_19_21_21_23_23_25_25_27_27_29_29_31_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15,17,17,19,19,21,21,23,23,25,25,27,27,29,29,31,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15, i32 17, i32 17, i32 19, i32 19, i32 21, i32 21, i32 23, i32 23, i32 25, i32 25, i32 27, i32 27, i32 29, i32 29, i32 31, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movl $15, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movl $15, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm1
+; AVX2-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_16_49_18_51_20_53_22_55_24_57_26_59_28_61_30_63:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = <1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_48_17_50_19_52_21_54_23_56_25_58_27_60_29_62_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,8,9,10,11,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,0,0,0,0,0,0,0,128,128,128,128,128,128,128,128]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_32_32_32_32_32_32_32_32_08_09_10_11_12_13_14_15_48_48_48_48_48_48_48_48_24_25_26_27_28_29_30_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,15,14,13,12,11,10,9,8]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,128,128,128,128,128,128,128,128]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_15_14_13_12_11_10_09_08_55_54_53_52_51_50_49_48_31_30_29_28_27_26_25_24:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,15,14,13,12,11,10,9,8,u,u,u,u,u,u,u,u,31,30,29,28,27,26,25,24]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [128,128,128,128,128,128,128,128,7,6,5,4,3,2,1,0]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [7,6,5,4,3,2,1,0,128,128,128,128,128,128,128,128]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_39_38_37_36_35_34_33_32_07_06_05_04_03_02_01_00_55_54_53_52_51_50_49_48_23_22_21_20_19_18_17_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[7,6,5,4,3,2,1,0,u,u,u,u,u,u,u,u,23,22,21,20,19,18,17,16,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_17_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 17, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_18_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,18,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 18, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_30_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,30,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 30, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movl $15, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufb %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_31_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 31, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_24_56_25_57_26_58_27_59_28_60_29_61_30_62_31_63:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31,u]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,0,u,1,u,2,u,3,u,4,u,5,u,6,u,7,u,24,u,25,u,26,u,27,u,28,u,29,u,30,u,31]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47_16_48_17_49_18_50_19_51_20_52_21_53_22_54_23_55:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23,u]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,8,u,9,u,10,u,11,u,12,u,13,u,14,u,15,u,16,u,17,u,18,u,19,u,20,u,21,u,22,u,23]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_01_00_16_17_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 16, i32 17, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_02_00_00_16_16_18_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,18,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 16, i32 16, i32 18, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_07_00_00_00_00_00_00_00_16_16_16_16_16_16_16_23_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,23,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 23, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,16,16,16,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_30_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,30,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movl $15, %eax
+; AVX1-NEXT:    vmovd %eax, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,31]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_04_04_04_04_08_08_08_08_12_12_12_12_28_28_28_28_24_24_24_24_20_20_20_20_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12,28,28,28,28,24,24,24,24,20,20,20,20,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12, i32 28, i32 28, i32 28, i32 28, i32 24, i32 24, i32 24, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_00_00_00_00_00_00_00_00_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,0,0,0,u,u,u,u,u,0,0,0,0,0,14,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_16_uu_uu_uu_uu_uu_16_16_16_16_16_30_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,16,16,16,16,u,u,u,u,u,16,16,16,16,16,30,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[14,14,1,1,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,u,0,u,u,u,u,0,0,0,0,0,0,14,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_14_uu_uu_00_00_00_00_00_00_00_00_00_00_00_00_16_16_uu_16_uu_uu_uu_uu_16_16_16_16_16_16_30_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,14,u,u,0,0,0,0,0,0,0,0,0,0,0,0,16,16,u,16,u,u,u,u,16,16,16,16,16,16,30,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 14, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 undef, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 30, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[0,0,0,0,4,4,4,4,8,8,8,8,12,12,12,12]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[12,12,12,12,8,8,8,8,4,4,4,4,0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_uu_uu_uu_04_uu_08_08_08_08_uu_uu_12_uu_28_28_28_28_uu_uu_uu_24_20_20_20_20_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,u,u,u,4,u,8,8,8,8,u,u,12,u,28,28,28,28,u,u,u,24,20,20,20,20,16,16,16,16]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 12, i32 undef, i32 28, i32 28, i32 28, i32 28, i32 undef, i32 undef, i32 undef, i32 24, i32 20, i32 20, i32 20, i32 20, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,8,8,9,9,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_08_08_08_08_08_08_08_08_uu_uu_uu_uu_uu_uu_uu_uu_16_16_16_uu_uu_uu_uu_uu_uu_uu_24_24_24_24_24_24:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[8,8,8,8,8,8,8,8,u,u,u,u,u,u,u,u,16,16,16,u,u,u,u,u,u,u,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 16, i32 16, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = zero,zero,xmm0[u],zero,xmm0[u,u,u,u,u,u,u,7,u,u,u,u]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm4 = xmm3[4,3,u,3,u,u,u,u,u,u,u],zero,xmm3[u,u,u,u]
+; AVX1-NEXT:    vpor %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm2 = xmm2[0,1],zero,xmm2[3],zero,zero,zero,zero,zero,zero,zero,xmm2[11],zero,zero,zero,zero
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm5 = xmm4[u,u,4,u,1,6],zero,zero,xmm4[0],zero,xmm4[11,u],zero,zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm6 = xmm1[u,u],zero,xmm1[u],zero,zero,xmm1[5,0],zero,xmm1[10],zero,xmm1[u,4,2,4,7]
+; AVX1-NEXT:    vpor %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm5 = zero,zero,xmm5[2],zero,xmm5[4,5,6,7,8,9,10],zero,xmm5[12,13,14,15]
+; AVX1-NEXT:    vpor %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = xmm3[u,u],zero,zero,xmm3[u,u,u,u,1,6,13,u,u],zero,xmm3[u,u]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
+; AVX1-NEXT:    vpor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,xmm0[2,3],zero,zero,zero,zero,xmm0[8,9,10],zero,zero,xmm0[13],zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm3 = zero,zero,xmm4[u,u],zero,zero,xmm4[12],zero,xmm4[u,u,u],zero,zero,xmm4[u,0,3]
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
+; AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1],zero,zero,xmm1[4,5,6,7],zero,zero,zero,xmm1[11,12],zero,xmm1[14,15]
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,12,u,u,u,u,u,u,u,0,3,u,u,u,u,u,u,21,16,u,26,u,u,20,18,20,23]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[10,13,u,u,3,3,u,8,u,u,u,12,1,u,u,u,u,u,20,u,17,22,u,u,16,u,27,u,u,u,u,u]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = <255,255,u,u,255,255,0,255,u,u,u,255,255,u,0,0,u,u,255,u,255,255,0,0,255,0,255,u,0,0,0,0>
+; AVX2-NEXT:    vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm2 = ymm2[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2],ymm0[3,4,5],ymm2[6],ymm0[7]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 42, i32 45, i32 12, i32 13, i32 35, i32 35, i32 60, i32 40, i32 17, i32 22, i32 29, i32 44, i32 33, i32 12, i32 48, i32 51, i32 20, i32 19, i32 52, i32 19, i32 49, i32 54, i32 37, i32 32, i32 48, i32 42, i32 59, i32 7, i32 36, i32 34, i32 36, i32 39>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_00_32_01_33_02_34_03_35_04_36_05_37_06_38_07_39_08_40_09_41_10_42_11_43_12_44_13_45_14_46_15_47:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47>
+  ret <32 x i8> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
new file mode 100644
index 000000000000..bca7fb7a276d
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -0,0 +1,888 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+
+target triple = "x86_64-unknown-unknown"
+
+define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0001:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0001:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0020:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0020:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0300:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0300:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_1000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_1000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_2200:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_2200:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_3330:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_3330:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_3210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_3210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0023:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0022:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1032:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1133:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1023:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1022:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0423:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0423:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
+; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0462:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0426:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1537:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_4062:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_5173:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_5163:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0527:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_4163:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0145:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_4501:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_0167:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1054:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_3254:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_3276:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_1076:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_0415:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0415:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x double> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0001:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0001:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0020:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0020:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0112:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0112:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0300:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0300:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_1000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_1000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_2200:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_2200:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3330:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_3330:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_3210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0124:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0124:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0142:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0142:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0412:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0412:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_4012:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_4012:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_0145:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0451:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0451:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_4501:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_4015:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_4015:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_2u35:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_2u35:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_1251:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_1251:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_1054:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_1054:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3254:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_3254:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3276:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_3276:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_1076:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_1076:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_0415:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0415:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: stress_test1:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm0[1,0,3,2]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
+; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: stress_test1:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm1[3,1,1,0]
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,3,1,3]
+; AVX2-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
+; AVX2-NEXT:    retq
+  %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
+  %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
+  %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
+  %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
+
+  ret <4 x i64> %f
+}
+
+define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
+; AVX1-LABEL: insert_reg_and_zero_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq %rdi, %xmm0
+; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_reg_and_zero_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq %rdi, %xmm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT:    retq
+  %v = insertelement <4 x i64> undef, i64 %a, i64 0
+  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
+; AVX1-LABEL: insert_mem_and_zero_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq (%rdi), %xmm0
+; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq (%rdi), %xmm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; AVX2-NEXT:    retq
+  %a = load i64* %ptr
+  %v = insertelement <4 x i64> undef, i64 %a, i64 0
+  %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
+; ALL-LABEL: insert_reg_and_zero_v4f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; ALL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
+; ALL-NEXT:    retq
+  %v = insertelement <4 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
+; ALL-LABEL: insert_mem_and_zero_v4f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovsd (%rdi), %xmm0
+; ALL-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <4 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @splat_mem_v4f64(double* %ptr) {
+; ALL-LABEL: splat_mem_v4f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %a = load double* %ptr
+  %v = insertelement <4 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x double> %shuffle
+}
+
+define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
+; AVX1-LABEL: splat_mem_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovddup (%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_mem_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %a = load i64* %ptr
+  %v = insertelement <4 x i64> undef, i64 %a, i64 0
+  %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x double> @splat_mem_v4f64_2(double* %p) {
+; ALL-LABEL: splat_mem_v4f64_2:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %1 = load double* %p
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
+  ret <4 x double> %3
+}
+
+define <4 x double> @splat_v4f64(<2 x double> %r) {
+; AVX1-LABEL: splat_v4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_v4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
+  ret <4 x double> %1
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
new file mode 100644
index 000000000000..77903da35583
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -0,0 +1,1851 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+
+target triple = "x86_64-unknown-unknown"
+
+define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00000010:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00000010:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00000200:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00000200:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00003000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00003000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00040000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00040000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00500000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00500000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_06000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_06000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_70000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_70000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    movl $7, %eax
+; AVX2-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_01014545:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00112233:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00112233:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_00001111:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_00001111:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_81a3c5e7:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_08080808:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_08080808:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
+; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_08084c4c:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
+; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_8823cc67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_9832dc76:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_9810dc54:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_08194c5d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_08192a3b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_08192a3b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
+; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_08991abb:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_08991abb:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
+; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_091b2d3f:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_091b2d3f:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_09ab1def:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_09ab1def:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00014445:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00204464:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_03004744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10005444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_22006644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_33307774:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_32107654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00234467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00224466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10325476:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_11335577:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10225466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00015444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00204644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_03004474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10004444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_22006446:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_33307474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_32104567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00236744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00226644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_10324567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_11334567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_01235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_01235466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_002u6u44:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_00uu66uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_103245uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_1133uu67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_0uu354uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuu3uu66:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_c348cda0:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_c348cda0:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
+; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_f511235a:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_f511235a:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
+; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
+; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_32103210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_32103210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_76547654:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_76547654:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_76543210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_76543210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_3210ba98:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_3210fedc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_7654fedc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_fedc7654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_ba987654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_ba983210:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00000010:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00000010:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00000200:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00000200:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00003000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00003000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00040000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00040000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00500000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[u,u,1,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00500000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_06000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[u,2,u,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,0,4,5,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_06000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_70000000:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,u,u,u,4,4,4,4]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_70000000:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    movl $7, %eax
+; AVX2-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vinserti128 $0, %xmm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_01014545:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_01014545:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00112233:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00112233:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00001111:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00001111:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_08080808:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_08080808:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
+; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_08084c4c:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_08084c4c:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_8823cc67:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_8823cc67:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_9832dc76:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_9832dc76:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,3,2,4,5,7,6]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_9810dc54:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_9810dc54:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_08194c5d:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_08194c5d:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_08192a3b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_08192a3b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_08991abb:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[1,0],xmm1[2,0]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[3,3]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_08991abb:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_091b2d3f:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_091b2d3f:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_09ab1def:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_09ab1def:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00014445:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00014445:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00204464:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00204464:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_03004744:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_03004744:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10005444:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10005444:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_22006644:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_22006644:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_33307774:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_33307774:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_32107654:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_32107654:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00234467:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00234467:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00224466:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00224466:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10325476:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10325476:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_11335577:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_11335577:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10235467:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10235467:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10225466:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10225466:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00015444:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00015444:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00204644:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00204644:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_03004474:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_03004474:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10004444:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10004444:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_22006446:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_22006446:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_33307474:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_33307474:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_32104567:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_32104567:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00236744:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00236744:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00226644:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00226644:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_10324567:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_10324567:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_11334567:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_11334567:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_01235467:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_01235467:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_01235466:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_01235466:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_002u6u44:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_002u6u44:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_00uu66uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_00uu66uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_103245uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_103245uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_1133uu67:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_1133uu67:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_0uu354uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_0uu354uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_6caa87e5:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_6caa87e5:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_32103210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_32103210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_76547654:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_76547654:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_76543210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_76543210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_3210ba98:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_3210ba98:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_3210fedc:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_3210fedc:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_7654fedc:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_7654fedc:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_fedc7654:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_fedc7654:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_ba987654:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_ba987654:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_ba983210:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_ba983210:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x float> @splat_mem_v8f32_2(float* %p) {
+; ALL-LABEL: splat_mem_v8f32_2:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %1 = load float* %p
+  %2 = insertelement <4 x float> undef, float %1, i32 0
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %3
+}
+
+define <8 x float> @splat_v8f32(<4 x float> %r) {
+; AVX1-LABEL: splat_v8f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_v8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
+  ret <8 x float> %1
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
new file mode 100644
index 000000000000..713ef7835cde
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target triple = "x86_64-unknown-unknown"
+
+define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  ret <16 x float> %shuffle
+}
+
+define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
+  ret <16 x i32> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x float> %shuffle
+}
+
+define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i32> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
new file mode 100644
index 000000000000..fd4c9cb72bad
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -0,0 +1,1456 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target triple = "x86_64-unknown-unknown"
+
+define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0 
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00000010:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00000200:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00003000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00040000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00500000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_06000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_70000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_01014545:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00112233:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00001111:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_81a3c5e7:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_08080808:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_08084c4c:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_8823cc67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_9832dc76:
+; ALL:       # BB#0:
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_9810dc54:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm2
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_08194c5d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_08192a3b:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_08991abb:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm0[1,0,2,2]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm1[0,2,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,1]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_091b2d3f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_09ab1def:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00014445:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00204464:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_03004744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10005444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_22006644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_33307774:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_32107654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00234467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00224466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10325476:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_11335577:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10225466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00015444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00204644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_03004474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10004444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_22006446:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_33307474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_32104567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00236744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00226644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_10324567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_11334567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_01235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_01235466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_002u6u44:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_00uu66uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_103245uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_1133uu67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_0uu354uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_uuu3uu66:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_c348cda0:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[0,1],ymm2[0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vbroadcastsd %xmm1, %ymm4
+; ALL-NEXT:    vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
+; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
+  ret <8 x double> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_f511235a:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm4 = ymm3[0,1,1,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3]
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm4 = ymm1[0,0,2,2]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0,1,2],ymm4[3]
+; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2,3]
+; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1 
+; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
+; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
+  ret <8 x double> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00000010:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00000200:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00003000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00040000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00500000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_06000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_70000000:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,1,2,3]
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_01014545:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00112233:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00001111:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_81a3c5e7:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_08080808:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_08084c4c:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm2, %ymm2
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_8823cc67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2 
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
+; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_9832dc76:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_9810dc54:
+; ALL:       # BB#0:
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm2
+; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_08194c5d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3 
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3 
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_08192a3b:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_08991abb:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm1[0,2,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3,4,5,6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,1]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_091b2d3f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_09ab1def:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00014445:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00204464:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_03004744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10005444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_22006644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_33307774:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_32107654:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00234467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00224466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10325476:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_11335577:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10225466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00015444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00204644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_03004474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10004444:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_22006446:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_33307474:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_32104567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00236744:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00226644:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_10324567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_11334567:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_01235467:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_01235466:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_002u6u44:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_00uu66uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_103245uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_1133uu67:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_0uu354uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_uuu3uu66:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_6caa87e5:
+; ALL:       # BB#0:
+; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 
+; ALL-NEXT:    vperm2i128 {{.*#+}} ymm2 = ymm0[0,1,0,1]
+; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3 
+; ALL-NEXT:    vpblendd {{.*#+}} ymm4 = ymm1[0,1,2,3],ymm3[4,5],ymm1[6,7]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm4[0,1],ymm2[2,3],ymm4[4,5],ymm2[6,7]
+; ALL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
+; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
+; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
+; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_082a4c6e:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x double> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_082a4c6e:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
+; ALL-LABEL: shuffle_v8f64_193b5d7f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x double> %shuffle
+}
+
+define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
+; ALL-LABEL: shuffle_v8i64_193b5d7f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  ret <8 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index e60ecb70dec6..4e2bf87fdf64 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1,6 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+;
+; Verify that the DAG combiner correctly folds bitwise operations across
+; shuffles, nested shuffles with undef, pairs of nested shuffles, and other
+; basic and always-safe patterns. Also test that the DAG combiner will combine
+; target-specific shuffle instructions where reasonable.
 
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
 
 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8)
@@ -8,57 +16,72 @@ declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8)
 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8)
 
 define <4 x i32> @combine_pshufd1(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd1
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    retq
-  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
-  %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27) 
+; ALL-LABEL: combine_pshufd1:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
+  %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 27)
   ret <4 x i32> %c
 }
 
 define <4 x i32> @combine_pshufd2(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd2
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    retq
-  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
+; ALL-LABEL: combine_pshufd2:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
   %b.cast = bitcast <4 x i32> %b to <8 x i16>
   %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 -28)
   %c.cast = bitcast <8 x i16> %c to <4 x i32>
-  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
   ret <4 x i32> %d
 }
 
 define <4 x i32> @combine_pshufd3(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd3
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    retq
-  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) 
+; ALL-LABEL: combine_pshufd3:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27)
   %b.cast = bitcast <4 x i32> %b to <8 x i16>
   %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 -28)
   %c.cast = bitcast <8 x i16> %c to <4 x i32>
-  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27) 
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 27)
   ret <4 x i32> %d
 }
 
 define <4 x i32> @combine_pshufd4(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd4
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    retq
-  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31) 
+; SSE-LABEL: combine_pshufd4:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pshufd4:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; AVX-NEXT:    retq
+entry:
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -31)
   %b.cast = bitcast <4 x i32> %b to <8 x i16>
   %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b.cast, i8 27)
   %c.cast = bitcast <8 x i16> %c to <4 x i32>
-  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31) 
+  %d = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %c.cast, i8 -31)
   ret <4 x i32> %d
 }
 
 define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd5
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
-  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76) 
+; SSE-LABEL: combine_pshufd5:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pshufd5:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    retq
+entry:
+  %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 -76)
   %b.cast = bitcast <4 x i32> %b to <8 x i16>
   %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b.cast, i8 27)
   %c.cast = bitcast <8 x i16> %c to <4 x i32>
@@ -67,53 +90,2474 @@ define <4 x i32> @combine_pshufd5(<4 x i32> %a) {
 }
 
 define <4 x i32> @combine_pshufd6(<4 x i32> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufd6
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufd $0
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: combine_pshufd6:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pshufd6:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX-NEXT:    retq
+entry:
   %b = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 0)
   %c = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %b, i8 8)
   ret <4 x i32> %c
 }
 
 define <8 x i16> @combine_pshuflw1(<8 x i16> %a) {
-; CHECK-SSE2-LABEL: @combine_pshuflw1
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    retq
-  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) 
-  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
+; ALL-LABEL: combine_pshuflw1:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
+  %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
   ret <8 x i16> %c
 }
 
 define <8 x i16> @combine_pshuflw2(<8 x i16> %a) {
-; CHECK-SSE2-LABEL: @combine_pshuflw2
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    retq
+; ALL-LABEL: combine_pshuflw2:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    retq
+entry:
   %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
-  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28) 
-  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 -28)
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
   ret <8 x i16> %d
 }
 
 define <8 x i16> @combine_pshuflw3(<8 x i16> %a) {
-; CHECK-SSE2-LABEL: @combine_pshuflw3
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshufhw {{.*}} # xmm0 = xmm0[0,1,2,3,7,6,5,4]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: combine_pshuflw3:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pshuflw3:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
+; AVX-NEXT:    retq
+entry:
   %b = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27)
-  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27) 
-  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27) 
+  %c = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %b, i8 27)
+  %d = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %c, i8 27)
   ret <8 x i16> %d
 }
 
 define <8 x i16> @combine_pshufhw1(<8 x i16> %a) {
-; CHECK-SSE2-LABEL: @combine_pshufhw1
-; CHECK-SSE2:       # BB#0:
-; CHECK-SSE2-NEXT:    pshuflw {{.*}} # xmm0 = xmm0[3,2,1,0,4,5,6,7]
-; CHECK-SSE2-NEXT:    retq
+; SSE-LABEL: combine_pshufhw1:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_pshufhw1:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7]
+; AVX-NEXT:    retq
+entry:
   %b = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27)
-  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27) 
-  %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27) 
+  %c = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %b, i8 27)
+  %d = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %c, i8 27)
   ret <8 x i16> %d
 }
 
+define <4 x i32> @combine_bitwise_ops_test1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 1, i32 3>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+define <4 x i32> @combine_bitwise_ops_test4(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test5(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test5:
+; SSE:       # BB#0:
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test6(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 4, i32 6, i32 5, i32 7>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+
+; Verify that DAGCombiner moves the shuffle after the xor/and/or even if shuffles
+; are not performing a swizzle operations.
+
+define <4 x i32> @combine_bitwise_ops_test1b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test1b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test1b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    andps %xmm1, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test1b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test1b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test1b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test2b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test2b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test2b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test2b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test2b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test2b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test3b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test3b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm0
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test3b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm0
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test3b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test3b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test3b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+define <4 x i32> @combine_bitwise_ops_test4b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test4b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    andps %xmm1, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test4b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    andps %xmm1, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test4b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pand %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test4b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test4b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test5b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test5b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    orps %xmm1, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test5b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    orps %xmm1, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test5b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test5b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test5b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2],xmm0[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test6b(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test6b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm0
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test6b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm0
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test6b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_bitwise_ops_test6b:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_bitwise_ops_test6b:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; AVX2-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 5, i32 2, i32 7>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+define <4 x i32> @combine_bitwise_ops_test1c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test1c:
+; SSE:       # BB#0:
+; SSE-NEXT:    andps %xmm1, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test1c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test2c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test2c:
+; SSE:       # BB#0:
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test2c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test3c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE2-LABEL: combine_bitwise_ops_test3c:
+; SSE2:      # BB#0:
+; SSE2-NEXT:   xorps %xmm1, %xmm0
+; SSE2-NEXT:   xorps %xmm1, %xmm1
+; SSE2-NEXT:   shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT:   retq
+;
+; SSSE3-LABEL: combine_bitwise_ops_test3c:
+; SSSE3:      # BB#0:
+; SSSE3-NEXT:   xorps %xmm1, %xmm0
+; SSSE3-NEXT:   xorps %xmm1, %xmm1
+; SSSE3-NEXT:   shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT:   retq
+;
+; SSE41-LABEL: combine_bitwise_ops_test3c:
+; SSE41:      # BB#0:
+; SSE41-NEXT:   xorps %xmm1, %xmm0
+; SSE41-NEXT:   insertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; SSE41-NEXT:   retq
+;
+; AVX-LABEL: combine_bitwise_ops_test3c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %a, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %b, <4 x i32> %c, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+define <4 x i32> @combine_bitwise_ops_test4c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test4c:
+; SSE:       # BB#0:
+; SSE-NEXT:    andps %xmm1, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test4c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %and = and <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %and
+}
+
+define <4 x i32> @combine_bitwise_ops_test5c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test5c:
+; SSE:       # BB#0:
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test5c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm2[0,2],xmm0[1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %or = or <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %or
+}
+
+define <4 x i32> @combine_bitwise_ops_test6c(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; SSE-LABEL: combine_bitwise_ops_test6c:
+; SSE:       # BB#0:
+; SSE-NEXT:    xorps %xmm1, %xmm0
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_bitwise_ops_test6c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[1,3]
+; AVX-NEXT:    retq
+  %shuf1 = shufflevector <4 x i32> %c, <4 x i32> %a, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %shuf2 = shufflevector <4 x i32> %c, <4 x i32> %b, <4 x i32><i32 0, i32 2, i32 5, i32 7>
+  %xor = xor <4 x i32> %shuf1, %shuf2
+  ret <4 x i32> %xor
+}
+
+define <4 x i32> @combine_nested_undef_test1(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,0,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test2(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test3(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test4(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_nested_undef_test4:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_nested_undef_test4:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 7, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 4, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test5(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test5:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 4, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test6(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 4, i32 0, i32 4>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test7(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test7:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,2]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test8(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test9(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test9:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test9:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,2]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 3, i32 2, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test10(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test10:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 4>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test11(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test11:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 2, i32 5, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 0>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test12(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test12:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_nested_undef_test12:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_nested_undef_test12:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 0, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 4, i32 0, i32 4>
+  ret <4 x i32> %2
+}
+
+; The following pair of shuffles is folded into vector %A.
+define <4 x i32> @combine_nested_undef_test13(<4 x i32> %A, <4 x i32> %B) {
+; ALL-LABEL: combine_nested_undef_test13:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 4, i32 2, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 4, i32 0, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+
+; The following pair of shuffles is folded into vector %B.
+define <4 x i32> @combine_nested_undef_test14(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test14:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 4, i32 1, i32 4>
+  ret <4 x i32> %2
+}
+
+
+; Verify that we don't optimize the following cases. We expect more than one shuffle.
+;
+; FIXME: Many of these already don't make sense, and the rest should stop
+; making sense with th enew vector shuffle lowering. Revisit at least testing for
+; it.
+
+define <4 x i32> @combine_nested_undef_test15(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test15:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test15:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[2,0],xmm0[3,1]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test16(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: combine_nested_undef_test16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_nested_undef_test16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_nested_undef_test16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_nested_undef_test16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_nested_undef_test16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test17(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test17:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test17:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test18(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test18:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test18:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[1,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test19(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test19:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test19:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,2]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,0,0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test20(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test20:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test20:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,2],xmm1[0,0]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 3, i32 2, i32 4, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test21(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test21:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[3,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test21:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[1,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[3,1]
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 3, i32 1>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+
+; Test that we correctly combine shuffles according to rule
+;  shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
+
+define <4 x i32> @combine_nested_undef_test22(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test22:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test22:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[1,1,1,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test23(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test23:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test23:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test24(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test24:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test24:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm1[0,3,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test25(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test25:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_nested_undef_test25:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_nested_undef_test25:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test26(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test26:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test26:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test27(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test27:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_nested_undef_test27:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_nested_undef_test27:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_nested_undef_test28(<4 x i32> %A, <4 x i32> %B) {
+; SSE-LABEL: combine_nested_undef_test28:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_nested_undef_test28:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
+  ret <4 x i32> %2
+}
+
+define <4 x float> @combine_test1(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_test2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test2:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test2:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test4(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test5(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_test5:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test5:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movaps %xmm1, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test5:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x i32> @combine_test6(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: combine_test7:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test7:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test7:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test7:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test7:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test8:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test9(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test9:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test9:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test10(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: combine_test10:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movaps %xmm1, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
+; SSE2-NEXT:    movaps %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test10:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movaps %xmm1, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,2,1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,0],xmm2[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,0]
+; SSSE3-NEXT:    movaps %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test10:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test10:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test10:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %2
+}
+
+define <4 x float> @combine_test11(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: combine_test11:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_test12:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test12:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test12:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test13:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test13:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test14(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test14:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test15(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_test15:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test15:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movaps %xmm0, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test15:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test15:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x i32> @combine_test16(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: combine_test16:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: combine_test17:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test17:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test17:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test17:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test17:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test18:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test18:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test19(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test19:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test19:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 6, i32 7, i32 5, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test20(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: combine_test20:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movaps %xmm0, %xmm2
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test20:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movaps %xmm0, %xmm2
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,0],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[2,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test20:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test20:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test20:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) {
+; SSE-LABEL: combine_test21:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    punpcklqdq  {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    punpckhqdq  {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE-NEXT:    movdqa %xmm2,
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_test21:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklqdq  {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vpunpckhqdq  {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    movdqa %xmm2,
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test21:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} xmm2 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vpunpckhqdq  {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT:    movdqa %xmm2,
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %1 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %2 = shufflevector <8 x i32> %a, <8 x i32> %a, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+  store <4 x i32> %1, <4 x i32>* %ptr, align 16
+  ret <4 x i32> %2
+}
+
+define <8 x float> @combine_test22(<2 x float>* %a, <2 x float>* %b) {
+; SSE-LABEL: combine_test22:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq    (%rdi), %xmm0
+; SSE-NEXT:    movhpd  (%rsi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_test22:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq    (%rdi), %xmm0
+; AVX1-NEXT:    vmovhpd  (%rsi), %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; Current AVX2 lowering of this is still awful, not adding a test case.
+  %1 = load <2 x float>* %a, align 8
+  %2 = load <2 x float>* %b, align 8
+  %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %3
+}
+
+; Check some negative cases.
+; FIXME: Do any of these really make sense? Are they redundant with the above tests?
+
+define <4 x float> @combine_test1b(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test1b:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test1b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[0,1,2,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 0>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test2b(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_test2b:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test2b:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; SSSE3-NEXT:    movapd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test2b:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test2b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 0, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test3b(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test3b:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test3b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm2 = xmm1[2,0],xmm0[3,0]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm2[0,2]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[3,3]
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_test4b(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_test4b:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test4b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 5, i32 5, i32 2, i32 7>
+  ret <4 x float> %2
+}
+
+
+; Verify that we correctly fold shuffles even when we use illegal vector types.
+
+define <4 x i8> @combine_test1c(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: combine_test1c:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd (%rdi), %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movd (%rsi), %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movss %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test1c:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd (%rdi), %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movd (%rsi), %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movss %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test1c:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd (%rdi), %xmm1
+; SSE41-NEXT:    pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test1c:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test1c:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX2-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX2-NEXT:    retq
+  %A = load <4 x i8>* %a
+  %B = load <4 x i8>* %b
+  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x i8> %2
+}
+
+define <4 x i8> @combine_test2c(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: combine_test2c:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd (%rdi), %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movd (%rsi), %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test2c:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd (%rdi), %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movd (%rsi), %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test2c:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd (%rdi), %xmm0
+; SSE41-NEXT:    pmovzxbd (%rsi), %xmm1
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test2c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %A = load <4 x i8>* %a
+  %B = load <4 x i8>* %b
+  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
+  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
+  ret <4 x i8> %2
+}
+
+define <4 x i8> @combine_test3c(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: combine_test3c:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd (%rdi), %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movd (%rsi), %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test3c:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd (%rdi), %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movd (%rsi), %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test3c:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd (%rdi), %xmm1
+; SSE41-NEXT:    pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT:    punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_test3c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %A = load <4 x i8>* %a
+  %B = load <4 x i8>* %b
+  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i8> %2
+}
+
+define <4 x i8> @combine_test4c(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: combine_test4c:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd (%rdi), %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movd (%rsi), %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_test4c:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd (%rdi), %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movd (%rsi), %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,0],xmm0[2,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_test4c:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmovzxbd (%rdi), %xmm1
+; SSE41-NEXT:    pmovzxbd (%rsi), %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: combine_test4c:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_test4c:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxbd (%rdi), %xmm0
+; AVX2-NEXT:    vpmovzxbd (%rsi), %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
+; AVX2-NEXT:    retq
+  %A = load <4 x i8>* %a
+  %B = load <4 x i8>* %b
+  %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i8> %2
+}
+
+
+; The following test cases are generated from this C++ code
+;
+;__m128 blend_01(__m128 a, __m128 b)
+;{
+;  __m128 s = a;
+;  s = _mm_blend_ps( s, b, 1<<0 );
+;  s = _mm_blend_ps( s, b, 1<<1 );
+;  return s;
+;}
+;
+;__m128 blend_02(__m128 a, __m128 b)
+;{
+;  __m128 s = a;
+;  s = _mm_blend_ps( s, b, 1<<0 );
+;  s = _mm_blend_ps( s, b, 1<<2 );
+;  return s;
+;}
+;
+;__m128 blend_123(__m128 a, __m128 b)
+;{
+;  __m128 s = a;
+;  s = _mm_blend_ps( s, b, 1<<1 );
+;  s = _mm_blend_ps( s, b, 1<<2 );
+;  s = _mm_blend_ps( s, b, 1<<3 );
+;  return s;
+;}
+
+; Ideally, we should collapse the following shuffles into a single one.
+
+define <4 x float> @combine_blend_01(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_blend_01:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_blend_01:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_blend_01:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_blend_01:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle6
+}
+
+define <4 x float> @combine_blend_02(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_blend_02:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm1, %xmm0
+; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_blend_02:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm1, %xmm0
+; SSSE3-NEXT:    shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[3,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_blend_02:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_blend_02:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2],xmm0[3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 1, i32 undef, i32 3>
+  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+  ret <4 x float> %shuffle6
+}
+
+define <4 x float> @combine_blend_123(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_blend_123:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_blend_123:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_blend_123:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_blend_123:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
+  %shuffle6 = shufflevector <4 x float> %shuffle, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
+  %shuffle12 = shufflevector <4 x float> %shuffle6, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %shuffle12
+}
+
+define <4 x i32> @combine_test_movhl_1(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test_movhl_1:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test_movhl_1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 7, i32 5, i32 3>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test_movhl_2(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test_movhl_2:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test_movhl_2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 0, i32 3, i32 6>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 3, i32 7, i32 0, i32 2>
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @combine_test_movhl_3(<4 x i32> %a, <4 x i32> %b) {
+; SSE-LABEL: combine_test_movhl_3:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpckhqdq {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_test_movhl_3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 6, i32 3, i32 2>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 6, i32 0, i32 3, i32 2>
+  ret <4 x i32> %2
+}
+
+
+; Verify that we fold shuffles according to rule:
+;  (shuffle(shuffle A, Undef, M0), B, M1) -> (shuffle A, B, M2)
+
+define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_undef_input_test1:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test1:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test1:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test4(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test5(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_undef_input_test5:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test5:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test5:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+
+; Verify that we fold shuffles according to rule:
+;  (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
+
+define <4 x float> @combine_undef_input_test6(<4 x float> %a) {
+; ALL-LABEL: combine_undef_input_test6:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 1, i32 2>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test7(<4 x float> %a) {
+; SSE2-LABEL: combine_undef_input_test7:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test7:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test7:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test8(<4 x float> %a) {
+; SSE2-LABEL: combine_undef_input_test8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test8:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test9(<4 x float> %a) {
+; SSE-LABEL: combine_undef_input_test9:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test9:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test10(<4 x float> %a) {
+; ALL-LABEL: combine_undef_input_test10:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
+  %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 0, i32 2, i32 6, i32 7>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_undef_input_test11:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test11:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test11:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 6>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test12:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test13:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test13:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test14(<4 x float> %a, <4 x float> %b) {
+; SSE-LABEL: combine_undef_input_test14:
+; SSE:       # BB#0:
+; SSE-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test15(<4 x float> %a, <4 x float> %b) {
+; SSE2-LABEL: combine_undef_input_test15:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsd %xmm0, %xmm1
+; SSE2-NEXT:    movaps %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test15:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsd %xmm0, %xmm1
+; SSSE3-NEXT:    movaps %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test15:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test15:
+; AVX:       # BB#0:
+; AVX-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
+  %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
+  ret <4 x float> %2
+}
+
+
+; Verify that shuffles are canonicalized according to rules:
+;  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
+;
+; This allows to trigger the following combine rule:
+;  (shuffle(shuffle A, Undef, M0), A, M1) -> (shuffle A, Undef, M2)
+;
+; As a result, all the shuffle pairs in each function below should be
+; combined into a single legal shuffle operation.
+
+define <4 x float> @combine_undef_input_test16(<4 x float> %a) {
+; ALL-LABEL: combine_undef_input_test16:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 3, i32 1>
+  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 0, i32 1, i32 5, i32 3>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test17(<4 x float> %a) {
+; SSE2-LABEL: combine_undef_input_test17:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test17:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test17:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test17:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test18(<4 x float> %a) {
+; SSE2-LABEL: combine_undef_input_test18:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: combine_undef_input_test18:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: combine_undef_input_test18:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test18:
+; AVX:       # BB#0:
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7>
+  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 0, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test19(<4 x float> %a) {
+; SSE-LABEL: combine_undef_input_test19:
+; SSE:       # BB#0:
+; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: combine_undef_input_test19:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
+  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x float> @combine_undef_input_test20(<4 x float> %a) {
+; ALL-LABEL: combine_undef_input_test20:
+; ALL:       # BB#0:
+; ALL-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 4, i32 1, i32 3>
+  %2 = shufflevector <4 x float> %a, <4 x float> %1, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
+  ret <4 x float> %2
+}
+
+; These tests are designed to test the ability to combine away unnecessary
+; operations feeding into a shuffle. The AVX cases are the important ones as
+; they leverage operations which cannot be done naturally on the entire vector
+; and thus are decomposed into multiple smaller operations.
+
+define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
+; SSE-LABEL: combine_unneeded_subvector1:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,2,1,0]
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_unneeded_subvector1:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_unneeded_subvector1:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %b = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
+  ret <8 x i32> %c
+}
+
+define <8 x i32> @combine_unneeded_subvector2(<8 x i32> %a, <8 x i32> %b) {
+; SSE-LABEL: combine_unneeded_subvector2:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[3,2,1,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: combine_unneeded_subvector2:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: combine_unneeded_subvector2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
+; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
+; AVX2-NEXT:    retq
+  %c = add <8 x i32> %a, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %d = shufflevector <8 x i32> %b, <8 x i32> %c, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
+  ret <8 x i32> %d
+}
+
+define <4 x float> @combine_insertps1(<4 x float> %a, <4 x float> %b) {
+; SSE41-LABEL: combine_insertps1:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_insertps1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[2],xmm0[1,2,3]
+; AVX-NEXT:    retq
+
+  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 6, i32 2, i32 4>
+  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
+  ret <4 x float> %d
+}
+
+define <4 x float> @combine_insertps2(<4 x float> %a, <4 x float> %b) {
+; SSE41-LABEL: combine_insertps2:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_insertps2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[2],xmm0[2,3]
+; AVX-NEXT:    retq
+
+  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 1, i32 6, i32 7>
+  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32> <i32 4, i32 6, i32 2, i32 3>
+  ret <4 x float> %d
+}
+
+define <4 x float> @combine_insertps3(<4 x float> %a, <4 x float> %b) {
+; SSE41-LABEL: combine_insertps3:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_insertps3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT:    retq
+
+  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5>
+  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 5, i32 3>
+  ret <4 x float> %d
+}
+
+define <4 x float> @combine_insertps4(<4 x float> %a, <4 x float> %b) {
+; SSE41-LABEL: combine_insertps4:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: combine_insertps4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT:    retq
+
+  %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32><i32 0, i32 4, i32 2, i32 5>
+  %d = shufflevector <4 x float> %a, <4 x float> %c, <4 x i32><i32 4, i32 1, i32 6, i32 5>
+  ret <4 x float> %d
+}
diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll
new file mode 100644
index 000000000000..ef60272b6c35
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -0,0 +1,235 @@
+; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=SSE1
+
+target triple = "x86_64-unknown-unknown"
+
+define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_0001:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_0020:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_0300:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_1000:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_2200:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_3330:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_3210:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_0011:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_2233:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_0022:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; SSE1-LABEL: shuffle_v4f32_1133:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_4zzz:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    movss %xmm0, %xmm1
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_z4zz:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_zz4z:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_zuu4:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_zzz7:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
+; SSE1-LABEL: shuffle_v4f32_z6zz:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
+; SSE1-NEXT:    retq
+  %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
+; SSE1-LABEL: insert_reg_and_zero_v4f32:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    xorps %xmm1, %xmm1
+; SSE1-NEXT:    movss %xmm0, %xmm1
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %v = insertelement <4 x float> undef, float %a, i32 0
+  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
+; SSE1-LABEL: insert_mem_and_zero_v4f32:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    movss (%rdi), %xmm0
+; SSE1-NEXT:    retq
+  %a = load float* %ptr
+  %v = insertelement <4 x float> undef, float %a, i32 0
+  %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
+; SSE1-LABEL: insert_mem_lo_v4f32:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    movq (%rdi), %rax
+; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; SSE1-NEXT:    shrq $32, %rax
+; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; SSE1-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1
+; SSE1-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2
+; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE1-NEXT:    xorps %xmm2, %xmm2
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,1]
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
+; SSE1-NEXT:    movaps %xmm1, %xmm0
+; SSE1-NEXT:    retq
+  %a = load <2 x float>* %ptr
+  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
+; SSE1-LABEL: insert_mem_hi_v4f32:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    movq (%rdi), %rax
+; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; SSE1-NEXT:    shrq $32, %rax
+; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
+; SSE1-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm1
+; SSE1-NEXT:    movss -{{[0-9]+}}(%rsp), %xmm2
+; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE1-NEXT:    xorps %xmm2, %xmm2
+; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,1]
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; SSE1-NEXT:    retq
+  %a = load <2 x float>* %ptr
+  %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+  ret <4 x float> %shuffle
+}
+
+define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
+; SSE1-LABEL: shuffle_mem_v4f32_3210:
+; SSE1:       # BB#0:
+; SSE1-NEXT:    movaps (%rdi), %xmm0
+; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; SSE1-NEXT:    retq
+  %a = load <4 x float>* %ptr
+  %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %shuffle
+}
diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll
new file mode 100644
index 000000000000..8a5b7488f664
--- /dev/null
+++ b/test/CodeGen/X86/vector-trunc.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+
+define i64 @trunc2i64(<2 x i64> %inval) {
+; SSE-LABEL:  trunc2i64:
+; SSE:        # BB#0: # %entry
+; SSE-NEXT:   pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE-NEXT:   movd %xmm0, %rax
+; SSE-NEXT:   retq
+
+; AVX-LABEL:  trunc2i64:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT:   vmovq %xmm0, %rax
+; AVX-NEXT:   retq
+
+entry:
+  %0 = trunc <2 x i64> %inval to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to i64
+  ret i64 %1
+}
+
+; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
+define i64 @trunc4i32(<4 x i32> %inval) {
+; SSE2-LABEL:  trunc4i32:
+; SSE2:        # BB#0: # %entry
+; SSE2-NEXT:   pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE2-NEXT:   pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; SSE2-NEXT:   pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:   movd %xmm0, %rax
+; SSE2-NEXT:   retq
+
+; SSSE3-LABEL: trunc4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:  pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:  movd %xmm0, %rax
+; SSSE3-NEXT:  retq
+
+; SSE41-LABEL: trunc4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:  pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:  movd %xmm0, %rax
+; SSE41-NEXT:  retq
+
+; AVX-LABEL:  trunc4i32:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX-NEXT:   vmovq %xmm0, %rax
+; AVX-NEXT:   retq
+
+entry:
+  %0 = trunc <4 x i32> %inval to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to i64
+  ret i64 %1
+}
+
+; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
+define i64 @trunc8i16(<8 x i16> %inval) {
+; SSE2-LABEL:  trunc8i16:
+; SSE2:        # BB#0: # %entry
+; SSE2-NEXT:   pand .LCP{{.*}}(%rip), %xmm0
+; SSE2-NEXT:   packuswb %xmm0, %xmm0
+; SSE2-NEXT:   movd %xmm0, %rax
+; SSE2-NEXT:   retq
+
+; SSSE3-LABEL: trunc8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:  pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSSE3-NEXT:  movd %xmm0, %rax
+; SSSE3-NEXT:  retq
+
+; SSE41-LABEL: trunc8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:  pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:  movd %xmm0, %rax
+; SSE41-NEXT:  retq
+
+; AVX-LABEL:  trunc8i16:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT:   vmovq %xmm0, %rax
+; AVX-NEXT:   retq
+
+entry:
+  %0 = trunc <8 x i16> %inval to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to i64
+  ret i64 %1
+}
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll
new file mode 100644
index 000000000000..cd09deee4550
--- /dev/null
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -0,0 +1,360 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i16_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i16_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwd %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_8i16_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpmovzxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_8i16_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxwd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_4i32_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; SSE2-NEXT:    pand %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_4i32_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; SSSE3-NEXT:    pand %xmm3, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSSE3-NEXT:    pand %xmm3, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_4i32_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxdq %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
+; SSE41-NEXT:    pand %xmm3, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_4i32_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpmovzxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_4i32_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+; SSE2-LABEL: zext_8i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwd %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_8i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxwd %xmm0, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_8i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxwd %xmm0, %ymm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
+
+; PR17654
+define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
+; SSE2-LABEL: zext_16i8_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSSE3-NEXT:    pand %xmm1, %xmm2
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbw %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT:    pand %xmm1, %xmm2
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_16i8_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vpmovzxbw %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_16i8_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbw %xmm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %t = zext <16 x i8> %z to <16 x i16>
+  ret <16 x i16> %t
+}
+
+define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_16i8_to_16i16:
+; SSE2:        # BB#0: # %entry
+; SSE2-NEXT:   movdqa        (%rdi), %xmm1
+; SSE2-NEXT:   movdqa        %xmm1, %xmm0
+; SSE2-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE2-NEXT:   pand  %xmm2, %xmm0
+; SSE2-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE2-NEXT:   pand  %xmm2, %xmm1
+; SSE2-NEXT:   retq
+
+; SSSE3-LABEL: load_zext_16i8_to_16i16:
+; SSSE3:        # BB#0: # %entry
+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
+; SSSE3-NEXT:   movdqa        %xmm1, %xmm0
+; SSSE3-NEXT:   punpcklbw     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSSE3-NEXT:   pand  %xmm2, %xmm0
+; SSSE3-NEXT:   punpckhbw     %xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSSE3-NEXT:   pand  %xmm2, %xmm1
+; SSSE3-NEXT:   retq
+
+; SSE41-LABEL: load_zext_16i8_to_16i16:
+; SSE41:        # BB#0: # %entry
+; SSE41-NEXT: 	movdqa	(%rdi), %xmm1
+; SSE41-NEXT: 	pmovzxbw	%xmm1, %xmm0
+; SSE41-NEXT: 	movdqa	{{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
+; SSE41-NEXT: 	pand	%xmm2, %xmm0
+; SSE41-NEXT: 	punpckhbw	%xmm1, %xmm1    # xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT: 	pand	%xmm2, %xmm1
+; SSE41-NEXT: 	retq
+
+; AVX1-LABEL: load_zext_16i8_to_16i16:
+; AVX1:        # BB#0: # %entry
+; AVX1-NEXT: 	vmovdqa	(%rdi), %xmm0
+; AVX1-NEXT: 	vpxor	%xmm1, %xmm1, %xmm1
+; AVX1-NEXT: 	vpunpckhbw	%xmm1, %xmm0, %xmm1 # xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT: 	vpmovzxbw	%xmm0, %xmm0
+; AVX1-NEXT: 	vinsertf128	$1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: 	retq
+
+; AVX2-LABEL: load_zext_16i8_to_16i16:
+; AVX2:        # BB#0: # %entry
+; AVX2-NEXT: 	vpmovzxbw	(%rdi), %ymm0
+; AVX2-NEXT: 	retq
+entry:
+ %X = load <16 x i8>* %ptr
+ %Y = zext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_8i16_to_8i32:
+; SSE2:          # BB#0: # %entry
+; SSE2-NEXT:   movdqa        (%rdi), %xmm1
+; SSE2-NEXT:   movdqa        %xmm1, %xmm0
+; SSE2-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSE2-NEXT:   pand  %xmm2, %xmm0
+; SSE2-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:   pand  %xmm2, %xmm1
+; SSE2-NEXT:   retq
+
+; SSSE3-LABEL: load_zext_8i16_to_8i32:
+; SSSE3:        # BB#0: # %entry
+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
+; SSSE3-NEXT:   movdqa        %xmm1, %xmm0
+; SSSE3-NEXT:   punpcklwd     %xmm0, %xmm0    # xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSSE3-NEXT:   pand  %xmm2, %xmm0
+; SSSE3-NEXT:   punpckhwd     %xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:   pand  %xmm2, %xmm1
+; SSSE3-NEXT:   retq
+
+; SSE41-LABEL: load_zext_8i16_to_8i32:
+; SSE41:        # BB#0: # %entry
+; SSE41-NEXT: 	movdqa	(%rdi), %xmm1
+; SSE41-NEXT: 	pmovzxwd	%xmm1, %xmm0
+; SSE41-NEXT: 	movdqa	{{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSE41-NEXT: 	pand	%xmm2, %xmm0
+; SSE41-NEXT: 	punpckhwd	%xmm1, %xmm1    # xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; SSE41-NEXT: 	pand	%xmm2, %xmm1
+; SSE41-NEXT: 	retq
+
+; AVX1-LABEL: load_zext_8i16_to_8i32:
+; AVX1:        # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa       (%rdi), %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd    %xmm1, %xmm0, %xmm1 # xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpmovzxwd     %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128   $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+
+; AVX2-LABEL: load_zext_8i16_to_8i32:
+; AVX2:        # BB#0: # %entry
+; AVX2-NEXT: 	vpmovzxwd	(%rdi), %ymm0
+; AVX2-NEXT: 	retq
+entry:
+ %X = load <8 x i16>* %ptr
+ %Y = zext <8 x i16> %X to <8 x i32>
+ ret <8 x i32>%Y
+}
+
+define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
+; SSE2-LABEL: load_zext_4i32_to_4i64:
+; SSE2:          # BB#0: # %entry
+; SSE2-NEXT:   movdqa        (%rdi), %xmm1
+; SSE2-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSE2-NEXT:   pand  %xmm2, %xmm0
+; SSE2-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]
+; SSE2-NEXT:   pand  %xmm2, %xmm1
+; SSE2-NEXT:   retq
+
+; SSSE3-LABEL: load_zext_4i32_to_4i64:
+; SSSE3:        # BB#0: # %entry
+; SSSE3-NEXT:   movdqa        (%rdi), %xmm1
+; SSSE3-NEXT:   pshufd        $-44, %xmm1, %xmm0      # xmm0 = xmm1[0,1,1,3]
+; SSSE3-NEXT:   movdqa        {{.*#+}} xmm2 = [4294967295,4294967295]
+; SSSE3-NEXT:   pand  %xmm2, %xmm0
+; SSSE3-NEXT:   pshufd        $-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]
+; SSSE3-NEXT:   pand  %xmm2, %xmm1
+; SSSE3-NEXT:   retq
+
+; SSE41-LABEL: load_zext_4i32_to_4i64:
+; SSE41:        # BB#0: # %entry
+; SSE41-NEXT: 	movdqa	(%rdi), %xmm1
+; SSE41-NEXT: 	pmovzxdq	%xmm1, %xmm0
+; SSE41-NEXT: 	movdqa	{{.*#+}} xmm2 = [4294967295,4294967295]
+; SSE41-NEXT: 	pand	%xmm2, %xmm0
+; SSE41-NEXT: 	pshufd	$-6, %xmm1, %xmm1       # xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT: 	pand	%xmm2, %xmm1
+; SSE41-NEXT: 	retq
+
+; AVX1-LABEL: load_zext_4i32_to_4i64:
+; AVX1:        # BB#0: # %entry
+; AVX1-NEXT: 	vmovdqa	(%rdi), %xmm0
+; AVX1-NEXT: 	vpxor	%xmm1, %xmm1, %xmm1
+; AVX1-NEXT: 	vpunpckhdq	%xmm1, %xmm0, %xmm1 # xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT: 	vpmovzxdq	%xmm0, %xmm0
+; AVX1-NEXT: 	vinsertf128	$1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: 	retq
+
+; AVX2-LABEL: load_zext_4i32_to_4i64:
+; AVX2:        # BB#0: # %entry
+; AVX2-NEXT: 	vpmovzxdq	(%rdi), %ymm0
+; AVX2-NEXT: 	retq
+entry:
+ %X = load <4 x i32>* %ptr
+ %Y = zext <4 x i32> %X to <4 x i64>
+ ret <4 x i64>%Y
+}
diff --git a/test/CodeGen/X86/vector-zmov.ll b/test/CodeGen/X86/vector-zmov.ll
new file mode 100644
index 000000000000..4de2543a1d6b
--- /dev/null
+++ b/test/CodeGen/X86/vector-zmov.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+define <4 x i32> @load_zmov_4i32_to_0zzz(<4 x i32> *%ptr) {
+; SSE-LABEL:  load_zmov_4i32_to_0zzz:
+; SSE:        # BB#0: # %entry
+; SSE-NEXT:   movd (%rdi), %xmm0
+; SSE-NEXT:   retq
+
+; AVX-LABEL:  load_zmov_4i32_to_0zzz:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vmovd (%rdi), %xmm0
+; AVX-NEXT:   retq
+entry:
+  %X = load <4 x i32>* %ptr
+  %Y = shufflevector <4 x i32> %X, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  ret <4 x i32>%Y
+}
+
+define <2 x i64> @load_zmov_2i64_to_0z(<2 x i64> *%ptr) {
+; SSE-LABEL:  load_zmov_2i64_to_0z:
+; SSE:        # BB#0: # %entry
+; SSE-NEXT:   movq (%rdi), %xmm0
+; SSE-NEXT:   retq
+
+; AVX-LABEL:  load_zmov_2i64_to_0z:
+; AVX:        # BB#0: # %entry
+; AVX-NEXT:   vmovq (%rdi), %xmm0
+; AVX-NEXT:   retq
+entry:
+  %X = load <2 x i64>* %ptr
+  %Y = shufflevector <2 x i64> %X, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  ret <2 x i64>%Y
+}
diff --git a/test/CodeGen/X86/vectorcall.ll b/test/CodeGen/X86/vectorcall.ll
new file mode 100644
index 000000000000..1e52654e99fe
--- /dev/null
+++ b/test/CodeGen/X86/vectorcall.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=i686-pc-win32 -mattr=+sse2 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+
+; Test integer arguments.
+
+define x86_vectorcallcc i32 @test_int_1() {
+  ret i32 0
+}
+
+; CHECK-LABEL: {{^}}test_int_1@@0:
+; CHECK: xorl %eax, %eax
+
+define x86_vectorcallcc i32 @test_int_2(i32 inreg %a) {
+  ret i32 %a
+}
+
+; X86-LABEL: {{^}}test_int_2@@4:
+; X64-LABEL: {{^}}test_int_2@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_3(i64 inreg %a) {
+  %at = trunc i64 %a to i32
+  ret i32 %at
+}
+
+; X86-LABEL: {{^}}test_int_3@@8:
+; X64-LABEL: {{^}}test_int_3@@8:
+; CHECK: movl %ecx, %eax
+
+define x86_vectorcallcc i32 @test_int_4(i32 inreg %a, i32 inreg %b) {
+  %s = add i32 %a, %b
+  ret i32 %s
+}
+
+; X86-LABEL: {{^}}test_int_4@@8:
+; X86: leal (%ecx,%edx), %eax
+
+; X64-LABEL: {{^}}test_int_4@@16:
+; X64: leal (%rcx,%rdx), %eax
+
+define x86_vectorcallcc i32 @"\01test_int_5"(i32, i32) {
+  ret i32 0
+}
+; CHECK-LABEL: {{^}}test_int_5:
+
+define x86_vectorcallcc double @test_fp_1(double %a, double %b) {
+  ret double %b
+}
+; CHECK-LABEL: {{^}}test_fp_1@@16:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc double @test_fp_2(
+    double, double, double, double, double, double, double %r) {
+  ret double %r
+}
+; CHECK-LABEL: {{^}}test_fp_2@@56:
+; CHECK: movsd {{[0-9]+\(%[re]sp\)}}, %xmm0
+
+define x86_vectorcallcc {double, double, double, double} @test_fp_3() {
+  ret {double, double, double, double}
+        { double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_3@@0:
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+; FIXME: Returning via x87 isn't compatible, but its hard to structure the
+; tablegen any other way.
+define x86_vectorcallcc {double, double, double, double, double} @test_fp_4() {
+  ret {double, double, double, double, double}
+        { double 0.0, double 0.0, double 0.0, double 0.0, double 0.0 }
+}
+; CHECK-LABEL: {{^}}test_fp_4@@0:
+; CHECK: fldz
+; CHECK: xorps %xmm0
+; CHECK: xorps %xmm1
+; CHECK: xorps %xmm2
+; CHECK: xorps %xmm3
+
+define x86_vectorcallcc <16 x i8> @test_vec_1(<16 x i8> %a, <16 x i8> %b) {
+  ret <16 x i8> %b
+}
+; CHECK-LABEL: {{^}}test_vec_1@@32:
+; CHECK: movaps %xmm1, %xmm0
+
+define x86_vectorcallcc <16 x i8> @test_vec_2(
+    double, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> %r) {
+  ret <16 x i8> %r
+}
+; CHECK-LABEL: {{^}}test_vec_2@@104:
+; CHECK: movaps (%{{[re]}}cx), %xmm0
diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll
index 50da32c67a3b..0991bdacd9c5 100644
--- a/test/CodeGen/X86/vselect-2.ll
+++ b/test/CodeGen/X86/vselect-2.ll
@@ -1,33 +1,60 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: test1
+; SSE2:        # BB#0:
+; SSE2-NEXT:     movsd %xmm0, %xmm1
+; SSE2-NEXT:     movaps %xmm1, %xmm0
+; SSE2-NEXT:     retq
+;
+; SSE41-LABEL: test1
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    retq
   %select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
   ret <4 x i32> %select
 }
-; CHECK-LABEL: test1
-; CHECK: movsd
-; CHECK: ret
 
 define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) {
+; SSE2-LABEL: test2
+; SSE2:        # BB#0:
+; SSE2-NEXT:     movsd %xmm1, %xmm0
+; SSE2-NEXT:     retq
+;
+; SSE41-LABEL: test2
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    retq
   %select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x i32> %A, <4 x i32> %B
   ret <4 x i32> %select
 }
-; CHECK-LABEL: test2
-; CHECK: movsd
-; CHECK-NEXT: ret
 
 define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
+; SSE2-LABEL: test3
+; SSE2:        # BB#0:
+; SSE2-NEXT:     movsd %xmm0, %xmm1
+; SSE2-NEXT:     movaps %xmm1, %xmm0
+; SSE2-NEXT:     retq
+;
+; SSE41-LABEL: test3
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; SSE41-NEXT:    retq
   %select = select <4 x i1><i1 true, i1 true, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
   ret <4 x float> %select
 }
-; CHECK-LABEL: test3
-; CHECK: movsd
-; CHECK: ret
 
 define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
+; SSE2-LABEL: test4
+; SSE2:        # BB#0:
+; SSE2-NEXT:     movsd %xmm1, %xmm0
+; SSE2-NEXT:     retq
+;
+; SSE41-LABEL: test4
+; SSE41:       # BB#0:
+; SSE41-NEXT:    blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; SSE41-NEXT:    retq
   %select = select <4 x i1><i1 false, i1 false, i1 true, i1 true>, <4 x float> %A, <4 x float> %B
   ret <4 x float> %select
 }
-; CHECK-LABEL: test4
-; CHECK: movsd
-; CHECK-NEXT: ret
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll
new file mode 100644
index 000000000000..0c0f4bbf992a
--- /dev/null
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -o - -mattr=+avx | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; For this test we used to optimize the <i1 true, i1 false, i1 false, i1 true>
+; mask into <i32 2147483648, i32 0, i32 0, i32 2147483648> because we thought
+; we would lower that into a blend where only the high bit is relevant.
+; However, since the whole mask is constant, this is simplified incorrectly
+; by the generic code, because it was expecting -1 in place of 2147483648.
+; 
+; The problem does not occur without AVX, because vselect of v4i32 is not legal
+; nor custom.
+;
+; <rdar://problem/18675020>
+
+; CHECK-LABEL: test:
+; CHECK: vmovdqa {{.*#+}} xmm0 = [65535,0,0,65535]
+; CHECK: vmovdqa {{.*#+}} xmm2 = [65533,124,125,14807]
+; CHECK: ret
+define void @test(<4 x i16>* %a, <4 x i16>* %b) {
+body:
+  %predphi = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> <i16 -3, i16 545, i16 4385, i16 14807>, <4 x i16> <i16 123, i16 124, i16 125, i16 127>
+  %predphi42 = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer
+  store <4 x i16> %predphi, <4 x i16>* %a, align 8
+  store <4 x i16> %predphi42, <4 x i16>* %b, align 8
+  ret void
+}
+
+; Improve code coverage.
+;
+; When shrinking the condition used into the select to match a blend, this
+; test case exercises the path where the modified node is not the root
+; of the condition.
+;
+; CHECK-LABEL: test2:
+; CHECK:	vpslld	$31, %xmm0, %xmm0
+; CHECK-NEXT:	vpmovsxdq	%xmm0, %xmm1
+; CHECK-NEXT:	vpshufd	$78, %xmm0, %xmm0       ## xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:	vpmovsxdq	%xmm0, %xmm0
+; CHECK-NEXT:	vinsertf128	$1, %xmm0, %ymm1, [[MASK:%ymm[0-9]+]]
+; CHECK: vblendvpd	[[MASK]]
+; CHECK: retq
+define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
+bb:
+  %arrayidx1928 = getelementptr inbounds double** %call1559, i64 %indvars.iv4198
+  %tmp1888 = load double** %arrayidx1928, align 8
+  %predphi.v.v = select <4 x i1> %tmp1895, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+  %tmp1900 = bitcast double* %tmp1888 to <4 x double>*
+  store <4 x double> %predphi.v.v, <4 x double>* %tmp1900, align 8
+  ret void
+}
+
+; For this test, we used to optimized the conditional mask for the blend, i.e.,
+; we shrunk some of its bits.
+; However, this same mask was used in another select (%predphi31) that turned out
+; to be optimized into a and. In that case, the conditional mask was wrong.
+;
+; Make sure that the and is fed by the original mask.
+; 
+; <rdar://problem/18819506>
+
+; Note: For now, hard code ORIG_MASK and SHRUNK_MASK registers, because we
+; cannot express that ORIG_MASK must not be equal to ORIG_MASK. Otherwise,
+; even a faulty pattern would pass!
+;  
+; CHECK-LABEL: test3:
+; Compute the original mask.
+;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[ORIG_MASK:%xmm0]]
+; Shrink the bit of the mask.
+; CHECK-NEXT: vpslld	$31, [[ORIG_MASK]], [[SHRUNK_MASK:%xmm3]]
+; Use the shrunk mask in the blend.
+; CHECK-NEXT:	vblendvps	[[SHRUNK_MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Use the original mask in the and.
+; CHECK-NEXT: vpand LCPI2_2(%rip), [[ORIG_MASK]], {{%xmm[0-9]+}} 
+; CHECK: retq
+define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,  <4 x i16> %tmp3, <4 x i16> %tmp12) {
+  %tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
+  %tmp7 = icmp eq <4 x i32> %tmp6, zeroinitializer
+  %predphi = select <4 x i1> %tmp7, <4 x i16> %tmp3, <4 x i16> %tmp12
+  %predphi31 = select <4 x i1> %tmp7, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer
+
+  store <4 x i16> %predphi31, <4 x i16>* %tmp16, align 8
+  store <4 x i16> %predphi, <4 x i16>* %tmp17, align 8
+ ret void
+}
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
index 25189f23e43a..3efe5684c15b 100644
--- a/test/CodeGen/X86/vselect-minmax.ll
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
 ; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
 ; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -march=x86-64 -mcpu=knl < %s | FileCheck %s  -check-prefix=AVX2 -check-prefix=AVX512F
+; RUN: llc -march=x86-64 -mcpu=skx < %s | FileCheck %s  -check-prefix=AVX512BW -check-prefix=AVX512VL -check-prefix=AVX512F
 
 define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
 vector.ph:
@@ -33,6 +35,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test1:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test1:
+; AVX512VL: vpminsb
 }
 
 define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -65,6 +70,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test2:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test2:
+; AVX512VL: vpminsb
 }
 
 define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -97,6 +105,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test3:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test3:
+; AVX512VL: vpmaxsb
 }
 
 define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -129,6 +140,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test4:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test4:
+; AVX512VL: vpmaxsb
 }
 
 define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -161,6 +175,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test5:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test5:
+; AVX512VL: vpminub 
 }
 
 define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -193,6 +210,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test6:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test6:
+; AVX512VL: vpminub
 }
 
 define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -225,6 +245,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test7:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test7:
+; AVX512VL: vpmaxub
 }
 
 define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -257,6 +280,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test8:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test8:
+; AVX512VL: vpmaxub
 }
 
 define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -289,6 +315,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test9:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test9:
+; AVX512VL: vpminsw 
 }
 
 define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -321,6 +350,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test10:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test10:
+; AVX512VL: vpminsw
 }
 
 define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -353,6 +385,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test11:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test11:
+; AVX512VL: vpmaxsw
 }
 
 define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -385,6 +420,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test12:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test12:
+; AVX512VL: vpmaxsw
 }
 
 define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -417,6 +455,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test13:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test13:
+; AVX512VL: vpminuw
 }
 
 define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -449,6 +490,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test14:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test14:
+; AVX512VL: vpminuw 
 }
 
 define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -481,6 +525,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test15:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test15:
+; AVX512VL: vpmaxuw
 }
 
 define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -513,6 +560,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test16:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test16:
+; AVX512VL: vpmaxuw
 }
 
 define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -545,6 +595,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test17:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test17:
+; AVX512VL: vpminsd
 }
 
 define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -577,6 +630,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test18:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test18:
+; AVX512VL: vpminsd
 }
 
 define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -609,6 +665,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test19:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test19:
+; AVX512VL: vpmaxsd
 }
 
 define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -641,6 +700,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test20:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test20:
+; AVX512VL: vpmaxsd
 }
 
 define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -673,6 +735,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test21:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test21:
+; AVX512VL: vpminud
 }
 
 define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -705,6 +770,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test22:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test22:
+; AVX512VL: vpminud
 }
 
 define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -737,6 +805,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test23:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test23:
+; AVX512VL: vpmaxud
 }
 
 define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -769,6 +840,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test24:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test24:
+; AVX512VL: vpmaxud
 }
 
 define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -795,6 +869,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test25:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test25:
+; AVX512VL: vpminsb
 }
 
 define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -821,6 +898,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test26:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test26:
+; AVX512VL: vpminsb
 }
 
 define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -847,6 +927,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test27:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test27:
+; AVX512VL: vpmaxsb
 }
 
 define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -873,6 +956,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test28:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test28:
+; AVX512VL: vpmaxsb
 }
 
 define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -899,6 +985,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test29:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test29:
+; AVX512VL: vpminub
 }
 
 define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -925,6 +1014,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test30:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test30:
+; AVX512VL: vpminub
 }
 
 define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -951,6 +1043,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test31:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test31:
+; AVX512VL: vpmaxub
 }
 
 define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -977,6 +1072,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test32:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test32:
+; AVX512VL: vpmaxub
 }
 
 define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1003,6 +1101,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test33:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test33:
+; AVX512VL: vpminsw 
 }
 
 define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1029,6 +1130,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test34:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test34:
+; AVX512VL: vpminsw
 }
 
 define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1055,6 +1159,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test35:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test35:
+; AVX512VL: vpmaxsw
 }
 
 define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1081,6 +1188,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test36:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test36:
+; AVX512VL: vpmaxsw
 }
 
 define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1107,6 +1217,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test37:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test37:
+; AVX512VL: vpminuw
 }
 
 define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1133,6 +1246,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test38:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test38:
+; AVX512VL: vpminuw
 }
 
 define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1159,6 +1275,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test39:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test39:
+; AVX512VL: vpmaxuw
 }
 
 define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1185,6 +1304,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test40:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test40:
+; AVX512VL: vpmaxuw
 }
 
 define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1211,6 +1333,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test41:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test41:
+; AVX512VL: vpminsd
 }
 
 define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1237,6 +1362,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test42:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test42:
+; AVX512VL: vpminsd
 }
 
 define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1263,6 +1391,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test43:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test43:
+; AVX512VL: vpmaxsd
 }
 
 define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1289,6 +1420,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test44:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test44:
+; AVX512VL: vpmaxsd
 }
 
 define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1315,6 +1449,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test45:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test45:
+; AVX512VL: vpminud
 }
 
 define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1341,6 +1478,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test46:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test46:
+; AVX512VL: vpminud
 }
 
 define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1367,6 +1507,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test47:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test47:
+; AVX512VL: vpmaxud
 }
 
 define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1393,6 +1536,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test48:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test48:
+; AVX512VL: vpmaxud
 }
 
 define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1425,6 +1571,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test49:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test49:
+; AVX512VL: vpmaxsb
 }
 
 define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1457,6 +1606,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test50:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test50:
+; AVX512VL: vpmaxsb
 }
 
 define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1489,6 +1641,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test51:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test51:
+; AVX512VL: vpminsb
 }
 
 define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1521,6 +1676,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test52:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test52:
+; AVX512VL: vpminsb
 }
 
 define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1553,6 +1711,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test53:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test53:
+; AVX512VL: vpmaxub
 }
 
 define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1585,6 +1746,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test54:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test54:
+; AVX512VL: vpmaxub
 }
 
 define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1617,6 +1781,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test55:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test55:
+; AVX512VL: vpminub
 }
 
 define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1649,6 +1816,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test56:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test56:
+; AVX512VL: vpminub
 }
 
 define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1681,6 +1851,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test57:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test57:
+; AVX512VL: vpmaxsw
 }
 
 define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1713,6 +1886,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test58:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test58:
+; AVX512VL: vpmaxsw
 }
 
 define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1745,6 +1921,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test59:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test59:
+; AVX512VL: vpminsw
 }
 
 define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1777,6 +1956,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test60:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test60:
+; AVX512VL: vpminsw
 }
 
 define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1809,6 +1991,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test61:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test61:
+; AVX512VL: vpmaxuw
 }
 
 define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1841,6 +2026,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test62:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test62:
+; AVX512VL: vpmaxuw
 }
 
 define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1873,6 +2061,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test63:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test63:
+; AVX512VL: vpminuw
 }
 
 define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1905,6 +2096,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test64:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test64:
+; AVX512VL: vpminuw
 }
 
 define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1937,6 +2131,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test65:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test65:
+; AVX512VL: vpmaxsd
 }
 
 define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1969,6 +2166,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test66:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test66:
+; AVX512VL: vpmaxsd
 }
 
 define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2001,6 +2201,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test67:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test67:
+; AVX512VL: vpminsd
 }
 
 define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2033,6 +2236,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test68:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test68:
+; AVX512VL: vpminsd
 }
 
 define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2065,6 +2271,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test69:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test69:
+; AVX512VL: vpmaxud
 }
 
 define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2097,6 +2306,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test70:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test70:
+; AVX512VL: vpmaxud
 }
 
 define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2129,6 +2341,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test71:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test71:
+; AVX512VL: vpminud
 }
 
 define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2161,6 +2376,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test72:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test72:
+; AVX512VL: vpminud
 }
 
 define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2187,6 +2405,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test73:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test73:
+; AVX512VL: vpmaxsb
 }
 
 define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2213,6 +2434,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test74:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test74:
+; AVX512VL: vpmaxsb 
 }
 
 define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2239,6 +2463,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test75:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test75:
+; AVX512VL: vpminsb
 }
 
 define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2265,6 +2492,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test76:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test76:
+; AVX512VL: vpminsb
 }
 
 define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2291,6 +2521,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test77:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test77:
+; AVX512VL: vpmaxub
 }
 
 define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2317,6 +2550,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test78:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test78:
+; AVX512VL: vpmaxub
 }
 
 define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2343,6 +2579,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test79:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test79:
+; AVX512VL: vpminub
 }
 
 define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2369,6 +2608,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test80:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test80:
+; AVX512VL: vpminub
 }
 
 define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2395,6 +2637,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test81:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test81:
+; AVX512VL: vpmaxsw
 }
 
 define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2421,6 +2666,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test82:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test82:
+; AVX512VL: vpmaxsw
 }
 
 define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2447,6 +2695,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test83:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test83:
+; AVX512VL: vpminsw
 }
 
 define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2473,6 +2724,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test84:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test84:
+; AVX512VL: vpminsw
 }
 
 define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2499,6 +2753,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test85:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test85:
+; AVX512VL: vpmaxuw
 }
 
 define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2525,6 +2782,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test86:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test86:
+; AVX512VL: vpmaxuw
 }
 
 define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2551,6 +2811,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test87:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test87:
+; AVX512VL: vpminuw
 }
 
 define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2577,6 +2840,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test88:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test88:
+; AVX512VL: vpminuw
 }
 
 define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2603,6 +2869,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test89:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test89:
+; AVX512VL: vpmaxsd
 }
 
 define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2629,6 +2898,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test90:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test90:
+; AVX512VL: vpmaxsd
 }
 
 define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2655,6 +2927,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test91:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test91:
+; AVX512VL: vpminsd
 }
 
 define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2681,6 +2956,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test92:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test92:
+; AVX512VL: vpminsd
 }
 
 define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2707,6 +2985,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test93:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test93:
+; AVX512VL: vpmaxud
 }
 
 define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2733,6 +3014,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test94:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test94:
+; AVX512VL: vpmaxud
 }
 
 define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2759,6 +3043,9 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test95:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test95:
+; AVX512VL: vpminud
 }
 
 define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2785,4 +3072,2507 @@ for.end:                                          ; preds = %vector.body
 
 ; AVX2-LABEL: test96:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test96:
+; AVX512VL: vpminud
+}
+
+; ----------------------------
+
+define void @test97(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test97:
+; AVX512BW: vpminsb {{.*}}
+}
+
+define void @test98(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test98:
+; AVX512BW: vpminsb {{.*}}
+}
+
+define void @test99(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test99:
+; AVX512BW: vpmaxsb {{.*}}
+}
+
+define void @test100(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test100:
+; AVX512BW: vpmaxsb {{.*}}
+}
+
+define void @test101(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test101:
+; AVX512BW: vpminub {{.*}}
+}
+
+define void @test102(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test102:
+; AVX512BW: vpminub {{.*}}
+}
+
+define void @test103(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test103:
+; AVX512BW: vpmaxub {{.*}}
+}
+
+define void @test104(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test104:
+; AVX512BW: vpmaxub {{.*}}
+}
+
+define void @test105(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test105:
+; AVX512BW: vpminsw {{.*}}
+}
+
+define void @test106(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test106:
+; AVX512BW: vpminsw {{.*}}
+}
+
+define void @test107(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test107:
+; AVX512BW: vpmaxsw {{.*}}
+}
+
+define void @test108(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test108:
+; AVX512BW: vpmaxsw {{.*}}
+}
+
+define void @test109(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test109:
+; AVX512BW: vpminuw {{.*}}
+}
+
+define void @test110(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test110:
+; AVX512BW: vpminuw {{.*}}
+}
+
+define void @test111(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test111:
+; AVX512BW: vpmaxuw {{.*}}
+}
+
+define void @test112(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test112:
+; AVX512BW: vpmaxuw {{.*}}
+}
+
+define void @test113(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test113:
+; AVX512F: vpminsd {{.*}}
+}
+
+define void @test114(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test114:
+; AVX512F: vpminsd {{.*}}
+}
+
+define void @test115(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test115:
+; AVX512F: vpmaxsd {{.*}}
+}
+
+define void @test116(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test116:
+; AVX512F: vpmaxsd {{.*}}
+}
+
+define void @test117(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test117:
+; AVX512F: vpminud {{.*}}
+}
+
+define void @test118(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test118:
+; AVX512F: vpminud {{.*}}
+}
+
+define void @test119(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test119:
+; AVX512F: vpmaxud {{.*}}
+}
+
+define void @test120(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test120:
+; AVX512F: vpmaxud {{.*}}
+}
+
+define void @test121(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test121:
+; AVX512F: vpminsq {{.*}}
+}
+
+define void @test122(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test122:
+; AVX512F: vpminsq {{.*}}
+}
+
+define void @test123(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test123:
+; AVX512F: vpmaxsq {{.*}}
+}
+
+define void @test124(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test124:
+; AVX512F: vpmaxsq {{.*}}
+}
+
+define void @test125(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test125:
+; AVX512F: vpminuq {{.*}}
+}
+
+define void @test126(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test126:
+; AVX512F: vpminuq {{.*}}
+}
+
+define void @test127(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test127:
+; AVX512F: vpmaxuq {{.*}}
+}
+
+define void @test128(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test128:
+; AVX512F: vpmaxuq {{.*}}
+}
+
+define void @test129(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test129:
+; AVX512BW: vpmaxsb
+}
+
+define void @test130(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test130:
+; AVX512BW: vpmaxsb
+}
+
+define void @test131(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test131:
+; AVX512BW: vpminsb
+}
+
+define void @test132(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test132:
+; AVX512BW: vpminsb
+}
+
+define void @test133(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test133:
+; AVX512BW: vpmaxub
+}
+
+define void @test134(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test134:
+; AVX512BW: vpmaxub
+}
+
+define void @test135(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test135:
+; AVX512BW: vpminub
+}
+
+define void @test136(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test136:
+; AVX512BW: vpminub
+}
+
+define void @test137(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test137:
+; AVX512BW: vpmaxsw
+}
+
+define void @test138(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test138:
+; AVX512BW: vpmaxsw
+}
+
+define void @test139(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test139:
+; AVX512BW: vpminsw
+}
+
+define void @test140(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test140:
+; AVX512BW: vpminsw
+}
+
+define void @test141(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test141:
+; AVX512BW: vpmaxuw
+}
+
+define void @test142(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test142:
+; AVX512BW: vpmaxuw
+}
+
+define void @test143(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test143:
+; AVX512BW: vpminuw
+}
+
+define void @test144(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test144:
+; AVX512BW: vpminuw
+}
+
+define void @test145(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test145:
+; AVX512F: vpmaxsd
+}
+
+define void @test146(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test146:
+; AVX512F: vpmaxsd
+}
+
+define void @test147(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test147:
+; AVX512F: vpminsd
+}
+
+define void @test148(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test148:
+; AVX512F: vpminsd
+}
+
+define void @test149(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test149:
+; AVX512F: vpmaxud
+}
+
+define void @test150(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test150:
+; AVX512F: vpmaxud
+}
+
+define void @test151(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test151:
+; AVX512F: vpminud
+}
+
+define void @test152(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test152:
+; AVX512F: vpminud
+}
+
+; -----------------------
+
+define void @test153(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test153:
+; AVX512F: vpmaxsq
+}
+
+define void @test154(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test154:
+; AVX512F: vpmaxsq
+}
+
+define void @test155(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test155:
+; AVX512F: vpminsq
+}
+
+define void @test156(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test156:
+; AVX512F: vpminsq
+}
+
+define void @test157(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test157:
+; AVX512F: vpmaxuq
+}
+
+define void @test158(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test158:
+; AVX512F: vpmaxuq
+}
+
+define void @test159(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test159:
+; AVX512F: vpminuq
+}
+
+define void @test160(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test160:
+; AVX512F: vpminuq
+}
+
+define void @test161(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test161:
+; AVX512VL: vpminsq
+}
+
+define void @test162(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test162:
+; AVX512VL: vpminsq
+}
+
+define void @test163(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test163:
+; AVX512VL: vpmaxsq 
+}
+
+define void @test164(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test164:
+; AVX512VL: vpmaxsq
+}
+
+define void @test165(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test165:
+; AVX512VL: vpminuq 
+}
+
+define void @test166(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test166:
+; AVX512VL: vpminuq
+}
+
+define void @test167(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test167:
+; AVX512VL: vpmaxuq
+}
+
+define void @test168(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test168:
+; AVX512VL: vpmaxuq
+}
+
+define void @test169(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test169:
+; AVX512VL: vpmaxsq
+}
+
+define void @test170(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test170:
+; AVX512VL: vpmaxsq
+}
+
+define void @test171(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test171:
+; AVX512VL: vpminsq
+}
+
+define void @test172(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test172:
+; AVX512VL: vpminsq
+}
+
+define void @test173(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test173:
+; AVX512VL: vpmaxuq
+}
+
+define void @test174(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test174:
+; AVX512VL: vpmaxuq
+}
+
+define void @test175(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test175:
+; AVX512VL: vpminuq
+}
+
+define void @test176(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test176:
+; AVX512VL: vpminuq
+}
+
+define void @test177(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test177:
+; AVX512VL: vpminsq
+}
+
+define void @test178(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test178:
+; AVX512VL: vpminsq
+}
+
+define void @test179(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test179:
+; AVX512VL: vpmaxsq
+}
+
+define void @test180(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test180:
+; AVX512VL: vpmaxsq
+}
+
+define void @test181(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test181:
+; AVX512VL: vpminuq
+}
+
+define void @test182(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test182:
+; AVX512VL: vpminuq
+}
+
+define void @test183(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test183:
+; AVX512VL: vpmaxuq
+}
+
+define void @test184(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test184:
+; AVX512VL: vpmaxuq
+}
+
+define void @test185(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test185:
+; AVX512VL: vpmaxsq
+}
+
+define void @test186(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test186:
+; AVX512VL: vpmaxsq
+}
+
+define void @test187(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test187:
+; AVX512VL: vpminsq
+}
+
+define void @test188(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test188:
+; AVX512VL: vpminsq
+}
+
+define void @test189(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test189:
+; AVX512VL: vpmaxuq
+}
+
+define void @test190(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test190:
+; AVX512VL: vpmaxuq
+}
+
+define void @test191(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test191:
+; AVX512VL: vpminuq
+}
+
+define void @test192(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test192:
+; AVX512VL: vpminuq
 }
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index 42cf06a4a049..3bd1dc4cb972 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -3,270 +3,253 @@
 ; Verify that we don't emit packed vector shifts instructions if the
 ; condition used by the vector select is a vector of constants.
 
-
 define <4 x float> @test1(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test1
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <4 x float> @test2(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test2
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <4 x float> @test3(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test3
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <4 x float> @test4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test4
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: movaps  %xmm1, %xmm0
-; CHECK: ret
-
 
 define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test5
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <8 x i16> @test6(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [0,65535,0,65535,0,65535,0,65535]
+; CHECK-NEXT:    andps %xmm0, %xmm1
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>, <8 x i16> %a, <8 x i16> %a
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test6
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <8 x i16> @test7(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test7
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
-
 
 define <8 x i16> @test8(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test8
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
 
 define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test9:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test9
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: movaps  %xmm1, %xmm0
-; CHECK-NEXT: ret
 
 define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test10
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
 
 define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test11:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps {{.*#+}} xmm2 = <0,65535,65535,0,u,65535,65535,u>
+; CHECK-NEXT:    andps %xmm2, %xmm0
+; CHECK-NEXT:    andnps %xmm1, %xmm2
+; CHECK-NEXT:    orps %xmm2, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 false, i1 true, i1 true, i1 false, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test11
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
 
 define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test12:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 false, i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test12
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
 
 define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test13:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef, i1 undef>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test13
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK: ret
 
 ; Fold (vselect (build_vector AllOnes), N1, N2) -> N1
-
 define <4 x float> @test14(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test14:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 undef, i1 true, i1 undef>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test14
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: pcmpeq
-; CHECK: ret
 
 define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test15:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 true, i1 true, i1 true, i1 undef, i1 undef, i1 true, i1 true, i1 undef>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test15
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: pcmpeq
-; CHECK: ret
 
 ; Fold (vselect (build_vector AllZeros), N1, N2) -> N2
-
 define <4 x float> @test16(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 false, i1 undef, i1 false, i1 undef>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
-} 
-; CHECK-LABEL: test16
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: ret 
+}
 
 define <8 x i16> @test17(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test17:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 undef, i1 undef, i1 false, i1 false, i1 undef>, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %1
 }
-; CHECK-LABEL: test17
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: ret
 
 define <4 x float> @test18(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test18:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test18
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK: ret
 
 define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test19:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %1
 }
-; CHECK-LABEL: test19
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK: ret
 
 define <2 x double> @test20(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test20:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <2 x i1> <i1 false, i1 true>, <2 x double> %a, <2 x double> %b
   ret <2 x double> %1
 }
-; CHECK-LABEL: test20
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test21:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <2 x i1> <i1 false, i1 true>, <2 x i64> %a, <2 x i64> %b
   ret <2 x i64> %1
 }
-; CHECK-LABEL: test21
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 define <4 x float> @test22(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test22:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %a, <4 x float> %b
   ret <4 x float> %1
 }
-; CHECK-LABEL: test22
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK: ret
 
 define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test23:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %1
 }
-; CHECK-LABEL: test23
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movss
-; CHECK: ret
 
 define <2 x double> @test24(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test24:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %a, <2 x double> %b
   ret <2 x double> %1
 }
-; CHECK-LABEL: test24
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test25:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = select <2 x i1> <i1 true, i1 false>, <2 x i64> %a, <2 x i64> %b
   ret <2 x i64> %1
 }
-; CHECK-LABEL: test25
-; CHECK-NOT: psllw
-; CHECK-NOT: psraw
-; CHECK-NOT: xorps
-; CHECK: movsd
-; CHECK: ret
 
 define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) {
-; CHECK-LABEL: select_of_shuffles_0
-; CHECK-DAG: movlhps %xmm2, [[REGA:%xmm[0-9]+]]
-; CHECK-DAG: movlhps %xmm3, [[REGB:%xmm[0-9]+]]
-; CHECK: subps [[REGB]], [[REGA]]
+; CHECK-LABEL: select_of_shuffles_0:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; CHECK-NEXT:    subps %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %2 = shufflevector <2 x float> %a1, <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
   %3 = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x float> %2, <4 x float> %1
@@ -276,3 +259,24 @@ define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x
   %7 = fsub <4 x float> %3, %6
   ret <4 x float> %7
 }
+
+; PR20677
+define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
+; CHECK-LABEL: select_illegal:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm4
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm5
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm6
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm7
+; CHECK-NEXT:    movaps %xmm7, 112(%rdi)
+; CHECK-NEXT:    movaps %xmm6, 96(%rdi)
+; CHECK-NEXT:    movaps %xmm5, 80(%rdi)
+; CHECK-NEXT:    movaps %xmm4, 64(%rdi)
+; CHECK-NEXT:    movaps %xmm3, 48(%rdi)
+; CHECK-NEXT:    movaps %xmm2, 32(%rdi)
+; CHECK-NEXT:    movaps %xmm1, 16(%rdi)
+; CHECK-NEXT:    movaps %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+  %sel = select <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x double> %a, <16 x double> %b
+  ret <16 x double> %sel
+}
diff --git a/test/CodeGen/X86/vshift-4.ll b/test/CodeGen/X86/vshift-4.ll
index a060cf803727..cda9bc893a2d 100644
--- a/test/CodeGen/X86/vshift-4.ll
+++ b/test/CodeGen/X86/vshift-4.ll
@@ -57,7 +57,7 @@ entry:
 define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
 entry:
 ; CHECK-LABEL: shift3a:
-; CHECK: movzwl
+; CHECK: pextrw $6
 ; CHECK: psllw
   %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
   %shl = shl <8 x i16> %val, %shamt
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index d115929f5aab..e0b861f29de8 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -2,12 +2,12 @@
 ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: movl
-; CHECK: paddd
+; CHECK: paddw
 ; CHECK: movlpd
 
 ; Scheduler causes produce a different instruction order
 ; ATOM: movl
-; ATOM: paddd
+; ATOM: paddw
 ; ATOM: movlpd
 
 ; bitcast a v4i16 to v2i32
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index 9f6778cff592..3f54ab694c07 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
-; CHECK: paddq
+; CHECK: paddd
 
 ; truncate v2i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_conversions.ll b/test/CodeGen/X86/widen_conversions.ll
index 522ab475c2a0..8e5174fbe76e 100644
--- a/test/CodeGen/X86/widen_conversions.ll
+++ b/test/CodeGen/X86/widen_conversions.ll
@@ -9,7 +9,7 @@ define <4 x i32> @zext_v4i8_to_v4i32(<4 x i8>* %ptr) {
 ; CHECK:      movd (%{{.*}}), %[[X:xmm[0-9]+]]
 ; CHECK-NEXT: pxor %[[Z:xmm[0-9]+]], %[[Z]]
 ; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
-; CHECK-NEXT: punpcklbw %[[Z]], %[[X]]
+; CHECK-NEXT: punpcklwd %[[Z]], %[[X]]
 ; CHECK-NEXT: ret
 
   %val = load <4 x i8>* %ptr
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 41bea859f474..0ec3574d69eb 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -4,12 +4,12 @@
 ;
 
 %i32vec3 = type <3 x i32>
-; CHECK: add3i32
 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
-; CHECK: movdqa
-; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movq
+; CHECK-LABEL: add3i32:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    paddd   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    pextrd  $2, %[[R0]], 8(%{{.*}})
+; CHECK-NEXT:    movq    %[[R0]], (%{{.*}})
 	%a = load %i32vec3* %ap, align 16
 	%b = load %i32vec3* %bp, align 16
 	%x = add %i32vec3 %a, %b
@@ -17,15 +17,15 @@ define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 	ret void
 }
 
-; CHECK: add3i32_2
 define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
-; CHECK: movq
-; CHECK: pinsrd
-; CHECK: movq
-; CHECK: pinsrd
-; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movq
+; CHECK-LABEL: add3i32_2:
+; CHECK:         movq    (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    pinsrd  $2, 8(%{{.*}}), %[[R0]]
+; CHECK-NEXT:    movq    (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    pinsrd  $2, 8(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    paddd   %[[R0]], %[[R1]]
+; CHECK-NEXT:    pextrd  $2, %[[R1]], 8(%{{.*}})
+; CHECK-NEXT:    movq    %[[R1]], (%{{.*}})
 	%a = load %i32vec3* %ap, align 8
 	%b = load %i32vec3* %bp, align 8
 	%x = add %i32vec3 %a, %b
@@ -34,15 +34,15 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 }
 
 %i32vec7 = type <7 x i32>
-; CHECK: add7i32
 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: paddd
-; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movq
-; CHECK: movdqa
+; CHECK-LABEL: add7i32:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  16(%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddd   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    paddd   16(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    pextrd  $2, %[[R1]], 24(%{{.*}})
+; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
 	%a = load %i32vec7* %ap, align 16
 	%b = load %i32vec7* %bp, align 16
 	%x = add %i32vec7 %a, %b
@@ -50,18 +50,18 @@ define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 	ret void
 }
 
-; CHECK: add12i32
 %i32vec12 = type <12 x i32>
 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: paddd
-; CHECK: paddd
-; CHECK: paddd
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: movdqa
+; CHECK-LABEL: add12i32:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  16(%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  32(%{{.*}}), %[[R2:xmm[0-9]+]]
+; CHECK-NEXT:    paddd   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    paddd   16(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    paddd   32(%{{.*}}), %[[R2]]
+; CHECK-NEXT:    movdqa  %[[R2]], 32(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R1]], 16(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
 	%a = load %i32vec12* %ap, align 16
 	%b = load %i32vec12* %bp, align 16
 	%x = add %i32vec12 %a, %b
@@ -70,11 +70,17 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 }
 
 
-; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: paddd
-; CHECK: ret
+; CHECK-LABEL: add3i16:
+; CHECK:         pmovzxwd (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    pmovzxwd (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddd    %[[R0]], %[[R1]]
+; CHECK-NEXT:    movdqa   %[[R1]], %[[R0]]
+; CHECK-NEXT:    pshufb   {{.*}}, %[[R0]]
+; CHECK-NEXT:    pmovzxdq %[[R0]], %[[R0]]
+; CHECK-NEXT:    pextrw   $4, %[[R1]], 4(%{{.*}})
+; CHECK-NEXT:    movd     %[[R0]], (%{{.*}})
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
 	%x = add %i16vec3 %a, %b
@@ -82,11 +88,13 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
 	ret void
 }
 
-; CHECK: add4i16
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: paddd
-; CHECK: movq
+; CHECK-LABEL: add4i16:
+; CHECK:         movq    (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movq    (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddw   %[[R0]], %[[R1]]
+; CHECK-NEXT:    movq    %[[R1]], (%{{.*}})
 	%a = load %i16vec4* %ap, align 16
 	%b = load %i16vec4* %bp, align 16
 	%x = add %i16vec4 %a, %b
@@ -94,15 +102,15 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
 	ret void
 }
 
-; CHECK: add12i16
 %i16vec12 = type <12 x i16>
 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: paddw
-; CHECK: movq
-; CHECK: movdqa
+; CHECK-LABEL: add12i16:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  16(%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddw   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    paddw   16(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
 	%a = load %i16vec12* %ap, align 16
 	%b = load %i16vec12* %bp, align 16
 	%x = add %i16vec12 %a, %b
@@ -110,18 +118,18 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
 	ret void
 }
 
-; CHECK: add18i16
 %i16vec18 = type <18 x i16>
 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: paddw
-; CHECK: paddw
-; CHECK: movd
-; CHECK: movdqa
-; CHECK: movdqa
+; CHECK-LABEL: add18i16:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  16(%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  32(%{{.*}}), %[[R2:xmm[0-9]+]]
+; CHECK-NEXT:    paddw   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    paddw   16(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    paddw   32(%{{.*}}), %[[R2]]
+; CHECK-NEXT:    movd    %[[R2]], 32(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R1]], 16(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
 	%a = load %i16vec18* %ap, align 16
 	%b = load %i16vec18* %bp, align 16
 	%x = add %i16vec18 %a, %b
@@ -130,11 +138,18 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 }
 
 
-; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: paddd
-; CHECK: ret
+; CHECK-LABEL: add3i8:
+; CHECK:         pmovzxbd (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    pmovzxbd (%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddd    %[[R0]], %[[R1]]
+; CHECK-NEXT:    movdqa   %[[R1]], %[[R0]]
+; CHECK-NEXT:    pshufb   {{.*}}, %[[R0]]
+; CHECK-NEXT:    pmovzxwq %[[R0]], %[[R0]]
+; CHECK-NEXT:    pextrb   $8, %[[R1]], 2(%{{.*}})
+; CHECK-NEXT:    movd     %[[R0]], %e[[R2:[abcd]]]x
+; CHECK-NEXT:    movw     %[[R2]]x, (%{{.*}})
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
 	%x = add %i8vec3 %a, %b
@@ -142,17 +157,18 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
 	ret void
 }
 
-; CHECK-LABEL: add31i8:
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: movdqa
-; CHECK: paddb
-; CHECK: paddb
-; CHECK: pextrb
-; CHECK: pextrw
-; CHECK: movq
-; CHECK: ret
+; CHECK-LABEL: add31i8:
+; CHECK:         movdqa  (%{{.*}}), %[[R0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  16(%{{.*}}), %[[R1:xmm[0-9]+]]
+; CHECK-NEXT:    paddb   (%{{.*}}), %[[R0]]
+; CHECK-NEXT:    paddb   16(%{{.*}}), %[[R1]]
+; CHECK-NEXT:    pextrb  $14, %[[R1]], 30(%{{.*}})
+; CHECK-NEXT:    pextrw  $6, %[[R1]], 28(%{{.*}})
+; CHECK-NEXT:    pextrd  $2, %[[R1]], 24(%{{.*}})
+; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
+; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
 	%a = load %i8vec31* %ap, align 16
 	%b = load %i8vec31* %bp, align 16
 	%x = add %i8vec31 %a, %b
@@ -161,14 +177,43 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 }
 
 
-; CHECK: rot
 %i8vec3pack = type { <3 x i8>, i8 }
-define %i8vec3pack  @rot() nounwind {
-; CHECK: pmovzxbd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
+define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pack* %rot) nounwind {
+; CHECK-LABEL: rot:
+; CHECK:         movdqa  {{.*}}, %[[CONSTANT0:xmm[0-9]+]]
+; CHECK-NEXT:    movdqa  {{.*}}, %[[SHUFFLE_MASK:xmm[0-9]+]]
+; CHECK-NEXT:    pshufb  %[[SHUFFLE_MASK]], %[[CONSTANT0]]
+; CHECK-NEXT:    pmovzxwq %[[CONSTANT0]], %[[CONSTANT0]]
+; CHECK-NEXT:    movd    %[[CONSTANT0]], %e[[R0:[abcd]]]x
+; CHECK-NEXT:    movw    %[[R0]]x, (%[[PTR0:.*]])
+; CHECK-NEXT:    movb    $-98, 2(%[[PTR0]])
+; CHECK-NEXT:    movdqa  {{.*}}, %[[CONSTANT1:xmm[0-9]+]]
+; CHECK-NEXT:    pshufb  %[[SHUFFLE_MASK]], %[[CONSTANT1]]
+; CHECK-NEXT:    pmovzxwq %[[CONSTANT1]], %[[CONSTANT1]]
+; CHECK-NEXT:    movd    %[[CONSTANT1]], %e[[R1:[abcd]]]x
+; CHECK-NEXT:    movw    %[[R1]]x, (%[[PTR1:.*]])
+; CHECK-NEXT:    movb    $1, 2(%[[PTR1]])
+; CHECK-NEXT:    pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
+; CHECK-NEXT:    pand    {{.*}}, %[[X0]]
+; CHECK-NEXT:    pextrd  $1, %[[X0]], %e[[R0:[abcd]]]x
+; CHECK-NEXT:    shrl    %e[[R0]]x
+; CHECK-NEXT:    movd    %[[X0]], %e[[R1:[abcd]]]x
+; CHECK-NEXT:    shrl    %e[[R1]]x
+; CHECK-NEXT:    movd    %e[[R1]]x, %[[X1:xmm[0-9]+]]
+; CHECK-NEXT:    pinsrd  $1, %e[[R0]]x, %[[X1]]
+; CHECK-NEXT:    pextrd  $2, %[[X0]], %e[[R0:[abcd]]]x
+; CHECK-NEXT:    shrl    %e[[R0]]x
+; CHECK-NEXT:    pinsrd  $2, %e[[R0]]x, %[[X1]]
+; CHECK-NEXT:    pextrd  $3, %[[X0]], %e[[R0:[abcd]]]x
+; CHECK-NEXT:    pinsrd  $3, %e[[R0]]x, %[[X1]]
+; CHECK-NEXT:    movdqa  %[[X1]], %[[X2:xmm[0-9]+]]
+; CHECK-NEXT:    pshufb  %[[SHUFFLE_MASK]], %[[X2]]
+; CHECK-NEXT:    pmovzxwq %[[X2]], %[[X3:xmm[0-9]+]]
+; CHECK-NEXT:    pextrb  $8, %[[X1]], 2(%{{.*}})
+; CHECK-NEXT:    movd    %[[X3]], %e[[R0:[abcd]]]x
+; CHECK-NEXT:    movw    %[[R0]]x, (%{{.*}})
+
 entry:
-  %X = alloca %i8vec3pack, align 4
-  %rot = alloca %i8vec3pack, align 4
-  %result = alloca %i8vec3pack, align 4
   %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
   store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
   %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
@@ -180,7 +225,6 @@ entry:
   %shr = lshr <3 x i8> %extractVec, %extractVec3
   %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
   store <3 x i8> %shr, <3 x i8>* %storetmp4
-  %tmp5 = load %i8vec3pack* %result
-  ret %i8vec3pack %tmp5
+  ret void
 }
 
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index a355b75fafcf..70fdbb7c9c82 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,43 +1,56 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 
+target triple = "x86_64-unknown-unknown"
+
 ; widening shuffle v3float and then a add
 define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
-entry:
 ; CHECK-LABEL: shuf:
-; CHECK: extractps
-; CHECK: extractps
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    addps %xmm1, %xmm0
+; CHECK-NEXT:    extractps $2, %xmm0, 8(%eax)
+; CHECK-NEXT:    extractps $1, %xmm0, 4(%eax)
+; CHECK-NEXT:    movss %xmm0, (%eax)
+; CHECK-NEXT:    retl
+entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
-; CHECK: ret
 }
 
 
 ; widening shuffle v3float with a different mask and then a add
 define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
-entry:
 ; CHECK-LABEL: shuf2:
-; CHECK: extractps
-; CHECK: extractps
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
+; CHECK-NEXT:    addps %xmm1, %xmm0
+; CHECK-NEXT:    extractps $2, %xmm0, 8(%eax)
+; CHECK-NEXT:    extractps $1, %xmm0, 4(%eax)
+; CHECK-NEXT:    movss %xmm0, (%eax)
+; CHECK-NEXT:    retl
+entry:
 	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
-; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
 ; with the operation that we are currently widening, i.e. when replacing
 ; opA with opB, the DAG will produce new operations with opA.
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
-entry:
 ; CHECK-LABEL: shuf3:
-; CHECK-NOT: movlhps
-; CHECK-NOT: shufps
-; CHECK: pshufd
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
+; CHECK-NEXT:    movaps %xmm1, (%eax)
+; CHECK-NEXT:    retl
+entry:
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-  %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
+  %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %tmp3.i13 = shufflevector <4 x float> %tmp1.i.i, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; <<3 x float>>
   %tmp6.i14 = shufflevector <3 x float> %tmp3.i13, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,27 +58,35 @@ entry:
   %tmp2.i18 = shufflevector <3 x float> %tmp97.i, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
   %t5 = bitcast <4 x float> %tmp2.i18 to <4 x i32>
   %shr.i.i19 = lshr <4 x i32> %t5, <i32 19, i32 19, i32 19, i32 19>
-  %and.i.i20 = and <4 x i32> %shr.i.i19, <i32 4080, i32 4080, i32 4080, i32 4080> 
+  %and.i.i20 = and <4 x i32> %shr.i.i19, <i32 4080, i32 4080, i32 4080, i32 4080>
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
-; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK-LABEL: shuf4:
-; CHECK-NOT: punpckldq
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK-NEXT:    pshufb %xmm2, %xmm1
+; CHECK-NEXT:    pshufb %xmm2, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retl
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
-; CHECK: ret
 }
 
 ; PR11389: another CONCAT_VECTORS case
 define void @shuf5(<8 x i8>* %p) nounwind {
 ; CHECK-LABEL: shuf5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = <4,33,u,u,u,u,u,u>
+; CHECK-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; CHECK-NEXT:    movlpd %xmm0, (%eax)
+; CHECK-NEXT:    retl
   %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   store <8 x i8> %v, <8 x i8>* %p, align 8
   ret void
-; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32-pic-jumptable.ll b/test/CodeGen/X86/win32-pic-jumptable.ll
new file mode 100644
index 000000000000..cabd36ae395d
--- /dev/null
+++ b/test/CodeGen/X86/win32-pic-jumptable.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -relocation-model=pic | FileCheck %s
+
+; CHECK:        calll L0$pb
+; CHECK-NEXT: L0$pb:
+; CHECK-NEXT:   popl %eax
+; CHECK-NEXT:   addl LJTI0_0(,%ecx,4), %eax
+; CHECK-NEXT:   jmpl *%eax
+
+; CHECK:      LJTI0_0:
+; CHECK-NEXT:   .long LBB0_4-L0$pb
+; CHECK-NEXT:   .long LBB0_5-L0$pb
+; CHECK-NEXT:   .long LBB0_6-L0$pb
+; CHECK-NEXT:   .long LBB0_7-L0$pb
+
+
+target triple = "i686--windows-itanium"
+define i32 @f(i64 %x) {
+bb0:
+  switch i64 %x, label %bb5 [
+    i64 1, label %bb1
+    i64 2, label %bb2
+    i64 3, label %bb3
+    i64 4, label %bb4
+  ]
+bb1:
+  br label %bb5
+bb2:
+  br label %bb5
+bb3:
+  br label %bb5
+bb4:
+  br label %bb5
+bb5:
+  %y = phi i32 [ 0, %bb0 ], [ 1, %bb1 ], [ 2, %bb2 ], [ 3, %bb3 ], [ 4, %bb4 ]
+  ret i32 %y
+}
diff --git a/test/CodeGen/X86/win64_call_epi.ll b/test/CodeGen/X86/win64_call_epi.ll
new file mode 100644
index 000000000000..71c44b085004
--- /dev/null
+++ b/test/CodeGen/X86/win64_call_epi.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
+
+declare void @bar()
+declare void @baz()
+declare i32 @personality(...)
+
+; Check for 'nop' between the last call and the epilogue.
+define void @foo1() {
+
+    invoke void @bar()
+        to label %normal
+        unwind label %catch
+
+normal:
+    ret void
+
+catch:
+    %1 = landingpad { i8*, i32 } personality i32 (...)* @personality cleanup
+    resume { i8*, i32 } %1
+}
+; WIN64-LABEL: foo1:
+; WIN64: .seh_proc foo1
+; WIN64: callq bar
+; WIN64: nop
+; WIN64: addq ${{[0-9]+}}, %rsp
+; WIN64: retq
+; Check for 'ud2' after noreturn call
+; WIN64: callq _Unwind_Resume
+; WIN64-NEXT: ud2
+; WIN64: .seh_endproc
+
+
+; Check it still works when blocks are reordered.
+@something = global i32 0
+define void @foo2(i1 zeroext %cond ) {
+    br i1 %cond, label %a, label %b, !prof !0
+a:
+    call void @bar()
+    br label %done
+b:
+    call void @baz()
+    store i32 0, i32* @something
+    br label %done
+done:
+    ret void
+}
+!0 = !{!"branch_weights", i32 100, i32 0}
+; WIN64-LABEL: foo2:
+; WIN64: callq bar
+; WIN64: nop
+; WIN64: addq ${{[0-9]+}}, %rsp
+; WIN64: retq
+
+
+; Check nop is not emitted when call is not adjacent to epilogue.
+define i32 @foo3() {
+    call void @bar()
+    ret i32 0
+}
+; WIN64-LABEL: foo3:
+; WIN64: callq bar
+; WIN64: xorl
+; WIN64-NOT: nop
+; WIN64: addq ${{[0-9]+}}, %rsp
+; WIN64: retq
diff --git a/test/CodeGen/X86/win64_eh.ll b/test/CodeGen/X86/win64_eh.ll
index f1f874eb2f5a..4670087b9b4d 100644
--- a/test/CodeGen/X86/win64_eh.ll
+++ b/test/CodeGen/X86/win64_eh.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-windows-itanium | FileCheck %s -check-prefix=WIN64
 ; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
 
 ; Check function without prolog
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index 1a51b2a64a76..8d7f2010a541 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -111,3 +111,22 @@ entry:
   %tmp = va_arg i8** %ap, i32
   ret i32 %tmp
 }
+
+define void @sret_arg(i32* sret %agg.result, i8* nocapture readnone %format, ...) {
+entry:
+  %ap = alloca i8*
+  %ap_i8 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap_i8)
+  %tmp = va_arg i8** %ap, i32
+  store i32 %tmp, i32* %agg.result
+  ret void
+}
+; CHECK-LABEL: sret_arg:
+; CHECK: pushq
+; CHECK-DAG: movq %r9, 40(%rsp)
+; CHECK-DAG: movq %r8, 32(%rsp)
+; CHECK: movl 32(%rsp), %[[tmp:[^ ]*]]
+; CHECK: movl %[[tmp]], (%[[sret:[^ ]*]])
+; CHECK: movq %[[sret]], %rax
+; CHECK: popq
+; CHECK: retq
diff --git a/test/CodeGen/X86/windows-itanium-alloca.ll b/test/CodeGen/X86/windows-itanium-alloca.ll
new file mode 100644
index 000000000000..0a06cdef8793
--- /dev/null
+++ b/test/CodeGen/X86/windows-itanium-alloca.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple i686-windows-itanium -filetype asm -o - %s | FileCheck %s
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686--windows-itanium"
+
+declare void @external(i8*)
+
+define dllexport void @alloca(i32 %sz) {
+entry:
+  %vla = alloca i8, i32 %sz, align 1
+  call void @external(i8* %vla)
+  ret void
+}
+
+; CHECK: __chkstk
+
diff --git a/test/CodeGen/X86/x32-function_pointer-1.ll b/test/CodeGen/X86/x32-function_pointer-1.ll
new file mode 100644
index 000000000000..2baf92a99790
--- /dev/null
+++ b/test/CodeGen/X86/x32-function_pointer-1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel | FileCheck %s
+
+; Test for x32 function pointer tail call
+
+@foo1 = external global void (i8*)*
+@foo2 = external global void (i8*)*
+
+define void @bar(i8* %h) nounwind uwtable {
+entry:
+  %0 = load void (i8*)** @foo1, align 4
+; CHECK: movl	foo1(%rip), %e{{[^,]*}}
+  tail call void %0(i8* %h) nounwind
+; CHECK: callq	*%r{{[^,]*}}
+  %1 = load void (i8*)** @foo2, align 4
+; CHECK: movl	foo2(%rip), %e{{[^,]*}}
+  tail call void %1(i8* %h) nounwind
+; CHECK: jmpq	*%r{{[^,]*}}
+  ret void
+}
diff --git a/test/CodeGen/X86/x32-function_pointer-2.ll b/test/CodeGen/X86/x32-function_pointer-2.ll
new file mode 100644
index 000000000000..f727d41be3a3
--- /dev/null
+++ b/test/CodeGen/X86/x32-function_pointer-2.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel | FileCheck %s
+
+; Test call function pointer with function argument
+;
+; void bar (void * h, void (*foo) (void *))
+;    {
+;      foo (h);
+;      foo (h);
+;    }
+
+
+define void @bar(i8* %h, void (i8*)* nocapture %foo) nounwind {
+entry:
+  tail call void %foo(i8* %h) nounwind
+; CHECK: mov{{l|q}}	%{{e|r}}si, %{{e|r}}[[REG:.*]]{{d?}}
+; CHECK: callq	*%r[[REG]]
+  tail call void %foo(i8* %h) nounwind
+; CHECK: jmpq	*%r{{[^,]*}}
+  ret void
+}
diff --git a/test/CodeGen/X86/x32-function_pointer-3.ll b/test/CodeGen/X86/x32-function_pointer-3.ll
new file mode 100644
index 000000000000..5eaf85d8f931
--- /dev/null
+++ b/test/CodeGen/X86/x32-function_pointer-3.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -fast-isel | FileCheck %s
+
+; Test calling function pointer passed in struct
+
+;    The fuction argument `h' in
+
+;    struct foo {
+;      void (*f) (void);
+;      int i;
+;    };
+;    void
+;    bar (struct foo h)
+;    {
+;      h.f ();
+;    }
+
+;    is passed in the 64-bit %rdi register.  The `f' field is in the lower 32
+;    bits of %rdi register and the `i' field is in the upper 32 bits of %rdi
+;    register.  We need to zero-extend %edi to %rdi before branching via %rdi.
+
+define void @bar(i64 %h.coerce) nounwind {
+entry:
+  %h.sroa.0.0.extract.trunc = trunc i64 %h.coerce to i32
+  %0 = inttoptr i32 %h.sroa.0.0.extract.trunc to void ()*
+; CHECK: movl	%edi, %e[[REG:.*]]
+  tail call void %0() nounwind
+; CHECK: jmpq	*%r[[REG]]
+  ret void
+}
diff --git a/test/CodeGen/X86/x86-64-call.ll b/test/CodeGen/X86/x86-64-call.ll
new file mode 100644
index 000000000000..300f8d1025e5
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-call.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnux32 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux -verify-machineinstrs | FileCheck %s -check-prefix=IA32
+
+; trivial test for correct call suffix
+
+define i32 @far() nounwind uwtable {
+entry:
+; CHECK: callq foo
+; IA32: calll foo
+  tail call void @foo() nounwind
+  ret i32 0
+}
+
+declare void @foo()
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index da8082b92518..8790fa6072b3 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep "callq	g@PLT" %t1
 
-@g = alias weak i32 ()* @f
+@g = weak alias i32 ()* @f
 
 define void @h() {
 entry:
diff --git a/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll b/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll
new file mode 100644
index 000000000000..c476ffd84053
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-stack-and-frame-ptr.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 < %s | FileCheck -check-prefix=X32ABI %s
+; RUN: llc -mtriple=x86_64-pc-nacl < %s | FileCheck -check-prefix=NACL %s
+
+; x32 uses %esp, %ebp as stack and frame pointers
+
+; CHECK-LABEL: foo
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: movq %rdi, -8(%rbp)
+; CHECK: popq %rbp
+; X32ABI-LABEL: foo
+; X32ABI: pushq %rbp
+; X32ABI: movl %esp, %ebp
+; X32ABI: movl %edi, -4(%ebp)
+; X32ABI: popq %rbp
+; NACL-LABEL: foo
+; NACL: pushq %rbp
+; NACL: movq %rsp, %rbp
+; NACL: movl %edi, -4(%rbp)
+; NACL: popq %rbp
+
+
+define void @foo(i32* %a) #0 {
+entry:
+  %a.addr = alloca i32*, align 4
+  %b = alloca i32*, align 4
+  store i32* %a, i32** %a.addr, align 4
+  ret void
+}
+
+attributes #0 = { nounwind uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"}
+
+
diff --git a/test/CodeGen/X86/x86-64-tls-1.ll b/test/CodeGen/X86/x86-64-tls-1.ll
index 641786f5a914..2879fb4e1e74 100644
--- a/test/CodeGen/X86/x86-64-tls-1.ll
+++ b/test/CodeGen/X86/x86-64-tls-1.ll
@@ -1,10 +1,9 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 @tm_nest_level = internal thread_local global i32 0
 define i64 @z() nounwind {
-; FIXME: The codegen here is primitive at best and could be much better.
-; The add and the moves can be folded together.
-; CHECK-DAG: movq    $tm_nest_level@TPOFF, %rcx
-; CHECK-DAG: movq    %fs:0, %rax
-; CHECK: addl    %ecx, %eax
+; CHECK:      movq    $tm_nest_level@TPOFF, %r[[R0:[abcd]]]x
+; CHECK-NEXT: addl    %fs:0, %e[[R0]]x
+; CHECK-NEXT: andq    $100, %r[[R0]]x
+
   ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
 }
diff --git a/test/CodeGen/X86/x86-inline-asm-validation.ll b/test/CodeGen/X86/x86-inline-asm-validation.ll
new file mode 100644
index 000000000000..56bdc48b0e4c
--- /dev/null
+++ b/test/CodeGen/X86/x86-inline-asm-validation.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple i686-gnu -filetype asm -o - %s 2>&1 | FileCheck %s
+
+define void @test_L_ff() {
+entry:
+  call void asm "", "L,~{dirflag},~{fpsr},~{flags}"(i32 255)
+  ret void
+}
+
+; CHECK-NOT: error: invalid operand for inline asm constraint 'L'
+
+define void @test_L_ffff() {
+entry:
+  call void asm "", "L,~{dirflag},~{fpsr},~{flags}"(i32 65535)
+  ret void
+}
+
+; CHECK-NOT: error: invalid operand for inline asm constraint 'L'
+
+define void @test_M_1() {
+entry:
+  call void asm "", "M,~{dirflag},~{fpsr},~{flags}"(i32 1)
+  ret void
+}
+
+; CHECK-NOT: error: invalid operand for inline asm constraint 'M'
+
+define void @test_O_64() {
+entry:
+  call void asm "", "O,~{dirflag},~{fpsr},~{flags}"(i32 64)
+  ret void
+}
+
+; CHECK-NOT: error: invalid operand for inline asm constraint 'O'
+
diff --git a/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll b/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
new file mode 100644
index 000000000000..fcf7eaec0544
--- /dev/null
+++ b/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
@@ -0,0 +1,35 @@
+; RUN: llc  -mtriple=x86_64-apple-macosx10.9.0  -mcpu=core2 -mattr=+64bit,+sse2 < %s | FileCheck %s
+
+; DAGCombine may choose to rewrite 2 loads feeding a select as a select of
+; addresses feeding a load. This test ensures that when it does that it creates
+; a load with alignment equivalent to the most restrictive source load.
+
+declare void @sink(<2 x double>)
+
+define void @test1(i1 %cmp) align 2 {
+  %1 = alloca  <2 x double>, align 16
+  %2 = alloca  <2 x double>, align 8
+
+  %val = load <2 x double>* %1, align 16
+  %val2 = load <2 x double>* %2, align 8
+  %val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
+  call void @sink(<2 x double> %val3)
+  ret void
+  ; CHECK: test1
+  ; CHECK: movups
+  ; CHECK: ret
+}
+
+define void @test2(i1 %cmp) align 2 {
+  %1 = alloca  <2 x double>, align 16
+  %2 = alloca  <2 x double>, align 8
+
+  %val = load <2 x double>* %1, align 16
+  %val2 = load <2 x double>* %2, align 16
+  %val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
+  call void @sink(<2 x double> %val3)
+  ret void
+  ; CHECK: test2
+  ; CHECK: movaps
+  ; CHECK: ret
+}
diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index f737519bd153..4317d8ab6a26 100644
--- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -2,10 +2,10 @@
 
 define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind {
 ; CHECK-LABEL: LCPI0_0:
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
 ; CHECK-LABEL: foo:
 ; CHECK: cmpeqps %xmm1, %xmm0
 ; CHECK-NEXT: andps LCPI0_0(%rip), %xmm0
@@ -54,3 +54,21 @@ define void @foo2(<4 x float>* noalias %result) nounwind {
   store <4 x float> %val, <4 x float>* %result
   ret void
 }
+
+; Fold explicit AND operations when the constant isn't a splat of a single
+; scalar value like what the zext creates.
+define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
+; CHECK-LABEL: LCPI3_0:
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
+; CHECK-NEXT: .long 0                       ## 0x0
+; CHECK-LABEL: foo3:
+; CHECK: cmpeqps %xmm1, %xmm0
+; CHECK-NEXT: andps LCPI3_0(%rip), %xmm0
+  %cmp = fcmp oeq <4 x float> %val, %test
+  %ext = zext <4 x i1> %cmp to <4 x i32>
+  %and = and <4 x i32> %ext, <i32 255, i32 256, i32 257, i32 258>
+  %result = sitofp <4 x i32> %and to <4 x float>
+  ret <4 x float> %result
+}
diff --git a/test/CodeGen/X86/xaluo.ll b/test/CodeGen/X86/xaluo.ll
index f078631c2b33..668628c69ede 100644
--- a/test/CodeGen/X86/xaluo.ll
+++ b/test/CodeGen/X86/xaluo.ll
@@ -1,7 +1,5 @@
-; RUN: llc -mtriple=x86_64-darwin-unknown < %s                             | FileCheck %s --check-prefix=DAG
-; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=FAST
-; RUN: llc -mtriple=x86_64-darwin-unknown < %s                             | FileCheck %s
-; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown                             < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SDAG
+; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FAST
 
 ;
 ; Get the actual value of the overflow bit.
@@ -9,12 +7,9 @@
 ; SADDO reg, reg
 define zeroext i1 @saddo.i8(i8 signext %v1, i8 signext %v2, i8* %res) {
 entry:
-; DAG-LABEL:    saddo.i8
-; DAG:          addb %sil, %dil
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i8
-; FAST:         addb %sil, %dil
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.i8
+; CHECK:       addb %sil, %dil
+; CHECK-NEXT:  seto %al
   %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v1, i8 %v2)
   %val = extractvalue {i8, i1} %t, 0
   %obit = extractvalue {i8, i1} %t, 1
@@ -24,12 +19,9 @@ entry:
 
 define zeroext i1 @saddo.i16(i16 %v1, i16 %v2, i16* %res) {
 entry:
-; DAG-LABEL:    saddo.i16
-; DAG:          addw %si, %di
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i16
-; FAST:         addw %si, %di
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.i16
+; CHECK:       addw %si, %di
+; CHECK-NEXT:  seto %al
   %t = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %v1, i16 %v2)
   %val = extractvalue {i16, i1} %t, 0
   %obit = extractvalue {i16, i1} %t, 1
@@ -39,12 +31,9 @@ entry:
 
 define zeroext i1 @saddo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    saddo.i32
-; DAG:          addl %esi, %edi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i32
-; FAST:         addl %esi, %edi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.i32
+; CHECK:       addl %esi, %edi
+; CHECK-NEXT:  seto %al
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -54,12 +43,9 @@ entry:
 
 define zeroext i1 @saddo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64
-; DAG:          addq %rsi, %rdi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i64
-; FAST:         addq %rsi, %rdi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.i64
+; CHECK:       addq %rsi, %rdi
+; CHECK-NEXT:  seto %al
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -67,16 +53,48 @@ entry:
   ret i1 %obit
 }
 
-; SADDO reg, imm | imm, reg
-; FIXME: INC isn't supported in FastISel yet
-define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
+; SADDO reg, 1 | INC
+define zeroext i1 @saddo.inc.i8(i8 %v1, i8* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i8
+; CHECK:       incb %dil
+; CHECK-NEXT:  seto %al
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v1, i8 1)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i16(i16 %v1, i16* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i16
+; CHECK:       incw %di
+; CHECK-NEXT:  seto %al
+  %t = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %v1, i16 1)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i32(i32 %v1, i32* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm1
-; DAG:          incq %rdi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i64imm1
-; FAST:         addq $1, %rdi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.inc.i32
+; CHECK:       incl %edi
+; CHECK-NEXT:  seto %al
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @saddo.inc.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: saddo.inc.i64
+; CHECK:       incq %rdi
+; CHECK-NEXT:  seto %al
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 1)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -84,17 +102,18 @@ entry:
   ret i1 %obit
 }
 
+; SADDO reg, imm | imm, reg
 ; FIXME: DAG doesn't optimize immediates on the LHS.
-define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm2
-; DAG:          mov
-; DAG-NEXT:     addq
-; DAG-NEXT:     seto
-; FAST-LABEL:   saddo.i64imm2
-; FAST:         addq $1, %rdi
-; FAST-NEXT:    seto %al
-  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 1, i64 %v1)
+; SDAG-LABEL: saddo.i64imm1
+; SDAG:       mov
+; SDAG-NEXT:  addq
+; SDAG-NEXT:  seto
+; FAST-LABEL: saddo.i64imm1
+; FAST:       addq $2, %rdi
+; FAST-NEXT:  seto %al
+  %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 2, i64 %v1)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
   store i64 %val, i64* %res
@@ -102,14 +121,11 @@ entry:
 }
 
 ; Check boundary conditions for large immediates.
-define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm3
-; DAG:          addq $-2147483648, %rdi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   saddo.i64imm3
-; FAST:         addq $-2147483648, %rdi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: saddo.i64imm2
+; CHECK:       addq $-2147483648, %rdi
+; CHECK-NEXT:  seto %al
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -117,16 +133,12 @@ entry:
   ret i1 %obit
 }
 
-define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm4
-; DAG:          movabsq $-21474836489, %[[REG:[a-z]+]]
-; DAG-NEXT:     addq %rdi, %[[REG]]
-; DAG-NEXT:     seto
-; FAST-LABEL:   saddo.i64imm4
-; FAST:         movabsq $-21474836489, %[[REG:[a-z]+]]
-; FAST-NEXT:    addq %rdi, %[[REG]]
-; FAST-NEXT:    seto
+; CHECK-LABEL: saddo.i64imm3
+; CHECK:       movabsq $-21474836489, %[[REG:[a-z]+]]
+; CHECK-NEXT:  addq %rdi, %[[REG]]
+; CHECK-NEXT:  seto
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -21474836489)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -134,14 +146,11 @@ entry:
   ret i1 %obit
 }
 
-define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm5
-; DAG:          addq $2147483647, %rdi
-; DAG-NEXT:     seto
-; FAST-LABEL:   saddo.i64imm5
-; FAST:         addq $2147483647, %rdi
-; FAST-NEXT:    seto
+; CHECK-LABEL: saddo.i64imm4
+; CHECK:       addq $2147483647, %rdi
+; CHECK-NEXT:  seto
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483647)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -149,17 +158,12 @@ entry:
   ret i1 %obit
 }
 
-; TODO: FastISel shouldn't use movabsq.
-define zeroext i1 @saddo.i64imm6(i64 %v1, i64* %res) {
+define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
 entry:
-; DAG-LABEL:    saddo.i64imm6
-; DAG:          movl $2147483648, %ecx
-; DAG:          addq %rdi, %rcx
-; DAG-NEXT:     seto
-; FAST-LABEL:   saddo.i64imm6
-; FAST:         movabsq $2147483648, %[[REG:[a-z]+]]
-; FAST:         addq %rdi, %[[REG]]
-; FAST-NEXT:     seto
+; CHECK-LABEL: saddo.i64imm5
+; CHECK:       movl $2147483648
+; CHECK:       addq %rdi
+; CHECK-NEXT:  seto
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483648)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -170,12 +174,9 @@ entry:
 ; UADDO
 define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    uaddo.i32
-; DAG:          addl %esi, %edi
-; DAG-NEXT:     setb %al
-; FAST-LABEL:   uaddo.i32
-; FAST:         addl %esi, %edi
-; FAST-NEXT:    setb %al
+; CHECK-LABEL: uaddo.i32
+; CHECK:       addl %esi, %edi
+; CHECK-NEXT:  setb %al
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -185,12 +186,9 @@ entry:
 
 define zeroext i1 @uaddo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    uaddo.i64
-; DAG:          addq %rsi, %rdi
-; DAG-NEXT:     setb %al
-; FAST-LABEL:   uaddo.i64
-; FAST:         addq %rsi, %rdi
-; FAST-NEXT:    setb %al
+; CHECK-LABEL: uaddo.i64
+; CHECK:       addq %rsi, %rdi
+; CHECK-NEXT:  setb %al
   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -198,15 +196,57 @@ entry:
   ret i1 %obit
 }
 
+; UADDO reg, 1 | NOT INC
+define zeroext i1 @uaddo.inc.i8(i8 %v1, i8* %res) {
+entry:
+; CHECK-LABEL: uaddo.inc.i8
+; CHECK-NOT:   incb %dil
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %v1, i8 1)
+  %val = extractvalue {i8, i1} %t, 0
+  %obit = extractvalue {i8, i1} %t, 1
+  store i8 %val, i8* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.inc.i16(i16 %v1, i16* %res) {
+entry:
+; CHECK-LABEL: uaddo.inc.i16
+; CHECK-NOT:   incw %di
+  %t = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %v1, i16 1)
+  %val = extractvalue {i16, i1} %t, 0
+  %obit = extractvalue {i16, i1} %t, 1
+  store i16 %val, i16* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.inc.i32(i32 %v1, i32* %res) {
+entry:
+; CHECK-LABEL: uaddo.inc.i32
+; CHECK-NOT:   incl %edi
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32* %res
+  ret i1 %obit
+}
+
+define zeroext i1 @uaddo.inc.i64(i64 %v1, i64* %res) {
+entry:
+; CHECK-LABEL: uaddo.inc.i64
+; CHECK-NOT:   incq %rdi
+  %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 1)
+  %val = extractvalue {i64, i1} %t, 0
+  %obit = extractvalue {i64, i1} %t, 1
+  store i64 %val, i64* %res
+  ret i1 %obit
+}
+
 ; SSUBO
 define zeroext i1 @ssubo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    ssubo.i32
-; DAG:          subl %esi, %edi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   ssubo.i32
-; FAST:         subl %esi, %edi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: ssubo.i32
+; CHECK:       subl %esi, %edi
+; CHECK-NEXT:  seto %al
   %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -216,12 +256,9 @@ entry:
 
 define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    ssubo.i64
-; DAG:          subq %rsi, %rdi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   ssubo.i64
-; FAST:         subq %rsi, %rdi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: ssubo.i64
+; CHECK:       subq %rsi, %rdi
+; CHECK-NEXT:  seto %al
   %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -232,12 +269,9 @@ entry:
 ; USUBO
 define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    usubo.i32
-; DAG:          subl %esi, %edi
-; DAG-NEXT:     setb %al
-; FAST-LABEL:   usubo.i32
-; FAST:         subl %esi, %edi
-; FAST-NEXT:    setb %al
+; CHECK-LABEL: usubo.i32
+; CHECK:       subl %esi, %edi
+; CHECK-NEXT:  setb %al
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -247,12 +281,9 @@ entry:
 
 define zeroext i1 @usubo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    usubo.i64
-; DAG:          subq %rsi, %rdi
-; DAG-NEXT:     setb %al
-; FAST-LABEL:   usubo.i64
-; FAST:         subq %rsi, %rdi
-; FAST-NEXT:    setb %al
+; CHECK-LABEL: usubo.i64
+; CHECK:       subq %rsi, %rdi
+; CHECK-NEXT:  setb %al
   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -263,10 +294,10 @@ entry:
 ; SMULO
 define zeroext i1 @smulo.i8(i8 %v1, i8 %v2, i8* %res) {
 entry:
-; FAST-LABEL:   smulo.i8
-; FAST:         movb %dil, %al
-; FAST-NEXT:    imulb %sil
-; FAST-NEXT:    seto %cl
+; CHECK-LABEL:   smulo.i8
+; CHECK:         movb %dil, %al
+; CHECK-NEXT:    imulb %sil
+; CHECK-NEXT:    seto %cl
   %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 %v1, i8 %v2)
   %val = extractvalue {i8, i1} %t, 0
   %obit = extractvalue {i8, i1} %t, 1
@@ -276,12 +307,9 @@ entry:
 
 define zeroext i1 @smulo.i16(i16 %v1, i16 %v2, i16* %res) {
 entry:
-; DAG-LABEL:    smulo.i16
-; DAG:          imulw %si, %di
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   smulo.i16
-; FAST:         imulw %si, %di
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: smulo.i16
+; CHECK:       imulw %si, %di
+; CHECK-NEXT:  seto %al
   %t = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %v1, i16 %v2)
   %val = extractvalue {i16, i1} %t, 0
   %obit = extractvalue {i16, i1} %t, 1
@@ -291,12 +319,9 @@ entry:
 
 define zeroext i1 @smulo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    smulo.i32
-; DAG:          imull %esi, %edi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   smulo.i32
-; FAST:         imull %esi, %edi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: smulo.i32
+; CHECK:       imull %esi, %edi
+; CHECK-NEXT:  seto %al
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -306,12 +331,9 @@ entry:
 
 define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    smulo.i64
-; DAG:          imulq %rsi, %rdi
-; DAG-NEXT:     seto %al
-; FAST-LABEL:   smulo.i64
-; FAST:         imulq %rsi, %rdi
-; FAST-NEXT:    seto %al
+; CHECK-LABEL: smulo.i64
+; CHECK:       imulq %rsi, %rdi
+; CHECK-NEXT:  seto %al
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -322,10 +344,10 @@ entry:
 ; UMULO
 define zeroext i1 @umulo.i8(i8 %v1, i8 %v2, i8* %res) {
 entry:
-; FAST-LABEL:   umulo.i8
-; FAST:         movb %dil, %al
-; FAST-NEXT:    mulb %sil
-; FAST-NEXT:    seto %cl
+; CHECK-LABEL:   umulo.i8
+; CHECK:         movb %dil, %al
+; CHECK-NEXT:    mulb %sil
+; CHECK-NEXT:    seto %cl
   %t = call {i8, i1} @llvm.umul.with.overflow.i8(i8 %v1, i8 %v2)
   %val = extractvalue {i8, i1} %t, 0
   %obit = extractvalue {i8, i1} %t, 1
@@ -335,12 +357,9 @@ entry:
 
 define zeroext i1 @umulo.i16(i16 %v1, i16 %v2, i16* %res) {
 entry:
-; DAG-LABEL:    umulo.i16
-; DAG:          mulw %si
-; DAG-NEXT:     seto
-; FAST-LABEL:   umulo.i16
-; FAST:         mulw %si
-; FAST-NEXT:    seto
+; CHECK-LABEL: umulo.i16
+; CHECK:       mulw %si
+; CHECK-NEXT:  seto
   %t = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %v1, i16 %v2)
   %val = extractvalue {i16, i1} %t, 0
   %obit = extractvalue {i16, i1} %t, 1
@@ -350,12 +369,9 @@ entry:
 
 define zeroext i1 @umulo.i32(i32 %v1, i32 %v2, i32* %res) {
 entry:
-; DAG-LABEL:    umulo.i32
-; DAG:          mull %esi
-; DAG-NEXT:     seto
-; FAST-LABEL:   umulo.i32
-; FAST:         mull %esi
-; FAST-NEXT:    seto
+; CHECK-LABEL: umulo.i32
+; CHECK:       mull %esi
+; CHECK-NEXT:  seto
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -365,12 +381,9 @@ entry:
 
 define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
 entry:
-; DAG-LABEL:    umulo.i64
-; DAG:          mulq %rsi
-; DAG-NEXT:     seto
-; FAST-LABEL:   umulo.i64
-; FAST:         mulq %rsi
-; FAST-NEXT:    seto
+; CHECK-LABEL: umulo.i64
+; CHECK:       mulq %rsi
+; CHECK-NEXT:  seto
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -383,9 +396,9 @@ entry:
 ;
 define i32 @saddo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    saddo.select.i32
-; CHECK:          addl   %esi, %eax
-; CHECK-NEXT:     cmovol %edi, %esi
+; CHECK-LABEL: saddo.select.i32
+; CHECK:       addl   %esi, %eax
+; CHECK-NEXT:  cmovol %edi, %esi
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -394,9 +407,9 @@ entry:
 
 define i64 @saddo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    saddo.select.i64
-; CHECK:          addq   %rsi, %rax
-; CHECK-NEXT:     cmovoq %rdi, %rsi
+; CHECK-LABEL: saddo.select.i64
+; CHECK:       addq   %rsi, %rax
+; CHECK-NEXT:  cmovoq %rdi, %rsi
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -405,9 +418,9 @@ entry:
 
 define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    uaddo.select.i32
-; CHECK:          addl   %esi, %eax
-; CHECK-NEXT:     cmovbl %edi, %esi
+; CHECK-LABEL: uaddo.select.i32
+; CHECK:       addl   %esi, %eax
+; CHECK-NEXT:  cmovbl %edi, %esi
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -416,9 +429,9 @@ entry:
 
 define i64 @uaddo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    uaddo.select.i64
-; CHECK:          addq   %rsi, %rax
-; CHECK-NEXT:     cmovbq %rdi, %rsi
+; CHECK-LABEL: uaddo.select.i64
+; CHECK:       addq   %rsi, %rax
+; CHECK-NEXT:  cmovbq %rdi, %rsi
   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -427,9 +440,9 @@ entry:
 
 define i32 @ssubo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    ssubo.select.i32
-; CHECK:          cmpl   %esi, %edi
-; CHECK-NEXT:     cmovol %edi, %esi
+; CHECK-LABEL: ssubo.select.i32
+; CHECK:       cmpl   %esi, %edi
+; CHECK-NEXT:  cmovol %edi, %esi
   %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -438,9 +451,9 @@ entry:
 
 define i64 @ssubo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    ssubo.select.i64
-; CHECK:          cmpq   %rsi, %rdi
-; CHECK-NEXT:     cmovoq %rdi, %rsi
+; CHECK-LABEL: ssubo.select.i64
+; CHECK:       cmpq   %rsi, %rdi
+; CHECK-NEXT:  cmovoq %rdi, %rsi
   %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -449,9 +462,9 @@ entry:
 
 define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    usubo.select.i32
-; CHECK:          cmpl   %esi, %edi
-; CHECK-NEXT:     cmovbl %edi, %esi
+; CHECK-LABEL: usubo.select.i32
+; CHECK:       cmpl   %esi, %edi
+; CHECK-NEXT:  cmovbl %edi, %esi
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -460,9 +473,9 @@ entry:
 
 define i64 @usubo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    usubo.select.i64
-; CHECK:          cmpq   %rsi, %rdi
-; CHECK-NEXT:     cmovbq %rdi, %rsi
+; CHECK-LABEL: usubo.select.i64
+; CHECK:       cmpq   %rsi, %rdi
+; CHECK-NEXT:  cmovbq %rdi, %rsi
   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -471,9 +484,9 @@ entry:
 
 define i32 @smulo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    smulo.select.i32
-; CHECK:          imull  %esi, %eax
-; CHECK-NEXT:     cmovol %edi, %esi
+; CHECK-LABEL: smulo.select.i32
+; CHECK:       imull  %esi, %eax
+; CHECK-NEXT:  cmovol %edi, %esi
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -482,9 +495,9 @@ entry:
 
 define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    smulo.select.i64
-; CHECK:          imulq  %rsi, %rax
-; CHECK-NEXT:     cmovoq %rdi, %rsi
+; CHECK-LABEL: smulo.select.i64
+; CHECK:       imulq  %rsi, %rax
+; CHECK-NEXT:  cmovoq %rdi, %rsi
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -493,9 +506,9 @@ entry:
 
 define i32 @umulo.select.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    umulo.select.i32
-; CHECK:          mull   %esi
-; CHECK-NEXT:     cmovol %edi, %esi
+; CHECK-LABEL: umulo.select.i32
+; CHECK:       mull   %esi
+; CHECK-NEXT:  cmovol %edi, %esi
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %obit = extractvalue {i32, i1} %t, 1
   %ret = select i1 %obit, i32 %v1, i32 %v2
@@ -504,9 +517,9 @@ entry:
 
 define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    umulo.select.i64
-; CHECK:          mulq   %rsi
-; CHECK-NEXT:     cmovoq %rdi, %rsi
+; CHECK-LABEL: umulo.select.i64
+; CHECK:       mulq   %rsi
+; CHECK-NEXT:  cmovoq %rdi, %rsi
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %obit = extractvalue {i64, i1} %t, 1
   %ret = select i1 %obit, i64 %v1, i64 %v2
@@ -519,9 +532,9 @@ entry:
 ;
 define zeroext i1 @saddo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    saddo.br.i32
-; CHECK:          addl   %esi, %edi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: saddo.br.i32
+; CHECK:       addl   %esi, %edi
+; CHECK-NEXT:  jo
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -536,9 +549,9 @@ continue:
 
 define zeroext i1 @saddo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    saddo.br.i64
-; CHECK:          addq   %rsi, %rdi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: saddo.br.i64
+; CHECK:       addq   %rsi, %rdi
+; CHECK-NEXT:  jo
   %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -553,9 +566,9 @@ continue:
 
 define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    uaddo.br.i32
-; CHECK:          addl   %esi, %edi
-; CHECK-NEXT:     jb
+; CHECK-LABEL: uaddo.br.i32
+; CHECK:       addl   %esi, %edi
+; CHECK-NEXT:  jb
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -570,9 +583,9 @@ continue:
 
 define zeroext i1 @uaddo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    uaddo.br.i64
-; CHECK:          addq   %rsi, %rdi
-; CHECK-NEXT:     jb
+; CHECK-LABEL: uaddo.br.i64
+; CHECK:       addq   %rsi, %rdi
+; CHECK-NEXT:  jb
   %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -587,9 +600,9 @@ continue:
 
 define zeroext i1 @ssubo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    ssubo.br.i32
-; CHECK:          cmpl   %esi, %edi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: ssubo.br.i32
+; CHECK:       cmpl   %esi, %edi
+; CHECK-NEXT:  jo
   %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -604,9 +617,9 @@ continue:
 
 define zeroext i1 @ssubo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    ssubo.br.i64
-; CHECK:          cmpq   %rsi, %rdi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: ssubo.br.i64
+; CHECK:       cmpq   %rsi, %rdi
+; CHECK-NEXT:  jo
   %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -621,9 +634,9 @@ continue:
 
 define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    usubo.br.i32
-; CHECK:          cmpl   %esi, %edi
-; CHECK-NEXT:     jb
+; CHECK-LABEL: usubo.br.i32
+; CHECK:       cmpl   %esi, %edi
+; CHECK-NEXT:  jb
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -638,9 +651,9 @@ continue:
 
 define zeroext i1 @usubo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    usubo.br.i64
-; CHECK:          cmpq   %rsi, %rdi
-; CHECK-NEXT:     jb
+; CHECK-LABEL: usubo.br.i64
+; CHECK:       cmpq   %rsi, %rdi
+; CHECK-NEXT:  jb
   %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -655,9 +668,9 @@ continue:
 
 define zeroext i1 @smulo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    smulo.br.i32
-; CHECK:          imull  %esi, %edi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: smulo.br.i32
+; CHECK:       imull  %esi, %edi
+; CHECK-NEXT:  jo
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -672,9 +685,9 @@ continue:
 
 define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    smulo.br.i64
-; CHECK:          imulq  %rsi, %rdi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: smulo.br.i64
+; CHECK:       imulq  %rsi, %rdi
+; CHECK-NEXT:  jo
   %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -689,9 +702,9 @@ continue:
 
 define zeroext i1 @umulo.br.i32(i32 %v1, i32 %v2) {
 entry:
-; CHECK-LABEL:    umulo.br.i32
-; CHECK:          mull  %esi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: umulo.br.i32
+; CHECK:       mull  %esi
+; CHECK-NEXT:  jo
   %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
   %val = extractvalue {i32, i1} %t, 0
   %obit = extractvalue {i32, i1} %t, 1
@@ -706,9 +719,9 @@ continue:
 
 define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
 entry:
-; CHECK-LABEL:    umulo.br.i64
-; CHECK:          mulq  %rsi
-; CHECK-NEXT:     jo
+; CHECK-LABEL: umulo.br.i64
+; CHECK:       mulq  %rsi
+; CHECK-NEXT:  jo
   %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2)
   %val = extractvalue {i64, i1} %t, 0
   %obit = extractvalue {i64, i1} %t, 1
@@ -725,6 +738,8 @@ declare {i8,  i1} @llvm.sadd.with.overflow.i8 (i8,  i8 ) nounwind readnone
 declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
 declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
 declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare {i8,  i1} @llvm.uadd.with.overflow.i8 (i8,  i8 ) nounwind readnone
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
 declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
 declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
@@ -740,4 +755,4 @@ declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
 declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
 
-!0 = metadata !{metadata !"branch_weights", i32 0, i32 2147483647}
+!0 = !{!"branch_weights", i32 0, i32 2147483647}
diff --git a/test/CodeGen/XCore/atomic.ll b/test/CodeGen/XCore/atomic.ll
index 58ef38bd3f60..6ca80cf5d9e7 100644
--- a/test/CodeGen/XCore/atomic.ll
+++ b/test/CodeGen/XCore/atomic.ll
@@ -22,11 +22,10 @@ entry:
 ; CHECK-LABEL: atomicloadstore
 
 ; CHECK: ldw r[[R0:[0-9]+]], dp[pool]
-; CHECK-NEXT: #MEMBARRIER
-  %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4
-
 ; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool]
+; CHECK-NEXT: #MEMBARRIER
 ; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0
+  %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4
 
 ; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]]
 ; CHECK-NEXT: #MEMBARRIER
diff --git a/test/CodeGen/XCore/dwarf_debug.ll b/test/CodeGen/XCore/dwarf_debug.ll
index 2f4b23111bb2..8c9c47de6497 100644
--- a/test/CodeGen/XCore/dwarf_debug.ll
+++ b/test/CodeGen/XCore/dwarf_debug.ll
@@ -13,27 +13,27 @@ define i32 @f(i32 %a) {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !11), !dbg !12
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !{!"0x102"}), !dbg !12
   %0 = load i32* %a.addr, align 4, !dbg !12
   %add = add nsw i32 %0, 1, !dbg !12
   ret i32 %add, !dbg !12
 }
 
-declare void @llvm.dbg.declare(metadata, metadata)
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1}
-!1 = metadata !{metadata !"", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @f, null, null, metadata !2, i32 2}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null}
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777218, metadata !8, i32 0, i32 0}
-!12 = metadata !{i32 2, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\002\000\001\000\006\00256\000\002", !1, !5, !6, null, i32 (i32)* @f, null, null, !2} ; [ DW_TAG_subprogram ]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"0x101\00a\0016777218\000", !4, !5, !8} ; [ DW_TAG_arg_variable ]
+!12 = !MDLocation(line: 2, scope: !4)
 
diff --git a/test/CodeGen/XCore/exception.ll b/test/CodeGen/XCore/exception.ll
index 3179fcdfcf5d..fec83eb15ea5 100644
--- a/test/CodeGen/XCore/exception.ll
+++ b/test/CodeGen/XCore/exception.ll
@@ -107,17 +107,12 @@ Exit:
 ; CHECK: .asciiz
 ; CHECK: .byte  3
 ; CHECK: .byte  26
-; CHECK: [[SET0:.L[a-zA-Z0-9_]+]] = [[PRE_G]]-[[START]]
-; CHECK: .long [[SET0]]
-; CHECK: [[SET1:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[PRE_G]]
-; CHECK: .long [[SET1]]
-; CHECK: [[SET2:.L[a-zA-Z0-9_]+]] = [[LANDING]]-[[START]]
-; CHECK: .long [[SET2]]
+; CHECK: .long [[PRE_G]]-[[START]]
+; CHECK: .long [[POST_G]]-[[PRE_G]]
+; CHECK: .long [[LANDING]]-[[START]]
 ; CHECK: .byte 3
-; CHECK: [[SET3:.L[a-zA-Z0-9_]+]] = [[POST_G]]-[[START]]
-; CHECK: .long [[SET3]]
-; CHECK: [[SET4:.L[a-zA-Z0-9_]+]] = [[END]]-[[POST_G]]
-; CHECK: .long [[SET4]]
+; CHECK: .long [[POST_G]]-[[START]]
+; CHECK: .long [[END]]-[[POST_G]]
 ; CHECK: .long 0
 ; CHECK: .byte 0
 ; CHECK: .byte 1
diff --git a/test/DebugInfo/2009-10-16-Phi.ll b/test/DebugInfo/2009-10-16-Phi.ll
index 0f799e3a7892..e14653f6fcf2 100644
--- a/test/DebugInfo/2009-10-16-Phi.ll
+++ b/test/DebugInfo/2009-10-16-Phi.ll
@@ -10,4 +10,4 @@ B2:
    ret i32 %0
 }
 
-!0 = metadata !{i32 42}
+!0 = !{i32 42}
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
index 21a60b826655..0b9f1b5ad948 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -4,12 +4,12 @@
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!6}
 
-!0 = metadata !{i32 786478, metadata !4, metadata !1, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 3, metadata !2, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 41, metadata !4} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 21, metadata !4, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!3 = metadata !{null}
-!4 = metadata !{metadata !"/foo", metadata !"bar.cpp"}
-!5 = metadata !{i32 458769, metadata !4, i32 12, metadata !"", i1 true, metadata !"", i32 0, metadata !3, metadata !3, null, null, null, metadata !""}; [DW_TAG_compile_unit ]
+!0 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEv\003\000\000\000\006\00258\000\003", !4, !1, !2, null, null, null, i32 0, !1} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !4} ; [ DW_TAG_file_type ]
+!2 = !{!"0x15\00\000\000\000\000\000\000", !4, !1, null, !3, null} ; [ DW_TAG_subroutine_type ]
+!3 = !{null}
+!4 = !{!"/foo", !"bar.cpp"}
+!5 = !{!"0x11\0012\00\001\00\000\00\000", !4, !3, !3, null, null, null}; [DW_TAG_compile_unit ]
 
 define <{i32, i32}> @f1() {
 ; CHECK: !dbgx ![[NUMBER:[0-9]+]]
@@ -20,4 +20,4 @@ define <{i32, i32}> @f1() {
 }
 
 ; CHECK: [protected]
-!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!6 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
index 65907d679780..c73b945a7dbd 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -10,17 +10,17 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
-!6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 2, metadata !9, i32 1, i32 1, null, null} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 3, i32 3, metadata !16, null}
-!16 = metadata !{i32 720907, metadata !17, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{metadata !"fb.c", metadata !"/private/tmp"}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 139632)\001\00\000\00\000", !17, !1, !1, !3, !12, null} ; [ DW_TAG_compile_unit ]
+!1 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\000\001\000", !17, !6, !7, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!6 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!12 = !{!14}
+!14 = !{!"0x34\00bar\00bar\00\002\001\001", !5, !6, !9, null, null} ; [ DW_TAG_variable ]
+!15 = !MDLocation(line: 3, column: 3, scope: !16)
+!16 = !{!"0xb\001\0011\000", !17, !5} ; [ DW_TAG_lexical_block ]
+!17 = !{!"fb.c", !"/private/tmp"}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
index 9beab2008b88..1b7c80f1f091 100644
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ b/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -4,11 +4,11 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 720913, metadata !8, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i32* @0, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !"g.c", metadata !"/private/tmp"}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 139632)\001\00\000\00\000", !8, !2, !2, !2, !3, null} ; [ DW_TAG_compile_unit ]
+!2 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\002\000\001", null, !6, !7, i32* @0, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!8 = !{!"g.c", !"/private/tmp"}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/2009-11-10-CurrentFn.ll
index 151d631d6868..bf237eee23fb 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -8,24 +8,24 @@ entry:
 
 declare void @foo(...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, metadata !17, i32 12, metadata !"clang version 3.0 (trunk 139632)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !17, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32)* @bar, null, null, metadata !9, metadata !""} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [bar]
-!6 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{metadata !11}
-!11 = metadata !{i32 721153, metadata !17, metadata !5, metadata !"i", i32 16777219, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 3, i32 14, metadata !5, null}
-!14 = metadata !{i32 4, i32 3, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !17, metadata !5, i32 3, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 5, i32 1, metadata !15, null}
-!17 = metadata !{metadata !"cf.c", metadata !"/private/tmp"}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 139632)\001\00\000\00\000", !17, !1, !1, !3, !1, null} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00bar\00bar\00\003\000\001\000\006\00256\001\000", !17, !6, !7, null, void (i32)* @bar, null, null, !9} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [bar]
+!6 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!11}
+!11 = !{!"0x101\00i\0016777219\000", !17, !5, !12} ; [ DW_TAG_arg_variable ]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!13 = !MDLocation(line: 3, column: 14, scope: !5)
+!14 = !MDLocation(line: 4, column: 3, scope: !15)
+!15 = !{!"0xb\003\0017\000", !17, !5} ; [ DW_TAG_lexical_block ]
+!16 = !MDLocation(line: 5, column: 1, scope: !15)
+!17 = !{!"cf.c", !"/private/tmp"}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/2010-01-05-DbgScope.ll
index 809cebf4eed4..d55972024dda 100644
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -11,15 +11,15 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 571, i32 3, metadata !1, null}
-!1 = metadata !{i32 458763, metadata !11, metadata !2, i32 1, i32 1, i32 0}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 561, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, metadata !11, i32 12, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !12, metadata !12, metadata !13, null, null, metadata !""}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, null, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 458788, null, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 588, i32 1, metadata !2, null}
-!11 = metadata !{metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty"}
-!12 = metadata !{i32 0}
-!13 = metadata !{metadata !2}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 571, column: 3, scope: !1)
+!1 = !{!"0xb\001\001\000", !11, !2}; [DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00foo\00foo\00foo\00561\000\001\000\006\000\000\000", i32 0, !3, !4, null, null, null, null, null}; [DW_TAG_subprogram ]
+!3 = !{!"0x11\0012\00clang 1.1\001\00\000\00\000", !11, !12, !12, !13, null, null}; [DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", null, !3, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6}
+!6 = !{!"0x24\00char\000\008\008\000\000\006", null, !3} ; [ DW_TAG_base_type ]
+!10 = !MDLocation(line: 588, column: 1, scope: !2)
+!11 = !{!"hashtab.c", !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty"}
+!12 = !{i32 0}
+!13 = !{!2}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/2010-03-12-llc-crash.ll
index 241bb3734c98..1b0d794ab78b 100644
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ b/test/DebugInfo/2010-03-12-llc-crash.ll
@@ -1,22 +1,22 @@
 ; RUN: llc -O0 < %s -o /dev/null
 ; llc should not crash on this invalid input.
 ; PR6588
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define void @foo() {
 entry:
-  call void @llvm.dbg.declare(metadata !{i32* undef}, metadata !0)
+  call void @llvm.dbg.declare(metadata i32* undef, metadata !0, metadata !{!"0x102"})
   ret void
 }
 
-!0 = metadata !{i32 524545, metadata !1, metadata !"sy", metadata !2, i32 890, metadata !7} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !8, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 892, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, metadata !9, i32 4, metadata !"clang 1.1", i1 true, metadata !"", i32 0, metadata !10, metadata !10, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !9, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{i32 524329, metadata !9} ; [ DW_TAG_file_type ]
-!6 = metadata !{null}
-!7 = metadata !{i32 524324, metadata !9, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !"qpainter.h", metadata !"QtGui"}
-!9 = metadata !{metadata !"splineeditor.cpp", metadata !"src"}
-!10 = metadata !{i32 0}
+!0 = !{!"0x101\00sy\00890\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\00892\000\001\000\006\000\000\000", !8, !3, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\004\00clang 1.1\001\00\000\00\000", !9, !10, !10, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !9, !5, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!"0x29", !9} ; [ DW_TAG_file_type ]
+!6 = !{null}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !9, !5} ; [ DW_TAG_base_type ]
+!8 = !{!"qpainter.h", !"QtGui"}
+!9 = !{!"splineeditor.cpp", !"src"}
+!10 = !{i32 0}
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/2010-03-19-DbgDeclare.ll
index 94aa259d31b6..831680160987 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -1,19 +1,19 @@
 ; RUN: opt < %s -verify -S | FileCheck %s
 
-; CHECK: lang 0x8001
+; CHECK: [DW_LANG_Mips_Assembler]
 
 define void @Foo(i32 %a, i32 %b) {
 entry:
-  call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
+  call void @llvm.dbg.declare(metadata i32* null, metadata !1, metadata !{!"0x102"})
   ret void
 }
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!5}
-!2 = metadata !{i32 786449, metadata !4, i32 32769, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, metadata !3,  metadata !3, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
-!3 = metadata !{}
-!0 = metadata !{i32 662302, i32 26, metadata !1, null}
-!1 = metadata !{i32 4, metadata !"foo"}
-!4 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = !{!"0x11\0032769\00clang version 3.3 \000\00\000\00\001", !4, !3, !3, !3, !3,  !3} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [lang 0x8001]
+!3 = !{}
+!0 = !MDLocation(line: 662302, column: 26, scope: !1)
+!1 = !{i32 4, !"foo"}
+!4 = !{!"scratch.cpp", !"/usr/local/google/home/blaikie/dev/scratch"}
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+!5 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/2010-03-24-MemberFn.ll
index 4ea9d2cf9861..7b091095048d 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -8,7 +8,7 @@ entry:
   %0 = alloca i32                                 ; <i32*> [#uses=2]
   %s1 = alloca %struct.S                          ; <%struct.S*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.S* %s1}, metadata !0), !dbg !16
+  call void @llvm.dbg.declare(metadata %struct.S* %s1, metadata !0, metadata !{!"0x102"}), !dbg !16
   %1 = call i32 @_ZN1S3fooEv(%struct.S* %s1) nounwind, !dbg !17 ; <i32> [#uses=1]
   store i32 %1, i32* %0, align 4, !dbg !17
   %2 = load i32* %0, align 4, !dbg !17            ; <i32> [#uses=1]
@@ -25,7 +25,7 @@ entry:
   %this_addr = alloca %struct.S*                  ; <%struct.S**> [#uses=1]
   %retval = alloca i32                            ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.S** %this_addr}, metadata !18), !dbg !21
+  call void @llvm.dbg.declare(metadata %struct.S** %this_addr, metadata !18, metadata !{!"0x102"}), !dbg !21
   store %struct.S* %this, %struct.S** %this_addr
   br label %return, !dbg !21
 
@@ -34,37 +34,37 @@ return:                                           ; preds = %entry
   ret i32 %retval1, !dbg !22
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!28}
 
-!0 = metadata !{i32 786688, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786443, metadata !25, metadata !2, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786443, metadata !25, metadata !3, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @_Z3barv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 786473, metadata !25} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, metadata !25, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !27, metadata !27, metadata !24, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !25, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, metadata !25, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786451, metadata !26, metadata !4, metadata !"S", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 2, size 8, align 8, offset 0] [def] [from ]
-!10 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !26, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, metadata !25, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !8, metadata !15}
-!15 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 3, i32 0, metadata !1, null}
-!17 = metadata !{i32 3, i32 0, metadata !3, null}
-!18 = metadata !{i32 786689, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19, i32 0, null} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786470, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
-!20 = metadata !{i32 786447, metadata !25, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 3, i32 0, metadata !12, null}
-!22 = metadata !{i32 3, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !26, metadata !12, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !3, metadata !12}
-!25 = metadata !{metadata !"one.cc", metadata !"/tmp/"}
-!26 = metadata !{metadata !"one.h", metadata !"/tmp/"}
-!27 = metadata !{i32 0}
-!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x100\00s1\003\000", !1, !4, !9} ; [ DW_TAG_auto_variable ]
+!1 = !{!"0xb\003\000\000", !25, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0xb\003\000\000", !25, !3} ; [ DW_TAG_lexical_block ]
+!3 = !{!"0x2e\00bar\00bar\00_Z3barv\003\000\001\000\006\000\000\003", !25, !4, !6, null, i32 ()* @_Z3barv, null, null, null} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x29", !25} ; [ DW_TAG_file_type ]
+!5 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !25, !27, !27, !24, null,  null} ; [ DW_TAG_compile_unit ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", !25, !4, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", !25, !4} ; [ DW_TAG_base_type ]
+!9 = !{!"0x13\00S\002\008\008\000\000\000", !26, !4, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 2, size 8, align 8, offset 0] [def] [from ]
+!10 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!11 = !{!12}
+!12 = !{!"0x2e\00foo\00foo\00_ZN1S3fooEv\003\000\001\000\006\000\000\003", !26, !9, !13, null, i32 (%struct.S*)* @_ZN1S3fooEv, null, null, null} ; [ DW_TAG_subprogram ]
+!13 = !{!"0x15\00\000\000\000\000\000\000", !25, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!8, !15}
+!15 = !{!"0xf\00\000\0064\0064\000\0064", !25, !4, !9} ; [ DW_TAG_pointer_type ]
+!16 = !MDLocation(line: 3, scope: !1)
+!17 = !MDLocation(line: 3, scope: !3)
+!18 = !{!"0x101\00this\003\000", !12, !10, !19} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x26\00\000\0064\0064\000\0064", !25, !4, !20} ; [ DW_TAG_const_type ]
+!20 = !{!"0xf\00\000\0064\0064\000\000", !25, !4, !9} ; [ DW_TAG_pointer_type ]
+!21 = !MDLocation(line: 3, scope: !12)
+!22 = !MDLocation(line: 3, scope: !23)
+!23 = !{!"0xb\003\000\000", !26, !12} ; [ DW_TAG_lexical_block ]
+!24 = !{!3, !12}
+!25 = !{!"one.cc", !"/tmp/"}
+!26 = !{!"one.h", !"/tmp/"}
+!27 = !{i32 0}
+!28 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
index 81285a9b2c90..c967f73550fb 100644
--- a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
+++ b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -2,35 +2,35 @@
 
 define void @baz(i32 %i) nounwind ssp {
 entry:
-  call void @llvm.dbg.declare(metadata !0, metadata !1), !dbg !0
+  call void @llvm.dbg.declare(metadata !0, metadata !1, metadata !{!"0x102"}), !dbg !0
   ret void, !dbg !0
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{{ [0 x i8] }** undef}
-!1 = metadata !{i32 524544, metadata !2, metadata !"x", metadata !4, i32 11, metadata !9} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 524299, metadata !20, metadata !3, i32 8, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, metadata !20, null, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !20} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !20, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524303, metadata !20, metadata !4, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524307, metadata !20, metadata !3, metadata !"", i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 11, size 8, align 8, offset 0] [def] [from ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 524301, metadata !20, metadata !10, metadata !"b", i32 11, i64 8, i64 8, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ]
-!13 = metadata !{i32 524310, metadata !20, metadata !3, metadata !"A", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
-!14 = metadata !{i32 524289, metadata !20, metadata !4, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
-!15 = metadata !{i32 524324, metadata !20, metadata !4, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
-!18 = metadata !{metadata !"llvm.mdnode.fwdref.19"}
-!19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
-!20 = metadata !{metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/"}
-!21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{{ [0 x i8] }** undef}
+!1 = !{!"0x100\00x\0011\000", !2, !4, !9} ; [ DW_TAG_auto_variable ]
+!2 = !{!"0xb\008\000\000", !20, !3} ; [ DW_TAG_lexical_block ]
+!3 = !{!"0x2e\00baz\00baz\00baz\008\001\001\000\006\000\000\000", !20, null, !6, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!5 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", !20, !4, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", !20, !4} ; [ DW_TAG_base_type ]
+!9 = !{!"0xf\00\000\0064\0064\000\000", !20, !4, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x13\00\0011\008\008\000\000\000", !20, !3, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [line 11, size 8, align 8, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00b\0011\008\008\000\000", !20, !10, !13} ; [ DW_TAG_member ]
+!13 = !{!"0x16\00A\0011\000\000\000\000", !20, !3, !14} ; [ DW_TAG_typedef ]
+!14 = !{!"0x1\00\000\008\008\000\000", !20, !4, !15, !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 8, align 8, offset 0] [from char]
+!15 = !{!"0x24\00char\000\008\008\000\000\006", !20, !4} ; [ DW_TAG_base_type ]
+!16 = !{!17}
+!17 = !{!"0x21\000\001"}        ; [ DW_TAG_subrange_type ]
+!18 = !{!"llvm.mdnode.fwdref.19"}
+!19 = !{!"llvm.mdnode.fwdref.23"}
+!20 = !{!"2007-12-VarArrayDebug.c", !"/Users/sabre/llvm/test/FrontendC/"}
+!21 = !{i32 0}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index 5f7cb696d738..ce52d24aa7f6 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -26,14 +26,14 @@ entry:
   %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
   %b = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
   store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{%class.A* %b}, metadata !0), !dbg !14
+  call void @llvm.dbg.declare(metadata %class.A* %b, metadata !0, metadata !{!"0x102"}), !dbg !14
   %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
   store i32 %call, i32* %retval, !dbg !15
   %0 = load i32* %retval, !dbg !16                ; <i32> [#uses=1]
   ret i32 %0, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 {
 entry:
@@ -42,10 +42,10 @@ entry:
   %a = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
   %i = alloca i32, align 4                        ; <i32*> [#uses=2]
   store %class.A* %this, %class.A** %this.addr
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !17, metadata !{!"0x102"}), !dbg !18
   %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !19), !dbg !27
-  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !28), !dbg !29
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !19, metadata !{!"0x102"}), !dbg !27
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !28, metadata !{!"0x102"}), !dbg !29
   %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
   store i32 %call, i32* %i, !dbg !30
   %tmp = load i32* %i, !dbg !31                   ; <i32> [#uses=1]
@@ -59,7 +59,7 @@ entry:
   %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
   %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
   store %class.A* %this, %class.A** %this.addr
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !33), !dbg !34
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !33, metadata !{!"0x102"}), !dbg !34
   %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
   store i32 42, i32* %retval, !dbg !35
   %0 = load i32* %retval, !dbg !35                ; <i32> [#uses=1]
@@ -68,45 +68,45 @@ entry:
 
 !llvm.dbg.cu = !{!4}
 !llvm.module.flags = !{!40}
-!37 = metadata !{metadata !2, metadata !10, metadata !23}
+!37 = !{!2, !10, !23}
 
-!0 = metadata !{i32 786688, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 786443, metadata !38, metadata !2, i32 15, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, metadata !38, metadata !3, metadata !"main", metadata !"main", metadata !"main", i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 ()* @main, null, null, null, i32 15} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 786473, metadata !38} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, metadata !38, i32 4, metadata !"clang 1.5", i1 false, metadata !"", i32 0, metadata !39, metadata !39, metadata !37, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, metadata !38, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786434, metadata !38, metadata !3, metadata !"B", i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 2, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, metadata !38, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !7, metadata !13}
-!13 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 16, i32 5, metadata !1, null}
-!15 = metadata !{i32 17, i32 3, metadata !1, null}
-!16 = metadata !{i32 18, i32 1, metadata !2, null}
-!17 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 4, i32 7, metadata !10, null}
-!19 = metadata !{i32 786688, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21, i32 0, null} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 786443, metadata !38, metadata !10, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!21 = metadata !{i32 786434, metadata !38, metadata !10, metadata !"A", i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 5, size 8, align 8, offset 0] [def] [from ]
-!22 = metadata !{metadata !23}
-!23 = metadata !{i32 786478, metadata !38, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 786453, metadata !38, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{metadata !7, metadata !26}
-!26 = metadata !{i32 786447, metadata !38, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
-!27 = metadata !{i32 9, i32 7, metadata !20, null}
-!28 = metadata !{i32 786688, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 10, i32 9, metadata !20, null}
-!30 = metadata !{i32 10, i32 5, metadata !20, null}
-!31 = metadata !{i32 11, i32 5, metadata !20, null}
-!32 = metadata !{i32 12, i32 3, metadata !10, null}
-!33 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26, i32 0, null} ; [ DW_TAG_arg_variable ]
-!34 = metadata !{i32 7, i32 11, metadata !23, null}
-!35 = metadata !{i32 7, i32 19, metadata !36, null}
-!36 = metadata !{i32 786443, metadata !38, metadata !23, i32 7, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
-!38 = metadata !{metadata !"one.cc", metadata !"/tmp" }
-!39 = metadata !{i32 0}
-!40 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x100\00b\0016\000", !1, !3, !8} ; [ DW_TAG_auto_variable ]
+!1 = !{!"0xb\0015\0012\000", !38, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00main\00main\00main\0015\000\001\000\006\000\000\0015", !38, !3, !5, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x29", !38} ; [ DW_TAG_file_type ]
+!4 = !{!"0x11\004\00clang 1.5\000\00\000\00\000", !38, !39, !39, !37, null,  null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !38, !3, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !38, !3} ; [ DW_TAG_base_type ]
+!8 = !{!"0x2\00B\002\008\008\000\000\000", !38, !3, null, !9, null, null, null} ; [ DW_TAG_class_type ] [B] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10}
+!10 = !{!"0x2e\00fn\00fn\00_ZN1B2fnEv\004\000\001\000\006\000\000\004", !38, !8, !11, null, i32 (%class.A*)* @_ZN1B2fnEv, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !38, !3, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!7, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\0064", !38, !3, !8} ; [ DW_TAG_pointer_type ]
+!14 = !MDLocation(line: 16, column: 5, scope: !1)
+!15 = !MDLocation(line: 17, column: 3, scope: !1)
+!16 = !MDLocation(line: 18, column: 1, scope: !2)
+!17 = !{!"0x101\00this\004\000", !10, !3, !13} ; [ DW_TAG_arg_variable ]
+!18 = !MDLocation(line: 4, column: 7, scope: !10)
+!19 = !{!"0x100\00a\009\000", !20, !3, !21} ; [ DW_TAG_auto_variable ]
+!20 = !{!"0xb\004\0012\000", !38, !10} ; [ DW_TAG_lexical_block ]
+!21 = !{!"0x2\00A\005\008\008\000\000\000", !38, !10, null, !22, null, null, null} ; [ DW_TAG_class_type ] [A] [line 5, size 8, align 8, offset 0] [def] [from ]
+!22 = !{!23}
+!23 = !{!"0x2e\00foo\00foo\00_ZZN1B2fnEvEN1A3fooEv\007\000\001\000\006\000\000\007", !38, !21, !24, null, i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv, null, null, null} ; [ DW_TAG_subprogram ]
+!24 = !{!"0x15\00\000\000\000\000\000\000", !38, !3, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = !{!7, !26}
+!26 = !{!"0xf\00\000\0064\0064\000\0064", !38, !3, !21} ; [ DW_TAG_pointer_type ]
+!27 = !MDLocation(line: 9, column: 7, scope: !20)
+!28 = !{!"0x100\00i\0010\000", !20, !3, !7} ; [ DW_TAG_auto_variable ]
+!29 = !MDLocation(line: 10, column: 9, scope: !20)
+!30 = !MDLocation(line: 10, column: 5, scope: !20)
+!31 = !MDLocation(line: 11, column: 5, scope: !20)
+!32 = !MDLocation(line: 12, column: 3, scope: !10)
+!33 = !{!"0x101\00this\007\000", !23, !3, !26} ; [ DW_TAG_arg_variable ]
+!34 = !MDLocation(line: 7, column: 11, scope: !23)
+!35 = !MDLocation(line: 7, column: 19, scope: !36)
+!36 = !{!"0xb\007\0017\000", !38, !23} ; [ DW_TAG_lexical_block ]
+!38 = !{!"one.cc", !"/tmp" }
+!39 = !{i32 0}
+!40 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/2010-04-19-FramePtr.ll
index 6c772230d93c..fe5a1f448551 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -21,17 +21,17 @@ return:                                           ; preds = %entry
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!12}
-!9 = metadata !{metadata !1}
+!9 = !{!1}
 
-!0 = metadata !{i32 2, i32 0, metadata !1, null}
-!1 = metadata !{i32 786478, metadata !10, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !10, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786468, metadata !10, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 2, i32 0, metadata !8, null}
-!8 = metadata !{i32 786443, metadata !10, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{metadata !"a.c", metadata !"/tmp"}
-!11 = metadata !{i32 0}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 2, scope: !1)
+!1 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\000\002", !10, null, !4, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !10, !11, !11, !9, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !10, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !10, !2} ; [ DW_TAG_base_type ]
+!7 = !MDLocation(line: 2, scope: !8)
+!8 = !{!"0xb\002\000\000", !10, !1} ; [ DW_TAG_lexical_block ]
+!10 = !{!"a.c", !"/tmp"}
+!11 = !{i32 0}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
index ba8d0e581cd1..d767871a2764 100644
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ b/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -6,7 +6,7 @@ define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) "no-
 entry:
   %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{void (%struct.AppleEvent*)** %userUPP_addr}, metadata !0), !dbg !13
+  call void @llvm.dbg.declare(metadata void (%struct.AppleEvent*)** %userUPP_addr, metadata !0, metadata !{!"0x102"}), !dbg !13
   store void (%struct.AppleEvent*)* %userUPP, void (%struct.AppleEvent*)** %userUPP_addr
   br label %return, !dbg !14
 
@@ -14,27 +14,27 @@ return:                                           ; preds = %entry
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!19}
-!0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !16, null, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !16} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, metadata !16, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !17, metadata !17, metadata !18, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"DMNotificationUPP", i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 524309, metadata !16, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{null, metadata !10}
-!10 = metadata !{i32 524303, metadata !16, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524310, metadata !16, metadata !2, metadata !"AppleEvent", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 524307, metadata !16, metadata !2, metadata !"AEDesc", i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [AEDesc] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!13 = metadata !{i32 7, i32 0, metadata !1, null}
-!14 = metadata !{i32 8, i32 0, metadata !15, null}
-!15 = metadata !{i32 524299, metadata !16, metadata !1, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/"}
-!17 = metadata !{i32 0}
-!18 = metadata !{metadata !1}
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00userUPP\007\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00DisposeDMNotificationUPP\00DisposeDMNotificationUPP\00DisposeDMNotificationUPP\007\000\001\000\006\000\000\000", !16, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !16} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)\001\00\000\00\000", !16, !17, !17, !18, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !16, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{null, !6}
+!6 = !{!"0x16\00DMNotificationUPP\006\000\000\000\000", !16, !2, !7} ; [ DW_TAG_typedef ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", !16, !2, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x15\00\000\000\000\000\000\000", !16, !2, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !{null, !10}
+!10 = !{!"0xf\00\000\0064\0064\000\000", !16, !2, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x16\00AppleEvent\004\000\000\000\000", !16, !2, !12} ; [ DW_TAG_typedef ]
+!12 = !{!"0x13\00AEDesc\001\000\000\000\004\000", !16, !2, null, null, null, null, null} ; [ DW_TAG_structure_type ] [AEDesc] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!13 = !MDLocation(line: 7, scope: !1)
+!14 = !MDLocation(line: 8, scope: !15)
+!15 = !{!"0xb\007\000\000", !16, !1} ; [ DW_TAG_lexical_block ]
+!16 = !{!"t.c", !"/Users/echeng/LLVM/radars/r7937664/"}
+!17 = !{i32 0}
+!18 = !{!1}
+!19 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 0c5d876bf05b..1caae64967bd 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -23,12 +23,12 @@ entry:
   %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
   %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
   %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i64* %data_addr.i17}, metadata !8) nounwind, !dbg !14
+  call void @llvm.dbg.declare(metadata i64* %data_addr.i17, metadata !8, metadata !{!"0x102"}) nounwind, !dbg !14
   store i64 %a12, i64* %data_addr.i17, align 8
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15) nounwind
-  call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
-  call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
-  call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !{!"0x102"}) nounwind
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !{!"0x102"}) nounwind
+  call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !{!"0x102"}) nounwind
+  call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !{!"0x102"}) nounwind
   %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
@@ -38,9 +38,9 @@ entry:
   ret void, !dbg !7
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
@@ -50,45 +50,45 @@ declare void @uuid_LtoB(i8*, i8*)
 
 !llvm.dbg.cu = !{!4}
 !llvm.module.flags = !{!41}
-!0 = metadata !{i32 808, i32 0, metadata !1, null}
-!1 = metadata !{i32 524299, metadata !39, metadata !2, i32 807, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 524334, metadata !39, null, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 524329, metadata !39} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 524305, metadata !39, i32 1, metadata !"llvm-gcc", i1 true, metadata !"", i32 0, metadata !18, metadata !18, metadata !40, null, null, i32 0} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 524309, metadata !39, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{null}
-!7 = metadata !{i32 810, i32 0, metadata !1, null}
-!8 = metadata !{i32 524545, metadata !9, metadata !"data", metadata !10, i32 201, metadata !11} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 524334, metadata !10, null, metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", i32 202, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 524329, metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc", metadata !4} ; [ DW_TAG_file_type ]
-!11 = metadata !{i32 524310, metadata !36, metadata !3, metadata !"uint64_t", i32 59, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 524329, metadata !"stdint.h", metadata !"/usr/4.2.1/include", metadata !4} ; [ DW_TAG_file_type ]
-!13 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 202, i32 0, metadata !9, metadata !7}
-!15 = metadata !{i32 524545, metadata !16, metadata !"base", metadata !10, i32 92, metadata !17} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524334, metadata !38, null, metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", i32 95, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524303, metadata !39, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 0}
-!19 = metadata !{i32 524545, metadata !16, metadata !"byteOffset", metadata !10, i32 94, metadata !20} ; [ DW_TAG_arg_variable ]
-!20 = metadata !{i32 524310, metadata !37, metadata !3, metadata !"uintptr_t", i32 114, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
-!21 = metadata !{i32 524329, metadata !"types.h", metadata !"/usr/include/ppc", metadata !4} ; [ DW_TAG_file_type ]
-!22 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 524544, metadata !24, metadata !"u", metadata !10, i32 100, metadata !25} ; [ DW_TAG_auto_variable ]
-!24 = metadata !{i32 524299, metadata !38, metadata !16, i32 95, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!25 = metadata !{i32 524311, metadata !38, metadata !16, metadata !"", i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_union_type ] [line 97, size 64, align 64, offset 0] [def] [from ]
-!26 = metadata !{metadata !27, metadata !28}
-!27 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u64", i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
-!28 = metadata !{i32 524301, metadata !38, metadata !25, metadata !"u32", i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
-!29 = metadata !{i32 524289, metadata !39, metadata !3, metadata !"", i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from uint32_t]
-!30 = metadata !{i32 524310, metadata !36, metadata !3, metadata !"uint32_t", i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
-!31 = metadata !{i32 524324, metadata !39, metadata !3, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!32 = metadata !{metadata !33}
-!33 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
-!34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
-!35 = metadata !{i32 524303, metadata !39, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!36 = metadata !{metadata !"stdint.h", metadata !"/usr/4.2.1/include"}
-!37 = metadata !{metadata !"types.h", metadata !"/usr/include/ppc"}
-!38 = metadata !{metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc"}
-!39 = metadata !{metadata !"G.c", metadata !"/tmp"}
-!40 = metadata !{metadata !2, metadata !9, metadata !16}
-!41 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 808, scope: !1)
+!1 = !{!"0xb\00807\000\000", !39, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00gpt2gpm\00gpt2gpm\00gpt2gpm\00807\001\001\000\006\000\000\000", !39, null, !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x29", !39} ; [ DW_TAG_file_type ]
+!4 = !{!"0x11\001\00llvm-gcc\001\00\000\00\000", !39, !18, !18, !40, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !39, !3, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{null}
+!7 = !MDLocation(line: 810, scope: !1)
+!8 = !{!"0x101\00data\00201\000", !9, !10, !11} ; [ DW_TAG_arg_variable ]
+!9 = !{!"0x2e\00_OSSwapInt64\00_OSSwapInt64\00_OSSwapInt64\00202\001\001\000\006\000\000\000", !10, null, !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x29", !"OSByteOrder.h", !"/usr/include/libkern/ppc", !4} ; [ DW_TAG_file_type ]
+!11 = !{!"0x16\00uint64_t\0059\000\000\000\000", !36, !3, !13} ; [ DW_TAG_typedef ]
+!12 = !{!"0x29", !"stdint.h", !"/usr/4.2.1/include", !4} ; [ DW_TAG_file_type ]
+!13 = !{!"0x24\00long long unsigned int\000\0064\0064\000\000\007", !39, !3} ; [ DW_TAG_base_type ]
+!14 = !MDLocation(line: 202, scope: !9, inlinedAt: !7)
+!15 = !{!"0x101\00base\0092\000", !16, !10, !17} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x2e\00OSReadSwapInt64\00OSReadSwapInt64\00OSReadSwapInt64\0095\001\001\000\006\000\000\000", !38, null, !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = !{!"0xf\00\000\0032\0032\000\000", !39, !3, null} ; [ DW_TAG_pointer_type ]
+!18 = !{i32 0}
+!19 = !{!"0x101\00byteOffset\0094\000", !16, !10, !20} ; [ DW_TAG_arg_variable ]
+!20 = !{!"0x16\00uintptr_t\00114\000\000\000\000", !37, !3, !22} ; [ DW_TAG_typedef ]
+!21 = !{!"0x29", !"types.h", !"/usr/include/ppc", !4} ; [ DW_TAG_file_type ]
+!22 = !{!"0x24\00long unsigned int\000\0032\0032\000\000\007", !39, !3} ; [ DW_TAG_base_type ]
+!23 = !{!"0x100\00u\00100\000", !24, !10, !25} ; [ DW_TAG_auto_variable ]
+!24 = !{!"0xb\0095\000\000", !38, !16} ; [ DW_TAG_lexical_block ]
+!25 = !{!"0x17\00\0097\0064\0064\000\000\000", !38, !16, null, !26, null, null, null} ; [ DW_TAG_union_type ] [line 97, size 64, align 64, offset 0] [def] [from ]
+!26 = !{!27, !28}
+!27 = !{!"0xd\00u64\0098\0064\0064\000\000", !38, !25, !11} ; [ DW_TAG_member ]
+!28 = !{!"0xd\00u32\0099\0064\0032\000\000", !38, !25, !29} ; [ DW_TAG_member ]
+!29 = !{!"0x1\00\000\0064\0032\000\000", !39, !3, !30, !32, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from uint32_t]
+!30 = !{!"0x16\00uint32_t\0055\000\000\000\000", !36, !3, !31} ; [ DW_TAG_typedef ]
+!31 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !39, !3} ; [ DW_TAG_base_type ]
+!32 = !{!33}
+!33 = !{!"0x21\000\002"}        ; [ DW_TAG_subrange_type ]
+!34 = !{!"0x100\00addr\0096\000", !24, !10, !35} ; [ DW_TAG_auto_variable ]
+!35 = !{!"0xf\00\000\0032\0032\000\000", !39, !3, !11} ; [ DW_TAG_pointer_type ]
+!36 = !{!"stdint.h", !"/usr/4.2.1/include"}
+!37 = !{!"types.h", !"/usr/include/ppc"}
+!38 = !{!"OSByteOrder.h", !"/usr/include/libkern/ppc"}
+!39 = !{!"G.c", !"/tmp"}
+!40 = !{!2, !9, !16}
+!41 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/2010-05-10-MultipleCU.ll
index 75d2e7084d21..502007c80934 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -19,26 +19,26 @@ return:
 
 !llvm.dbg.cu = !{!4, !12}
 !llvm.module.flags = !{!21}
-!16 = metadata !{metadata !2}
-!17 = metadata !{metadata !10}
+!16 = !{!2}
+!17 = !{!10}
 
-!0 = metadata !{i32 3, i32 0, metadata !1, null}
-!1 = metadata !{i32 786443, metadata !18, metadata !2, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 786478, metadata !18, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !16, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786453, metadata !18, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, metadata !18, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 3, i32 0, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !20, metadata !10, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{i32 786478, metadata !20, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!12 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!13 = metadata !{i32 786453, metadata !20, metadata !11, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786468, metadata !20, metadata !11, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
-!19 = metadata !{i32 0}
-!20 = metadata !{metadata !"b.c", metadata !"/tmp/"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 3, scope: !1)
+!1 = !{!"0xb\002\000\000", !18, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\000\000", !18, !3, !5, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!4 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !18, !19, !19, !16, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !18, !3, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !18, !3} ; [ DW_TAG_base_type ]
+!8 = !MDLocation(line: 3, scope: !9)
+!9 = !{!"0xb\002\000\000", !20, !10} ; [ DW_TAG_lexical_block ]
+!10 = !{!"0x2e\00bar\00bar\00bar\002\000\001\000\006\000\000\000", !20, !11, !13, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!12 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !20, !19, !19, !17, null, null} ; [ DW_TAG_compile_unit ]
+!13 = !{!"0x15\00\000\000\000\000\000\000", !20, !11, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!15}
+!15 = !{!"0x24\00int\000\0032\0032\000\000\005", !20, !11} ; [ DW_TAG_base_type ]
+!18 = !{!"a.c", !"/tmp/"}
+!19 = !{i32 0}
+!20 = !{!"b.c", !"/tmp/"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index a461abdcdf5b..9f0f7c37fcc0 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -7,15 +7,15 @@
 
 @i = common global i32 0                          ; <i32*> [#uses=2]
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define i32 @bar() nounwind ssp {
 entry:
   %0 = load i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
-  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !9), !dbg !19
-  tail call void @llvm.dbg.declare(metadata !29, metadata !10), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !29, metadata !10, metadata !{!"0x102"}), !dbg !21
   %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
   store i32 %1, i32* @i, align 4, !dbg !17
   ret i32 %1, !dbg !23
@@ -24,33 +24,33 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!28}
 
-!0 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, metadata !24, i32 9} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !27, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !20, metadata !20, metadata !25, metadata !26,  metadata !20, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5, metadata !5}
-!5 = metadata !{i32 786468, metadata !27, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !27, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !27, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !5}
-!9 = metadata !{i32 786689, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786688, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 9, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 786451, metadata !27, metadata !0, metadata !"X", i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 10, size 64, align 32, offset 0] [def] [from ]
-!13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"a", i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!15 = metadata !{i32 786445, metadata !27, metadata !12, metadata !"b", i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786484, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i, null} ; [ DW_TAG_variable ]
-!17 = metadata !{i32 15, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !1, metadata !6, i32 14, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 9, i32 0, metadata !0, metadata !17}
-!20 = metadata !{}
-!21 = metadata !{i32 9, i32 0, metadata !11, metadata !17}
-!22 = metadata !{i32 11, i32 0, metadata !11, metadata !17}
-!23 = metadata !{i32 16, i32 0, metadata !18, null}
-!24 = metadata !{metadata !9, metadata !10}
-!25 = metadata !{metadata !0, metadata !6}
-!26 = metadata !{metadata !16}
-!27 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
-!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!29 = metadata !{null}
+!0 = !{!"0x2e\00foo\00foo\00\009\001\001\000\006\000\001\009", !27, !1, !3, null, null, null, null, !24} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !27} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !27, !20, !20, !25, !26,  !20} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !27, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5, !5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !27, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00bar\00bar\00bar\0014\000\001\000\006\000\001\000", !27, !1, !7, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !27, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!5}
+!9 = !{!"0x101\00j\009\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x100\00xyz\0010\000", !11, !1, !12} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\009\000\000", !1, !0} ; [ DW_TAG_lexical_block ]
+!12 = !{!"0x13\00X\0010\0064\0032\000\000\000", !27, !0, null, !13, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 10, size 64, align 32, offset 0] [def] [from ]
+!13 = !{!14, !15}
+!14 = !{!"0xd\00a\0010\0032\0032\000\000", !27, !12, !5} ; [ DW_TAG_member ]
+!15 = !{!"0xd\00b\0010\0032\0032\0032\000", !27, !12, !5} ; [ DW_TAG_member ]
+!16 = !{!"0x34\00i\00i\00\005\000\001", !1, !1, !5, i32* @i, null} ; [ DW_TAG_variable ]
+!17 = !MDLocation(line: 15, scope: !18)
+!18 = !{!"0xb\0014\000\001", !1, !6} ; [ DW_TAG_lexical_block ]
+!19 = !MDLocation(line: 9, scope: !0, inlinedAt: !17)
+!20 = !{}
+!21 = !MDLocation(line: 9, scope: !11, inlinedAt: !17)
+!22 = !MDLocation(line: 11, scope: !11, inlinedAt: !17)
+!23 = !MDLocation(line: 16, scope: !18)
+!24 = !{!9, !10}
+!25 = !{!0, !6}
+!26 = !{!16}
+!27 = !{!"bar.c", !"/tmp/"}
+!28 = !{i32 1, !"Debug Info Version", i32 2}
+!29 = !{null}
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/2010-07-19-Crash.ll
index a10b10a7d145..8bbe48c33e90 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/2010-07-19-Crash.ll
@@ -12,19 +12,19 @@ entry:
 !llvm.dbg.sp = !{!0, !6, !11}
 !llvm.dbg.lv.foo = !{!7}
 
-!0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !12, i32 12, metadata !"clang 2.8", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !13, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 524299, metadata !12, metadata !6, i32 7, i32 18, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 4, i32 3, metadata !10, null}
-!10 = metadata !{i32 524299, metadata !12, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
-!13 = metadata !{metadata !0}
-!14 = metadata !{i32 0}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00bar\00bar\00bar\003\000\001\000\006\000\001\000", !12, !1, !3, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang 2.8\001\00\000\00\000", !12, !14, !14, !13, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !12, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00foo\00foo\00foo\007\001\001\000\006\000\001\000", !12, !1, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x100\00one\008\000", !8, !1, !5} ; [ DW_TAG_auto_variable ]
+!8 = !{!"0xb\007\0018\000", !12, !6} ; [ DW_TAG_lexical_block ]
+!9 = !MDLocation(line: 4, column: 3, scope: !10)
+!10 = !{!"0xb\003\0011\000", !12, !0} ; [ DW_TAG_lexical_block ]
+!11 = !{!"0x2e\00foo\00foo\00foo\007\001\000\000\006\000\001\000", !12, !1, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!12 = !{!"one.c", !"/private/tmp"}
+!13 = !{!0}
+!14 = !{i32 0}
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/2010-10-01-crash.ll
index f8dbb6eb3c97..5c822e94bece 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/2010-10-01-crash.ll
@@ -4,23 +4,23 @@
 
 define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
 entry:
-  call void @llvm.dbg.declare(metadata !{i32* %rect}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata i32* %rect, metadata !23, metadata !{!"0x102"}), !dbg !24
   ret void
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!27}
-!0 = metadata !{i32 589870, metadata !1, null, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 54] [def] [scope 0] [CGRectStandardize]
-!1 = metadata !{i32 589865, metadata !25}
-!2 = metadata !{i32 589841, metadata !25, i32 16, metadata !"clang version 2.9 (trunk 115292)", i1 true, metadata !"", i32 1, metadata !26, metadata !26, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589846, metadata !25, null, metadata !"CGRect", i32 49, i64 0, i64 0, i64 0, i32 0, null}
-!23 = metadata !{i32 590081, metadata !0, metadata !"rect", metadata !1, i32 53, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 53, i32 33, metadata !0, null}
-!25 = metadata !{metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop"}
-!26 = metadata !{i32 0}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00CGRectStandardize\00CGRectStandardize\00CGRectStandardize\0054\000\001\000\006\000\000\000", !1, null, null, null, void (i32*, i32*)* @CGRectStandardize, null, null, null} ; [ DW_TAG_subprogram ] [line 54] [def] [scope 0] [CGRectStandardize]
+!1 = !{!"0x29", !25} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0016\00clang version 2.9 (trunk 115292)\001\00\001\00\000", !25, !26, !26, null, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x16\00CGRect\0049\000\000\000\000", !25, null, null} ; [ DW_TAG_typedef ]
+!23 = !{!"0x101\00rect\0053\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!24 = !MDLocation(line: 53, column: 33, scope: !0)
+!25 = !{!"GSFusedSilica.m", !"/Volumes/Data/Users/sabre/Desktop"}
+!26 = !{i32 0}
+!27 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/AArch64/big-endian-dump.ll b/test/DebugInfo/AArch64/big-endian-dump.ll
new file mode 100644
index 000000000000..4a1a9e78ae9f
--- /dev/null
+++ b/test/DebugInfo/AArch64/big-endian-dump.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O0 -filetype=obj -mtriple=aarch64_be-none-linux < %s | llvm-dwarfdump - | FileCheck %s
+
+; CHECK: file format ELF64-aarch64-big
+
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/AArch64/big-endian.ll b/test/DebugInfo/AArch64/big-endian.ll
new file mode 100644
index 000000000000..79e38c455996
--- /dev/null
+++ b/test/DebugInfo/AArch64/big-endian.ll
@@ -0,0 +1,22 @@
+; RUN: llc %s -filetype=asm -o -
+
+target datalayout = "E-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64_be--none-eabi"
+
+@a = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 \001\00\000\00\001", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/work/validation/-] [DW_LANG_C99]
+!1 = !{!"-", !"/work/validation"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00a\00a\00\001\000\001", null, !5, !7, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!5 = !{!"0x29", !6}          ; [ DW_TAG_file_type ] [/work/validation/<stdin>]
+!6 = !{!"<stdin>", !"/work/validation"}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/AArch64/cfi-eof-prologue.ll b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
new file mode 100644
index 000000000000..2d68af629e32
--- /dev/null
+++ b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
@@ -0,0 +1,112 @@
+; struct A {
+;   A();
+;   virtual ~A();
+; };
+; struct B : A {
+;   B();
+;   virtual ~B();
+; };
+; B::B() {}
+; CHECK: __ZN1BC1Ev:
+; CHECK:     .loc	1 [[@LINE-2]] 0 prologue_end
+; CHECK-NOT: .loc	1 0 0 prologue_end
+
+; The location of the prologue_end marker should not be affected by the presence
+; of CFI instructions.
+
+; RUN: llc -O0 -filetype=asm < %s | FileCheck %s
+
+; ModuleID = 'test1.cpp'
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-apple-ios"
+
+%struct.B = type { %struct.A }
+%struct.A = type { i32 (...)** }
+
+@_ZTV1B = external unnamed_addr constant [4 x i8*]
+
+; Function Attrs: nounwind
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !38), !dbg !39
+  %0 = getelementptr inbounds %struct.B* %this, i64 0, i32 0, !dbg !40
+  %call = tail call %struct.A* @_ZN1AC2Ev(%struct.A* %0) #3, !dbg !40
+  %1 = getelementptr inbounds %struct.B* %this, i64 0, i32 0, i32 0, !dbg !40
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV1B, i64 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8, !dbg !40, !tbaa !41
+  ret %struct.B* %this, !dbg !40
+}
+
+declare %struct.A* @_ZN1AC2Ev(%struct.A*)
+
+; Function Attrs: nounwind
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !38), !dbg !44
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !45, metadata !38) #3, !dbg !47
+  %0 = getelementptr inbounds %struct.B* %this, i64 0, i32 0, !dbg !48
+  %call.i = tail call %struct.A* @_ZN1AC2Ev(%struct.A* %0) #3, !dbg !48
+  %1 = getelementptr inbounds %struct.B* %this, i64 0, i32 0, i32 0, !dbg !48
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV1B, i64 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8, !dbg !48, !tbaa !41
+  ret %struct.B* %this, !dbg !46
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !36}
+!llvm.ident = !{!37}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)\001\00\000\00\001", !1, !2, !3, !27, !2, !2} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !""}
+!2 = !{}
+!3 = !{!4, !13}
+!4 = !{!"0x13\00B\005\0064\0064\000\000\000", !5, null, null, !6, !"_ZTS1A", null, !"_ZTS1B"} ; [ DW_TAG_structure_type ] [B] [line 5, size 64, align 64, offset 0] [def] [from ]
+!5 = !{!"test1.cpp", !""}
+!6 = !{!7, !8, !12}
+!7 = !{!"0x1c\00\000\000\000\000\000", null, !"_ZTS1B", !"_ZTS1A"} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
+!8 = !{!"0x2e\00B\00B\00\006\000\000\000\000\00256\001\006", !5, !"_ZTS1B", !9, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 6] [B]
+!9 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!12 = !{!"0x2e\00~B\00~B\00\007\000\000\001\000\00256\001\007", !5, !"_ZTS1B", !9, !"_ZTS1B", null, null, null, null} ; [ DW_TAG_subprogram ] [line 7] [~B]
+!13 = !{!"0x13\00A\001\0064\0064\000\000\000", !5, null, null, !14, !"_ZTS1A", null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 64, align 64, offset 0] [def] [from ]
+!14 = !{!15, !22, !26}
+!15 = !{!"0xd\00_vptr$A\000\0064\000\000\0064", !5, !16, !17} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!16 = !{!"0x29", !5}                              ; [ DW_TAG_file_type ] [test1.cpp]
+!17 = !{!"0xf\00\000\0064\000\000\000", null, null, !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!18 = !{!"0xf\00__vtbl_ptr_type\000\0064\000\000\000", null, null, !19} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!19 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{!21}
+!21 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = !{!"0x2e\00A\00A\00\002\000\000\000\000\00256\001\002", !5, !"_ZTS1A", !23, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [A]
+!23 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{null, !25}
+!25 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!26 = !{!"0x2e\00~A\00~A\00\003\000\000\001\000\00256\001\003", !5, !"_ZTS1A", !23, !"_ZTS1A", null, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [~A]
+!27 = !{!28, !32}
+!28 = !{!"0x2e\00B\00B\00_ZN1BC2Ev\009\000\001\000\000\00256\001\009", !5, !"_ZTS1B", !9, null, %struct.B* (%struct.B*)* @_ZN1BC2Ev, null, !8, !29} ; [ DW_TAG_subprogram ] [line 9] [def] [B]
+!29 = !{!30}
+!30 = !{!"0x101\00this\0016777216\001088", !28, null, !31} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
+!32 = !{!"0x2e\00B\00B\00_ZN1BC1Ev\009\000\001\000\000\00256\001\009", !5, !"_ZTS1B", !9, null, %struct.B* (%struct.B*)* @_ZN1BC1Ev, null, !8, !33} ; [ DW_TAG_subprogram ] [line 9] [def] [B]
+!33 = !{!34}
+!34 = !{!"0x101\00this\0016777216\001088", !32, null, !31} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!35 = !{i32 2, !"Dwarf Version", i32 4}
+!36 = !{i32 2, !"Debug Info Version", i32 2}
+!37 = !{!"clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)"}
+!38 = !{!"0x102"}                                 ; [ DW_TAG_expression ]
+!39 = !MDLocation(line: 0, scope: !28)
+!40 = !MDLocation(line: 9, scope: !28)
+!41 = !{!42, !42, i64 0}
+!42 = !{!"vtable pointer", !43, i64 0}
+!43 = !{!"Simple C/C++ TBAA"}
+!44 = !MDLocation(line: 0, scope: !32)
+!45 = !{!"0x101\00this\0016777216\001088", !28, null, !31, !46} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!46 = !MDLocation(line: 9, scope: !32)
+!47 = !MDLocation(line: 0, scope: !28, inlinedAt: !46)
+!48 = !MDLocation(line: 9, scope: !28, inlinedAt: !46)
diff --git a/test/DebugInfo/AArch64/coalescing.ll b/test/DebugInfo/AArch64/coalescing.ll
new file mode 100644
index 000000000000..35bb04138c13
--- /dev/null
+++ b/test/DebugInfo/AArch64/coalescing.ll
@@ -0,0 +1,65 @@
+; RUN: llc -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
+;
+; Generated at -Os from:
+; void *foo(void *dst);
+; void start() {
+;   unsigned size;
+;   foo(&size);
+;   if (size != 0) { // Work around a bug to preserve the dbg.value.
+;   }
+; }
+
+; ModuleID = 'test1.cpp'
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+; Function Attrs: nounwind optsize
+define void @_Z5startv() #0 {
+entry:
+  %size = alloca i32, align 4
+  %0 = bitcast i32* %size to i8*, !dbg !15
+  %call = call i8* @_Z3fooPv(i8* %0) #3, !dbg !15
+  call void @llvm.dbg.value(metadata i32* %size, i64 0, metadata !10, metadata !16), !dbg !17
+  ; CHECK: .debug_info contents:
+  ; CHECK: DW_TAG_variable
+  ; CHECK-NEXT: DW_AT_location
+  ; CHECK-NEXT: DW_AT_name {{.*}}"size"
+  ; CHECK: .debug_loc contents:
+  ; CHECK: Location description: 70 00
+  ret void, !dbg !18
+}
+
+; Function Attrs: optsize
+declare i8* @_Z3fooPv(i8*) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind optsize }
+attributes #1 = { optsize }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind optsize }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00start\00start\00_Z5startv\002\000\001\000\000\00256\001\003", !5, !6, !7, null, void ()* @_Z5startv, null, null, !9} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [start]
+!5 = !{!"test1.c", !""}
+!6 = !{!"0x29", !5}    ; [ DW_TAG_file_type ] [/test1.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!10}
+!10 = !{!"0x100\00size\004\000", !4, !6, !11} ; [ DW_TAG_auto_variable ] [size] [line 4]
+!11 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!12 = !{i32 2, !"Dwarf Version", i32 2}
+!13 = !{i32 2, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)"}
+!15 = !MDLocation(line: 5, column: 3, scope: !4)
+!16 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!17 = !MDLocation(line: 4, column: 12, scope: !4)
+!18 = !MDLocation(line: 8, column: 1, scope: !4)
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 98e863dbb4b5..cba18b2b23b1 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -27,14 +27,14 @@ attributes #0 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!10}
 
-!0 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
-!1 = metadata !{}
-!2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, metadata !9, metadata !4, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
-!4 = metadata !{i32 786473, metadata !9} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 2, i32 0, metadata !3, null}
-!9 = metadata !{metadata !"tmp.c", metadata !"/home/tim/llvm/build"}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 \000\00\000\00\000", !9, !1, !1, !2, !1,  !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/llvm/build/tmp.c] [DW_LANG_C99]
+!1 = !{}
+!2 = !{!3}
+!3 = !{!"0x2e\00main\00main\00\001\000\001\000\006\000\000\001", !9, !4, !5, null, i32 ()* @main, null, null, !1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!4 = !{!"0x29", !9} ; [ DW_TAG_file_type ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !MDLocation(line: 2, scope: !3)
+!9 = !{!"tmp.c", !"/home/tim/llvm/build"}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/AArch64/little-endian-dump.ll b/test/DebugInfo/AArch64/little-endian-dump.ll
new file mode 100644
index 000000000000..65330c9eb95a
--- /dev/null
+++ b/test/DebugInfo/AArch64/little-endian-dump.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O0 -filetype=obj -mtriple=aarch64-none-linux < %s | llvm-dwarfdump - | FileCheck %s
+
+; CHECK: file format ELF64-aarch64-little
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/AArch64/processes-relocations.ll b/test/DebugInfo/AArch64/processes-relocations.ll
new file mode 100644
index 000000000000..1a9dfd7ec366
--- /dev/null
+++ b/test/DebugInfo/AArch64/processes-relocations.ll
@@ -0,0 +1,15 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple aarch64-unknown-linux | \
+; RUN:      llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/AArch64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
index 0e336f799c54..f66f56cef5bc 100644
--- a/test/DebugInfo/AArch64/struct_by_value.ll
+++ b/test/DebugInfo/AArch64/struct_by_value.ll
@@ -32,14 +32,14 @@ target triple = "arm64-apple-ios3.0.0"
 ; Function Attrs: nounwind ssp
 define i32 @return_five_int(%struct.five* %f) #0 {
 entry:
-  call void @llvm.dbg.declare(metadata !{%struct.five* %f}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata %struct.five* %f, metadata !17, metadata !{!"0x102"}), !dbg !18
   %a = getelementptr inbounds %struct.five* %f, i32 0, i32 0, !dbg !19
   %0 = load i32* %a, align 4, !dbg !19
   ret i32 %0, !dbg !19
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp }
 attributes #1 = { nounwind readnone }
@@ -47,24 +47,24 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16, !20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"LLVM version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [struct_by_value.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"struct_by_value.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"return_five_int", metadata !"return_five_int", metadata !"", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.five*)* @return_five_int, null, null, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 14] [return_five_int]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [struct_by_value.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"five", i32 1, i64 160, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [five] [line 1, size 160, align 32, offset 0] [def] [from ]
-!10 = metadata !{metadata !11, metadata !12, metadata !13, metadata !14, metadata !15}
-!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"a", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 3, size 32, align 32, offset 0] [from int]
-!12 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"b", i32 4, i64 32, i64 32, i64 32, i32 0, metadata !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 32] [from int]
-!13 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"c", i32 5, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] [c] [line 5, size 32, align 32, offset 64] [from int]
-!14 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"d", i32 6, i64 32, i64 32, i64 96, i32 0, metadata !8} ; [ DW_TAG_member ] [d] [line 6, size 32, align 32, offset 96] [from int]
-!15 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"e", i32 7, i64 32, i64 32, i64 128, i32 0, metadata !8} ; [ DW_TAG_member ] [e] [line 7, size 32, align 32, offset 128] [from int]
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!17 = metadata !{i32 786689, metadata !4, metadata !"f", metadata !5, i32 16777229, metadata !9, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [f] [line 13]
-!18 = metadata !{i32 13, i32 0, metadata !4, null}
-!19 = metadata !{i32 16, i32 0, metadata !4, null}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00LLVM version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [struct_by_value.c] [DW_LANG_C99]
+!1 = !{!"struct_by_value.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00return_five_int\00return_five_int\00\0013\000\001\000\006\00256\000\0014", !1, !5, !6, null, i32 (%struct.five*)* @return_five_int, null, null, !2} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 14] [return_five_int]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [struct_by_value.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x13\00five\001\00160\0032\000\000\000", !1, null, null, !10, null, null, null} ; [ DW_TAG_structure_type ] [five] [line 1, size 160, align 32, offset 0] [def] [from ]
+!10 = !{!11, !12, !13, !14, !15}
+!11 = !{!"0xd\00a\003\0032\0032\000\000", !1, !9, !8} ; [ DW_TAG_member ] [a] [line 3, size 32, align 32, offset 0] [from int]
+!12 = !{!"0xd\00b\004\0032\0032\0032\000", !1, !9, !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 32] [from int]
+!13 = !{!"0xd\00c\005\0032\0032\0064\000", !1, !9, !8} ; [ DW_TAG_member ] [c] [line 5, size 32, align 32, offset 64] [from int]
+!14 = !{!"0xd\00d\006\0032\0032\0096\000", !1, !9, !8} ; [ DW_TAG_member ] [d] [line 6, size 32, align 32, offset 96] [from int]
+!15 = !{!"0xd\00e\007\0032\0032\00128\000", !1, !9, !8} ; [ DW_TAG_member ] [e] [line 7, size 32, align 32, offset 128] [from int]
+!16 = !{i32 2, !"Dwarf Version", i32 2}
+!17 = !{!"0x101\00f\0016777229\008192", !4, !5, !9} ; [ DW_TAG_arg_variable ] [f] [line 13]
+!18 = !MDLocation(line: 13, scope: !4)
+!19 = !MDLocation(line: 16, scope: !4)
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll
index 8c025ad487b4..7c99ae20dc64 100644
--- a/test/DebugInfo/ARM/PR16736.ll
+++ b/test/DebugInfo/ARM/PR16736.ll
@@ -15,14 +15,14 @@ target triple = "thumbv7-apple-ios"
 ; Function Attrs: nounwind
 define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !12), !dbg !18
-  tail call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !13), !dbg !18
-  tail call void @llvm.dbg.value(metadata !{i32 %2}, i64 0, metadata !14), !dbg !18
-  tail call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !15), !dbg !18
-  tail call void @llvm.dbg.value(metadata !{float %x}, i64 0, metadata !16), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %2, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %3, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata float %x, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !18
   %call = tail call arm_aapcscc i32 @_Z1fv() #3, !dbg !19
   %conv = sitofp i32 %call to float, !dbg !19
-  tail call void @llvm.dbg.value(metadata !{float %conv}, i64 0, metadata !16), !dbg !19
+  tail call void @llvm.dbg.value(metadata float %conv, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !19
   tail call arm_aapcscc void @_Z1gf(float %conv) #3, !dbg !19
   ret void, !dbg !20
 }
@@ -32,7 +32,7 @@ declare arm_aapcscc void @_Z1gf(float)
 declare arm_aapcscc i32 @_Z1fv()
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind  }
 attributes #2 = { nounwind readnone }
@@ -41,25 +41,25 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17, !21}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 190804) (llvm/trunk 190797)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [//<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"/<unknown>", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"h", metadata !"h", metadata !"_Z1hiiiif", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32, i32, i32, float)* @_Z1hiiiif, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [h]
-!5 = metadata !{metadata !"/arm.cpp", metadata !""}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [//arm.cpp]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !9, metadata !9, metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
-!12 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
-!13 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 33554435, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
-!14 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 50331651, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
-!15 = metadata !{i32 786689, metadata !4, metadata !"", metadata !6, i32 67108867, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 3]
-!16 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 83886083, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 3]
-!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!18 = metadata !{i32 3, i32 0, metadata !4, null}
-!19 = metadata !{i32 4, i32 0, metadata !4, null}
-!20 = metadata !{i32 5, i32 0, metadata !4, null}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (trunk 190804) (llvm/trunk 190797)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [//<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"/<unknown>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00h\00h\00_Z1hiiiif\003\000\001\000\006\00256\001\003", !5, !6, !7, null, void (i32, i32, i32, i32, float)* @_Z1hiiiif, null, null, !11} ; [ DW_TAG_subprogram ] [line 3] [def] [h]
+!5 = !{!"/arm.cpp", !""}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [//arm.cpp]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !9, !9, !9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x24\00float\000\0032\0032\000\000\004", null, null} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!11 = !{!12, !13, !14, !15, !16}
+!12 = !{!"0x101\00\0016777219\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 3]
+!13 = !{!"0x101\00\0033554435\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 3]
+!14 = !{!"0x101\00\0050331651\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 3]
+!15 = !{!"0x101\00\0067108867\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [line 3]
+!16 = !{!"0x101\00x\0083886083\000", !4, !6, !10} ; [ DW_TAG_arg_variable ] [x] [line 3]
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !MDLocation(line: 3, scope: !4)
+!19 = !MDLocation(line: 4, scope: !4)
+!20 = !MDLocation(line: 5, scope: !4)
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/ARM/big-endian-dump.ll b/test/DebugInfo/ARM/big-endian-dump.ll
new file mode 100644
index 000000000000..7bfe24eb2300
--- /dev/null
+++ b/test/DebugInfo/ARM/big-endian-dump.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -filetype=obj -mtriple=armeb-none-linux < %s | llvm-dwarfdump - | FileCheck %s
+
+; CHECK: file format ELF32-arm-big
+
+target datalayout = "E-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/ARM/cfi-eof-prologue.ll b/test/DebugInfo/ARM/cfi-eof-prologue.ll
new file mode 100644
index 000000000000..599806bf7073
--- /dev/null
+++ b/test/DebugInfo/ARM/cfi-eof-prologue.ll
@@ -0,0 +1,115 @@
+; struct A {
+;   A();
+;   virtual ~A();
+; };
+; struct B : A {
+;   B();
+;   virtual ~B();
+; };
+; B::B() {}
+; CHECK: __ZN1BC1Ev:
+; CHECK:     .loc	1 [[@LINE-2]] 0 prologue_end
+; CHECK-NOT: .loc	1 0 0 prologue_end
+
+; The location of the prologue_end marker should not be affected by the presence
+; of CFI instructions.
+
+; RUN: llc -O0 -filetype=asm -mtriple=thumbv7-apple-ios < %s | FileCheck %s
+; RUN: llc -O0 -filetype=asm -mtriple=thumbv6-apple-ios < %s | FileCheck %s
+
+; ModuleID = 'test1.cpp'
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+%struct.B = type { %struct.A }
+%struct.A = type { i32 (...)** }
+
+@_ZTV1B = external unnamed_addr constant [4 x i8*]
+
+; Function Attrs: nounwind
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !40), !dbg !41
+  %0 = getelementptr inbounds %struct.B* %this, i32 0, i32 0, !dbg !42
+  %call = tail call %struct.A* @_ZN1AC2Ev(%struct.A* %0) #3, !dbg !42
+  %1 = getelementptr inbounds %struct.B* %this, i32 0, i32 0, i32 0, !dbg !42
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV1B, i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 4, !dbg !42, !tbaa !43
+  ret %struct.B* %this, !dbg !42
+}
+
+declare %struct.A* @_ZN1AC2Ev(%struct.A*)
+
+; Function Attrs: nounwind
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !40), !dbg !46
+  tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !47, metadata !40) #3, !dbg !49
+  %0 = getelementptr inbounds %struct.B* %this, i32 0, i32 0, !dbg !50
+  %call.i = tail call %struct.A* @_ZN1AC2Ev(%struct.A* %0) #3, !dbg !50
+  %1 = getelementptr inbounds %struct.B* %this, i32 0, i32 0, i32 0, !dbg !50
+  store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV1B, i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 4, !dbg !50, !tbaa !43
+  ret %struct.B* %this, !dbg !48
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !36, !37, !38}
+!llvm.ident = !{!39}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)\001\00\000\00\001", !1, !2, !3, !27, !2, !2} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !""}
+!2 = !{}
+!3 = !{!4, !13}
+!4 = !{!"0x13\00B\005\0032\0032\000\000\000", !5, null, null, !6, !"_ZTS1A", null, !"_ZTS1B"} ; [ DW_TAG_structure_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"test1.cpp", !""}
+!6 = !{!7, !8, !12}
+!7 = !{!"0x1c\00\000\000\000\000\000", null, !"_ZTS1B", !"_ZTS1A"} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
+!8 = !{!"0x2e\00B\00B\00\006\000\000\000\000\00256\001\006", !5, !"_ZTS1B", !9, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 6] [B]
+!9 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0xf\00\000\0032\0032\000\001088\00", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [artificial] [from _ZTS1B]
+!12 = !{!"0x2e\00~B\00~B\00\007\000\000\001\000\00256\001\007", !5, !"_ZTS1B", !9, !"_ZTS1B", null, null, null, null} ; [ DW_TAG_subprogram ] [line 7] [~B]
+!13 = !{!"0x13\00A\001\0032\0032\000\000\000", !5, null, null, !14, !"_ZTS1A", null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!14 = !{!15, !22, !26}
+!15 = !{!"0xd\00_vptr$A\000\0032\000\000\0064", !5, !16, !17} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 32, align 0, offset 0] [artificial] [from ]
+!16 = !{!"0x29", !5}                              ; [ DW_TAG_file_type ] [test1.cpp]
+!17 = !{!"0xf\00\000\0032\000\000\000", null, null, !18} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 0, offset 0] [from __vtbl_ptr_type]
+!18 = !{!"0xf\00__vtbl_ptr_type\000\0032\000\000\000", null, null, !19} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 32, align 0, offset 0] [from ]
+!19 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{!21}
+!21 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = !{!"0x2e\00A\00A\00\002\000\000\000\000\00256\001\002", !5, !"_ZTS1A", !23, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [A]
+!23 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{null, !25}
+!25 = !{!"0xf\00\000\0032\0032\000\001088\00", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [artificial] [from _ZTS1A]
+!26 = !{!"0x2e\00~A\00~A\00\003\000\000\001\000\00256\001\003", !5, !"_ZTS1A", !23, !"_ZTS1A", null, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [~A]
+!27 = !{!28, !32}
+!28 = !{!"0x2e\00B\00B\00_ZN1BC2Ev\009\000\001\000\000\00256\001\009", !5, !"_ZTS1B", !9, null, %struct.B* (%struct.B*)* @_ZN1BC2Ev, null, !8, !29} ; [ DW_TAG_subprogram ] [line 9] [def] [B]
+!29 = !{!30}
+!30 = !{!"0x101\00this\0016777216\001088", !28, null, !31} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !{!"0xf\00\000\0032\0032\000\000", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from _ZTS1B]
+!32 = !{!"0x2e\00B\00B\00_ZN1BC1Ev\009\000\001\000\000\00256\001\009", !5, !"_ZTS1B", !9, null, %struct.B* (%struct.B*)* @_ZN1BC1Ev, null, !8, !33} ; [ DW_TAG_subprogram ] [line 9] [def] [B]
+!33 = !{!34}
+!34 = !{!"0x101\00this\0016777216\001088", !32, null, !31} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!35 = !{i32 2, !"Dwarf Version", i32 4}
+!36 = !{i32 2, !"Debug Info Version", i32 2}
+!37 = !{i32 1, !"wchar_size", i32 4}
+!38 = !{i32 1, !"min_enum_size", i32 4}
+!39 = !{!"clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)"}
+!40 = !{!"0x102"}                                 ; [ DW_TAG_expression ]
+!41 = !MDLocation(line: 0, scope: !28)
+!42 = !MDLocation(line: 9, scope: !28)
+!43 = !{!44, !44, i64 0}
+!44 = !{!"vtable pointer", !45, i64 0}
+!45 = !{!"Simple C/C++ TBAA"}
+!46 = !MDLocation(line: 0, scope: !32)
+!47 = !{!"0x101\00this\0016777216\001088", !28, null, !31, !48} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!48 = !MDLocation(line: 9, scope: !32)
+!49 = !MDLocation(line: 0, scope: !28, inlinedAt: !48)
+!50 = !MDLocation(line: 9, scope: !28, inlinedAt: !48)
diff --git a/test/DebugInfo/ARM/little-endian-dump.ll b/test/DebugInfo/ARM/little-endian-dump.ll
new file mode 100644
index 000000000000..b5f86e979be9
--- /dev/null
+++ b/test/DebugInfo/ARM/little-endian-dump.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O0 -filetype=obj -mtriple=arm-none-linux < %s | llvm-dwarfdump - | FileCheck %s
+
+; CHECK: file format ELF32-arm-little
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{i32 1, !"min_enum_size", i32 4}
+!7 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index 0378c7514d84..2e137678e0e6 100644
--- a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -19,18 +19,18 @@ target triple = "thumbv7-apple-ios8.0.0"
 ; Function Attrs: nounwind optsize readnone
 define void @run(float %r) #0 {
 entry:
-  tail call void @llvm.dbg.declare(metadata !{float %r}, metadata !11), !dbg !22
+  tail call void @llvm.dbg.declare(metadata float %r, metadata !11, metadata !{!"0x102"}), !dbg !22
   %conv = fptosi float %r to i32, !dbg !23
-  tail call void @llvm.dbg.declare(metadata !{i32 %conv}, metadata !12), !dbg !23
+  tail call void @llvm.dbg.declare(metadata i32 %conv, metadata !12, metadata !{!"0x102"}), !dbg !23
   %vla = alloca float, i32 %conv, align 4, !dbg !24
-  tail call void @llvm.dbg.declare(metadata !{float* %vla}, metadata !14), !dbg !24
+  tail call void @llvm.dbg.declare(metadata float* %vla, metadata !14, metadata !{!"0x102"}), !dbg !24
 ; The VLA alloca should be described by a dbg.declare:
-; CHECK: call void @llvm.dbg.declare(metadata !{float* %vla}, metadata ![[VLA:.*]])
+; CHECK: call void @llvm.dbg.declare(metadata float* %vla, metadata ![[VLA:.*]], metadata {{.*}})
 ; The VLA alloca and following store into the array should not be lowered to like this:
-; CHECK-NOT:  call void @llvm.dbg.value(metadata !{float %r}, i64 0, metadata ![[VLA]])
+; CHECK-NOT:  call void @llvm.dbg.value(metadata float %r, i64 0, metadata ![[VLA]])
 ; the backend interprets this as "vla has the location of %r".
   store float %r, float* %vla, align 4, !dbg !25, !tbaa !26
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !18), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !30
   %cmp8 = icmp sgt i32 %conv, 0, !dbg !30
   br i1 %cmp8, label %for.body, label %for.end, !dbg !30
 
@@ -41,7 +41,7 @@ for.body:                                         ; preds = %entry, %for.body.fo
   %div = fdiv float %0, %r, !dbg !31
   store float %div, float* %arrayidx2, align 4, !dbg !31, !tbaa !26
   %inc = add nsw i32 %i.09, 1, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !30
   %exitcond = icmp eq i32 %inc, %conv, !dbg !30
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !dbg !30
 
@@ -55,10 +55,10 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind optsize readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -67,37 +67,37 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !33}
 !llvm.ident = !{!21}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15464571/<unknown>] [DW_LANG_C99]
-!1 = metadata !{metadata !"<unknown>", metadata !"/Volumes/Data/radar/15464571"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (float)* @run, null, null, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [run]
-!5 = metadata !{metadata !"test.c", metadata !"/Volumes/Data/radar/15464571"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15464571/test.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!10 = metadata !{metadata !11, metadata !12, metadata !14, metadata !18}
-!11 = metadata !{i32 786689, metadata !4, metadata !"r", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [r] [line 1]
-!12 = metadata !{i32 786688, metadata !4, metadata !"count", metadata !6, i32 3, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [count] [line 3]
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786688, metadata !4, metadata !"vla", metadata !6, i32 4, metadata !15, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [vla] [line 4]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from float]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
-!18 = metadata !{i32 786688, metadata !19, metadata !"i", metadata !6, i32 6, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 6]
-!19 = metadata !{i32 786443, metadata !5, metadata !4, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/radar/15464571/test.c]
-!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!21 = metadata !{metadata !"clang version 3.4 "}
-!22 = metadata !{i32 1, i32 0, metadata !4, null}
-!23 = metadata !{i32 3, i32 0, metadata !4, null}
-!24 = metadata !{i32 4, i32 0, metadata !4, null}
-!25 = metadata !{i32 5, i32 0, metadata !4, null}
-!26 = metadata !{metadata !27, metadata !27, i64 0}
-!27 = metadata !{metadata !"float", metadata !28, i64 0}
-!28 = metadata !{metadata !"omnipotent char", metadata !29, i64 0}
-!29 = metadata !{metadata !"Simple C/C++ TBAA"}
-!30 = metadata !{i32 6, i32 0, metadata !19, null}
-!31 = metadata !{i32 7, i32 0, metadata !19, null}
-!32 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \001\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15464571/<unknown>] [DW_LANG_C99]
+!1 = !{!"<unknown>", !"/Volumes/Data/radar/15464571"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00run\00run\00\001\000\001\000\006\00256\001\002", !5, !6, !7, null, void (float)* @run, null, null, !10} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [run]
+!5 = !{!"test.c", !"/Volumes/Data/radar/15464571"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15464571/test.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0x24\00float\000\0032\0032\000\000\004", null, null} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!10 = !{!11, !12, !14, !18}
+!11 = !{!"0x101\00r\0016777217\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [r] [line 1]
+!12 = !{!"0x100\00count\003\000", !4, !6, !13} ; [ DW_TAG_auto_variable ] [count] [line 3]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!"0x100\00vla\004\008192", !4, !6, !15} ; [ DW_TAG_auto_variable ] [vla] [line 4]
+!15 = !{!"0x1\00\000\000\0032\000\000", null, null, !9, !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from float]
+!16 = !{!17}
+!17 = !{!"0x21\000\00-1"}       ; [ DW_TAG_subrange_type ] [unbounded]
+!18 = !{!"0x100\00i\006\000", !19, !6, !13} ; [ DW_TAG_auto_variable ] [i] [line 6]
+!19 = !{!"0xb\006\000\000", !5, !4} ; [ DW_TAG_lexical_block ] [/Volumes/Data/radar/15464571/test.c]
+!20 = !{i32 2, !"Dwarf Version", i32 2}
+!21 = !{!"clang version 3.4 "}
+!22 = !MDLocation(line: 1, scope: !4)
+!23 = !MDLocation(line: 3, scope: !4)
+!24 = !MDLocation(line: 4, scope: !4)
+!25 = !MDLocation(line: 5, scope: !4)
+!26 = !{!27, !27, i64 0}
+!27 = !{!"float", !28, i64 0}
+!28 = !{!"omnipotent char", !29, i64 0}
+!29 = !{!"Simple C/C++ TBAA"}
+!30 = !MDLocation(line: 6, scope: !19)
+!31 = !MDLocation(line: 7, scope: !19)
+!32 = !MDLocation(line: 8, scope: !4)
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/ARM/processes-relocations.ll b/test/DebugInfo/ARM/processes-relocations.ll
new file mode 100644
index 000000000000..b3db457ffe95
--- /dev/null
+++ b/test/DebugInfo/ARM/processes-relocations.ll
@@ -0,0 +1,15 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple arm-unknown-linux | \
+; RUN:      llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/ARM/s-super-register.ll b/test/DebugInfo/ARM/s-super-register.ll
new file mode 100644
index 000000000000..62a315e508a1
--- /dev/null
+++ b/test/DebugInfo/ARM/s-super-register.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s - -filetype=obj | llvm-dwarfdump -debug-dump=loc - | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.6.7"
+
+; The S registers on ARM are expressed as pieces of their super-registers in DWARF.
+;
+; 0x90   DW_OP_regx of super-register
+; 0x93   DW_OP_piece
+; 0x9d   DW_OP_bit_piece
+; CHECK:            Location description: 90 {{.. .. ((93 ..)|(9d .. ..)) $}}
+
+define void @_Z3foov() optsize ssp {
+entry:
+  %call = tail call float @_Z3barv() optsize, !dbg !11
+  tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !11
+  %call16 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp7 = fcmp olt float %call, %call16, !dbg !12
+  br i1 %cmp7, label %for.body, label %for.end, !dbg !12
+
+for.body:                                         ; preds = %entry, %for.body
+  %k.08 = phi float [ %inc, %for.body ], [ %call, %entry ]
+  %call4 = tail call float @_Z2f3f(float %k.08) optsize, !dbg !13
+  %inc = fadd float %k.08, 1.000000e+00, !dbg !14
+  %call1 = tail call float @_Z2f2v() optsize, !dbg !12
+  %cmp = fcmp olt float %inc, %call1, !dbg !12
+  br i1 %cmp, label %for.body, label %for.end, !dbg !12
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !15
+}
+
+declare float @_Z3barv() optsize
+
+declare float @_Z2f2v() optsize
+
+declare float @_Z2f3f(float) optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!20}
+
+!0 = !{!"0x11\004\00clang version 3.0 (trunk 130845)\001\00\000\00\001", !18, !19, !19, !16, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00_Z3foov\005\000\001\000\006\00256\001\005", !18, !2, !3, null, void ()* @_Z3foov, null, null, !17} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x100\00k\006\000", !6, !2, !7} ; [ DW_TAG_auto_variable ]
+!6 = !{!"0xb\005\0012\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!7 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !0} ; [ DW_TAG_base_type ]
+!8 = !{!"0x100\00y\008\000", !9, !2, !7} ; [ DW_TAG_auto_variable ]
+!9 = !{!"0xb\007\0025\002", !18, !10} ; [ DW_TAG_lexical_block ]
+!10 = !{!"0xb\007\003\001", !18, !6} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 6, column: 18, scope: !6)
+!12 = !MDLocation(line: 7, column: 3, scope: !6)
+!13 = !MDLocation(line: 8, column: 20, scope: !9)
+!14 = !MDLocation(line: 7, column: 20, scope: !10)
+!15 = !MDLocation(line: 10, column: 1, scope: !6)
+!16 = !{!1}
+!17 = !{!5, !8}
+!18 = !{!"k.cc", !"/private/tmp"}
+!19 = !{i32 0}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/ARM/sectionorder.ll b/test/DebugInfo/ARM/sectionorder.ll
index a7030cd1b670..41677c7dc05d 100644
--- a/test/DebugInfo/ARM/sectionorder.ll
+++ b/test/DebugInfo/ARM/sectionorder.ll
@@ -11,7 +11,8 @@ target triple = "thumbv7-apple-ios"
 !llvm.module.flags = !{!3, !4}
 !llvm.dbg.cu = !{!0}
 
-!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/Volumes/Data/radar/15623193", metadata !"LLVM", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15623193/test.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00LLVM\001\00\00\00\00", !5, !1, !1, !1, !1, null} ; [ DW_TAG_compile_unit ] [/Volumes/Data/radar/15623193/test.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 2}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
+!5 = !{!"test.c", !"/Volumes/Data/radar/15623193"}
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
index cc151e039faf..0626d2f1f666 100644
--- a/test/DebugInfo/ARM/selectiondag-deadcode.ll
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -13,15 +13,15 @@ _ZN7Vector39NormalizeEv.exit:                     ; preds = %1, %0
   ; and SelectionDAGISel crashes.  It should definitely not
   ; crash. Drop the dbg_value instead.
   ; CHECK-NOT: "matrix"
-  tail call void @llvm.dbg.declare(metadata !{%class.Matrix3.0.6.10* %agg.result}, metadata !45)
+  tail call void @llvm.dbg.declare(metadata %class.Matrix3.0.6.10* %agg.result, metadata !45, metadata !{!"0x102"})
   %2 = getelementptr inbounds %class.Matrix3.0.6.10* %agg.result, i32 0, i32 0, i32 8
   ret void
 }
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare arm_aapcscc void @_ZL4Sqrtd() #2
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"Matrix3", i32 20, i64 288, i64 32, i32 0, i32 0, null, null, i32 0, null, null, metadata !"_ZTS7Matrix3"} ; [ DW_TAG_class_type ] [Matrix3] [line 20, size 288, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"test.ii", metadata !"/Volumes/Data/radar/15094721"}
-!39 = metadata !{i32 786478, metadata !5, metadata !40, metadata !"GetMatrix", metadata !"GetMatrix", metadata !"_Z9GetMatrixv", i32 32, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv, null, null, null, i32 32} ; [ DW_TAG_subprogram ] [line 32] [def] [GetMatrix]
-!40 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15094721/test.ii]
-!41 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, null, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!45 = metadata !{i32 786688, metadata !39, metadata !"matrix", metadata !40, i32 35, metadata !4, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [matrix] [line 35]
+!4 = !{!"0x2\00Matrix3\0020\00288\0032\000\000\000", !5, null, null, null, null, null, !"_ZTS7Matrix3"} ; [ DW_TAG_class_type ] [Matrix3] [line 20, size 288, align 32, offset 0] [def] [from ]
+!5 = !{!"test.ii", !"/Volumes/Data/radar/15094721"}
+!39 = !{!"0x2e\00GetMatrix\00GetMatrix\00_Z9GetMatrixv\0032\000\001\000\006\00256\001\0032", !5, !40, !41, null, void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv, null, null, null} ; [ DW_TAG_subprogram ] [line 32] [def] [GetMatrix]
+!40 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [/Volumes/Data/radar/15094721/test.ii]
+!41 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, null, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = !{!"0x100\00matrix\0035\008192", !39, !40, !4} ; [ DW_TAG_auto_variable ] [matrix] [line 35]
diff --git a/test/DebugInfo/ARM/tls.ll b/test/DebugInfo/ARM/tls.ll
index e54d16004762..39436fe63ca6 100644
--- a/test/DebugInfo/ARM/tls.ll
+++ b/test/DebugInfo/ARM/tls.ll
@@ -16,13 +16,13 @@
 ; The debug relocation of the address of the tls variable
 ; CHECK: .long x(tlsldo)
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"tls.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.c]
-!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 "}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/tmp/tls.c] [DW_LANG_C99]
+!1 = !{!"tls.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00x\00x\00\001\000\001", null, !5, !6, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/tls.c]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 "}
diff --git a/test/DebugInfo/COFF/asan-module-ctor.ll b/test/DebugInfo/COFF/asan-module-ctor.ll
index c1d8e75292cd..e2c7aef750af 100644
--- a/test/DebugInfo/COFF/asan-module-ctor.ll
+++ b/test/DebugInfo/COFF/asan-module-ctor.ll
@@ -13,6 +13,9 @@
 ; X86-NEXT: calll   ___asan_init_v3
 ; X86-NEXT: retl
 
+; Make sure we don't put any DWARF debug info for ASan-instrumented modules.
+; X86-NOT: DWARF
+
 ; ModuleID = 'asan.c'
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-win32"
@@ -78,14 +81,14 @@ attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-fra
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [D:\/asan.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"asan.c", metadata !"D:\5C"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [D:\/asan.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5.0 "}
-!10 = metadata !{i32 2, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/asan.c] [DW_LANG_C99]
+!1 = !{!"asan.c", !"D:\5C"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [D:\/asan.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !MDLocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/COFF/asan-module-without-functions.ll b/test/DebugInfo/COFF/asan-module-without-functions.ll
index 419faa0e0f35..5bd4c9164523 100644
--- a/test/DebugInfo/COFF/asan-module-without-functions.ll
+++ b/test/DebugInfo/COFF/asan-module-without-functions.ll
@@ -45,9 +45,9 @@ define internal void @asan.module_dtor() {
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [D:\/asan.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"asan.c", metadata !"D:\5C"}
-!2 = metadata !{}
-!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!5 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\002", !1, !2, !2, !2, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/asan.c] [DW_LANG_C99]
+!1 = !{!"asan.c", !"D:\5C"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
+!5 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index 8c9dff0cdf15..44ee4f9ce4f4 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -13,21 +13,45 @@
 ;  6 }
 
 ; X86-LABEL: _f:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[ASM_LINE:^L.*]]:{{$}}
 ; X86:      [[CALL_LINE:^L.*]]:{{$}}
-; X86-NEXT: calll   _g
+; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
-; X86-NEXT: ret
-; X86-NEXT: [[END_OF_F:.*]]:
+; X86:      ret
+; X86-NEXT: L{{.*}}:
+; X86-NEXT: [[END_OF_F:^L.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rnd"
+; X86-LABEL: .section        .debug$S,"rd"
 ; X86-NEXT: .long   4
+; Symbol subsection
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_F]]-_f
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _f
+; X86-NEXT: .secidx _f
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   102
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
+; Line table
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32 _f
 ; X86-NEXT: .secidx _f
+; X86-NEXT: .short 0
 ; X86-NEXT: .long [[END_OF_F]]-_f
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X86-NEXT: .long   0
@@ -60,8 +84,20 @@
 ; OBJ32:      Characteristics [ (0x42100040)
 ; OBJ32:      ]
 ; OBJ32:      Relocations [
-; OBJ32-NEXT:   0xC IMAGE_REL_I386_SECREL _f
-; OBJ32-NEXT:   0x10 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x2C IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x30 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x44 IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x48 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: f
+; OBJ32-NEXT:     Section: _f
+; OBJ32-NEXT:     CodeSize: 0x6
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
 ; OBJ32-NEXT: ]
 ; OBJ32:      FunctionLineTable [
 ; OBJ32-NEXT:   Name: _f
@@ -80,23 +116,47 @@
 
 ; X64-LABEL: f:
 ; X64-NEXT: [[START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[ASM_LINE:.*]]:{{$}}
 ; X64:      [[CALL_LINE:.*]]:{{$}}
-; X64-NEXT: callq   g
+; X64:      callq   g
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rnd"
+; X64-LABEL: .section        .debug$S,"rd"
 ; X64-NEXT: .long   4
+; Symbol subsection
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_F]]-f
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 f
+; X64-NEXT: .secidx f
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   102
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
+; Line table
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 f
 ; X64-NEXT: .secidx f
+; X64-NEXT: .short 0
 ; X64-NEXT: .long [[END_OF_F]]-f
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X64-NEXT: .long   0
@@ -131,8 +191,20 @@
 ; OBJ64:      Characteristics [ (0x42100040)
 ; OBJ64:      ]
 ; OBJ64:      Relocations [
-; OBJ64-NEXT:   0xC IMAGE_REL_AMD64_SECREL f
-; OBJ64-NEXT:   0x10 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x2C IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x30 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x44 IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x48 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: f
+; OBJ64-NEXT:     Section: f
+; OBJ64-NEXT:     CodeSize: 0xE
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
 ; OBJ64-NEXT: ]
 ; OBJ64:      FunctionLineTable [
 ; OBJ64-NEXT:   Name: f
@@ -167,18 +239,18 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
-!1 = metadata !{metadata !"<unknown>", metadata !"D:\5C"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @f, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!5 = metadata !{metadata !"asm.c", metadata !"D:\5C"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [D:\/asm.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 "}
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
-!13 = metadata !{i32 5, i32 0, metadata !4, null}
-!14 = metadata !{i32 6, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
+!1 = !{!"<unknown>", !"D:\5C"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\003\000\001\000\006\00256\000\003", !5, !6, !7, null, void ()* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = !{!"asm.c", !"D:\5C"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [D:\/asm.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 "}
+!12 = !MDLocation(line: 4, scope: !4)
+!13 = !MDLocation(line: 5, scope: !4)
+!14 = !MDLocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/COFF/cpp-mangling.ll b/test/DebugInfo/COFF/cpp-mangling.ll
new file mode 100644
index 000000000000..73c8e1594e60
--- /dev/null
+++ b/test/DebugInfo/COFF/cpp-mangling.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview-linetables | FileCheck %s
+
+; This LL file was generated by running clang on the following code:
+; D:\src.cpp:
+;  1 namespace foo {
+;  2 int bar(int x) {
+;  3   return x * 2;
+;  4 }
+;  5 }
+
+; CHECK:        ProcStart {
+; FIXME: The display name should in fact be "foo::bar", see PR21528
+; CHECK-NEXT:     DisplayName: ?bar@foo@@YAHH@Z
+; CHECK-NEXT:     Section: ?bar@foo@@YAHH@Z
+
+; Function Attrs: nounwind
+define i32 @"\01?bar@foo@@YAHH@Z"(i32 %x) #0 {
+entry:
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %0 = load i32* %x.addr, align 4, !dbg !11
+  %mul = mul nsw i32 %0, 2, !dbg !11
+  ret i32 %mul, !dbg !11
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"D:\5C"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00bar\00bar\00\002\000\001\000\000\00256\000\002", !5, !6, !7, null, i32 (i32)* @"\01?bar@foo@@YAHH@Z", null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!5 = !{!"src.cpp", !"D:\5C"}
+!6 = !{!"0x29", !5}    ; [ DW_TAG_file_type ] [D:\/src.cpp]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.6.0 "}
+!11 = !MDLocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index c04bdb3b7c94..5cdd6dc2e51b 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -7,34 +7,58 @@
 ; D:\input.c:
 ;  1 void g(void);
 ;  2
-;  3 void f() {
+;  3 void f(void) {
 ;  4 #line 1 "one.c"
-;  5   g(void);
+;  5   g();
 ;  6 #line 2 "two.c"
-;  7   g(void);
+;  7   g();
 ;  8 #line 7 "one.c"
-;  9   g(void);
+;  9   g();
 ; 10 }
 
 ; X86-LABEL: _f:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[CALL_LINE_1:.*]]:{{$}}
-; X86-NEXT: calll   _g
+; X86:      calll   _g
 ; X86-NEXT: [[CALL_LINE_2:.*]]:{{$}}
-; X86-NEXT: calll   _g
+; X86:      calll   _g
 ; X86-NEXT: [[CALL_LINE_3:.*]]:{{$}}
-; X86-NEXT: calll   _g
+; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
-; X86-NEXT: ret
+; X86:      ret
+; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rnd"
+; X86-LABEL: .section        .debug$S,"rd"
 ; X86-NEXT: .long   4
+; Symbol subsection
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_F]]-_f
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _f
+; X86-NEXT: .secidx _f
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   102
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
+; Line table
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32 _f
 ; X86-NEXT: .secidx _f
+; X86-NEXT: .short 0
 ; X86-NEXT: .long [[END_OF_F]]-_f
 ; Segment for file 'D:\\one.c' begins
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
@@ -85,8 +109,20 @@
 ; OBJ32:      Characteristics [ (0x42100040)
 ; OBJ32:      ]
 ; OBJ32:      Relocations [
-; OBJ32-NEXT:   0xC IMAGE_REL_I386_SECREL _f
-; OBJ32-NEXT:   0x10 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x2C IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x30 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x44 IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x48 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: f
+; OBJ32-NEXT:     Section: _f
+; OBJ32-NEXT:     CodeSize: 0x10
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
 ; OBJ32-NEXT: ]
 ; OBJ32:      FunctionLineTable [
 ; OBJ32-NEXT:   Name: _f
@@ -109,26 +145,50 @@
 
 ; X64-LABEL: f:
 ; X64-NEXT: [[START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[CALL_LINE_1:.*]]:{{$}}
-; X64-NEXT: callq   g
+; X64:      callq   g
 ; X64-NEXT: [[CALL_LINE_2:.*]]:{{$}}
-; X64-NEXT: callq   g
+; X64:      callq   g
 ; X64-NEXT: [[CALL_LINE_3:.*]]:{{$}}
-; X64-NEXT: callq   g
+; X64:      callq   g
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rnd"
+; X64-LABEL: .section        .debug$S,"rd"
 ; X64-NEXT: .long   4
+; Symbol subsection
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_F]]-f
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 f
+; X64-NEXT: .secidx f
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   102
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
+; Line table
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 f
 ; X64-NEXT: .secidx f
+; X64-NEXT: .short 0
 ; X64-NEXT: .long [[END_OF_F]]-f
 ; Segment for file 'D:\\input.c' begins
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
@@ -191,8 +251,20 @@
 ; OBJ64:      Characteristics [ (0x42100040)
 ; OBJ64:      ]
 ; OBJ64:      Relocations [
-; OBJ64-NEXT:   0xC IMAGE_REL_AMD64_SECREL f
-; OBJ64-NEXT:   0x10 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x2C IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x30 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x44 IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x48 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: f
+; OBJ64-NEXT:     Section: f
+; OBJ64-NEXT:     CodeSize: 0x18
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
 ; OBJ64-NEXT: ]
 ; OBJ64:      FunctionLineTable [
 ; OBJ64-NEXT:   Name: f
@@ -235,23 +307,23 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
-!1 = metadata !{metadata !"<unknown>", metadata !"D:\5C"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @f, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!5 = metadata !{metadata !"input.c", metadata !"D:\5C"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [D:\/input.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 "}
-!12 = metadata !{i32 1, i32 0, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !14, metadata !4} ; [ DW_TAG_lexical_block ] [D:\/one.c]
-!14 = metadata !{metadata !"one.c", metadata !"D:\5C"}
-!15 = metadata !{i32 2, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !17, metadata !4} ; [ DW_TAG_lexical_block ] [D:\/two.c]
-!17 = metadata !{metadata !"two.c", metadata !"D:\5C"}
-!18 = metadata !{i32 7, i32 0, metadata !13, null}
-!19 = metadata !{i32 8, i32 0, metadata !13, null} ; [ DW_TAG_imported_declaration ]
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
+!1 = !{!"<unknown>", !"D:\5C"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\003\000\001\000\006\00256\000\003", !5, !6, !7, null, void ()* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = !{!"input.c", !"D:\5C"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [D:\/input.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 "}
+!12 = !MDLocation(line: 1, scope: !13)
+!13 = !{!"0xb\000", !14, !4} ; [ DW_TAG_lexical_block ] [D:\/one.c]
+!14 = !{!"one.c", !"D:\5C"}
+!15 = !MDLocation(line: 2, scope: !16)
+!16 = !{!"0xb\000", !17, !4} ; [ DW_TAG_lexical_block ] [D:\/two.c]
+!17 = !{!"two.c", !"D:\5C"}
+!18 = !MDLocation(line: 7, scope: !13)
+!19 = !MDLocation(line: 8, scope: !13)
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index 5a6555805fd8..8f9a3f8b9b75 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -23,41 +23,66 @@
 
 
 ; X86-LABEL: _x:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[X_CALL:.*]]:{{$}}
-; X86-NEXT: calll   _z
+; X86:      calll   _z
 ; X86-NEXT: [[X_RETURN:.*]]:
-; X86-NEXT: ret
+; X86:      ret
+; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_X:.*]]:
 ;
 ; X86-LABEL: _y:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[Y_CALL:.*]]:{{$}}
-; X86-NEXT: calll   _z
+; X86:      calll   _z
 ; X86-NEXT: [[Y_RETURN:.*]]:
-; X86-NEXT: ret
+; X86:      ret
+; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_Y:.*]]:
 ;
 ; X86-LABEL: _f:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[F_CALLS_X:.*]]:{{$}}
-; X86-NEXT: calll   _x
+; X86:      calll   _x
 ; X86-NEXT: [[F_CALLS_Y:.*]]:
-; X86-NEXT: calll   _y
+; X86:      calll   _y
 ; X86-NEXT: [[F_CALLS_Z:.*]]:
-; X86-NEXT: calll   _z
+; X86:      calll   _z
 ; X86-NEXT: [[F_RETURN:.*]]:
-; X86-NEXT: ret
+; X86:      ret
+; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rnd"
+; X86-LABEL: .section        .debug$S,"rd"
 ; X86-NEXT: .long   4
+; Symbol subsection for x
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_X]]-_x
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _x
+; X86-NEXT: .secidx _x
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   120
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
 ; Line table subsection for x
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32       _x
 ; X86-NEXT: .secidx _x
+; X86-NEXT: .short 0
 ; X86-NEXT: .long [[END_OF_X]]-_x
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X86-NEXT: .long   0
@@ -69,12 +94,34 @@
 ; X86-NEXT: .long   5
 ; X86-NEXT: [[FILE_SEGMENT_END]]:
 ; X86-NEXT: [[F2_END]]:
+; Symbol subsection for y
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_Y]]-_y
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _y
+; X86-NEXT: .secidx _y
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   121
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
 ; Line table subsection for y
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32       _y
 ; X86-NEXT: .secidx _y
+; X86-NEXT: .short 0
 ; X86-NEXT: .long [[END_OF_Y]]-_y
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X86-NEXT: .long   0
@@ -86,12 +133,34 @@
 ; X86-NEXT: .long   9
 ; X86-NEXT: [[FILE_SEGMENT_END]]:
 ; X86-NEXT: [[F2_END]]:
+; Symbol subsection for f
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_F]]-_f
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _f
+; X86-NEXT: .secidx _f
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   102
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
 ; Line table subsection for f
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32 _f
 ; X86-NEXT: .secidx _f
+; X86-NEXT: .short 0
 ; X86-NEXT: .long [[END_OF_F]]-_f
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X86-NEXT: .long   0
@@ -125,13 +194,58 @@
 ; OBJ32:      Characteristics [ (0x42100040)
 ; OBJ32:      ]
 ; OBJ32:      Relocations [
-; OBJ32-NEXT:   0xC IMAGE_REL_I386_SECREL _x
-; OBJ32-NEXT:   0x10 IMAGE_REL_I386_SECTION _x
-; OBJ32-NEXT:   0x3C IMAGE_REL_I386_SECREL _y
-; OBJ32-NEXT:   0x40 IMAGE_REL_I386_SECTION _y
-; OBJ32-NEXT:   0x6C IMAGE_REL_I386_SECREL _f
-; OBJ32-NEXT:   0x70 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x2C IMAGE_REL_I386_SECREL _x
+; OBJ32-NEXT:   0x30 IMAGE_REL_I386_SECTION _x
+; OBJ32-NEXT:   0x44 IMAGE_REL_I386_SECREL _x
+; OBJ32-NEXT:   0x48 IMAGE_REL_I386_SECTION _x
+; OBJ32-NEXT:   0x94 IMAGE_REL_I386_SECREL _y
+; OBJ32-NEXT:   0x98 IMAGE_REL_I386_SECTION _y
+; OBJ32-NEXT:   0xAC IMAGE_REL_I386_SECREL _y
+; OBJ32-NEXT:   0xB0 IMAGE_REL_I386_SECTION _y
+; OBJ32-NEXT:   0xFC IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x100 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x114 IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x118 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: x
+; OBJ32-NEXT:     Section: _x
+; OBJ32-NEXT:     CodeSize: 0x6
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF2
+; OBJ32:      ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: y
+; OBJ32-NEXT:     Section: _y
+; OBJ32-NEXT:     CodeSize: 0x6
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF2
+; OBJ32:      ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: f
+; OBJ32-NEXT:     Section: _f
+; OBJ32-NEXT:     CodeSize: 0x10
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
 ; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF2
+; OBJ32:      ]
 ; OBJ32:      FunctionLineTable [
 ; OBJ32-NEXT:   Name: _x
 ; OBJ32-NEXT:   CodeSize: 0x6
@@ -165,49 +279,74 @@
 
 ; X64-LABEL: x:
 ; X64-NEXT: [[X_START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[X_CALL_LINE:.*]]:{{$}}
 ; X64-NEXT: callq   z
 ; X64-NEXT: [[X_EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_X:.*]]:
 ;
 ; X64-LABEL: y:
 ; X64-NEXT: [[Y_START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[Y_CALL_LINE:.*]]:{{$}}
 ; X64-NEXT: callq   z
 ; X64-NEXT: [[Y_EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_Y:.*]]:
 ;
 ; X64-LABEL: f:
 ; X64-NEXT: [[F_START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[F_CALLS_X:.*]]:{{$}}
 ; X64-NEXT: callq   x
 ; X64-NEXT: [[F_CALLS_Y:.*]]:
-; X64-NEXT: callq   y
+; X64:      callq   y
 ; X64-NEXT: [[F_CALLS_Z:.*]]:
-; X64-NEXT: callq   z
+; X64:      callq   z
 ; X64-NEXT: [[F_EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rnd"
+; X64-LABEL: .section        .debug$S,"rd"
 ; X64-NEXT: .long   4
+; Symbol subsection for x
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_X]]-x
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 x
+; X64-NEXT: .secidx x
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   120
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
 ; Line table subsection for x
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 x
 ; X64-NEXT: .secidx x
+; X64-NEXT: .short 0
 ; X64-NEXT: .long [[END_OF_X]]-x
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X64-NEXT: .long   0
@@ -221,12 +360,34 @@
 ; X64-NEXT: .long   5
 ; X64-NEXT: [[FILE_SEGMENT_END]]:
 ; X64-NEXT: [[F2_END]]:
+; Symbol subsection for y
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_Y]]-y
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 y
+; X64-NEXT: .secidx y
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   121
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
 ; Line table subsection for y
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 y
 ; X64-NEXT: .secidx y
+; X64-NEXT: .short 0
 ; X64-NEXT: .long [[END_OF_Y]]-y
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X64-NEXT: .long   0
@@ -240,12 +401,34 @@
 ; X64-NEXT: .long   9
 ; X64-NEXT: [[FILE_SEGMENT_END]]:
 ; X64-NEXT: [[F2_END]]:
+; Symbol subsection for f
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_F]]-f
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 f
+; X64-NEXT: .secidx f
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   102
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
 ; Line table subsection for f
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 f
 ; X64-NEXT: .secidx f
+; X64-NEXT: .short 0
 ; X64-NEXT: .long [[END_OF_F]]-f
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X64-NEXT: .long   0
@@ -281,13 +464,58 @@
 ; OBJ64:      Characteristics [ (0x42100040)
 ; OBJ64:      ]
 ; OBJ64:      Relocations [
-; OBJ64-NEXT:   0xC IMAGE_REL_AMD64_SECREL x
-; OBJ64-NEXT:   0x10 IMAGE_REL_AMD64_SECTION x
-; OBJ64-NEXT:   0x44 IMAGE_REL_AMD64_SECREL y
-; OBJ64-NEXT:   0x48 IMAGE_REL_AMD64_SECTION y
-; OBJ64-NEXT:   0x7C IMAGE_REL_AMD64_SECREL f
-; OBJ64-NEXT:   0x80 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x2C IMAGE_REL_AMD64_SECREL x
+; OBJ64-NEXT:   0x30 IMAGE_REL_AMD64_SECTION x
+; OBJ64-NEXT:   0x44 IMAGE_REL_AMD64_SECREL x
+; OBJ64-NEXT:   0x48 IMAGE_REL_AMD64_SECTION x
+; OBJ64-NEXT:   0x9C IMAGE_REL_AMD64_SECREL y
+; OBJ64-NEXT:   0xA0 IMAGE_REL_AMD64_SECTION y
+; OBJ64-NEXT:   0xB4 IMAGE_REL_AMD64_SECREL y
+; OBJ64-NEXT:   0xB8 IMAGE_REL_AMD64_SECTION y
+; OBJ64-NEXT:   0x10C IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x110 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x124 IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x128 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: x
+; OBJ64-NEXT:     Section: x
+; OBJ64-NEXT:     CodeSize: 0xE
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
 ; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF2
+; OBJ64:      ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: y
+; OBJ64-NEXT:     Section: y
+; OBJ64-NEXT:     CodeSize: 0xE
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF2
+; OBJ64:      ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: f
+; OBJ64-NEXT:     Section: f
+; OBJ64-NEXT:     CodeSize: 0x18
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF2
+; OBJ64:      ]
 ; OBJ64:      FunctionLineTable [
 ; OBJ64-NEXT:   Name: x
 ; OBJ64-NEXT:   CodeSize: 0xE
@@ -354,25 +582,25 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
-!1 = metadata !{metadata !"<unknown>", metadata !"D:\5C"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !9, metadata !10}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"x", metadata !"x", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @x, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [x]
-!5 = metadata !{metadata !"source.c", metadata !"D:\5C"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [D:\/source.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"y", metadata !"y", metadata !"", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @y, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [y]
-!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @f, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [f]
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!13 = metadata !{metadata !"clang version 3.5 "}
-!14 = metadata !{i32 4, i32 0, metadata !4, null}
-!15 = metadata !{i32 5, i32 0, metadata !4, null}
-!16 = metadata !{i32 8, i32 0, metadata !9, null} ; [ DW_TAG_imported_declaration ]
-!17 = metadata !{i32 9, i32 0, metadata !9, null}
-!18 = metadata !{i32 12, i32 0, metadata !10, null}
-!19 = metadata !{i32 13, i32 0, metadata !10, null}
-!20 = metadata !{i32 14, i32 0, metadata !10, null}
-!21 = metadata !{i32 15, i32 0, metadata !10, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
+!1 = !{!"<unknown>", !"D:\5C"}
+!2 = !{}
+!3 = !{!4, !9, !10}
+!4 = !{!"0x2e\00x\00x\00\003\000\001\000\006\00256\000\003", !5, !6, !7, null, void ()* @x, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [x]
+!5 = !{!"source.c", !"D:\5C"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [D:\/source.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x2e\00y\00y\00\007\000\001\000\006\00256\000\007", !5, !6, !7, null, void ()* @y, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [y]
+!10 = !{!"0x2e\00f\00f\00\0011\000\001\000\006\00256\000\0011", !5, !6, !7, null, void ()* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [f]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{!"clang version 3.5 "}
+!14 = !MDLocation(line: 4, scope: !4)
+!15 = !MDLocation(line: 5, scope: !4)
+!16 = !MDLocation(line: 8, scope: !9)
+!17 = !MDLocation(line: 9, scope: !9)
+!18 = !MDLocation(line: 12, scope: !10)
+!19 = !MDLocation(line: 13, scope: !10)
+!20 = !MDLocation(line: 14, scope: !10)
+!21 = !MDLocation(line: 15, scope: !10)
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index 2613a18298d3..bcb8a695c7f1 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -12,20 +12,44 @@
 ; 5 }
 
 ; X86-LABEL: _f:
-; X86-NEXT: # BB
+; X86:      # BB
 ; X86-NEXT: [[CALL_LINE:^L.*]]:{{$}}
-; X86-NEXT: calll   _g
+; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
-; X86-NEXT: ret
+; X86:      ret
+; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
-; X86-LABEL: .section        .debug$S,"rnd"
+; X86-LABEL: .section        .debug$S,"rd"
 ; X86-NEXT: .long   4
+; Symbol subsection
+; X86-NEXT: .long   241
+; X86-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X86-NEXT: [[F1_START]]:
+; X86-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X86-NEXT: [[PROC_SEGMENT_START]]:
+; X86-NEXT: .short  4423
+; X86-NEXT: .zero   12
+; X86-NEXT: .long [[END_OF_F]]-_f
+; X86-NEXT: .zero   12
+; X86-NEXT: .secrel32 _f
+; X86-NEXT: .secidx _f
+; X86-NEXT: .byte   0
+; X86-NEXT: .byte   102
+; X86-NEXT: .byte   0
+; X86-NEXT: [[PROC_SEGMENT_END]]:
+; X86-NEXT: .short  2
+; X86-NEXT: .short  4431
+; X86-NEXT: [[F1_END]]:
+; Padding
+; X86-NEXT: .zero   3
+; Line table
 ; X86-NEXT: .long   242
 ; X86-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X86-NEXT: [[F2_START]]:
 ; X86-NEXT: .secrel32 _f
 ; X86-NEXT: .secidx _f
+; X86-NEXT: .short  0
 ; X86-NEXT: .long [[END_OF_F]]-_f
 ; X86-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X86-NEXT: .long   0
@@ -56,8 +80,20 @@
 ; OBJ32:      Characteristics [ (0x42100040)
 ; OBJ32:      ]
 ; OBJ32:      Relocations [
-; OBJ32-NEXT:   0xC IMAGE_REL_I386_SECREL _f
-; OBJ32-NEXT:   0x10 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x2C IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x30 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT:   0x44 IMAGE_REL_I386_SECREL _f
+; OBJ32-NEXT:   0x48 IMAGE_REL_I386_SECTION _f
+; OBJ32-NEXT: ]
+; OBJ32:      Subsection [
+; OBJ32-NEXT:   Type: 0xF1
+; OBJ32-NOT:    ]
+; OBJ32:        ProcStart {
+; OBJ32-NEXT:     DisplayName: f
+; OBJ32-NEXT:     Section: _f
+; OBJ32-NEXT:     CodeSize: 0x6
+; OBJ32-NEXT:   }
+; OBJ32-NEXT:   ProcEnd
 ; OBJ32-NEXT: ]
 ; OBJ32:      FunctionLineTable [
 ; OBJ32-NEXT:   Name: _f
@@ -72,22 +108,46 @@
 
 ; X64-LABEL: f:
 ; X64-NEXT: [[START:.*]]:{{$}}
-; X64-NEXT: # BB
-; X64-NEXT: subq    $40, %rsp
+; X64:      # BB
+; X64:      subq    $40, %rsp
 ; X64-NEXT: [[CALL_LINE:.*]]:{{$}}
 ; X64-NEXT: callq   g
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
-; X64-NEXT: addq    $40, %rsp
+; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
+; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
-; X64-LABEL: .section        .debug$S,"rnd"
+; X64-LABEL: .section        .debug$S,"rd"
 ; X64-NEXT: .long   4
+; Symbol subsection
+; X64-NEXT: .long   241
+; X64-NEXT: .long [[F1_END:.*]]-[[F1_START:.*]]
+; X64-NEXT: [[F1_START]]:
+; X64-NEXT: .short [[PROC_SEGMENT_END:.*]]-[[PROC_SEGMENT_START:.*]]
+; X64-NEXT: [[PROC_SEGMENT_START]]:
+; X64-NEXT: .short  4423
+; X64-NEXT: .zero   12
+; X64-NEXT: .long [[END_OF_F]]-f
+; X64-NEXT: .zero   12
+; X64-NEXT: .secrel32 f
+; X64-NEXT: .secidx f
+; X64-NEXT: .byte   0
+; X64-NEXT: .byte   102
+; X64-NEXT: .byte   0
+; X64-NEXT: [[PROC_SEGMENT_END]]:
+; X64-NEXT: .short  2
+; X64-NEXT: .short  4431
+; X64-NEXT: [[F1_END]]:
+; Padding
+; X64-NEXT: .zero   3
+; Line table
 ; X64-NEXT: .long   242
 ; X64-NEXT: .long [[F2_END:.*]]-[[F2_START:.*]]
 ; X64-NEXT: [[F2_START]]:
 ; X64-NEXT: .secrel32 f
 ; X64-NEXT: .secidx f
+; X64-NEXT: .short  0
 ; X64-NEXT: .long [[END_OF_F]]-f
 ; X64-NEXT: [[FILE_SEGMENT_START:[^:]*]]:
 ; X64-NEXT: .long   0
@@ -120,8 +180,20 @@
 ; OBJ64:      Characteristics [ (0x42100040)
 ; OBJ64:      ]
 ; OBJ64:      Relocations [
-; OBJ64-NEXT:   0xC IMAGE_REL_AMD64_SECREL f
-; OBJ64-NEXT:   0x10 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x2C IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x30 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT:   0x44 IMAGE_REL_AMD64_SECREL f
+; OBJ64-NEXT:   0x48 IMAGE_REL_AMD64_SECTION f
+; OBJ64-NEXT: ]
+; OBJ64:      Subsection [
+; OBJ64-NEXT:   Type: 0xF1
+; OBJ64-NOT:    ]
+; OBJ64:        ProcStart {
+; OBJ64-NEXT:     DisplayName: f
+; OBJ64-NEXT:     Section: f
+; OBJ64-NEXT:     CodeSize: 0xE
+; OBJ64-NEXT:   }
+; OBJ64-NEXT:   ProcEnd
 ; OBJ64-NEXT: ]
 ; OBJ64:      FunctionLineTable [
 ; OBJ64-NEXT:   Name: f
@@ -151,17 +223,17 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
-!1 = metadata !{metadata !"<unknown>", metadata !"D:\5C"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @f, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!5 = metadata !{metadata !"test.c", metadata !"D:\5C"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [D:\/test.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 "}
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
-!13 = metadata !{i32 5, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/<unknown>] [DW_LANG_C99]
+!1 = !{!"<unknown>", !"D:\5C"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\003\000\001\000\006\00256\000\003", !5, !6, !7, null, void ()* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = !{!"test.c", !"D:\5C"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [D:\/test.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 "}
+!12 = !MDLocation(line: 4, scope: !4)
+!13 = !MDLocation(line: 5, scope: !4)
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 4d2e42734475..181359b905ab 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -18,11 +18,11 @@
 ; X86-LABEL: {{^}}"?bar@@YAXHZZ":
 ; X86-NEXT: # BB
 ; X86-NEXT: [[JMP_LINE:^L.*]]:{{$}}
-; X86-NEXT: jmp "?foo@@YAXXZ"
+; X86:      jmp "?foo@@YAXXZ"
 ; X86-NEXT: [[END_OF_BAR:^L.*]]:{{$}}
 ; X86-NOT:  ret
 
-; X86-LABEL: .section        .debug$S,"rnd"
+; X86-LABEL: .section        .debug$S,"rd"
 ; X86:       .secrel32 "?bar@@YAXHZZ"
 ; X86-NEXT:  .secidx   "?bar@@YAXHZZ"
 ; X86:       .long   0
@@ -61,18 +61,18 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [D:\/test.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"test.cpp", metadata !"D:\5C"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"spam", metadata !"spam", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @"\01?spam@@YAXXZ", null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [spam]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [D:\/test.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 3, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [bar]
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 "}
-!11 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!12 = metadata !{i32 9, i32 0, metadata !4, null}
-!13 = metadata !{i32 4, i32 0, metadata !7, null}
-!14 = metadata !{i32 5, i32 0, metadata !7, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\/test.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"test.cpp", !"D:\5C"}
+!2 = !{}
+!3 = !{!4, !7}
+!4 = !{!"0x2e\00spam\00spam\00\007\000\001\000\006\00256\001\007", !1, !5, !6, null, void ()* @"\01?spam@@YAXXZ", null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [spam]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [D:\/test.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00bar\00bar\00\003\001\001\000\006\00256\001\003", !1, !5, !6, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [bar]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !MDLocation(line: 8, scope: !4)
+!12 = !MDLocation(line: 9, scope: !4)
+!13 = !MDLocation(line: 4, scope: !7)
+!14 = !MDLocation(line: 5, scope: !7)
diff --git a/test/DebugInfo/Inputs/cross-cu-inlining.c b/test/DebugInfo/Inputs/cross-cu-inlining.c
new file mode 100644
index 000000000000..05535811aa6a
--- /dev/null
+++ b/test/DebugInfo/Inputs/cross-cu-inlining.c
@@ -0,0 +1,18 @@
+// To generate the test file:
+// clang cross-cu-inlining.c -DA_C -g -emit-llvm -S -o a.ll
+// clang cross-cu-inlining.c -DB_C -g -emit-llvm -S -o b.ll
+// llvm-link a.ll b.ll -o ab.bc
+// opt -inline ab.bc -o cross-cu-inlining.bc
+// clang -c cross-cu-inlining.bc -o cross-cu-inlining.o
+#ifdef A_C
+int i;
+int func(int);
+int main() {
+  return func(i);
+}
+#endif
+#ifdef B_C
+int __attribute__((always_inline)) func(int x) {
+  return x * 2;
+}
+#endif
diff --git a/test/DebugInfo/Inputs/cross-cu-inlining.x86_64-macho.o b/test/DebugInfo/Inputs/cross-cu-inlining.x86_64-macho.o
new file mode 100644
index 000000000000..052d4c92f909
--- /dev/null
+++ b/test/DebugInfo/Inputs/cross-cu-inlining.x86_64-macho.o
diff --git a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64 b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
index 6df03dad95a7..decc72b4ff22 100755
--- a/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
+++ b/test/DebugInfo/Inputs/dwarfdump-inl-test.elf-x86-64
diff --git a/test/DebugInfo/Inputs/dwarfdump-objc.m b/test/DebugInfo/Inputs/dwarfdump-objc.m
new file mode 100644
index 000000000000..54fbee2ac39c
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-objc.m
@@ -0,0 +1,16 @@
+// Compile with clang -g dwarfdump-objc.m -c -Wno-objc-root-class
+
+@interface NSObject {} @end
+
+
+@interface TestInterface
+@property (readonly) int ReadOnly;
+@property (assign) int Assign;
+@property (readwrite) int ReadWrite;
+@property (retain) NSObject *Retain;
+@property (copy) NSObject *Copy;
+@property (nonatomic) int NonAtomic;
+@end
+
+@implementation TestInterface
+@end
diff --git a/test/DebugInfo/Inputs/dwarfdump-objc.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-objc.x86_64.o
new file mode 100644
index 000000000000..6b55d38ce91d
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-objc.x86_64.o
diff --git a/test/DebugInfo/Inputs/gmlt.ll b/test/DebugInfo/Inputs/gmlt.ll
new file mode 100644
index 000000000000..e43264010bf9
--- /dev/null
+++ b/test/DebugInfo/Inputs/gmlt.ll
@@ -0,0 +1,153 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; Generated from the following source compiled with clang++ -gmlt:
+; void f1() {}
+; void __attribute__((section("__TEXT,__bar"))) f2() {}
+; void __attribute__((always_inline)) f3() { f1(); }
+; void f4() { f3(); }
+
+; Check that
+;  * -gmlt includes no DW_TAG_subprograms for subprograms without inlined
+;    subroutines.
+;  * yet still produces DW_AT_ranges and a range list in debug_ranges that
+;    describes those subprograms
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
+; CHECK-NOT: {{DW_TAG|NULL}}
+
+; Omitting the subprograms without inlined subroutines is not possible
+; currently on Darwin as dsymutil will drop the whole CU if it has no subprograms
+; (which happens with this optimization if there are no inlined subroutines).
+
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN:    DW_AT_name {{.*}} "f1"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN:    DW_AT_name {{.*}} "f2"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; Can't check the abstract_origin value across the DARWIN/CHECK checking and
+; ordering, so don't bother - just trust me, it refers to f3 down there.
+; DARWIN:    DW_AT_abstract_origin
+; DARWIN-NOT: {{DW_TAG|NULL}}
+
+
+; FIXME: Emitting separate abstract definitions is inefficient when we could
+; just attach the DW_AT_name to the inlined_subroutine directly. Except that
+; would produce many string relocations. Implement string indexing in the
+; skeleton CU to address the relocation problem, then remove abstract
+; definitions from -gmlt here.
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_name {{.*}} "f3"
+
+; FIXME: We don't really need DW_AT_inline, consumers can ignore this due to
+; the absence of high_pc/low_pc/ranges and know that they just need it for
+; retrieving the name of a concrete inlined instance
+
+; CHECK-NOT: {{DW_TAG|DW_AT|NULL}}
+
+; Check that we only provide the minimal attributes on a subprogram to save space.
+; CHECK:   DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_low_pc
+; CHECK-NEXT:     DW_AT_high_pc
+; CHECK-NEXT:     DW_AT_name
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK:     DW_TAG_inlined_subroutine
+
+; As mentioned above - replace DW_AT_abstract_origin with DW_AT_name to save
+; space once we have support for string indexing in non-dwo sections
+
+; CHECK-NEXT:       DW_AT_abstract_origin {{.*}} "f3"
+; CHECK-NEXT:       DW_AT_low_pc
+; CHECK-NEXT:       DW_AT_high_pc
+; CHECK-NEXT:       DW_AT_call_file
+; CHECK-NEXT:       DW_AT_call_line
+
+; Make sure we don't have any other subprograms here (subprograms with no
+; inlined subroutines are omitted by design to save space)
+
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+
+
+; CHECK: .debug_ranges contents:
+
+; ... some addresses (depends on platform (such as platforms with function
+; reordering in the linker), and looks wonky on platforms with zero values
+; written in relocation places (dumper needs to	be fixed to read the
+; relocations rather than interpret that as the end of a range list))
+
+; CHECK: 00000000 <End of list>
+
+
+; Check that we don't emit any pubnames or pubtypes under -gmlt
+; CHECK: .debug_pubnames contents:
+; CHECK-NOT: Offset
+
+; CHECK: .debug_pubtypes contents:
+; CHECK-NOT: Offset
+
+; CHECK: .apple{{.*}} contents:
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f1v() #0 {
+entry:
+  ret void, !dbg !13
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f2v() #0 section "__TEXT,__bar" {
+entry:
+  ret void, !dbg !14
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @_Z2f3v() #1 {
+entry:
+  call void @_Z2f1v(), !dbg !15
+  ret void, !dbg !16
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f4v() #0 {
+entry:
+  call void @_Z2f1v() #2, !dbg !17
+  ret void, !dbg !19
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/gmlt.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"gmlt.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !7, !8, !9}
+!4 = !{!"0x2e\00f1\00f1\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void ()* @_Z2f1v, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [f1]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/gmlt.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00f2\00f2\00\002\000\001\000\006\00256\000\002", !1, !5, !6, null, void ()* @_Z2f2v, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f2]
+!8 = !{!"0x2e\00f3\00f3\00\003\000\001\000\006\00256\000\003", !1, !5, !6, null, void ()* @_Z2f3v, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f3]
+!9 = !{!"0x2e\00f4\00f4\00\004\000\001\000\006\00256\000\004", !1, !5, !6, null, void ()* @_Z2f4v, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [f4]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.6.0 "}
+!13 = !MDLocation(line: 1, column: 12, scope: !4)
+!14 = !MDLocation(line: 2, column: 53, scope: !7)
+!15 = !MDLocation(line: 3, column: 44, scope: !8)
+!16 = !MDLocation(line: 3, column: 50, scope: !8)
+!17 = !MDLocation(line: 3, column: 44, scope: !8, inlinedAt: !18)
+!18 = !MDLocation(line: 4, column: 13, scope: !9)
+!19 = !MDLocation(line: 4, column: 19, scope: !9)
diff --git a/test/DebugInfo/Inputs/split-dwarf-test b/test/DebugInfo/Inputs/split-dwarf-test
new file mode 100755
index 000000000000..a4411129a9b6
--- /dev/null
+++ b/test/DebugInfo/Inputs/split-dwarf-test
diff --git a/test/DebugInfo/Inputs/split-dwarf-test.cc b/test/DebugInfo/Inputs/split-dwarf-test.cc
new file mode 100644
index 000000000000..5ed56f94b362
--- /dev/null
+++ b/test/DebugInfo/Inputs/split-dwarf-test.cc
@@ -0,0 +1,17 @@
+int foo(int a) {
+  return a + 1;
+}
+
+int main(int argc, char *argv[]) {
+  return foo(argc);
+}
+
+// Build instructions:
+// 1) clang++ -### -O2 -gsplit-dwarf.cc split-dwarf-test.cc -o split-dwarf-test
+// 2) Replace the value "-fdebug-compilation-dir" flag to "Output"
+//      (this is the temp directory used by lit).
+// 3) Manually run clang-cc1, objcopy and ld invocations.
+// 4) Copy the binary and .dwo file to the Inputs directory. Make sure the
+//    .dwo file will be available for symbolizer (use test RUN-lines to copy
+//    the .dwo file to a directory
+//    <execution_directory>/<directory_provided_in_fdebug_compilation_dir>.
diff --git a/test/DebugInfo/Inputs/split-dwarf-test.dwo b/test/DebugInfo/Inputs/split-dwarf-test.dwo
new file mode 100644
index 000000000000..74183a4b622f
--- /dev/null
+++ b/test/DebugInfo/Inputs/split-dwarf-test.dwo
diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll
index 9bce4ba6c9d8..d860cea32100 100644
--- a/test/DebugInfo/Mips/delay-slot.ll
+++ b/test/DebugInfo/Mips/delay-slot.ll
@@ -26,7 +26,7 @@ target triple = "mips--linux-gnu"
 ; Function Attrs: nounwind
 define i32 @foo(i32 %x) #0 {
 entry:
-  call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !12), !dbg !13
+  call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !13
   %tobool = icmp ne i32 %x, 0, !dbg !14
   br i1 %tobool, label %if.then, label %if.end, !dbg !14
 
@@ -42,10 +42,10 @@ return:                                           ; preds = %if.end, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
@@ -54,22 +54,22 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5.0"}
-!12 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
-!13 = metadata !{i32 1, i32 0, metadata !4, null}
-!14 = metadata !{i32 2, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/test.c]
-!16 = metadata !{i32 3, i32 0, metadata !15, null}
-!17 = metadata !{i32 4, i32 0, metadata !4, null}
-!18 = metadata !{i32 5, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0"}
+!12 = !{!"0x101\00x\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!13 = !MDLocation(line: 1, scope: !4)
+!14 = !MDLocation(line: 2, scope: !15)
+!15 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/test.c]
+!16 = !MDLocation(line: 3, scope: !15)
+!17 = !MDLocation(line: 4, scope: !4)
+!18 = !MDLocation(line: 5, scope: !4)
diff --git a/test/DebugInfo/Mips/processes-relocations.ll b/test/DebugInfo/Mips/processes-relocations.ll
new file mode 100644
index 000000000000..5f52ea1ad16d
--- /dev/null
+++ b/test/DebugInfo/Mips/processes-relocations.ll
@@ -0,0 +1,17 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple mips-unknown-linux | \
+; RUN:      llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple mips64-unknown-linux | \
+; RUN:      llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/PR20038.ll b/test/DebugInfo/PR20038.ll
index 61145e5c8441..bfee8d2b4389 100644
--- a/test/DebugInfo/PR20038.ll
+++ b/test/DebugInfo/PR20038.ll
@@ -14,15 +14,15 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "C"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[C_DTOR_DECL:.*]]:  DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "~C"
 
-; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram
+; CHECK:  DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1CD1Ev"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[D1_THIS_ABS:.*]]:   DW_TAG_formal_parameter
+; CHECK:  DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "this"
 
@@ -30,23 +30,19 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "fun4"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:   DW_TAG_lexical_block
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:     DW_TAG_inlined_subroutine
+; CHECK:   DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:       DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "_ZN1CD1Ev"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:       DW_TAG_formal_parameter
+; CHECK:     DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:         DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]}
+; CHECK:       DW_AT_abstract_origin {{.*}} "this"
 
 ; FIXME: D2 is actually inlined into D1 but doesn't show up here, possibly due
 ; to there being no work in D2 (calling another member function from the dtor
 ; causes D2 to show up, calling a free function doesn't).
 
 ; CHECK-NOT: DW_TAG
-; CHECK:       NULL
-; CHECK-NOT: DW_TAG
 ; CHECK:     NULL
 ; CHECK-NOT: DW_TAG
 ; CHECK:   NULL
@@ -78,10 +74,10 @@ land.end:                                         ; preds = %land.rhs, %entry
 
 cleanup.action:                                   ; preds = %land.end
   store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i}, metadata !29), !dbg !31
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !29, metadata !{!"0x102"}), !dbg !31
   %this1.i = load %struct.C** %this.addr.i, !dbg !22
   store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i.i}, metadata !32), !dbg !33
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !32, metadata !{!"0x102"}), !dbg !33
   %this1.i.i = load %struct.C** %this.addr.i.i, !dbg !21
   br label %cleanup.done, !dbg !22
 
@@ -95,10 +91,10 @@ entry:
   %this.addr.i = alloca %struct.C*, align 8, !dbg !37
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !29), !dbg !38
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !29, metadata !{!"0x102"}), !dbg !38
   %this1 = load %struct.C** %this.addr
   store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr.i}, metadata !32), !dbg !39
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !32, metadata !{!"0x102"}), !dbg !39
   %this1.i = load %struct.C** %this.addr.i, !dbg !37
   ret void, !dbg !37
 }
@@ -108,13 +104,13 @@ define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !32), !dbg !40
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !32, metadata !{!"0x102"}), !dbg !40
   %this1 = load %struct.C** %this.addr
   ret void, !dbg !41
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #2
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { alwaysinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -124,45 +120,45 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<stdin>", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"PR20038.cpp", metadata !"/tmp/dbginfo"}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"", i32 2, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [~C]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{null, metadata !10}
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!11 = metadata !{metadata !12, metadata !16, metadata !17}
-!12 = metadata !{i32 786478, metadata !5, metadata !13, metadata !"fun4", metadata !"fun4", metadata !"_Z4fun4v", i32 5, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4fun4v, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [fun4]
-!13 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/PR20038.cpp]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null}
-!16 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"_ZN1CD2Ev", i32 6, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1CD2Ev, null, metadata !7, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
-!17 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"~C", metadata !"~C", metadata !"_ZN1CD1Ev", i32 6, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1CD1Ev, null, metadata !7, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!20 = metadata !{metadata !"clang version 3.5.0 "}
-!21 = metadata !{i32 6, i32 0, metadata !17, metadata !22}
-!22 = metadata !{i32 5, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
-!24 = metadata !{i32 5, i32 0, metadata !12, null}
-!25 = metadata !{i32 5, i32 0, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
-!27 = metadata !{i32 5, i32 0, metadata !28, null}
-!28 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
-!29 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
-!31 = metadata !{i32 0, i32 0, metadata !17, metadata !22}
-!32 = metadata !{i32 786689, metadata !16, metadata !"this", null, i32 16777216, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!33 = metadata !{i32 0, i32 0, metadata !16, metadata !21}
-!34 = metadata !{i32 5, i32 0, metadata !35, null}
-!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 5, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
-!36 = metadata !{i32 786443, metadata !5, metadata !12, i32 5, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
-!37 = metadata !{i32 6, i32 0, metadata !17, null}
-!38 = metadata !{i32 0, i32 0, metadata !17, null}
-!39 = metadata !{i32 0, i32 0, metadata !16, metadata !37}
-!40 = metadata !{i32 0, i32 0, metadata !16, null}
-!41 = metadata !{i32 6, i32 0, metadata !16, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !11, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00C\001\008\008\000\000\000", !5, null, null, !6, null, null, !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"PR20038.cpp", !"/tmp/dbginfo"}
+!6 = !{!7}
+!7 = !{!"0x2e\00~C\00~C\00\002\000\000\000\006\00256\000\002", !5, !"_ZTS1C", !8, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 2] [~C]
+!8 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !{null, !10}
+!10 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!11 = !{!12, !16, !17}
+!12 = !{!"0x2e\00fun4\00fun4\00_Z4fun4v\005\000\001\000\006\00256\000\005", !5, !13, !14, null, void ()* @_Z4fun4v, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [fun4]
+!13 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/PR20038.cpp]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null}
+!16 = !{!"0x2e\00~C\00~C\00_ZN1CD2Ev\006\000\001\000\006\00256\000\006", !5, !"_ZTS1C", !8, null, void (%struct.C*)* @_ZN1CD2Ev, null, !7, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
+!17 = !{!"0x2e\00~C\00~C\00_ZN1CD1Ev\006\000\001\000\006\00256\000\006", !5, !"_ZTS1C", !8, null, void (%struct.C*)* @_ZN1CD1Ev, null, !7, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [~C]
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{i32 2, !"Debug Info Version", i32 2}
+!20 = !{!"clang version 3.5.0 "}
+!21 = !MDLocation(line: 6, scope: !17, inlinedAt: !22)
+!22 = !MDLocation(line: 5, scope: !23)
+!23 = !{!"0xb\005\000\003", !5, !12} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!24 = !MDLocation(line: 5, scope: !12)
+!25 = !MDLocation(line: 5, scope: !26)
+!26 = !{!"0xb\005\000\001", !5, !12} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!27 = !MDLocation(line: 5, scope: !28)
+!28 = !{!"0xb\005\000\002", !5, !12} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!29 = !{!"0x101\00this\0016777216\001088", !17, null, !30} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!30 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!31 = !MDLocation(line: 0, scope: !17, inlinedAt: !22)
+!32 = !{!"0x101\00this\0016777216\001088", !16, null, !30} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!33 = !MDLocation(line: 0, scope: !16, inlinedAt: !21)
+!34 = !MDLocation(line: 5, scope: !35)
+!35 = !{!"0xb\005\000\005", !5, !36} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!36 = !{!"0xb\005\000\004", !5, !12} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/PR20038.cpp]
+!37 = !MDLocation(line: 6, scope: !17)
+!38 = !MDLocation(line: 0, scope: !17)
+!39 = !MDLocation(line: 0, scope: !16, inlinedAt: !37)
+!40 = !MDLocation(line: 0, scope: !16)
+!41 = !MDLocation(line: 6, scope: !16)
diff --git a/test/DebugInfo/PowerPC/processes-relocations.ll b/test/DebugInfo/PowerPC/processes-relocations.ll
new file mode 100644
index 000000000000..459055e4911e
--- /dev/null
+++ b/test/DebugInfo/PowerPC/processes-relocations.ll
@@ -0,0 +1,17 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple powerpc64-unknown-linux | \
+; RUN:    llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple powerpc-unknown-linux | \
+; RUN:    llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/PowerPC/tls-fission.ll b/test/DebugInfo/PowerPC/tls-fission.ll
index 9cde2c79a00b..7bb1626aa1ec 100644
--- a/test/DebugInfo/PowerPC/tls-fission.ll
+++ b/test/DebugInfo/PowerPC/tls-fission.ll
@@ -21,12 +21,12 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"tls.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
-!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00tls.dwo\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tls.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00tls\00tls\00\001\000\001", null, !5, !6, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = !{i32 2, !"Dwarf Version", i32 3}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/PowerPC/tls.ll b/test/DebugInfo/PowerPC/tls.ll
index f2586eda4243..be6313633916 100644
--- a/test/DebugInfo/PowerPC/tls.ll
+++ b/test/DebugInfo/PowerPC/tls.ll
@@ -17,13 +17,13 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
-!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/tmp/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tls.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00tls\00tls\00\001\000\001", null, !5, !6, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/tls.cpp]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = !{i32 2, !"Dwarf Version", i32 3}
 
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/Sparc/gnu-window-save.ll b/test/DebugInfo/Sparc/gnu-window-save.ll
index 303a28777584..5bf3f02538d5 100644
--- a/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -55,17 +55,17 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/venkatra/work/benchmarks/test/hello/hello.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"hello.c", metadata !"/home/venkatra/work/benchmarks/test/hello"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/venkatra/work/benchmarks/test/hello/hello.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)"}
-!12 = metadata !{i32 5, i32 0, metadata !4, null}
-!13 = metadata !{i32 6, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/home/venkatra/work/benchmarks/test/hello/hello.c] [DW_LANG_C99]
+!1 = !{!"hello.c", !"/home/venkatra/work/benchmarks/test/hello"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\003\000\001\000\006\00256\000\004", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/venkatra/work/benchmarks/test/hello/hello.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)"}
+!12 = !MDLocation(line: 5, scope: !4)
+!13 = !MDLocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/Sparc/processes-relocations.ll b/test/DebugInfo/Sparc/processes-relocations.ll
new file mode 100644
index 000000000000..de44cc933585
--- /dev/null
+++ b/test/DebugInfo/Sparc/processes-relocations.ll
@@ -0,0 +1,17 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple sparc-unknown-linux | \
+; RUN:    llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple sparcv9-unknown-linux | \
+; RUN:    llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/SystemZ/processes-relocations.ll b/test/DebugInfo/SystemZ/processes-relocations.ll
new file mode 100644
index 000000000000..fd1adf6b4a54
--- /dev/null
+++ b/test/DebugInfo/SystemZ/processes-relocations.ll
@@ -0,0 +1,15 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple s390x-unknown-linux | \
+; RUN:     llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 1}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 23df1cb555d3..9e5c6a9e7e67 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -25,7 +25,7 @@
 
 declare void @populate_array(i32*, i32) nounwind
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @sum_array(i32*, i32) nounwind
 
@@ -35,8 +35,8 @@ entry:
   %main_arr = alloca [100 x i32], align 4
   %val = alloca i32, align 4
   store volatile i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{[100 x i32]* %main_arr}, metadata !17), !dbg !22
-  call void @llvm.dbg.declare(metadata !{i32* %val}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata [100 x i32]* %main_arr, metadata !17, metadata !{!"0x102"}), !dbg !22
+  call void @llvm.dbg.declare(metadata i32* %val, metadata !23, metadata !{!"0x102"}), !dbg !24
   %arraydecay = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !25
   call void @populate_array(i32* %arraydecay, i32 100), !dbg !25
   %arraydecay1 = getelementptr inbounds [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
@@ -52,31 +52,31 @@ declare i32 @printf(i8*, ...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!30}
 
-!0 = metadata !{i32 786449, metadata !29, i32 12, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !11, metadata !14}
-!5 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"populate_array", metadata !"populate_array", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*, i32)* @populate_array, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
-!6 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"sum_array", metadata !"sum_array", metadata !"", i32 9, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*, i32)* @sum_array, null, null, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
-!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{metadata !10, metadata !9, metadata !10}
-!14 = metadata !{i32 786478, metadata !29, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !10}
-!17 = metadata !{i32 786688, metadata !18, metadata !"main_arr", metadata !6, i32 19, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
-!18 = metadata !{i32 786443, metadata !29, metadata !14, i32 18, i32 16, i32 4} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 3200, i64 32, i32 0, i32 0, metadata !10, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
-!20 = metadata !{i32 786465, i64 0, i64 99}       ; [ DW_TAG_subrange_type ] [0, 99]
-!22 = metadata !{i32 19, i32 7, metadata !18, null}
-!23 = metadata !{i32 786688, metadata !18, metadata !"val", metadata !6, i32 20, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [val] [line 20]
-!24 = metadata !{i32 20, i32 7, metadata !18, null}
-!25 = metadata !{i32 22, i32 3, metadata !18, null}
-!26 = metadata !{i32 23, i32 9, metadata !18, null}
-!27 = metadata !{i32 24, i32 3, metadata !18, null}
-!28 = metadata !{i32 26, i32 3, metadata !18, null}
-!29 = metadata !{metadata !"simple.c", metadata !"/home/timnor01/a64-trunk/build"}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.2 \000\00\000\00\000", !29, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/home/timnor01/a64-trunk/build/simple.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5, !11, !14}
+!5 = !{!"0x2e\00populate_array\00populate_array\00\004\000\001\000\006\00256\000\004", !29, !6, !7, null, void (i32*, i32)* @populate_array, null, null, !1} ; [ DW_TAG_subprogram ] [line 4] [def] [populate_array]
+!6 = !{!"0x29", !29} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !10}
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = !{!"0x2e\00sum_array\00sum_array\00\009\000\001\000\006\00256\000\009", !29, !6, !12, null, i32 (i32*, i32)* @sum_array, null, null, !1} ; [ DW_TAG_subprogram ] [line 9] [def] [sum_array]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{!10, !9, !10}
+!14 = !{!"0x2e\00main\00main\00\0018\000\001\000\006\00256\000\0018", !29, !6, !15, null, i32 ()* @main, null, null, !1} ; [ DW_TAG_subprogram ] [line 18] [def] [main]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!10}
+!17 = !{!"0x100\00main_arr\0019\000", !18, !6, !19} ; [ DW_TAG_auto_variable ] [main_arr] [line 19]
+!18 = !{!"0xb\0018\0016\004", !29, !14} ; [ DW_TAG_lexical_block ] [/home/timnor01/a64-trunk/build/simple.c]
+!19 = !{!"0x1\00\000\003200\0032\000\000", null, null, !10, !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 3200, align 32, offset 0] [from int]
+!20 = !{!"0x21\000\0099"}       ; [ DW_TAG_subrange_type ] [0, 99]
+!22 = !MDLocation(line: 19, column: 7, scope: !18)
+!23 = !{!"0x100\00val\0020\000", !18, !6, !10} ; [ DW_TAG_auto_variable ] [val] [line 20]
+!24 = !MDLocation(line: 20, column: 7, scope: !18)
+!25 = !MDLocation(line: 22, column: 3, scope: !18)
+!26 = !MDLocation(line: 23, column: 9, scope: !18)
+!27 = !MDLocation(line: 24, column: 3, scope: !18)
+!28 = !MDLocation(line: 26, column: 3, scope: !18)
+!29 = !{!"simple.c", !"/home/timnor01/a64-trunk/build"}
+!30 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 0440afce24c7..0aec0361b885 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -12,9 +12,9 @@ entry:
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %0 = alloca i32                                 ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.X** %x_addr}, metadata !0), !dbg !13
+  call void @llvm.dbg.declare(metadata %struct.X** %x_addr, metadata !0, metadata !{!"0x102"}), !dbg !13
   store %struct.X* %x, %struct.X** %x_addr
-  call void @llvm.dbg.declare(metadata !{%struct.Y** %y_addr}, metadata !14), !dbg !13
+  call void @llvm.dbg.declare(metadata %struct.Y** %y_addr, metadata !14, metadata !{!"0x102"}), !dbg !13
   store %struct.Y* %y, %struct.Y** %y_addr
   store i32 0, i32* %0, align 4, !dbg !13
   %1 = load i32* %0, align 4, !dbg !13            ; <i32> [#uses=1]
@@ -26,29 +26,29 @@ return:                                           ; preds = %entry
   ret i32 %retval1, !dbg !15
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !18, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !7, metadata !9}
-!6 = metadata !{i32 786468, metadata !18, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"X", i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 3, size 0, align 0, offset 0] [decl] [from ]
-!9 = metadata !{i32 786447, metadata !18, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !18, metadata !2, metadata !"Y", i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Y] [line 4, size 32, align 32, offset 0] [def] [from ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786445, metadata !18, metadata !10, metadata !"x", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 7, i32 0, metadata !1, null}
-!14 = metadata !{i32 786689, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 7, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !18, metadata !1, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{metadata !1}
-!18 = metadata !{metadata !"a.c", metadata !"/tmp/"}
-!19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00x\007\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\007\000\001\000\006\000\000\007", !18, !2, !4, null, i32 (%struct.X*, %struct.Y*)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !18, !19, !19, !17, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !7, !9}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !18, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", !18, !2, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x13\00X\003\000\000\000\004\000", !18, !2, null, null, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 3, size 0, align 0, offset 0] [decl] [from ]
+!9 = !{!"0xf\00\000\0064\0064\000\000", !18, !2, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x13\00Y\004\0032\0032\000\000\000", !18, !2, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [Y] [line 4, size 32, align 32, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00x\005\0032\0032\000\000", !18, !10, !6} ; [ DW_TAG_member ]
+!13 = !MDLocation(line: 7, scope: !1)
+!14 = !{!"0x101\00y\007\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!15 = !MDLocation(line: 7, scope: !16)
+!16 = !{!"0xb\007\000\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!17 = !{!1}
+!18 = !{!"a.c", !"/tmp/"}
+!19 = !{i32 0}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
index 7f42e7b70ec6..bcaac896f1af 100644
--- a/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
+++ b/test/DebugInfo/X86/2010-08-10-DbgConstant.ll
@@ -14,18 +14,18 @@ declare void @bar(i32)
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786478, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang 2.8", i1 false, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, metadata !11,  metadata !14, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 786471, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201, null} ; [ DW_TAG_constant ]
-!6 = metadata !{i32 786470, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
-!7 = metadata !{i32 786468, metadata !12, metadata !1, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 3, i32 14, metadata !9, null}
-!9 = metadata !{i32 786443, metadata !12, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{metadata !0}
-!11 = metadata !{metadata !5}
-!12 = metadata !{metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!14 = metadata !{}
+!0 = !{!"0x2e\00foo\00foo\00foo\003\000\001\000\006\000\000\003", !12, !1, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang 2.8\000\00\000\00\000", !12, !4, !4, !10, !11,  !14} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !12, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x27\00ro\00ro\00ro\001\001\001", !1, !1, !6, i32 201, null} ; [ DW_TAG_constant ]
+!6 = !{!"0x26\00\000\000\000\000\000", !12, !1, !7} ; [ DW_TAG_const_type ]
+!7 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !12, !1} ; [ DW_TAG_base_type ]
+!8 = !MDLocation(line: 3, column: 14, scope: !9)
+!9 = !{!"0xb\003\0012\000", !12, !0} ; [ DW_TAG_lexical_block ]
+!10 = !{!0}
+!11 = !{!5}
+!12 = !{!"/tmp/l.c", !"/Volumes/Lalgate/clean/D"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
+!14 = !{}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 4dc747f566d2..d1beadcbb0a2 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -7,49 +7,49 @@
 
 define i32 @f() nounwind {
   %LOC = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %LOC}, metadata !15), !dbg !17
+  call void @llvm.dbg.declare(metadata i32* %LOC, metadata !15, metadata !{!"0x102"}), !dbg !17
   %1 = load i32* @GLB, align 4, !dbg !18
   store i32 %1, i32* %LOC, align 4, !dbg !18
   %2 = load i32* @GLB, align 4, !dbg !19
   ret i32 %2, !dbg !19
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"clang version 3.0 (trunk)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @f, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
-!6 = metadata !{i32 720937, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB, null} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 786688, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 9, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{i32 4, i32 9, metadata !16, null}
-!18 = metadata !{i32 4, i32 23, metadata !16, null}
-!19 = metadata !{i32 5, i32 5, metadata !16, null}
-!20 = metadata !{metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo"}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk)\000\00\000\00\000", !20, !1, !1, !3, !12,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00f\00f\00\003\000\001\000\006\000\000\000", !6, !6, !7, null, i32 ()* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [f]
+!6 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!12 = !{!14}
+!14 = !{!"0x34\00GLB\00GLB\00\001\000\001", null, !6, !9, i32* @GLB, null} ; [ DW_TAG_variable ]
+!15 = !{!"0x100\00LOC\004\000", !16, !6, !9} ; [ DW_TAG_auto_variable ]
+!16 = !{!"0xb\003\009\000", !20, !5} ; [ DW_TAG_lexical_block ]
+!17 = !MDLocation(line: 4, column: 9, scope: !16)
+!18 = !MDLocation(line: 4, column: 23, scope: !16)
+!19 = !MDLocation(line: 5, column: 5, scope: !16)
+!20 = !{!"test.c", !"/work/llvm/vanilla/test/DebugInfo"}
 
 ; CHECK: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "GLB")
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c")
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1] (1)
 
 ; CHECK: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name [DW_FORM_strp]   ( .debug_str[0x{{[0-9a-f]*}}] = "LOC")
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_decl_file [DW_FORM_data1]     (0x01)
+; CHECK: DW_AT_decl_file [DW_FORM_data1]     ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c")
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_decl_line [DW_FORM_data1]     (0x04)
+; CHECK: DW_AT_decl_line [DW_FORM_data1]     (4)
 
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 21dccd71c4e2..4880fa49fc5e 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -15,24 +15,24 @@ entry:
   %myBar = alloca %struct.bar, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !49), !dbg !50
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !49, metadata !{!"0x102"}), !dbg !50
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !51), !dbg !52
-  call void @llvm.dbg.declare(metadata !{%struct.bar* %myBar}, metadata !53), !dbg !55
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !51, metadata !{!"0x102"}), !dbg !52
+  call void @llvm.dbg.declare(metadata %struct.bar* %myBar, metadata !53, metadata !{!"0x102"}), !dbg !55
   call void @_ZN3barC1Ei(%struct.bar* %myBar, i32 1), !dbg !56
   ret i32 0, !dbg !57
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
 entry:
   %this.addr = alloca %struct.bar*, align 8
   %x.addr = alloca i32, align 4
   store %struct.bar* %this, %struct.bar** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !58), !dbg !59
+  call void @llvm.dbg.declare(metadata %struct.bar** %this.addr, metadata !58, metadata !{!"0x102"}), !dbg !59
   store i32 %x, i32* %x.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !60), !dbg !61
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !60, metadata !{!"0x102"}), !dbg !61
   %this1 = load %struct.bar** %this.addr
   %0 = load i32* %x.addr, align 4, !dbg !62
   call void @_ZN3barC2Ei(%struct.bar* %this1, i32 %0), !dbg !62
@@ -44,9 +44,9 @@ entry:
   %this.addr = alloca %struct.bar*, align 8
   %x.addr = alloca i32, align 4
   store %struct.bar* %this, %struct.bar** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !63), !dbg !64
+  call void @llvm.dbg.declare(metadata %struct.bar** %this.addr, metadata !63, metadata !{!"0x102"}), !dbg !64
   store i32 %x, i32* %x.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !65), !dbg !66
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !65, metadata !{!"0x102"}), !dbg !66
   %this1 = load %struct.bar** %this.addr
   %b = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
   %0 = load i32* %x.addr, align 4, !dbg !67
@@ -62,9 +62,9 @@ entry:
   %this.addr = alloca %struct.baz*, align 8
   %a.addr = alloca i32, align 4
   store %struct.baz* %this, %struct.baz** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !70), !dbg !71
+  call void @llvm.dbg.declare(metadata %struct.baz** %this.addr, metadata !70, metadata !{!"0x102"}), !dbg !71
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !72), !dbg !73
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !72, metadata !{!"0x102"}), !dbg !73
   %this1 = load %struct.baz** %this.addr
   %0 = load i32* %a.addr, align 4, !dbg !74
   call void @_ZN3bazC2Ei(%struct.baz* %this1, i32 %0), !dbg !74
@@ -76,9 +76,9 @@ entry:
   %this.addr = alloca %struct.baz*, align 8
   %a.addr = alloca i32, align 4
   store %struct.baz* %this, %struct.baz** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !75), !dbg !76
+  call void @llvm.dbg.declare(metadata %struct.baz** %this.addr, metadata !75, metadata !{!"0x102"}), !dbg !76
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !77), !dbg !78
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !77, metadata !{!"0x102"}), !dbg !78
   %this1 = load %struct.baz** %this.addr
   %h = getelementptr inbounds %struct.baz* %this1, i32 0, i32 0, !dbg !79
   %0 = load i32* %a.addr, align 4, !dbg !79
@@ -89,78 +89,78 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!83}
 
-!0 = metadata !{i32 720913, metadata !82, i32 4, metadata !"clang version 3.1 (trunk 146596)", i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !9}
-!5 = metadata !{i32 720898, metadata !82, null, metadata !"bar", i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 128, align 64, offset 0] [def] [from ]
-!6 = metadata !{i32 720937, metadata !82} ; [ DW_TAG_file_type ]
-!7 = metadata !{metadata !8, metadata !19, metadata !21}
-!8 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!9 = metadata !{i32 720898, metadata !82, null, metadata !"baz", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [baz] [line 3, size 32, align 32, offset 0] [def] [from ]
-!10 = metadata !{metadata !11, metadata !13}
-!11 = metadata !{i32 720909, metadata !82, metadata !9, metadata !"h", i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
-!12 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!13 = metadata !{i32 720942, metadata !82, metadata !9, metadata !"baz", metadata !"baz", metadata !"", i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
-!14 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !16, metadata !12}
-!16 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
-!19 = metadata !{i32 720909, metadata !82, metadata !5, metadata !"b_ref", i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
-!21 = metadata !{i32 720942, metadata !82, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null, metadata !24, metadata !12}
-!24 = metadata !{i32 720911, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
-!27 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
-!29 = metadata !{i32 720942, metadata !82, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
-!30 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!31 = metadata !{metadata !12, metadata !12, metadata !32}
-!32 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
-!33 = metadata !{i32 720911, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
-!34 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!35 = metadata !{metadata !36}
-!36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!37 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, null, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
-!38 = metadata !{metadata !39}
-!39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!40 = metadata !{i32 720942, metadata !82, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, null, i32 0} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
-!41 = metadata !{metadata !42}
-!42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!43 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
-!44 = metadata !{metadata !45}
-!45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!46 = metadata !{i32 720942, metadata !82, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
-!49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!50 = metadata !{i32 16, i32 14, metadata !29, null}
-!51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!52 = metadata !{i32 16, i32 27, metadata !29, null}
-!53 = metadata !{i32 721152, metadata !54, metadata !"myBar", metadata !6, i32 18, metadata !5, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!54 = metadata !{i32 720907, metadata !82, metadata !29, i32 17, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!55 = metadata !{i32 18, i32 9, metadata !54, null}
-!56 = metadata !{i32 18, i32 17, metadata !54, null}
-!57 = metadata !{i32 19, i32 5, metadata !54, null}
-!58 = metadata !{i32 721153, metadata !37, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!59 = metadata !{i32 13, i32 5, metadata !37, null}
-!60 = metadata !{i32 721153, metadata !37, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!61 = metadata !{i32 13, i32 13, metadata !37, null}
-!62 = metadata !{i32 13, i32 34, metadata !37, null}
-!63 = metadata !{i32 721153, metadata !40, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!64 = metadata !{i32 13, i32 5, metadata !40, null}
-!65 = metadata !{i32 721153, metadata !40, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!66 = metadata !{i32 13, i32 13, metadata !40, null}
-!67 = metadata !{i32 13, i32 33, metadata !40, null}
-!68 = metadata !{i32 13, i32 34, metadata !69, null}
-!69 = metadata !{i32 720907, metadata !82, metadata !40, i32 13, i32 33, i32 1} ; [ DW_TAG_lexical_block ]
-!70 = metadata !{i32 721153, metadata !43, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!71 = metadata !{i32 6, i32 5, metadata !43, null}
-!72 = metadata !{i32 721153, metadata !43, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!73 = metadata !{i32 6, i32 13, metadata !43, null}
-!74 = metadata !{i32 6, i32 24, metadata !43, null}
-!75 = metadata !{i32 721153, metadata !46, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!76 = metadata !{i32 6, i32 5, metadata !46, null}
-!77 = metadata !{i32 721153, metadata !46, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!78 = metadata !{i32 6, i32 13, metadata !46, null}
-!79 = metadata !{i32 6, i32 23, metadata !46, null}
-!80 = metadata !{i32 6, i32 24, metadata !81, null}
-!81 = metadata !{i32 720907, metadata !82, metadata !46, i32 6, i32 23, i32 2} ; [ DW_TAG_lexical_block ]
-!82 = metadata !{metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref"}
-!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.1 (trunk 146596)\000\00\000\00\000", !82, !1, !3, !27, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5, !9}
+!5 = !{!"0x2\00bar\009\00128\0064\000\000\000", !82, null, null, !7, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 128, align 64, offset 0] [def] [from ]
+!6 = !{!"0x29", !82} ; [ DW_TAG_file_type ]
+!7 = !{!8, !19, !21}
+!8 = !{!"0xd\00b\0011\0032\0032\000\000", !82, !5, !9} ; [ DW_TAG_member ]
+!9 = !{!"0x2\00baz\003\0032\0032\000\000\000", !82, null, null, !10, null, null, null} ; [ DW_TAG_class_type ] [baz] [line 3, size 32, align 32, offset 0] [def] [from ]
+!10 = !{!11, !13}
+!11 = !{!"0xd\00h\005\0032\0032\000\000", !82, !9, !12} ; [ DW_TAG_member ]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!13 = !{!"0x2e\00baz\00baz\00\006\000\000\000\006\00256\000\000", !82, !9, !14, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !16, !12}
+!16 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !9} ; [ DW_TAG_pointer_type ]
+!19 = !{!"0xd\00b_ref\0012\0064\0064\0064\000", !82, !5, !20} ; [ DW_TAG_member ]
+!20 = !{!"0x10\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_reference_type ]
+!21 = !{!"0x2e\00bar\00bar\00\0013\000\000\000\006\00256\000\000", !82, !5, !22, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null, !24, !12}
+!24 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !5} ; [ DW_TAG_pointer_type ]
+!27 = !{!29, !37, !40, !43, !46}
+!29 = !{!"0x2e\00main\00main\00\0017\000\001\000\006\00256\000\000", !82, !6, !30, null, i32 (i32, i8**)* @main, null, null, null} ; [ DW_TAG_subprogram ] [line 17] [def] [scope 0] [main]
+!30 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !31, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!31 = !{!12, !12, !32}
+!32 = !{!"0xf\00\000\0064\0064\000\000", null, null, !33} ; [ DW_TAG_pointer_type ]
+!33 = !{!"0xf\00\000\0064\0064\000\000", null, null, !34} ; [ DW_TAG_pointer_type ]
+!34 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!35 = !{!36}
+!36 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!37 = !{!"0x2e\00bar\00bar\00_ZN3barC1Ei\0013\000\001\000\006\00256\000\000", !82, null, !22, null, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, !21, null} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
+!38 = !{!39}
+!39 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!40 = !{!"0x2e\00bar\00bar\00_ZN3barC2Ei\0013\000\001\000\006\00256\000\000", !82, null, !22, null, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, !21, null} ; [ DW_TAG_subprogram ] [line 13] [def] [scope 0] [bar]
+!41 = !{!42}
+!42 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!43 = !{!"0x2e\00baz\00baz\00_ZN3bazC1Ei\006\000\001\000\006\00256\000\000", !82, null, !14, null, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, !13, null} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
+!44 = !{!45}
+!45 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!46 = !{!"0x2e\00baz\00baz\00_ZN3bazC2Ei\006\000\001\000\006\00256\000\000", !82, null, !14, null, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, !13, null} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [baz]
+!49 = !{!"0x101\00argc\0016777232\000", !29, !6, !12} ; [ DW_TAG_arg_variable ]
+!50 = !MDLocation(line: 16, column: 14, scope: !29)
+!51 = !{!"0x101\00argv\0033554448\000", !29, !6, !32} ; [ DW_TAG_arg_variable ]
+!52 = !MDLocation(line: 16, column: 27, scope: !29)
+!53 = !{!"0x100\00myBar\0018\000", !54, !6, !5} ; [ DW_TAG_auto_variable ]
+!54 = !{!"0xb\0017\001\000", !82, !29} ; [ DW_TAG_lexical_block ]
+!55 = !MDLocation(line: 18, column: 9, scope: !54)
+!56 = !MDLocation(line: 18, column: 17, scope: !54)
+!57 = !MDLocation(line: 19, column: 5, scope: !54)
+!58 = !{!"0x101\00this\0016777229\0064", !37, !6, !24} ; [ DW_TAG_arg_variable ]
+!59 = !MDLocation(line: 13, column: 5, scope: !37)
+!60 = !{!"0x101\00x\0033554445\000", !37, !6, !12} ; [ DW_TAG_arg_variable ]
+!61 = !MDLocation(line: 13, column: 13, scope: !37)
+!62 = !MDLocation(line: 13, column: 34, scope: !37)
+!63 = !{!"0x101\00this\0016777229\0064", !40, !6, !24} ; [ DW_TAG_arg_variable ]
+!64 = !MDLocation(line: 13, column: 5, scope: !40)
+!65 = !{!"0x101\00x\0033554445\000", !40, !6, !12} ; [ DW_TAG_arg_variable ]
+!66 = !MDLocation(line: 13, column: 13, scope: !40)
+!67 = !MDLocation(line: 13, column: 33, scope: !40)
+!68 = !MDLocation(line: 13, column: 34, scope: !69)
+!69 = !{!"0xb\0013\0033\001", !82, !40} ; [ DW_TAG_lexical_block ]
+!70 = !{!"0x101\00this\0016777222\0064", !43, !6, !16} ; [ DW_TAG_arg_variable ]
+!71 = !MDLocation(line: 6, column: 5, scope: !43)
+!72 = !{!"0x101\00a\0033554438\000", !43, !6, !12} ; [ DW_TAG_arg_variable ]
+!73 = !MDLocation(line: 6, column: 13, scope: !43)
+!74 = !MDLocation(line: 6, column: 24, scope: !43)
+!75 = !{!"0x101\00this\0016777222\0064", !46, !6, !16} ; [ DW_TAG_arg_variable ]
+!76 = !MDLocation(line: 6, column: 5, scope: !46)
+!77 = !{!"0x101\00a\0033554438\000", !46, !6, !12} ; [ DW_TAG_arg_variable ]
+!78 = !MDLocation(line: 6, column: 13, scope: !46)
+!79 = !MDLocation(line: 6, column: 23, scope: !46)
+!80 = !MDLocation(line: 6, column: 24, scope: !81)
+!81 = !{!"0xb\006\0023\002", !82, !46} ; [ DW_TAG_lexical_block ]
+!82 = !{!"main.cpp", !"/Users/echristo/tmp/bad-struct-ref"}
+!83 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index 59921bd245c7..8b4f5611c170 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -14,33 +14,33 @@ define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
 entry:
   %a.addr = alloca %struct.A*, align 8
   store %struct.A* %a, %struct.A** %a.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.A** %a.addr}, metadata !16), !dbg !17
+  call void @llvm.dbg.declare(metadata %struct.A** %a.addr, metadata !16, metadata !{!"0x102"}), !dbg !17
   %0 = load %struct.A** %a.addr, align 8, !dbg !18
   %b = getelementptr inbounds %struct.A* %0, i32 0, i32 0, !dbg !18
   %1 = load i32* %b, align 4, !dbg !18
   ret i32 %1, !dbg !18
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.1 (trunk 150996)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, null, i32 3} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786445, metadata !20, metadata !11, metadata !"b", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 3, i32 13, metadata !5, null}
-!18 = metadata !{i32 4, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !20, metadata !5, i32 3, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.1 (trunk 150996)\000\00\000\00\000", !20, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00_Z3fooP1A\003\000\001\000\006\00256\000\003", !20, !6, !7, null, i32 (%struct.A*)* @_Z3fooP1A, null, null, null} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x2\00A\001\0032\0032\000\000\000", !20, null, null, !12, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!12 = !{!13}
+!13 = !{!"0xd\00b\001\0032\0032\000\000", !20, !11, !9} ; [ DW_TAG_member ]
+!16 = !{!"0x101\00a\0016777219\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!17 = !MDLocation(line: 3, column: 13, scope: !5)
+!18 = !MDLocation(line: 4, column: 3, scope: !19)
+!19 = !{!"0xb\003\0016\000", !20, !5} ; [ DW_TAG_lexical_block ]
+!20 = !{!"foo.cpp", !"/Users/echristo"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll
index dce234aa9002..e395e0619736 100644
--- a/test/DebugInfo/X86/DW_AT_linkage_name.ll
+++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -18,14 +18,14 @@
 ; Test that we do emit a linkage name for a specific instance of it.
 
 ; CHECK: DW_TAG_subprogram
-; CHECK: [[A_DTOR:.*]]:     DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK: DW_AT_name {{.*}} "~A"
 ; CHECK-NOT: DW_AT_MIPS_linkage_name
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1AD2Ev"
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}}[[A_DTOR]]
+; CHECK: DW_AT_specification {{.*}} "~A"
 
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@@ -38,20 +38,20 @@ define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.A** %this.addr}, metadata !26), !dbg !28
+  call void @llvm.dbg.declare(metadata %struct.A** %this.addr, metadata !26, metadata !{!"0x102"}), !dbg !28
   %this1 = load %struct.A** %this.addr
   ret void, !dbg !29
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
 define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.A** %this.addr}, metadata !30), !dbg !31
+  call void @llvm.dbg.declare(metadata %struct.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
   %this1 = load %struct.A** %this.addr
   call void @_ZN1AD2Ev(%struct.A* %this1), !dbg !32
   ret void, !dbg !33
@@ -61,7 +61,7 @@ entry:
 define void @_Z3foov() #2 {
 entry:
   %a = alloca %struct.A, align 1
-  call void @llvm.dbg.declare(metadata !{%struct.A* %a}, metadata !34), !dbg !35
+  call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !34, metadata !{!"0x102"}), !dbg !35
   call void @_ZN1AC1Ei(%struct.A* %a, i32 1), !dbg !35
   call void @_ZN1AD1Ev(%struct.A* %a), !dbg !36
   ret void, !dbg !36
@@ -77,40 +77,40 @@ attributes #2 = { ssp uwtable }
 !llvm.module.flags = !{!23, !24}
 !llvm.ident = !{!25}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !16, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [linkage-name.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"linkage-name.cpp", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !12}
-!6 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !11, i32 2} ; [ DW_TAG_subprogram ] [line 2] [A]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786468}
-!12 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"", i32 3, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !15, i32 3} ; [ DW_TAG_subprogram ] [line 3] [~A]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{null, metadata !9}
-!15 = metadata !{i32 786468}
-!16 = metadata !{metadata !17, metadata !18, metadata !19}
-!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 6, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_ZN1AD2Ev, null, metadata !12, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
-!18 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 6, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_ZN1AD1Ev, null, metadata !12, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
-!19 = metadata !{i32 786478, metadata !1, metadata !20, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 10, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 10} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
-!20 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [linkage-name.cpp]
-!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!22 = metadata !{null}
-!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!25 = metadata !{metadata !"clang version 3.5.0 "}
-!26 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!28 = metadata !{i32 0, i32 0, metadata !17, null}
-!29 = metadata !{i32 8, i32 0, metadata !17, null} ; [ DW_TAG_imported_declaration ]
-!30 = metadata !{i32 786689, metadata !18, metadata !"this", null, i32 16777216, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!31 = metadata !{i32 0, i32 0, metadata !18, null}
-!32 = metadata !{i32 6, i32 0, metadata !18, null}
-!33 = metadata !{i32 8, i32 0, metadata !18, null} ; [ DW_TAG_imported_declaration ]
-!34 = metadata !{i32 786688, metadata !19, metadata !"a", metadata !20, i32 11, metadata !"_ZTS1A", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 11]
-!35 = metadata !{i32 11, i32 0, metadata !19, null}
-!36 = metadata !{i32 12, i32 0, metadata !19, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !16, !2, !2} ; [ DW_TAG_compile_unit ] [linkage-name.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"linkage-name.cpp", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00A\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6, !12}
+!6 = !{!"0x2e\00A\00A\00\002\000\000\000\006\00256\000\002", !1, !"_ZTS1A", !7, null, null, null, i32 0, !11} ; [ DW_TAG_subprogram ] [line 2] [A]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !10}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = !{i32 786468}
+!12 = !{!"0x2e\00~A\00~A\00\003\000\000\000\006\00256\000\003", !1, !"_ZTS1A", !13, null, null, null, i32 0, !15} ; [ DW_TAG_subprogram ] [line 3] [~A]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null, !9}
+!15 = !{i32 786468}
+!16 = !{!17, !18, !19}
+!17 = !{!"0x2e\00~A\00~A\00_ZN1AD2Ev\006\000\001\000\006\00256\000\006", !1, !"_ZTS1A", !13, null, void (%struct.A*)* @_ZN1AD2Ev, null, !12, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
+!18 = !{!"0x2e\00~A\00~A\00_ZN1AD1Ev\006\000\001\000\006\00256\000\006", !1, !"_ZTS1A", !13, null, void (%struct.A*)* @_ZN1AD1Ev, null, !12, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [~A]
+!19 = !{!"0x2e\00foo\00foo\00_Z3foov\0010\000\001\000\006\00256\000\0010", !1, !20, !21, null, void ()* @_Z3foov, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
+!20 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [linkage-name.cpp]
+!21 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{null}
+!23 = !{i32 2, !"Dwarf Version", i32 2}
+!24 = !{i32 1, !"Debug Info Version", i32 2}
+!25 = !{!"clang version 3.5.0 "}
+!26 = !{!"0x101\00this\0016777216\001088", !17, null, !27} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!27 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!28 = !MDLocation(line: 0, scope: !17)
+!29 = !MDLocation(line: 8, scope: !17)
+!30 = !{!"0x101\00this\0016777216\001088", !18, null, !27} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !MDLocation(line: 0, scope: !18)
+!32 = !MDLocation(line: 6, scope: !18)
+!33 = !MDLocation(line: 8, scope: !18)
+!34 = !{!"0x100\00a\0011\000", !19, !20, !"_ZTS1A"} ; [ DW_TAG_auto_variable ] [a] [line 11]
+!35 = !MDLocation(line: 11, scope: !19)
+!36 = !MDLocation(line: 12, scope: !19)
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index f31b0ad3259a..a5b5700ca700 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -64,7 +64,7 @@ define void @f() nounwind {
 entry:
   %call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8
   store i32 %call, i32* @a, align 4, !dbg !8
-  tail call void @llvm.dbg.value(metadata !12, i64 0, metadata !5), !dbg !13
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !13
   br label %while.body
 
 while.body:                                       ; preds = %entry, %while.body
@@ -75,10 +75,10 @@ while.body:                                       ; preds = %entry, %while.body
   br i1 %tobool, label %while.end, label %while.body, !dbg !14
 
 while.end:                                        ; preds = %while.body
-  tail call void @llvm.dbg.value(metadata !{i32 %mul}, i64 0, metadata !5), !dbg !14
+  tail call void @llvm.dbg.value(metadata i32 %mul, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !14
   %call4 = tail call i32 @g(i32 %mul, i32 0) nounwind, !dbg !15
   store i32 %call4, i32* @a, align 4, !dbg !15
-  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !5), !dbg !17
+  tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !17
   br label %while.body9
 
 while.body9:                                      ; preds = %while.end, %while.body9
@@ -89,7 +89,7 @@ while.body9:                                      ; preds = %while.end, %while.b
   br i1 %tobool8, label %while.end13, label %while.body9, !dbg !18
 
 while.end13:                                      ; preds = %while.body9
-  tail call void @llvm.dbg.value(metadata !{i32 %mul12}, i64 0, metadata !5), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 %mul12, i64 0, metadata !5, metadata !{!"0x102"}), !dbg !18
   %call15 = tail call i32 @g(i32 0, i32 %mul12) nounwind, !dbg !19
   store i32 %call15, i32* @a, align 4, !dbg !19
   ret void, !dbg !20
@@ -97,30 +97,30 @@ while.end13:                                      ; preds = %while.body9
 
 declare i32 @g(i32, i32)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786478, metadata !23, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, metadata !22, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f]
-!1 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.0 (trunk)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !21, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !23, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 786688, metadata !6, metadata !"x", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 786443, metadata !23, metadata !0, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 6, i32 3, metadata !6, null}
-!12 = metadata !{i32 1}
-!13 = metadata !{i32 7, i32 3, metadata !6, null}
-!14 = metadata !{i32 8, i32 3, metadata !6, null}
-!15 = metadata !{i32 9, i32 3, metadata !6, null}
-!16 = metadata !{i32 2}
-!17 = metadata !{i32 10, i32 3, metadata !6, null}
-!18 = metadata !{i32 11, i32 3, metadata !6, null}
-!19 = metadata !{i32 12, i32 3, metadata !6, null}
-!20 = metadata !{i32 13, i32 1, metadata !6, null}
-!21 = metadata !{metadata !0}
-!22 = metadata !{metadata !5}
-!23 = metadata !{metadata !"simple.c", metadata !"/home/rengol01/temp/tests/dwarf/relocation"}
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00f\00f\00\004\000\001\000\006\00256\001\004", !23, !1, !3, null, void ()* @f, null, null, !22} ; [ DW_TAG_subprogram ] [line 4] [def] [f]
+!1 = !{!"0x29", !23} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 (trunk)\001\00\000\00\001", !23, !4, !4, !21, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !23, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x100\00x\005\000", !6, !1, !7} ; [ DW_TAG_auto_variable ]
+!6 = !{!"0xb\004\0014\000", !23, !0} ; [ DW_TAG_lexical_block ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!8 = !MDLocation(line: 6, column: 3, scope: !6)
+!12 = !{i32 1}
+!13 = !MDLocation(line: 7, column: 3, scope: !6)
+!14 = !MDLocation(line: 8, column: 3, scope: !6)
+!15 = !MDLocation(line: 9, column: 3, scope: !6)
+!16 = !{i32 2}
+!17 = !MDLocation(line: 10, column: 3, scope: !6)
+!18 = !MDLocation(line: 11, column: 3, scope: !6)
+!19 = !MDLocation(line: 12, column: 3, scope: !6)
+!20 = !MDLocation(line: 13, column: 1, scope: !6)
+!21 = !{!0}
+!22 = !{!5}
+!23 = !{!"simple.c", !"/home/rengol01/temp/tests/dwarf/relocation"}
+!24 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 4b9fae8e5af8..8cff0b87dceb 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -17,21 +17,21 @@ entry:
   %.addr = alloca i32, align 4
   %a = alloca %class.A, align 4
   store i32 %0, i32* %.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %.addr}, metadata !36), !dbg !35
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !21), !dbg !23
+  call void @llvm.dbg.declare(metadata i32* %.addr, metadata !36, metadata !{!"0x102"}), !dbg !35
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !21, metadata !{!"0x102"}), !dbg !23
   call void @_ZN1AC1Ev(%class.A* %a), !dbg !24
   %m_a = getelementptr inbounds %class.A* %a, i32 0, i32 0, !dbg !25
   %1 = load i32* %m_a, align 4, !dbg !25
   ret i32 %1, !dbg !25
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !26), !dbg !28
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !26, metadata !{!"0x102"}), !dbg !28
   %this1 = load %class.A** %this.addr
   call void @_ZN1AC2Ev(%class.A* %this1), !dbg !29
   ret void, !dbg !29
@@ -41,7 +41,7 @@ define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr nounwind uwtab
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !30), !dbg !31
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
   %this1 = load %class.A** %this.addr
   %m_a = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !32
   store i32 0, i32* %m_a, align 4, !dbg !32
@@ -51,40 +51,40 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.2 (trunk 163586) (llvm/trunk 163570)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !10, metadata !20}
-!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3fooi, null, null, metadata !1, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
-!6 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
-!11 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!14 = metadata !{i32 786434, metadata !37, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !37, metadata !14, metadata !"m_a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
-!17 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, metadata !6, null, metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", i32 3, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !17, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
-!21 = metadata !{i32 786688, metadata !22, metadata !"a", metadata !6, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 8]
-!22 = metadata !{i32 786443, metadata !6, metadata !5, i32 7, i32 11, i32 0} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
-!23 = metadata !{i32 8, i32 5, metadata !22, null}
-!24 = metadata !{i32 8, i32 6, metadata !22, null}
-!25 = metadata !{i32 9, i32 3, metadata !22, null}
-!26 = metadata !{i32 786689, metadata !10, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
-!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!28 = metadata !{i32 3, i32 3, metadata !10, null}
-!29 = metadata !{i32 3, i32 18, metadata !10, null}
-!30 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777219, metadata !27, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 3]
-!31 = metadata !{i32 3, i32 3, metadata !20, null}
-!32 = metadata !{i32 3, i32 9, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !6, metadata !20, i32 3, i32 7, i32 1} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
-!34 = metadata !{i32 3, i32 18, metadata !33, null}
-!35 = metadata !{i32 7, i32 0, metadata !5, null}
-!36 = metadata !{i32 786689, metadata !5, metadata !"", metadata !6, i32 16777223, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 7]
-!37 = metadata !{metadata !"bar.cpp", metadata !"/Users/echristo/debug-tests"}
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 163586) (llvm/trunk 163570)\000\00\000\00\000", !37, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/Users/echristo/debug-tests/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5, !10, !20}
+!5 = !{!"0x2e\00foo\00foo\00_Z3fooi\007\000\001\000\006\00256\000\007", !6, !6, !7, null, i32 (i32)* @_Z3fooi, null, null, !1} ; [ DW_TAG_subprogram ] [line 7] [def] [foo]
+!6 = !{!"0x29", !37} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00A\00A\00_ZN1AC1Ev\003\000\001\000\006\00256\000\003", !6, null, !11, null, void (%class.A*)* @_ZN1AC1Ev, null, !17, !1} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!14 = !{!"0x2\00A\001\0032\0032\000\000\000", !37, null, null, !15, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!15 = !{!16, !17}
+!16 = !{!"0xd\00m_a\004\0032\0032\000\000", !37, !14, !9} ; [ DW_TAG_member ] [m_a] [line 4, size 32, align 32, offset 0] [from int]
+!17 = !{!"0x2e\00A\00A\00\003\000\000\000\006\00256\000\003", !6, !14, !11, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 3] [A]
+!18 = !{!19}
+!19 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = !{!"0x2e\00A\00A\00_ZN1AC2Ev\003\000\001\000\006\00256\000\003", !6, null, !11, null, void (%class.A*)* @_ZN1AC2Ev, null, !17, !1} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!21 = !{!"0x100\00a\008\000", !22, !6, !14} ; [ DW_TAG_auto_variable ] [a] [line 8]
+!22 = !{!"0xb\007\0011\000", !6, !5} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!23 = !MDLocation(line: 8, column: 5, scope: !22)
+!24 = !MDLocation(line: 8, column: 6, scope: !22)
+!25 = !MDLocation(line: 9, column: 3, scope: !22)
+!26 = !{!"0x101\00this\0016777219\001088", !10, !6, !27} ; [ DW_TAG_arg_variable ] [this] [line 3]
+!27 = !{!"0xf\00\000\0064\0064\000\000", null, null, !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!28 = !MDLocation(line: 3, column: 3, scope: !10)
+!29 = !MDLocation(line: 3, column: 18, scope: !10)
+!30 = !{!"0x101\00this\0016777219\001088", !20, !6, !27} ; [ DW_TAG_arg_variable ] [this] [line 3]
+!31 = !MDLocation(line: 3, column: 3, scope: !20)
+!32 = !MDLocation(line: 3, column: 9, scope: !33)
+!33 = !{!"0xb\003\007\001", !6, !20} ; [ DW_TAG_lexical_block ] [/Users/echristo/debug-tests/bar.cpp]
+!34 = !MDLocation(line: 3, column: 18, scope: !33)
+!35 = !MDLocation(line: 7, scope: !5)
+!36 = !{!"0x101\00\0016777223\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [line 7]
+!37 = !{!"bar.cpp", !"/Users/echristo/debug-tests"}
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 4f45f367448f..ab2075a879d3 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -3,11 +3,11 @@
 
 ; test that the DW_AT_specification is a back edge in the file.
 
-; CHECK: [[BAR_DECL:0x[0-9a-f]*]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv"
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}} {[[BAR_DECL]]}
+; CHECK: DW_AT_specification {{.*}} "_ZN3foo3barEv"
 
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
@@ -20,23 +20,23 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!28}
 
-!0 = metadata !{i32 786449, metadata !27, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
-!6 = metadata !{i32 720937, metadata !27} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786451, metadata !27, null, metadata !"foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!11 = metadata !{i32 720942, metadata !6, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 720898, metadata !27, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
-!13 = metadata !{metadata !11}
-!18 = metadata !{metadata !20}
-!20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
-!22 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 6, i32 1, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
-!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.0 ()\000\00\000\00\000", !27, !1, !1, !3, !18,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEv\004\000\001\000\006\00256\000\004", !6, null, !7, null, void ()* @_ZN3foo3barEv, null, !11, null} ; [ DW_TAG_subprogram ] [line 4] [def] [bar]
+!6 = !{!"0x29", !27} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x13\00foo\001\000\000\000\004\000", !27, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!11 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEv\002\000\000\000\006\00256\000\002", !6, !12, !7, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ]
+!12 = !{!"0x2\00foo\001\008\008\000\000\000", !27, null, null, !13, null, null} ; [ DW_TAG_class_type ]
+!13 = !{!11}
+!18 = !{!20}
+!20 = !{!"0x34\00x\00x\00\005\001\001", !5, !6, !21, i32* @_ZZN3foo3barEvE1x, null} ; [ DW_TAG_variable ]
+!21 = !{!"0x26\00\000\000\000\000\000", null, null, !22} ; [ DW_TAG_const_type ]
+!22 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!25 = !MDLocation(line: 6, column: 1, scope: !26)
+!26 = !{!"0xb\004\0017\000", !6, !5} ; [ DW_TAG_lexical_block ]
+!27 = !{!"nsNativeAppSupportBase.ii", !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library"}
+!28 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
index f16cbb061bf3..39c334054762 100644
--- a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
+++ b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -30,15 +30,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 3, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"C:\5CProjects"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\001\000\001\000\006\000\000\002", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !MDLocation(line: 3, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 2facc409e8d1..ffa032f7cbde 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -18,31 +18,31 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !17}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !11}
-!9 = metadata !{i32 786445, metadata !28, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15, i32 1} ; [ DW_TAG_subprogram ]
-!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null, metadata !14}
-!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 786434, metadata !28, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_class_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
-!19 = metadata !{metadata !20, metadata !21, metadata !27}
-!20 = metadata !{i32 786445, metadata !28, metadata !18, metadata !"b", i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
-!21 = metadata !{i32 786478, metadata !6, metadata !18, metadata !"B", metadata !"B", metadata !"", i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ]
-!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null, metadata !24}
-!24 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
-!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.1 (trunk 153413) (llvm/trunk 153428)\000\00\000\00\000", !28, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5, !17}
+!5 = !{!"0x34\00a\00a\00\0010\000\001", null, !6, !7, %class.A* @a, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!7 = !{!"0x2\00A\001\0032\0032\000\000\000", !28, null, null, !8, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!8 = !{!9, !11}
+!9 = !{!"0xd\00a\002\0032\0032\000\001", !28, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!11 = !{!"0x2e\00A\00A\00\001\000\000\000\006\00320\000\001", !6, !7, !12, null, null, null, i32 0, !15} ; [ DW_TAG_subprogram ]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null, !14}
+!14 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !7} ; [ DW_TAG_pointer_type ]
+!15 = !{!16}
+!16 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!17 = !{!"0x34\00b\00b\00\0011\000\001", null, !6, !18, %class.B* @b, null} ; [ DW_TAG_variable ]
+!18 = !{!"0x2\00B\005\0032\0032\000\000\000", !28, null, null, !19, null, null, null} ; [ DW_TAG_class_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
+!19 = !{!20, !21, !27}
+!20 = !{!"0xd\00b\007\0032\0032\000\001", !28, !18, !10} ; [ DW_TAG_member ]
+!21 = !{!"0x2e\00B\00B\00\005\000\000\000\006\00320\000\005", !6, !18, !22, null, null, null, i32 0, !25} ; [ DW_TAG_subprogram ]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null, !24}
+!24 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !18} ; [ DW_TAG_pointer_type ]
+!25 = !{!26}
+!26 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!27 = !{!"0x2a\00\000\000\000\000\000", !18, null, !7} ; [ DW_TAG_friend ]
+!28 = !{!"foo.cpp", !"/Users/echristo/tmp"}
+!29 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 54484acc785c..32c6f2449aa0 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -18,26 +18,26 @@
 define void @_Z3runv() nounwind uwtable {
 entry:
   %x = alloca i32, align 32
-  call void @llvm.dbg.declare(metadata !{i32* %x}, metadata !9), !dbg !12
+  call void @llvm.dbg.declare(metadata i32* %x, metadata !9, metadata !{!"0x102"}), !dbg !12
   ret void, !dbg !13
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15}
 
-!0 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !14, metadata !6, metadata !"run", metadata !"run", metadata !"_Z3runv", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 786688, metadata !10, metadata !"x", metadata !6, i32 2, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !14, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 2, i32 7, metadata !10, null}
-!13 = metadata !{i32 3, i32 1, metadata !10, null}
-!14 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/debuginfo"}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)\000\00\000\00\000", !14, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00run\00run\00_Z3runv\001\000\001\000\006\00256\000\001", !14, !6, !7, null, void ()* @_Z3runv, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !14} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x100\00x\002\000", !10, !6, !11} ; [ DW_TAG_auto_variable ]
+!10 = !{!"0xb\001\0012\000", !14, !5} ; [ DW_TAG_lexical_block ]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!12 = !MDLocation(line: 2, column: 7, scope: !10)
+!13 = !MDLocation(line: 3, column: 1, scope: !10)
+!14 = !{!"test.cc", !"/home/samsonov/debuginfo"}
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/arange.ll b/test/DebugInfo/X86/arange.ll
index 4eea646968b4..97ab3c59e559 100644
--- a/test/DebugInfo/X86/arange.ll
+++ b/test/DebugInfo/X86/arange.ll
@@ -29,18 +29,18 @@
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/simple.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"simple.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo<&i>", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS3fooIXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [foo<&i>] [line 3, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786480, null, metadata !"x", metadata !7, i32* @i, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !11, i32 6, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/simple.cpp]
-!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!14 = metadata !{metadata !"clang version 3.5 "}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !3, !2, !9, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/simple.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"simple.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo<&i>\003\008\008\000\000\000", !1, null, null, !2, null, !5, !"_ZTS3fooIXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [foo<&i>] [line 3, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x30\00x\000\000", null, !7, i32* @i, null} ; [ DW_TAG_template_value_parameter ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x34\00f\00f\00\006\000\001", null, !11, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/simple.cpp]
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.5 "}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 989e4fff484a..2bc56b4e4dae 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -31,8 +31,8 @@
 ; Function Attrs: nounwind uwtable
 define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 {
 entry:
-  call void @llvm.dbg.declare(metadata !{%struct.foo* %f}, metadata !19), !dbg !20
-  call void @llvm.dbg.declare(metadata !{%struct.foo* %g}, metadata !21), !dbg !20
+  call void @llvm.dbg.declare(metadata %struct.foo* %f, metadata !19, metadata !{!"0x102"}), !dbg !20
+  call void @llvm.dbg.declare(metadata %struct.foo* %g, metadata !21, metadata !{!"0x102"}), !dbg !20
   %i = getelementptr inbounds %struct.foo* %f, i32 0, i32 0, !dbg !22
   %0 = load i32* %i, align 4, !dbg !22
   %inc = add nsw i32 %0, 1, !dbg !22
@@ -41,7 +41,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -49,28 +49,28 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"scratch.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4func3fooS_", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8, metadata !8}
-!8 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !12}
-!10 = metadata !{i32 786445, metadata !1, metadata !8, metadata !"i", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !18, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{null, metadata !15, metadata !16}
-!15 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
-!16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786689, metadata !4, metadata !"f", metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [f] [line 6]
-!20 = metadata !{i32 6, i32 0, metadata !4, null}
-!21 = metadata !{i32 786689, metadata !4, metadata !"g", metadata !5, i32 33554438, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [g] [line 6]
-!22 = metadata !{i32 7, i32 0, metadata !4, null}
-!23 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"scratch.cpp", !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_Z4func3fooS_\006\000\001\000\006\00256\000\006", !1, !5, !6, null, void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/scratch.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8, !8}
+!8 = !{!"0x13\00foo\001\0032\0032\000\000\000", !1, null, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
+!9 = !{!10, !12}
+!10 = !{!"0xd\00i\003\0032\0032\000\000", !1, !8, !11} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 0] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!"0x2e\00foo\00foo\00\002\000\000\000\006\00256\000\002", !1, !8, !13, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null, !15, !16}
+!15 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!16 = !{!"0x10\00\000\000\000\000\000", null, null, !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
+!18 = !{i32 786468}
+!19 = !{!"0x101\00f\0016777222\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [f] [line 6]
+!20 = !MDLocation(line: 6, scope: !4)
+!21 = !{!"0x101\00g\0033554438\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [g] [line 6]
+!22 = !MDLocation(line: 7, scope: !4)
+!23 = !MDLocation(line: 8, scope: !4)
+!24 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/array.ll b/test/DebugInfo/X86/array.ll
index dc6c7a406507..e39be5ae0ff0 100644
--- a/test/DebugInfo/X86/array.ll
+++ b/test/DebugInfo/X86/array.ll
@@ -25,7 +25,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
 define void @f(i32* nocapture %p) #0 {
-  tail call void @llvm.dbg.value(metadata !{i32* %p}, i64 0, metadata !11), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !28
   store i32 42, i32* %p, align 4, !dbg !29, !tbaa !30
   ret void, !dbg !34
 }
@@ -33,15 +33,15 @@ define void @f(i32* nocapture %p) #0 {
 ; Function Attrs: nounwind ssp uwtable
 define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
   %array = alloca [4 x i32], align 16
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !19), !dbg !35
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !20), !dbg !35
-  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !35
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !35
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !36
   %1 = bitcast [4 x i32]* %array to i8*, !dbg !36
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !36
-  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !36
   %2 = getelementptr inbounds [4 x i32]* %array, i64 0, i64 0, !dbg !37
   call void @f(i32* %2), !dbg !37
-  tail call void @llvm.dbg.value(metadata !{[4 x i32]* %array}, i64 0, metadata !21), !dbg !36
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !36
   %3 = load i32* %2, align 16, !dbg !38, !tbaa !30
   ret i32 %3, !dbg !38
 }
@@ -50,7 +50,7 @@ define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind ssp uwtable }
 attributes #1 = { nounwind }
@@ -60,42 +60,42 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!25, !26}
 !llvm.ident = !{!27}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/array.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"array.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !12}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*)* @f, null, null, metadata !10, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/array.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
-!12 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !18, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !9, metadata !9, metadata !15}
-!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!17 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!18 = metadata !{metadata !19, metadata !20, metadata !21}
-!19 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !5, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 5]
-!20 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !5, i32 33554437, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 5]
-!21 = metadata !{i32 786688, metadata !12, metadata !"array", metadata !5, i32 6, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [array] [line 6]
-!22 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !9, metadata !23, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
-!23 = metadata !{metadata !24}
-!24 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
-!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!27 = metadata !{metadata !"clang version 3.5.0 "}
-!28 = metadata !{i32 1, i32 0, metadata !4, null}
-!29 = metadata !{i32 2, i32 0, metadata !4, null}
-!30 = metadata !{metadata !31, metadata !31, i64 0}
-!31 = metadata !{metadata !"int", metadata !32, i64 0}
-!32 = metadata !{metadata !"omnipotent char", metadata !33, i64 0}
-!33 = metadata !{metadata !"Simple C/C++ TBAA"}
-!34 = metadata !{i32 3, i32 0, metadata !4, null}
-!35 = metadata !{i32 5, i32 0, metadata !12, null}
-!36 = metadata !{i32 6, i32 0, metadata !12, null}
-!37 = metadata !{i32 7, i32 0, metadata !12, null}
-!38 = metadata !{i32 8, i32 0, metadata !12, null} ; [ DW_TAG_imported_declaration ]
+!0 = !{!"0x11\0012\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/array.c] [DW_LANG_C99]
+!1 = !{!"array.c", !""}
+!2 = !{}
+!3 = !{!4, !12}
+!4 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*)* @f, null, null, !10} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/array.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!"0x101\00p\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!12 = !{!"0x2e\00main\00main\00\005\000\001\000\006\00256\001\005", !1, !5, !13, null, i32 (i32, i8**)* @main, null, null, !18} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!9, !9, !15}
+!15 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!16 = !{!"0xf\00\000\0064\0064\000\000", null, null, !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!17 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!18 = !{!19, !20, !21}
+!19 = !{!"0x101\00argc\0016777221\000", !12, !5, !9} ; [ DW_TAG_arg_variable ] [argc] [line 5]
+!20 = !{!"0x101\00argv\0033554437\000", !12, !5, !15} ; [ DW_TAG_arg_variable ] [argv] [line 5]
+!21 = !{!"0x100\00array\006\000", !12, !5, !22} ; [ DW_TAG_auto_variable ] [array] [line 6]
+!22 = !{!"0x1\00\000\00128\0032\000\000", null, null, !9, !23, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
+!23 = !{!24}
+!24 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!25 = !{i32 2, !"Dwarf Version", i32 2}
+!26 = !{i32 1, !"Debug Info Version", i32 2}
+!27 = !{!"clang version 3.5.0 "}
+!28 = !MDLocation(line: 1, scope: !4)
+!29 = !MDLocation(line: 2, scope: !4)
+!30 = !{!31, !31, i64 0}
+!31 = !{!"int", !32, i64 0}
+!32 = !{!"omnipotent char", !33, i64 0}
+!33 = !{!"Simple C/C++ TBAA"}
+!34 = !MDLocation(line: 3, scope: !4)
+!35 = !MDLocation(line: 5, scope: !12)
+!36 = !MDLocation(line: 6, scope: !12)
+!37 = !MDLocation(line: 7, scope: !12)
+!38 = !MDLocation(line: 8, scope: !12)
diff --git a/test/DebugInfo/X86/array2.ll b/test/DebugInfo/X86/array2.ll
index 2dc2af325b51..5b21188c1cfe 100644
--- a/test/DebugInfo/X86/array2.ll
+++ b/test/DebugInfo/X86/array2.ll
@@ -29,7 +29,7 @@ define void @f(i32* %p) #0 {
 entry:
   %p.addr = alloca i32*, align 8
   store i32* %p, i32** %p.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32** %p.addr}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !19, metadata !{!"0x102"}), !dbg !20
   %0 = load i32** %p.addr, align 8, !dbg !21
   %arrayidx = getelementptr inbounds i32* %0, i64 0, !dbg !21
   store i32 42, i32* %arrayidx, align 4, !dbg !21
@@ -37,7 +37,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
 define i32 @main(i32 %argc, i8** %argv) #0 {
@@ -48,10 +48,10 @@ entry:
   %array = alloca [4 x i32], align 16
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !23, metadata !{!"0x102"}), !dbg !24
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !25), !dbg !24
-  call void @llvm.dbg.declare(metadata !{[4 x i32]* %array}, metadata !26), !dbg !30
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !25, metadata !{!"0x102"}), !dbg !24
+  call void @llvm.dbg.declare(metadata [4 x i32]* %array, metadata !26, metadata !{!"0x102"}), !dbg !30
   %0 = bitcast [4 x i32]* %array to i8*, !dbg !30
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !30
   %arraydecay = getelementptr inbounds [4 x i32]* %array, i32 0, i32 0, !dbg !31
@@ -72,36 +72,36 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [array.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"array.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !10}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [array.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !9, metadata !9, metadata !13}
-!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!15 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!18 = metadata !{metadata !"clang version 3.5.0 "}
-!19 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
-!20 = metadata !{i32 1, i32 0, metadata !4, null}
-!21 = metadata !{i32 2, i32 0, metadata !4, null}
-!22 = metadata !{i32 3, i32 0, metadata !4, null}
-!23 = metadata !{i32 786689, metadata !10, metadata !"argc", metadata !5, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 5]
-!24 = metadata !{i32 5, i32 0, metadata !10, null}
-!25 = metadata !{i32 786689, metadata !10, metadata !"argv", metadata !5, i32 33554437, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 5]
-!26 = metadata !{i32 786688, metadata !10, metadata !"array", metadata !5, i32 6, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [array] [line 6]
-!27 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 32, i32 0, i32 0, metadata !9, metadata !28, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
-!28 = metadata !{metadata !29}
-!29 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
-!30 = metadata !{i32 6, i32 0, metadata !10, null}
-!31 = metadata !{i32 7, i32 0, metadata !10, null}
-!32 = metadata !{i32 8, i32 0, metadata !10, null} ; [ DW_TAG_imported_declaration ]
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [array.c] [DW_LANG_C99]
+!1 = !{!"array.c", !""}
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i32*)* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [array.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00main\00main\00\005\000\001\000\006\00256\000\005", !1, !5, !11, null, i32 (i32, i8**)* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!9, !9, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\000", null, null, !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!14 = !{!"0xf\00\000\0064\0064\000\000", null, null, !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!15 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!16 = !{i32 2, !"Dwarf Version", i32 2}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !{!"0x101\00p\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!20 = !MDLocation(line: 1, scope: !4)
+!21 = !MDLocation(line: 2, scope: !4)
+!22 = !MDLocation(line: 3, scope: !4)
+!23 = !{!"0x101\00argc\0016777221\000", !10, !5, !9} ; [ DW_TAG_arg_variable ] [argc] [line 5]
+!24 = !MDLocation(line: 5, scope: !10)
+!25 = !{!"0x101\00argv\0033554437\000", !10, !5, !13} ; [ DW_TAG_arg_variable ] [argv] [line 5]
+!26 = !{!"0x100\00array\006\000", !10, !5, !27} ; [ DW_TAG_auto_variable ] [array] [line 6]
+!27 = !{!"0x1\00\000\00128\0032\000\000", null, null, !9, !28, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from int]
+!28 = !{!29}
+!29 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!30 = !MDLocation(line: 6, scope: !10)
+!31 = !MDLocation(line: 7, scope: !10)
+!32 = !MDLocation(line: 8, scope: !10)
diff --git a/test/DebugInfo/X86/asm-macro-line-number.s b/test/DebugInfo/X86/asm-macro-line-number.s
new file mode 100644
index 000000000000..0f51dbb6440a
--- /dev/null
+++ b/test/DebugInfo/X86/asm-macro-line-number.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -g -triple i686-linux-gnu -filetype asm -o - %s | FileCheck %s
+
+# 1 "reduced.S"
+# 1 "<built-in>" 1
+# 1 "reduced.S" 2
+
+ .macro return arg
+  movl %eax, \arg
+  retl
+ .endm
+
+function:
+ return 0
+
+# CHECK: .file 2 "reduced.S"
+# CHECK: .loc 2 8 0
+# CHECK: movl %eax, 0
+# CHECK: .loc 2 8 0
+# CHECK: retl
+
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index e842afe9446a..51d575fdfe42 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -1,133 +1,131 @@
-; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llc %s -o %t -filetype=obj
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
-; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj -dwarf-version=3
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DWARF3
 
 ; Checks that we emit debug info for the block variable declare.
 ; CHECK: DW_TAG_subprogram
 ; CHECK: DW_TAG_variable
-; CHECK: DW_AT_location [DW_FORM_sec_offset]
-; CHECK: DW_AT_name {{.*}} "block"
+;                                              fbreg +8, deref, +32
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (<0x05> 91 08 06 23 20 )
+; CHECK-NEXT: DW_AT_name {{.*}} "block"
 
-; DWARF3: DW_TAG_subprogram
-; DWARF3: DW_TAG_variable
-; DWARF3: DW_AT_location [DW_FORM_data4]
-; DWARF3: DW_AT_name {{.*}} "block"
+; Extracted from the clang output for:
+; void foo() {
+;  void (^block)() = ^{ block(); };
+; }
+
+; ModuleID = 'foo.m'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
 
 %struct.__block_descriptor = type { i64, i64 }
 %struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define hidden void @__foo_block_invoke_0(i8* %.block_descriptor) uwtable ssp {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !39), !dbg !51
-  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>*, !dbg !52
-  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block}, metadata !53), !dbg !54
-  %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block, i32 0, i32 5, !dbg !55
-  %0 = load void ()** %block.capture.addr, align 8, !dbg !55
-  %block.literal = bitcast void ()* %0 to %struct.__block_literal_generic*, !dbg !55
-  %1 = getelementptr inbounds %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !55
-  %2 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !55
-  %3 = load i8** %1, !dbg !55
-  %4 = bitcast i8* %3 to void (i8*)*, !dbg !55
-  invoke void %4(i8* %2)
-          to label %invoke.cont unwind label %lpad, !dbg !55
-
-invoke.cont:                                      ; preds = %entry
-  br label %eh.cont, !dbg !58
+@_NSConcreteStackBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00", align 1
 
-eh.cont:                                          ; preds = %catch, %invoke.cont
-  ret void, !dbg !61
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-lpad:                                             ; preds = %entry
-  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
-          catch i8* null, !dbg !55
-  %6 = extractvalue { i8*, i32 } %5, 0, !dbg !55
-  store i8* %6, i8** %exn.slot, !dbg !55
-  %7 = extractvalue { i8*, i32 } %5, 1, !dbg !55
-  store i32 %7, i32* %ehselector.slot, !dbg !55
-  br label %catch, !dbg !55
-
-catch:                                            ; preds = %lpad
-  %exn = load i8** %exn.slot, !dbg !62
-  %exn.adjusted = call i8* @objc_begin_catch(i8* %exn) nounwind, !dbg !62
-  call void @objc_end_catch(), !dbg !58
-  br label %eh.cont, !dbg !58
+; Function Attrs: ssp uwtable
+define internal void @__foo_block_invoke(i8* %.block_descriptor) #2 {
+entry:
+  %.block_descriptor.addr = alloca i8*, align 8
+  %block.addr = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, align 8
+  store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
+  %0 = load i8** %.block_descriptor.addr
+  call void @llvm.dbg.value(metadata i8* %0, i64 0, metadata !47, metadata !43), !dbg !66
+  call void @llvm.dbg.declare(metadata i8* %.block_descriptor, metadata !47, metadata !43), !dbg !66
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, !dbg !67
+  store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>* %block, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>** %block.addr, align 8
+  call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>** %block.addr, metadata !68, metadata !69), !dbg !70
+  %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>* %block, i32 0, i32 5, !dbg !71
+  %1 = load void (...)** %block.capture.addr, align 8, !dbg !71
+  %block.literal = bitcast void (...)* %1 to %struct.__block_literal_generic*, !dbg !71
+  %2 = getelementptr inbounds %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !71
+  %3 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !71
+  %4 = load i8** %2, !dbg !71
+  %5 = bitcast i8* %4 to void (i8*, ...)*, !dbg !71
+  call void (i8*, ...)* %5(i8* %3), !dbg !71
+  ret void, !dbg !73
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-declare i8* @objc_begin_catch(i8*)
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
-declare void @objc_end_catch()
 
-declare i32 @__objc_personality_v0(...)
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { ssp uwtable }
+attributes #3 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!35, !36, !37, !38, !64}
+!llvm.module.flags = !{!16, !17, !18, !19, !20, !21, !22}
+!llvm.ident = !{!23}
 
-!0 = metadata !{i32 786449, metadata !63, i32 16, metadata !"clang version 3.1 (trunk 151227)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
-!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !63} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786454, metadata !63, null, metadata !"dispatch_block_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_generic", i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 5, size 256, align 0, offset 0] [def] [from ]
-!12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
-!13 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!19 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
-!21 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_descriptor", i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 5, size 128, align 0, offset 0] [def] [from ]
-!22 = metadata !{metadata !23, metadata !25}
-!23 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
-!24 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!25 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
-!28 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = metadata !{null, metadata !14}
-!31 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 10} ; [ DW_TAG_subprogram ]
-!32 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!33 = metadata !{null, metadata !14, metadata !14}
-!34 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 10} ; [ DW_TAG_subprogram ]
-!35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
-!41 = metadata !{i32 786451, metadata !63, metadata !6, metadata !"__block_literal_1", i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 7, size 320, align 64, offset 0] [def] [from ]
-!42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
-!43 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__isa", i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
-!44 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__flags", i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
-!45 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__reserved", i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
-!46 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__FuncPtr", i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
-!47 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"__descriptor", i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
-!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
-!49 = metadata !{i32 786451, metadata !63, null, metadata !"__block_descriptor_withcopydispose", i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 7, size 0, align 0, offset 0] [decl] [from ]
-!50 = metadata !{i32 786445, metadata !63, metadata !6, metadata !"block", i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
-!51 = metadata !{i32 7, i32 18, metadata !28, null}
-!52 = metadata !{i32 7, i32 19, metadata !28, null}
-!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, metadata !65} ; [ DW_TAG_auto_variable ]
-!54 = metadata !{i32 5, i32 27, metadata !28, null}
-!55 = metadata !{i32 8, i32 22, metadata !56, null}
-!56 = metadata !{i32 786443, metadata !6, metadata !57, i32 7, i32 26, i32 2} ; [ DW_TAG_lexical_block ]
-!57 = metadata !{i32 786443, metadata !6, metadata !28, i32 7, i32 19, i32 1} ; [ DW_TAG_lexical_block ]
-!58 = metadata !{i32 10, i32 20, metadata !59, null}
-!59 = metadata !{i32 786443, metadata !6, metadata !60, i32 9, i32 35, i32 4} ; [ DW_TAG_lexical_block ]
-!60 = metadata !{i32 786443, metadata !6, metadata !57, i32 9, i32 35, i32 3} ; [ DW_TAG_lexical_block ]
-!61 = metadata !{i32 10, i32 21, metadata !28, null}
-!62 = metadata !{i32 9, i32 20, metadata !56, null}
-!63 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
-!64 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!65 = metadata !{i64 1, i64 32}
+!0 = !{!"0x11\0016\00clang version 3.6.0 (trunk 223471)\000\00\002\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/foo.m] [DW_LANG_ObjC]
+!1 = !{!"foo.m", !""}
+!2 = !{}
+!3 = !{!8}
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/foo.m]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00__foo_block_invoke\00__foo_block_invoke\00\002\001\001\000\000\00256\000\002", !1, !5, !9, null, void (i8*)* @__foo_block_invoke, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__foo_block_invoke]
+!9 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null, !11, !11}
+!16 = !{i32 1, !"Objective-C Version", i32 2}
+!17 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!18 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!19 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!20 = !{i32 2, !"Dwarf Version", i32 2}
+!21 = !{i32 2, !"Debug Info Version", i32 2}
+!22 = !{i32 1, !"PIC Level", i32 2}
+!23 = !{!"clang version 3.6.0 (trunk 223471)"}
+!25 = !{!"0xf\00\000\0064\000\000\000", null, null, !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
+!26 = !{!"0x13\00__block_literal_generic\002\00256\000\000\008\000", !1, !5, null, !27, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 2, size 256, align 0, offset 0] [def] [from ]
+!27 = !{!28, !29, !31, !32, !36}
+!28 = !{!"0xd\00__isa\000\0064\0064\000\000", !1, !5, !11} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
+!29 = !{!"0xd\00__flags\000\0032\0032\0064\000", !1, !5, !30} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
+!30 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!31 = !{!"0xd\00__reserved\000\0032\0032\0096\000", !1, !5, !30} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int]
+!32 = !{!"0xd\00__FuncPtr\000\0064\0064\00128\000", !1, !5, !33} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
+!33 = !{!"0xf\00\000\0064\0064\000\000", null, null, !34} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!34 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !35, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!35 = !{null, null}
+!36 = !{!"0xd\00__descriptor\002\0064\0064\00192\000", !1, !5, !37} ; [ DW_TAG_member ] [__descriptor] [line 2, size 64, align 64, offset 192] [from ]
+!37 = !{!"0xf\00\000\0064\000\000\000", null, null, !38} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
+!38 = !{!"0x13\00__block_descriptor\002\00128\000\000\008\000", !1, !5, null, !39, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 2, size 128, align 0, offset 0] [def] [from ]
+!39 = !{!40, !42}
+!40 = !{!"0xd\00reserved\000\0064\0064\000\000", !1, !5, !41} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
+!41 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!42 = !{!"0xd\00Size\000\0064\0064\0064\000", !1, !5, !41} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int]
+!43 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!47 = !{!"0x101\00.block_descriptor\0016777218\0064", !8, !5, !48} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 2]
+!48 = !{!"0xf\00\000\0064\000\000\000", null, null, !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
+!49 = !{!"0x13\00__block_literal_1\002\00320\0064\000\000\000", !1, !5, null, !50, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 2, size 320, align 64, offset 0] [def] [from ]
+!50 = !{!51, !52, !53, !54, !56, !65}
+!51 = !{!"0xd\00__isa\002\0064\0064\000\003", !1, !5, !11} ; [ DW_TAG_member ] [__isa] [line 2, size 64, align 64, offset 0] [public] [from ]
+!52 = !{!"0xd\00__flags\002\0032\0032\0064\003", !1, !5, !30} ; [ DW_TAG_member ] [__flags] [line 2, size 32, align 32, offset 64] [public] [from int]
+!53 = !{!"0xd\00__reserved\002\0032\0032\0096\003", !1, !5, !30} ; [ DW_TAG_member ] [__reserved] [line 2, size 32, align 32, offset 96] [public] [from int]
+!54 = !{!"0xd\00__FuncPtr\002\0064\0064\00128\003", !1, !5, !55} ; [ DW_TAG_member ] [__FuncPtr] [line 2, size 64, align 64, offset 128] [public] [from ]
+!55 = !{!"0xf\00\000\0064\0064\000\000", null, null, !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!56 = !{!"0xd\00__descriptor\002\0064\0064\00192\003", !1, !5, !57} ; [ DW_TAG_member ] [__descriptor] [line 2, size 64, align 64, offset 192] [public] [from ]
+!57 = !{!"0xf\00\000\0064\0064\000\000", null, null, !58} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
+!58 = !{!"0x13\00__block_descriptor_withcopydispose\002\00256\0064\000\000\000", !1, null, null, !59, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 2, size 256, align 64, offset 0] [def] [from ]
+!59 = !{!60, !61, !62, !64}
+!60 = !{!"0xd\00reserved\002\0064\0064\000\000", !1, !58, !41} ; [ DW_TAG_member ] [reserved] [line 2, size 64, align 64, offset 0] [from long unsigned int]
+!61 = !{!"0xd\00Size\002\0064\0064\0064\000", !1, !58, !41} ; [ DW_TAG_member ] [Size] [line 2, size 64, align 64, offset 64] [from long unsigned int]
+!62 = !{!"0xd\00CopyFuncPtr\002\0064\0064\00128\000", !1, !58, !63} ; [ DW_TAG_member ] [CopyFuncPtr] [line 2, size 64, align 64, offset 128] [from ]
+!63 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!64 = !{!"0xd\00DestroyFuncPtr\002\0064\0064\00192\000", !1, !58, !63} ; [ DW_TAG_member ] [DestroyFuncPtr] [line 2, size 64, align 64, offset 192] [from ]
+!65 = !{!"0xd\00block\002\0064\0064\00256\003", !1, !5, !25} ; [ DW_TAG_member ] [block] [line 2, size 64, align 64, offset 256] [public] [from ]
+!66 = !MDLocation(line: 2, column: 20, scope: !8)
+!67 = !MDLocation(line: 2, column: 21, scope: !8)
+!68 = !{!"0x100\00block\002\000", !8, !5, !25} ; [ DW_TAG_auto_variable ] [block] [line 2]
+!69 = !{!"0x102\006\0034\0032"} ; [ DW_TAG_expression ] [DW_OP_deref]
+!70 = !MDLocation(line: 2, column: 9, scope: !8)
+!71 = !MDLocation(line: 2, column: 23, scope: !72)
+!72 = !{!"0xb\002\0021\000", !1, !8} ; [ DW_TAG_lexical_block ] [/foo.m]
+!73 = !MDLocation(line: 2, column: 32, scope: !8)
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
index d787ef39c36c..d89ba3596c53 100644
--- a/test/DebugInfo/X86/byvalstruct.ll
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -66,20 +66,20 @@ entry:
   %otherBitmap.addr = alloca %0*, align 8
   %length.addr = alloca i64, align 8
   store %0* %self, %0** %self.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%0** %self.addr}, metadata !28), !dbg !29
+  call void @llvm.dbg.declare(metadata %0** %self.addr, metadata !28, metadata !{!"0x102"}), !dbg !29
   store i8* %_cmd, i8** %_cmd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %_cmd.addr}, metadata !30), !dbg !29
+  call void @llvm.dbg.declare(metadata i8** %_cmd.addr, metadata !30, metadata !{!"0x102"}), !dbg !29
   store %0* %otherBitmap, %0** %otherBitmap.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%0** %otherBitmap.addr}, metadata !32), !dbg !29
-  call void @llvm.dbg.declare(metadata !{%struct.ImageInfo* %info}, metadata !33), !dbg !34
+  call void @llvm.dbg.declare(metadata %0** %otherBitmap.addr, metadata !32, metadata !{!"0x102"}), !dbg !29
+  call void @llvm.dbg.declare(metadata %struct.ImageInfo* %info, metadata !33, metadata !{!"0x102"}), !dbg !34
   store i64 %length, i64* %length.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %length.addr}, metadata !35), !dbg !36
+  call void @llvm.dbg.declare(metadata i64* %length.addr, metadata !35, metadata !{!"0x102"}), !dbg !36
   %0 = load i8** %retval, !dbg !37
   ret i8* %0, !dbg !37
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { ssp uwtable }
 attributes #1 = { nounwind readnone }
@@ -87,42 +87,42 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!24, !25, !26, !27, !38}
 
-!0 = metadata !{i32 786449, metadata !1, i32 17, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !6, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/t.mm] [DW_LANG_ObjC_plus_plus]
-!1 = metadata !{metadata !"t.mm", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"Bitmap", i32 8, i64 8, i64 8, i32 0, i32 512, null, metadata !2, i32 17, null, null, null} ; [ DW_TAG_structure_type ] [Bitmap] [line 8, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/t.mm]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"-[Bitmap initWithCopy:andInfo:andLength:]", metadata !"", i32 9, metadata !8, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [-[Bitmap initWithCopy:andInfo:andLength:]]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{metadata !4, metadata !10, metadata !11, metadata !14, metadata !15, metadata !19}
-!10 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Bitmap]
-!11 = metadata !{i32 786454, metadata !1, null, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 64, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [artificial] [from ]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!13 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!14 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Bitmap]
-!15 = metadata !{i32 786454, metadata !1, null, metadata !"ImageInfo", i32 7, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [ImageInfo] [line 7, size 0, align 0, offset 0] [from ]
-!16 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 2, i64 192, i64 64, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 2, size 192, align 64, offset 0] [def] [from ]
-!17 = metadata !{metadata !18, metadata !21, metadata !22}
-!18 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"width", i32 4, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [width] [line 4, size 64, align 64, offset 0] [from NSUInteger]
-!19 = metadata !{i32 786454, metadata !1, null, metadata !"NSUInteger", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ] [NSUInteger] [line 1, size 0, align 0, offset 0] [from long unsigned int]
-!20 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!21 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"height", i32 5, i64 64, i64 64, i64 64, i32 0, metadata !19} ; [ DW_TAG_member ] [height] [line 5, size 64, align 64, offset 64] [from NSUInteger]
-!22 = metadata !{i32 786445, metadata !1, metadata !16, metadata !"pixelAspect", i32 6, i64 64, i64 64, i64 128, i32 0, metadata !23} ; [ DW_TAG_member ] [pixelAspect] [line 6, size 64, align 64, offset 128] [from double]
-!23 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!24 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!25 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!26 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!27 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!28 = metadata !{i32 786689, metadata !7, metadata !"self", metadata !5, i32 16777225, metadata !14, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 9]
-!29 = metadata !{i32 9, i32 0, metadata !7, null}
-!30 = metadata !{i32 786689, metadata !7, metadata !"_cmd", metadata !5, i32 33554441, metadata !31, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 9]
-!31 = metadata !{i32 786454, metadata !1, null, metadata !"SEL", i32 9, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [from ]
-!32 = metadata !{i32 786689, metadata !7, metadata !"otherBitmap", metadata !5, i32 50331657, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [otherBitmap] [line 9]
-!33 = metadata !{i32 786689, metadata !7, metadata !"info", metadata !5, i32 67108874, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [info] [line 10]
-!34 = metadata !{i32 10, i32 0, metadata !7, null}
-!35 = metadata !{i32 786689, metadata !7, metadata !"length", metadata !5, i32 83886091, metadata !19, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [length] [line 11]
-!36 = metadata !{i32 11, i32 0, metadata !7, null}
-!37 = metadata !{i32 13, i32 0, metadata !7, null}
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0017\00clang version 3.4 \000\00\002\00\000", !1, !2, !3, !6, !2, !2} ; [ DW_TAG_compile_unit ] [/t.mm] [DW_LANG_ObjC_plus_plus]
+!1 = !{!"t.mm", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Bitmap\008\008\008\000\00512\0017", !1, !5, null, !2, null, null, null} ; [ DW_TAG_structure_type ] [Bitmap] [line 8, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/t.mm]
+!6 = !{!7}
+!7 = !{!"0x2e\00-[Bitmap initWithCopy:andInfo:andLength:]\00-[Bitmap initWithCopy:andInfo:andLength:]\00\009\001\001\000\006\00256\000\009", !1, !5, !8, null, i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [-[Bitmap initWithCopy:andInfo:andLength:]]
+!8 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !{!4, !10, !11, !14, !15, !19}
+!10 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Bitmap]
+!11 = !{!"0x16\00SEL\009\000\000\000\0064", !1, null, !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [artificial] [from ]
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!13 = !{!"0x13\00objc_selector\000\000\000\000\004\000", !1, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!14 = !{!"0xf\00\000\0064\0064\000\000", null, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Bitmap]
+!15 = !{!"0x16\00ImageInfo\007\000\000\000\000", !1, null, !16} ; [ DW_TAG_typedef ] [ImageInfo] [line 7, size 0, align 0, offset 0] [from ]
+!16 = !{!"0x13\00\002\00192\0064\000\000\000", !1, null, null, !17, null, null, null} ; [ DW_TAG_structure_type ] [line 2, size 192, align 64, offset 0] [def] [from ]
+!17 = !{!18, !21, !22}
+!18 = !{!"0xd\00width\004\0064\0064\000\000", !1, !16, !19} ; [ DW_TAG_member ] [width] [line 4, size 64, align 64, offset 0] [from NSUInteger]
+!19 = !{!"0x16\00NSUInteger\001\000\000\000\000", !1, null, !20} ; [ DW_TAG_typedef ] [NSUInteger] [line 1, size 0, align 0, offset 0] [from long unsigned int]
+!20 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!21 = !{!"0xd\00height\005\0064\0064\0064\000", !1, !16, !19} ; [ DW_TAG_member ] [height] [line 5, size 64, align 64, offset 64] [from NSUInteger]
+!22 = !{!"0xd\00pixelAspect\006\0064\0064\00128\000", !1, !16, !23} ; [ DW_TAG_member ] [pixelAspect] [line 6, size 64, align 64, offset 128] [from double]
+!23 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!24 = !{i32 1, !"Objective-C Version", i32 2}
+!25 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!26 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!27 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!28 = !{!"0x101\00self\0016777225\001088", !7, !5, !14} ; [ DW_TAG_arg_variable ] [self] [line 9]
+!29 = !MDLocation(line: 9, scope: !7)
+!30 = !{!"0x101\00_cmd\0033554441\0064", !7, !5, !31} ; [ DW_TAG_arg_variable ] [_cmd] [line 9]
+!31 = !{!"0x16\00SEL\009\000\000\000\000", !1, null, !12} ; [ DW_TAG_typedef ] [SEL] [line 9, size 0, align 0, offset 0] [from ]
+!32 = !{!"0x101\00otherBitmap\0050331657\000", !7, !5, !14} ; [ DW_TAG_arg_variable ] [otherBitmap] [line 9]
+!33 = !{!"0x101\00info\0067108874\000", !7, !5, !15} ; [ DW_TAG_arg_variable ] [info] [line 10]
+!34 = !MDLocation(line: 10, scope: !7)
+!35 = !{!"0x101\00length\0083886091\000", !7, !5, !19} ; [ DW_TAG_arg_variable ] [length] [line 11]
+!36 = !MDLocation(line: 11, scope: !7)
+!37 = !MDLocation(line: 13, scope: !7)
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/c-type-units.ll b/test/DebugInfo/X86/c-type-units.ll
index 431b0295148e..b9bc36eac8d1 100644
--- a/test/DebugInfo/X86/c-type-units.ll
+++ b/test/DebugInfo/X86/c-type-units.ll
@@ -17,13 +17,13 @@
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/simple.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"simple.c", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !5, i32 2, metadata !6, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/simple.c]
-!6 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 8, offset 0] [def] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 "}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/simple.c] [DW_LANG_C99]
+!1 = !{!"simple.c", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00f\00f\00\002\000\001", null, !5, !6, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/simple.c]
+!6 = !{!"0x13\00foo\001\000\008\000\000\000", !1, null, null, !2, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 0, align 8, offset 0] [def] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 "}
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index a1051c39bffd..d34f50b7244b 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -1,10 +1,12 @@
-; RUN: llc -mtriple=i686-pc-mingw32 -filetype=asm -O0 < %s | FileCheck %s
-; RUN: llc -mtriple=i686-pc-cygwin -filetype=asm -O0 < %s | FileCheck %s
-; RUN: llc -mtriple=i686-w64-mingw32 -filetype=asm -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-mingw32 -dwarf-accel-tables=Enable -filetype=asm -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-cygwin -dwarf-accel-tables=Enable -filetype=asm -O0 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-w64-mingw32 -dwarf-accel-tables=Enable -filetype=asm -O0 < %s | FileCheck %s
 ; CHECK:    .section  .debug_info
+; CHECK:    .section  .apple_names
+; CHECK:    .section  .apple_types
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
-; WIN32:    .section .debug$S,"rnd"
+; WIN32:    .section .debug$S,"rd"
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s
 
@@ -27,15 +29,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!10 = metadata !{i32 3, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"C:\5CProjects"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\001\000\001\000\006\000\000\002", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 3}
+!10 = !MDLocation(line: 3, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/coff_relative_names.ll b/test/DebugInfo/X86/coff_relative_names.ll
index 3b4854e733df..96e70b1e3881 100644
--- a/test/DebugInfo/X86/coff_relative_names.ll
+++ b/test/DebugInfo/X86/coff_relative_names.ll
@@ -23,15 +23,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"C:\5CProjects"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!10 = metadata !{i32 3, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [C:\Projects/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"C:\5CProjects"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\001\000\001\000\006\000\000\002", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [C:\Projects/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 3}
+!10 = !MDLocation(line: 3, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index 40300de793d5..b5da28ae3289 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -8,54 +8,54 @@
 
 ; CHECK: DW_TAG_class_type
 ; CHECK:   DW_TAG_subprogram
-; CHECK: [[ASSIGN_DECL:0x........]]:  DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_MIPS_linkage_name {{.*}} "_ZN12nsAutoRefCntaSEi"
 
 ; CHECK: DW_TAG_class_type
-; CHECK: [[RELEASE_DECL:0x........]]:  DW_TAG_subprogram
-; CHECK: [[DTOR_DECL:0x........]]:  DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_MIPS_linkage_name {{.*}} "_ZN17nsAutoRefCnt7ReleaseEv"
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}} "~nsAutoRefCnt"
 
-; CHECK: [[D2_ABS:.*]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D2
-; CHECK-NEXT:     DW_AT_specification {{.*}} {[[DTOR_DECL]]}
+; CHECK-NEXT:     DW_AT_specification {{.*}} "~nsAutoRefCnt"
 ; CHECK-NEXT:     DW_AT_inline
 ; CHECK-NOT:      DW_AT
 ; CHECK: DW_TAG
-; CHECK: [[D1_ABS:.*]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D1
-; CHECK-NEXT:     DW_AT_specification {{.*}} {[[DTOR_DECL]]}
+; CHECK-NEXT:     DW_AT_specification {{.*}} "~nsAutoRefCnt"
 ; CHECK-NEXT:     DW_AT_inline
 ; CHECK-NOT:     DW_AT
 ; CHECK: [[D1_THIS_ABS:.*]]: DW_TAG_formal_parameter
 
-; CHECK: [[RELEASE:0x........]]: DW_TAG_subprogram
-; CHECK:     DW_AT_specification {{.*}} {[[RELEASE_DECL]]}
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_specification {{.*}} "_ZN17nsAutoRefCnt7ReleaseEv"
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: NULL
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_TAG_lexical_block
-; CHECK-NOT: NULL
-; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[ASSIGN:0x........]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN12nsAutoRefCntaSEi"
 ; CHECK-NOT: NULL
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD1Ev"
 ; CHECK-NOT: NULL
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD2Ev"
 
 ; and then that a TAG_subprogram refers to it with AT_abstract_origin.
 
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_ABS]]}
+; CHECK: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD1Ev"
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]}
+; CHECK: DW_AT_abstract_origin {{.*}} {[[D1_THIS_ABS]]} "this"
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[D2_ABS]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD2Ev"
 
 
 define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
@@ -76,56 +76,56 @@ declare void @_Z8moz_freePv(i8*)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!60}
 
-!0 = metadata !{i32 786449, metadata !59, i32 4, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
-!5 = metadata !{i32 720942, metadata !6, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !12, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
-!6 = metadata !{i32 720937, metadata !59} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 10, size 0, align 0, offset 0] [decl] [from ]
-!12 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, null, null, null} ; [ DW_TAG_class_type ]
-!14 = metadata !{metadata !12, metadata !15}
-!15 = metadata !{i32 720942, metadata !6, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 12} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null, metadata !10}
-!18 = metadata !{}
-!20 = metadata !{metadata !22}
-!22 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !24, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
-!24 = metadata !{metadata !26}
-!26 = metadata !{i32 786689, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 720942, metadata !6, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32* null, null, metadata !15, metadata !28, i32 18} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
-!28 = metadata !{metadata !30}
-!30 = metadata !{i32 786689, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 720942, metadata !6, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !36, metadata !43, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [operator=]
-!32 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!33 = metadata !{metadata !9, metadata !34, metadata !9}
-!34 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
-!35 = metadata !{i32 786451, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 2, size 0, align 0, offset 0] [decl] [from ]
-!36 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ]
-!37 = metadata !{i32 720898, metadata !59, null, metadata !"nsAutoRefCnt", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_class_type ] [nsAutoRefCnt] [line 2, size 32, align 32, offset 0] [def] [from ]
-!38 = metadata !{metadata !39, metadata !40, metadata !36}
-!39 = metadata !{i32 786445, metadata !59, metadata !37, metadata !"mValue", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
-!40 = metadata !{i32 720942, metadata !6, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ]
-!41 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!42 = metadata !{null, metadata !34}
-!43 = metadata !{metadata !45, metadata !46}
-!45 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!46 = metadata !{i32 786689, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!47 = metadata !{metadata !49}
-!49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null, null} ; [ DW_TAG_variable ]
-!50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
-!51 = metadata !{i32 786443, metadata !6, metadata !31, i32 4, i32 29, i32 2} ; [ DW_TAG_lexical_block ]
-!52 = metadata !{i32 15, i32 0, metadata !53, null}
-!53 = metadata !{i32 786443, metadata !6, metadata !5, i32 14, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!54 = metadata !{i32 19, i32 3, metadata !55, metadata !56}
-!55 = metadata !{i32 786443, metadata !6, metadata !27, i32 18, i32 41, i32 1} ; [ DW_TAG_lexical_block ]
-!56 = metadata !{i32 18, i32 41, metadata !23, metadata !52}
-!57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
-!58 = metadata !{i32 18, i32 41, metadata !23, null}
-!59 = metadata !{metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"}
-!60 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.1 ()\001\00\000\00\000", !59, !1, !1, !3, !47,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5, !23, !27, !31}
+!5 = !{!"0x2e\00Release\00Release\00_ZN17nsAutoRefCnt7ReleaseEv\0014\000\001\000\006\00256\001\0014", !6, null, !7, null, i32 ()* @_ZN17nsAutoRefCnt7ReleaseEv , null, !12, !20} ; [ DW_TAG_subprogram ] [line 14] [def] [Release]
+!6 = !{!"0x29", !59} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x13\00nsAutoRefCnt\0010\000\000\000\004\000", !59, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 10, size 0, align 0, offset 0] [decl] [from ]
+!12 = !{!"0x2e\00Release\00Release\00_ZN17nsAutoRefCnt7ReleaseEv\0011\000\000\000\006\00256\001\0011", !6, !13, !7, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ]
+!13 = !{!"0x2\00nsAutoRefCnt\0010\008\008\000\000\000", !59, null, null, !14, null, null} ; [ DW_TAG_class_type ]
+!14 = !{!12, !15}
+!15 = !{!"0x2e\00~nsAutoRefCnt\00~nsAutoRefCnt\00\0012\000\000\000\006\00256\001\0012", !6, !13, !16, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null, !10}
+!18 = !{}
+!20 = !{!22}
+!22 = !{!"0x101\00this\0016777230\0064", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!23 = !{!"0x2e\00~nsAutoRefCnt\00~nsAutoRefCnt\00_ZN17nsAutoRefCntD1Ev\0018\000\001\000\006\00256\001\0018", !6, null, !16, null, void ()* @_ZN17nsAutoRefCntD1Ev, null, !15, !24} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
+!24 = !{!26}
+!26 = !{!"0x101\00this\0016777234\0064", !23, !6, !10} ; [ DW_TAG_arg_variable ]
+!27 = !{!"0x2e\00~nsAutoRefCnt\00~nsAutoRefCnt\00_ZN17nsAutoRefCntD2Ev\0018\000\001\000\006\00256\001\0018", !6, null, !16, null, i32* null, null, !15, !28} ; [ DW_TAG_subprogram ] [line 18] [def] [~nsAutoRefCnt]
+!28 = !{!30}
+!30 = !{!"0x101\00this\0016777234\0064", !27, !6, !10} ; [ DW_TAG_arg_variable ]
+!31 = !{!"0x2e\00operator=\00operator=\00_ZN12nsAutoRefCntaSEi\004\000\001\000\006\00256\001\004", !6, null, !32, null, null, null, !36, !43} ; [ DW_TAG_subprogram ] [line 4] [def] [operator=]
+!32 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !33, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = !{!9, !34, !9}
+!34 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !35} ; [ DW_TAG_pointer_type ]
+!35 = !{!"0x13\00nsAutoRefCnt\002\000\000\000\004\000", !59, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [nsAutoRefCnt] [line 2, size 0, align 0, offset 0] [decl] [from ]
+!36 = !{!"0x2e\00operator=\00operator=\00_ZN12nsAutoRefCntaSEi\004\000\000\000\006\00256\001\004", !6, !37, !32, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ]
+!37 = !{!"0x2\00nsAutoRefCnt\002\0032\0032\000\000\000", !59, null, null, !38, null, null, null} ; [ DW_TAG_class_type ] [nsAutoRefCnt] [line 2, size 32, align 32, offset 0] [def] [from ]
+!38 = !{!39, !40, !36}
+!39 = !{!"0xd\00mValue\007\0032\0032\000\000", !59, !37, !9} ; [ DW_TAG_member ]
+!40 = !{!"0x2e\00nsAutoRefCnt\00nsAutoRefCnt\00\003\000\000\000\006\00256\001\003", !6, !37, !41, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ]
+!41 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !42, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!42 = !{null, !34}
+!43 = !{!45, !46}
+!45 = !{!"0x101\00this\0016777220\0064", !31, !6, !34} ; [ DW_TAG_arg_variable ]
+!46 = !{!"0x101\00aValue\0033554436\000", !31, !6, !9} ; [ DW_TAG_arg_variable ]
+!47 = !{!49}
+!49 = !{!"0x34\00mRefCnt\00mRefCnt\00\009\000\001", null, !6, !37, i32* null, null} ; [ DW_TAG_variable ]
+!50 = !MDLocation(line: 5, column: 5, scope: !51, inlinedAt: !52)
+!51 = !{!"0xb\004\0029\002", !6, !31} ; [ DW_TAG_lexical_block ]
+!52 = !MDLocation(line: 15, scope: !53)
+!53 = !{!"0xb\0014\0034\000", !6, !5} ; [ DW_TAG_lexical_block ]
+!54 = !MDLocation(line: 19, column: 3, scope: !55, inlinedAt: !56)
+!55 = !{!"0xb\0018\0041\001", !6, !27} ; [ DW_TAG_lexical_block ]
+!56 = !MDLocation(line: 18, column: 41, scope: !23, inlinedAt: !52)
+!57 = !MDLocation(line: 19, column: 3, scope: !55, inlinedAt: !58)
+!58 = !MDLocation(line: 18, column: 41, scope: !23)
+!59 = !{!"nsAutoRefCnt.ii", !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src"}
+!60 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/constant-aggregate.ll b/test/DebugInfo/X86/constant-aggregate.ll
new file mode 100644
index 000000000000..324c83177746
--- /dev/null
+++ b/test/DebugInfo/X86/constant-aggregate.ll
@@ -0,0 +1,118 @@
+; RUN: llc %s -filetype=obj -o %t.o
+; RUN: llvm-dwarfdump -debug-dump=info %t.o | FileCheck %s
+; Test emitting a constant for an aggregate type.
+;
+; clang -S -O1 -emit-llvm
+;
+; typedef struct { unsigned i; } S;
+;
+; unsigned foo(S s) {
+;   s.i = 1;
+;   return s.i;
+; }
+;
+; class C { public: unsigned i; };
+;
+; unsigned foo(C c) {
+;   c.i = 2;
+;   return c.i;
+; }
+;
+; unsigned bar() {
+;  int a[1] = { 3 };
+;   return a[0];
+; }
+;
+; CHECK:  DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_udata]	(1)
+; CHECK-NEXT: DW_AT_name {{.*}} "s"
+;
+; CHECK:  DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_udata]	(2)
+; CHECK-NEXT: DW_AT_name {{.*}} "c"
+;
+; CHECK:  DW_TAG_variable
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_udata]	(3)
+; CHECK-NEXT: DW_AT_name {{.*}} "a"
+
+; ModuleID = 'sroasplit-4.cpp'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @_Z3foo1S(i32 %s.coerce) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %s.coerce, i64 0, metadata !18, metadata !37), !dbg !38
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !37), !dbg !38
+  ret i32 1, !dbg !39
+}
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @_Z3foo1C(i32 %c.coerce) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %c.coerce, i64 0, metadata !23, metadata !37), !dbg !40
+  tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !23, metadata !37), !dbg !40
+  ret i32 2, !dbg !41
+}
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @_Z3barv() #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !28, metadata !37), !dbg !42
+  ret i32 3, !dbg !43
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone ssp uwtable }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!33, !34, !35}
+!llvm.ident = !{!36}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)\001\00\000\00\001", !1, !2, !3, !11, !2, !2} ; [ DW_TAG_compile_unit ] [/sroasplit-4.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"sroasplit-4.cpp", !""}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x13\00\001\0032\0032\000\000\000", !1, null, null, !5, null, null, !"_ZTS1S"} ; [ DW_TAG_structure_type ] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0xd\00i\001\0032\0032\000\000", !1, !"_ZTS1S", !7} ; [ DW_TAG_member ] [i] [line 1, size 32, align 32, offset 0] [from unsigned int]
+!7 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!8 = !{!"0x2\00C\008\0032\0032\000\000\000", !1, null, null, !9, null, null, !"_ZTS1C"} ; [ DW_TAG_class_type ] [C] [line 8, size 32, align 32, offset 0] [def] [from ]
+!9 = !{!10}
+!10 = !{!"0xd\00i\008\0032\0032\000\003", !1, !"_ZTS1C", !7} ; [ DW_TAG_member ] [i] [line 8, size 32, align 32, offset 0] [public] [from unsigned int]
+!11 = !{!12, !19, !24}
+!12 = !{!"0x2e\00foo\00foo\00_Z3foo1S\003\000\001\000\000\00256\001\003", !1, !13, !14, null, i32 (i32)* @_Z3foo1S, null, null, !17} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!13 = !{!"0x29", !1}                              ; [ DW_TAG_file_type ] [/sroasplit-4.cpp]
+!14 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{!7, !16}
+!16 = !{!"0x16\00S\001\000\000\000\000", !1, null, !"_ZTS1S"} ; [ DW_TAG_typedef ] [S] [line 1, size 0, align 0, offset 0] [from _ZTS1S]
+!17 = !{!18}
+!18 = !{!"0x101\00s\0016777219\000", !12, !13, !16} ; [ DW_TAG_arg_variable ] [s] [line 3]
+!19 = !{!"0x2e\00foo\00foo\00_Z3foo1C\0010\000\001\000\000\00256\001\0010", !1, !13, !20, null, i32 (i32)* @_Z3foo1C, null, null, !22} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
+!20 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!7, !"_ZTS1C"}
+!22 = !{!23}
+!23 = !{!"0x101\00c\0016777226\000", !19, !13, !"_ZTS1C"} ; [ DW_TAG_arg_variable ] [c] [line 10]
+!24 = !{!"0x2e\00bar\00bar\00_Z3barv\0015\000\001\000\000\00256\001\0015", !1, !13, !25, null, i32 ()* @_Z3barv, null, null, !27} ; [ DW_TAG_subprogram ] [line 15] [def] [bar]
+!25 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !26, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = !{!7}
+!27 = !{!28}
+!28 = !{!"0x100\00a\0016\000", !24, !13, !29}     ; [ DW_TAG_auto_variable ] [a] [line 16]
+!29 = !{!"0x1\00\000\0032\0032\000\000\000", null, null, !30, !31, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!30 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!31 = !{!32}
+!32 = !{!"0x21\000\001"}                          ; [ DW_TAG_subrange_type ] [0, 0]
+!33 = !{i32 2, !"Dwarf Version", i32 2}
+!34 = !{i32 2, !"Debug Info Version", i32 2}
+!35 = !{i32 1, !"PIC Level", i32 2}
+!36 = !{!"clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)"}
+!37 = !{!"0x102"}                                 ; [ DW_TAG_expression ]
+!38 = !MDLocation(line: 3, column: 16, scope: !12)
+!39 = !MDLocation(line: 5, column: 3, scope: !12)
+!40 = !MDLocation(line: 10, column: 16, scope: !19)
+!41 = !MDLocation(line: 12, column: 3, scope: !19)
+!42 = !MDLocation(line: 16, column: 6, scope: !24)
+!43 = !MDLocation(line: 17, column: 3, scope: !24)
diff --git a/test/DebugInfo/X86/cu-ranges-odr.ll b/test/DebugInfo/X86/cu-ranges-odr.ll
index c42a9085da56..c1f58d7eed73 100644
--- a/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -35,9 +35,9 @@ entry:
   %this.addr = alloca %class.A*, align 8
   %i.addr = alloca i32, align 4
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !27), !dbg !29
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !27, metadata !{!"0x102"}), !dbg !29
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !30), !dbg !31
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
   %this1 = load %class.A** %this.addr
   %a = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !31
   %0 = load i32* %i.addr, align 4, !dbg !31
@@ -46,7 +46,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define internal void @_GLOBAL__I_a() section ".text.startup" {
 entry:
@@ -61,36 +61,36 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!23, !24}
 !llvm.ident = !{!25}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 (trunk 199923) (llvm/trunk 199940)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !13, metadata !21, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"baz.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !8}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1A", metadata !"a", i32 5, i64 32, i64 32, i64 0, i32 1, metadata !7} ; [ DW_TAG_member ] [a] [line 5, size 32, align 32, offset 0] [private] [from int]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 3, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{null, metadata !11, metadata !7}
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!12 = metadata !{i32 786468}
-!13 = metadata !{metadata !14, metadata !18, metadata !19}
-!14 = metadata !{i32 786478, metadata !1, metadata !15, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 8, metadata !16, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [local] [def] [__cxx_global_var_init]
-!15 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/baz.cpp]
-!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null}
-!18 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2Ei", i32 3, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_ZN1AC2Ei, null, metadata !8, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
-!19 = metadata !{i32 786478, metadata !1, metadata !15, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 3, metadata !20, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !15, i32 8, metadata !4, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 8] [def]
-!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!25 = metadata !{metadata !"clang version 3.5 (trunk 199923) (llvm/trunk 199940)"}
-!26 = metadata !{i32 8, i32 0, metadata !14, null} ; [ DW_TAG_imported_declaration ]
-!27 = metadata !{i32 786689, metadata !18, metadata !"this", null, i32 16777216, metadata !28, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!29 = metadata !{i32 0, i32 0, metadata !18, null}
-!30 = metadata !{i32 786689, metadata !18, metadata !"i", metadata !15, i32 33554435, metadata !7, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 3]
-!31 = metadata !{i32 3, i32 0, metadata !18, null}
-!32 = metadata !{i32 3, i32 0, metadata !19, null}
+!0 = !{!"0x11\004\00clang version 3.5 (trunk 199923) (llvm/trunk 199940)\000\00\000\00\001", !1, !2, !3, !13, !21, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"baz.cpp", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\001\0032\0032\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!6, !8}
+!6 = !{!"0xd\00a\005\0032\0032\000\001", !1, !"_ZTS1A", !7} ; [ DW_TAG_member ] [a] [line 5, size 32, align 32, offset 0] [private] [from int]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"0x2e\00A\00A\00\003\000\000\000\006\00256\000\003", !1, !"_ZTS1A", !9, null, null, null, i32 0, !12} ; [ DW_TAG_subprogram ] [line 3] [A]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11, !7}
+!11 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!12 = !{i32 786468}
+!13 = !{!14, !18, !19}
+!14 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\008\001\001\000\006\00256\000\008", !1, !15, !16, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 8] [local] [def] [__cxx_global_var_init]
+!15 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/baz.cpp]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null}
+!18 = !{!"0x2e\00A\00A\00_ZN1AC2Ei\003\000\001\000\006\00256\000\003", !1, !"_ZTS1A", !9, null, void (%class.A*, i32)* @_ZN1AC2Ei, null, !8, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [A]
+!19 = !{!"0x2e\00\00\00_GLOBAL__I_a\003\001\001\000\006\0064\000\003", !1, !15, !20, null, void ()* @_GLOBAL__I_a, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [local] [def]
+!20 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!22}
+!22 = !{!"0x34\00a\00a\00\008\000\001", null, !15, !4, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 8] [def]
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 1, !"Debug Info Version", i32 2}
+!25 = !{!"clang version 3.5 (trunk 199923) (llvm/trunk 199940)"}
+!26 = !MDLocation(line: 8, scope: !14)
+!27 = !{!"0x101\00this\0016777216\001088", !18, null, !28} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!28 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!29 = !MDLocation(line: 0, scope: !18)
+!30 = !{!"0x101\00i\0033554435\000", !18, !15, !7} ; [ DW_TAG_arg_variable ] [i] [line 3]
+!31 = !MDLocation(line: 3, scope: !18)
+!32 = !MDLocation(line: 3, scope: !19)
diff --git a/test/DebugInfo/X86/cu-ranges.ll b/test/DebugInfo/X86/cu-ranges.ll
index 405a498155f5..0d872d872f9b 100644
--- a/test/DebugInfo/X86/cu-ranges.ll
+++ b/test/DebugInfo/X86/cu-ranges.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -split-dwarf=Enable -O0 %s -function-sections -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
-; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck --check-prefix=FUNCTION-SECTIONS %s
+; RUN: llvm-dwarfdump -debug-dump=abbrev %t | FileCheck --check-prefix=FUNCTION-SECTIONS %s
 ; RUN: llvm-readobj --relocations %t | FileCheck --check-prefix=FUNCTION-SECTIONS-RELOCS %s
 
 ; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
-; RUN: llvm-dwarfdump -debug-dump=all %t | FileCheck --check-prefix=NO-FUNCTION-SECTIONS %s
+; RUN: llvm-dwarfdump -debug-dump=abbrev %t | FileCheck --check-prefix=NO-FUNCTION-SECTIONS %s
 
 ; From:
 ; int foo (int a) {
@@ -21,27 +21,29 @@
 
 ; Without function sections enabled make sure that we have no DW_AT_ranges attribute.
 ; NO-FUNCTION-SECTIONS-NOT: DW_AT_ranges
+; NO-FUNCTION-SECTIONS: DW_AT_low_pc DW_FORM_addr
+; NO-FUNCTION-SECTIONS-NOT: DW_AT_ranges
 
 ; Function Attrs: nounwind uwtable
 define i32 @foo(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   %0 = load i32* %a.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   ret i32 %add, !dbg !14
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
 define i32 @bar(i32 %b) #0 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %b.addr, align 4, !dbg !16
   %add = add nsw i32 %0, 2, !dbg !16
   ret i32 %add, !dbg !16
@@ -54,20 +56,20 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/z.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"z.c", metadata !"/usr/local/google/home/echristo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/z.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @bar, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{i32 786689, metadata !9, metadata !"b", metadata !5, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 2]
-!16 = metadata !{i32 2, i32 0, metadata !9, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/z.c] [DW_LANG_C99]
+!1 = !{!"z.c", !"/usr/local/google/home/echristo"}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/z.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00bar\00bar\00\002\000\001\000\006\00256\000\002", !1, !5, !6, null, i32 (i32)* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!13 = !{!"0x101\00a\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !{!"0x101\00b\0016777218\000", !9, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 2]
+!16 = !MDLocation(line: 2, scope: !9)
diff --git a/test/DebugInfo/X86/data_member_location.ll b/test/DebugInfo/X86/data_member_location.ll
index 1c76258ec205..4fdcc5ddcb92 100644
--- a/test/DebugInfo/X86/data_member_location.ll
+++ b/test/DebugInfo/X86/data_member_location.ll
@@ -34,20 +34,20 @@
 !llvm.module.flags = !{!13, !15}
 !llvm.ident = !{!14}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !10, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/data_member_location.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"data_member_location.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !8}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"c", i32 2, i64 8, i64 8, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [c] [line 2, size 8, align 8, offset 0] [from char]
-!7 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!8 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"i", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !9} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 32] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !12, i32 6, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
-!12 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/data_member_location.cpp]
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{metadata !"clang version 3.4 "}
-
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\000", !1, !2, !3, !2, !10, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/data_member_location.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"data_member_location.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo\001\0064\0032\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
+!5 = !{!6, !8}
+!6 = !{!"0xd\00c\002\008\008\000\000", !1, !"_ZTS3foo", !7} ; [ DW_TAG_member ] [c] [line 2, size 8, align 8, offset 0] [from char]
+!7 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!8 = !{!"0xd\00i\003\0032\0032\0032\000", !1, !"_ZTS3foo", !9} ; [ DW_TAG_member ] [i] [line 3, size 32, align 32, offset 32] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!"0x34\00f\00f\00\006\000\001", null, !12, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!12 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/data_member_location.cpp]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{!"clang version 3.4 "}
+
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-asm.s b/test/DebugInfo/X86/dbg-asm.s
index 66a0292461dd..f6e5233ba45d 100644
--- a/test/DebugInfo/X86/dbg-asm.s
+++ b/test/DebugInfo/X86/dbg-asm.s
@@ -12,11 +12,19 @@ _a:
 # CHECK-COFF: Relocations [
 # CHECK-COFF:   Section {{.*}} .debug_info {
 # CHECK-COFF:     0x6 IMAGE_REL_I386_SECREL .debug_abbrev
+# CHECK-COFF:     0xC IMAGE_REL_I386_SECREL .debug_line
+# CHECK-COFF:   }
+# CHECK-COFF:   Section {{.*}} .debug_aranges {
+# CHECK-COFF:     0x6 IMAGE_REL_I386_SECREL .debug_info
 # CHECK-COFF:   }
 # CHECK-COFF: ]
 
 # CHECK-ELF: Relocations [
 # CHECK-ELF:   Section {{.*}} .rel.debug_info {
 # CHECK-ELF:     0x6 R_386_32 .debug_abbrev
+# CHECK-ELF:     0xC R_386_32 .debug_line
+# CHECK-ELF:   }
+# CHECK-ELF:   Section {{.*}} .rel.debug_aranges {
+# CHECK-ELF:     0x6 R_386_32 .debug_info
 # CHECK-ELF:   }
 # CHECK-ELF: ]
diff --git a/test/DebugInfo/X86/dbg-at-specficiation.ll b/test/DebugInfo/X86/dbg-at-specficiation.ll
index c76536719f4f..82696992729b 100644
--- a/test/DebugInfo/X86/dbg-at-specficiation.ll
+++ b/test/DebugInfo/X86/dbg-at-specficiation.ll
@@ -8,14 +8,14 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 720913, metadata !11, i32 12, metadata !"clang version 3.0 (trunk 140253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, null, i32 0} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, [10 x i32]* @a, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720897, null, null, null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
-!8 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
-!11 = metadata !{metadata !"x.c", metadata !"/private/tmp"}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 140253)\001\00\000\00\000", !11, !2, !2, !2, !3, null} ; [ DW_TAG_compile_unit ]
+!2 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\001\000\001", null, !6, !7, [10 x i32]* @a, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !11} ; [ DW_TAG_file_type ]
+!7 = !{!"0x1\00\000\00320\0032\000\000", null, null, !8, !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!9 = !{!10}
+!10 = !{!"0x21\000\0010"}        ; [ DW_TAG_subrange_type ]
+!11 = !{!"x.c", !"/private/tmp"}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
index c658b5050269..713781f38fb7 100644
--- a/test/DebugInfo/X86/dbg-byval-parameter.ll
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -9,7 +9,7 @@ entry:
   %retval = alloca double                         ; <double*> [#uses=2]
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !{!"0x102"}), !dbg !15
   %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
   %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
   %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
@@ -23,30 +23,30 @@ return:                                           ; preds = %entry
   ret double %retval1, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (%struct.Rect*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !19, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !7}
-!6 = metadata !{i32 786468, metadata !19, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Rect", i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P1", i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786451, metadata !19, metadata !2, metadata !"Pt", i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
-!11 = metadata !{metadata !12, metadata !13}
-!12 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"x", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786445, metadata !19, metadata !10, metadata !"y", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!14 = metadata !{i32 786445, metadata !19, metadata !7, metadata !"P2", i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 11, i32 0, metadata !1, null}
-!16 = metadata !{i32 12, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !19, metadata !1, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{metadata !1}
-!19 = metadata !{metadata !"b2.c", metadata !"/tmp/"}
-!20 = metadata !{i32 0}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00my_r0\0011\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foo\00foo\00foo\0011\000\001\000\006\000\000\000", !19, !2, !4, null, double (%struct.Rect*)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !19, !20, !20, !18, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !19, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !7}
+!6 = !{!"0x24\00double\000\0064\0064\000\000\004", !19, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0x13\00Rect\006\00256\0064\000\000\000", !19, !2, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [Rect] [line 6, size 256, align 64, offset 0] [def] [from ]
+!8 = !{!9, !14}
+!9 = !{!"0xd\00P1\007\00128\0064\000\000", !19, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x13\00Pt\001\00128\0064\000\000\000", !19, !2, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [Pt] [line 1, size 128, align 64, offset 0] [def] [from ]
+!11 = !{!12, !13}
+!12 = !{!"0xd\00x\002\0064\0064\000\000", !19, !10, !6} ; [ DW_TAG_member ]
+!13 = !{!"0xd\00y\003\0064\0064\0064\000", !19, !10, !6} ; [ DW_TAG_member ]
+!14 = !{!"0xd\00P2\008\00128\0064\00128\000", !19, !7, !10} ; [ DW_TAG_member ]
+!15 = !MDLocation(line: 11, scope: !1)
+!16 = !MDLocation(line: 12, scope: !17)
+!17 = !{!"0xb\0011\000\000", !19, !1} ; [ DW_TAG_lexical_block ]
+!18 = !{!1}
+!19 = !{!"b2.c", !"/tmp/"}
+!20 = !{i32 0}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index bf7ee08c665f..18abbdd5c21d 100644
--- a/test/DebugInfo/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -12,28 +12,28 @@ target triple = "x86_64-apple-macosx10.6.7"
 
 define i32 @foo() nounwind uwtable readnone optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !9
   ret i32 42, !dbg !10
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15}
 
-!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.0 (trunk 132191)", i1 true, metadata !"", i32 0, metadata !14, metadata !14, metadata !11, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !13, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !12, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
-!2 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !13, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786688, metadata !7, metadata !"i", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !13, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 42}
-!9 = metadata !{i32 2, i32 12, metadata !7, null}
-!10 = metadata !{i32 3, i32 2, metadata !7, null}
-!11 = metadata !{metadata !1}
-!12 = metadata !{metadata !6}
-!13 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
-!14 = metadata !{i32 0}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 132191)\001\00\000\00\000", !13, !14, !14, !11, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\000\001\000", !13, !2, !3, null, i32 ()* @foo, null, null, !12} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!2 = !{!"0x29", !13} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !13, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x100\00i\002\000", !7, !2, !5} ; [ DW_TAG_auto_variable ]
+!7 = !{!"0xb\001\0011\000", !13, !1} ; [ DW_TAG_lexical_block ]
+!8 = !{i32 42}
+!9 = !MDLocation(line: 2, column: 12, scope: !7)
+!10 = !MDLocation(line: 3, column: 2, scope: !7)
+!11 = !{!1}
+!12 = !{!6}
+!13 = !{!"a.c", !"/private/tmp"}
+!14 = !{i32 0}
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index 300c1eeb977b..755565dbf954 100644
--- a/test/DebugInfo/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -17,35 +17,35 @@ target triple = "x86_64-apple-darwin10.0.0"
 ;CHECK-NEXT:  .byte	42
 define i32 @foobar() nounwind readonly noinline ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !8, i64 0, metadata !6), !dbg !9
+  tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !9
   %call = tail call i32 @bar(), !dbg !11
-  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !6), !dbg !11
+  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !11
   %call2 = tail call i32 @bar(), !dbg !11
   %add = add nsw i32 %call2, %call, !dbg !12
   ret i32 %add, !dbg !10
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 declare i32 @bar() nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786478, metadata !15, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"foobar", i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @foobar, null, null, metadata !14, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !15, i32 12, metadata !"clang version 2.9 (trunk 114183)", i1 true, metadata !"", i32 0, metadata !16, metadata !16, metadata !13, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, metadata !15, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!6 = metadata !{i32 786688, metadata !7, metadata !"j", metadata !1, i32 15, metadata !5, i32 0, null}
-!7 = metadata !{i32 786443, metadata !15, metadata !0, i32 12, i32 52, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 42}
-!9 = metadata !{i32 15, i32 12, metadata !7, null}
-!10 = metadata !{i32 23, i32 3, metadata !7, null}
-!11 = metadata !{i32 17, i32 3, metadata !7, null}
-!12 = metadata !{i32 18, i32 3, metadata !7, null}
-!13 = metadata !{metadata !0}
-!14 = metadata !{metadata !6}
-!15 = metadata !{metadata !"mu.c", metadata !"/private/tmp"}
-!16 = metadata !{i32 0}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foobar\00foobar\00foobar\0012\000\001\000\006\000\001\000", !15, !1, !3, null, i32 ()* @foobar, null, null, !14} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !15} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 114183)\001\00\000\00\001", !15, !16, !16, !13, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !15, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !15, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x100\00j\0015\000", !7, !1, !5} ; [ DW_TAG_auto_variable ]
+!7 = !{!"0xb\0012\0052\000", !15, !0} ; [ DW_TAG_lexical_block ]
+!8 = !{i32 42}
+!9 = !MDLocation(line: 15, column: 12, scope: !7)
+!10 = !MDLocation(line: 23, column: 3, scope: !7)
+!11 = !MDLocation(line: 17, column: 3, scope: !7)
+!12 = !MDLocation(line: 18, column: 3, scope: !7)
+!13 = !{!0}
+!14 = !{!6}
+!15 = !{!"mu.c", !"/private/tmp"}
+!16 = !{i32 0}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index b5372658cf37..ef975dd9db8d 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -14,8 +14,8 @@ entry:
   %nrvo = alloca i1
   %cleanup.dest.slot = alloca i32
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !26), !dbg !27
-  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !28), !dbg !30
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !26, metadata !{!"0x102"}), !dbg !27
+  call void @llvm.dbg.declare(metadata i32* %j, metadata !28, metadata !{!"0x102"}), !dbg !30
   store i32 0, i32* %j, align 4, !dbg !31
   %tmp = load i32* %i.addr, align 4, !dbg !32
   %cmp = icmp eq i32 %tmp, 42, !dbg !32
@@ -29,7 +29,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   store i1 false, i1* %nrvo, !dbg !36
-  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !37), !dbg !39
+  call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !37, metadata !{!"0x102"}), !dbg !39
   %tmp2 = load i32* %j, align 4, !dbg !40
   %x = getelementptr inbounds %class.A* %agg.result, i32 0, i32 0, !dbg !40
   store i32 %tmp2, i32* %x, align 4, !dbg !40
@@ -46,13 +46,13 @@ nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.en
   ret void, !dbg !42
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !43), !dbg !44
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !43, metadata !{!"0x102"}), !dbg !44
   %this1 = load %class.A** %this.addr
   call void @_ZN1AD2Ev(%class.A* %this1)
   ret void, !dbg !45
@@ -62,7 +62,7 @@ define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp a
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !46), !dbg !47
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !46, metadata !{!"0x102"}), !dbg !47
   %this1 = load %class.A** %this.addr
   %x = getelementptr inbounds %class.A* %this1, i32 0, i32 0, !dbg !48
   store i32 1, i32* %x, align 4, !dbg !48
@@ -72,56 +72,56 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!52}
 
-!0 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"~A", metadata !"~A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589826, metadata !51, metadata !2, metadata !"A", i32 2, i64 128, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 2, size 128, align 32, offset 0] [def] [from ]
-!2 = metadata !{i32 786449, metadata !51, i32 4, metadata !"clang version 3.0 (trunk 130127)", i1 false, metadata !"", i32 0, metadata !24, metadata !24, metadata !50, null, null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786473, metadata !51} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !7, metadata !8, metadata !9, metadata !0, metadata !10, metadata !14}
-!5 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"x", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"y", i32 2, i64 32, i64 32, i64 32, i32 0, metadata !6} ; [ DW_TAG_member ]
-!8 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"z", i32 2, i64 32, i64 32, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
-!9 = metadata !{i32 786445, metadata !51, metadata !3, metadata !"o", i32 2, i64 32, i64 32, i64 96, i32 0, metadata !6} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, metadata !2, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786478, metadata !51, metadata !1, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !13, metadata !17}
-!17 = metadata !{i32 589840, null, metadata !2, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ]
-!18 = metadata !{i32 786470, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_const_type ]
-!19 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_Z3fooi, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
-!20 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !1}
-!22 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD1Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD1Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
-!23 = metadata !{i32 786453, metadata !51, metadata !3, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{null}
-!25 = metadata !{i32 786478, metadata !51, metadata !3, metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 2, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
-!26 = metadata !{i32 786689, metadata !19, metadata !"i", metadata !3, i32 16777220, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!27 = metadata !{i32 4, i32 11, metadata !19, null}
-!28 = metadata !{i32 786688, metadata !29, metadata !"j", metadata !3, i32 5, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!29 = metadata !{i32 786443, metadata !51, metadata !19, i32 4, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 5, i32 7, metadata !29, null}
-!31 = metadata !{i32 5, i32 12, metadata !29, null}
-!32 = metadata !{i32 6, i32 3, metadata !29, null}
-!33 = metadata !{i32 7, i32 5, metadata !34, null}
-!34 = metadata !{i32 786443, metadata !51, metadata !29, i32 6, i32 16, i32 1} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 8, i32 3, metadata !34, null}
-!36 = metadata !{i32 9, i32 9, metadata !29, null}
-!37 = metadata !{i32 786688, metadata !29, metadata !"my_a", metadata !3, i32 9, metadata !38, i32 0, null} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 589840, metadata !2, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
-!39 = metadata !{i32 9, i32 5, metadata !29, null}
-!40 = metadata !{i32 10, i32 3, metadata !29, null}
-!41 = metadata !{i32 11, i32 3, metadata !29, null}
-!42 = metadata !{i32 12, i32 1, metadata !29, null}
-!43 = metadata !{i32 786689, metadata !22, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
-!44 = metadata !{i32 2, i32 47, metadata !22, null}
-!45 = metadata !{i32 2, i32 61, metadata !22, null}
-!46 = metadata !{i32 786689, metadata !25, metadata !"this", metadata !3, i32 16777218, metadata !13, i32 64, null} ; [ DW_TAG_arg_variable ]
-!47 = metadata !{i32 2, i32 47, metadata !25, null}
-!48 = metadata !{i32 2, i32 54, metadata !49, null}
-!49 = metadata !{i32 786443, metadata !51, metadata !25, i32 2, i32 52, i32 2} ; [ DW_TAG_lexical_block ]
-!50 = metadata !{metadata !19, metadata !22, metadata !25}
-!51 = metadata !{metadata !"a.cc", metadata !"/private/tmp"}
-!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00~A\00~A\00\002\000\000\000\006\00256\000\000", !51, !1, !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x2\00A\002\00128\0032\000\000\000", !51, !2, null, !4, null, null, null} ; [ DW_TAG_class_type ] [A] [line 2, size 128, align 32, offset 0] [def] [from ]
+!2 = !{!"0x11\004\00clang version 3.0 (trunk 130127)\000\00\000\00\001", !51, !24, !24, !50, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x29", !51} ; [ DW_TAG_file_type ]
+!4 = !{!5, !7, !8, !9, !0, !10, !14}
+!5 = !{!"0xd\00x\002\0032\0032\000\000", !51, !3, !6} ; [ DW_TAG_member ]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0xd\00y\002\0032\0032\0032\000", !51, !3, !6} ; [ DW_TAG_member ]
+!8 = !{!"0xd\00z\002\0032\0032\0064\000", !51, !3, !6} ; [ DW_TAG_member ]
+!9 = !{!"0xd\00o\002\0032\0032\0096\000", !51, !3, !6} ; [ DW_TAG_member ]
+!10 = !{!"0x2e\00A\00A\00\002\000\000\000\006\00320\000\000", !51, !1, !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !51, !3, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\0064", !2, null, !1} ; [ DW_TAG_pointer_type ]
+!14 = !{!"0x2e\00A\00A\00\002\000\000\000\006\00320\000\000", !51, !1, !15, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!15 = !{!"0x15\00\000\000\000\000\000\000", !51, !3, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !13, !17}
+!17 = !{!"0x10\00\000\000\000\000\000", null, !2, !18} ; [ DW_TAG_reference_type ]
+!18 = !{!"0x26\00\000\000\000\000\000", !2, null, !1} ; [ DW_TAG_const_type ]
+!19 = !{!"0x2e\00foo\00foo\00_Z3fooi\004\000\001\000\006\00256\000\000", !51, !3, !20, null, void (%class.A*, i32)* @_Z3fooi, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
+!20 = !{!"0x15\00\000\000\000\000\000\000", !51, !3, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!1}
+!22 = !{!"0x2e\00~A\00~A\00_ZN1AD1Ev\002\000\001\000\006\00256\000\000", !51, !3, !23, null, void (%class.A*)* @_ZN1AD1Ev, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
+!23 = !{!"0x15\00\000\000\000\000\000\000", !51, !3, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{null}
+!25 = !{!"0x2e\00~A\00~A\00_ZN1AD2Ev\002\000\001\000\006\00256\000\000", !51, !3, !23, null, void (%class.A*)* @_ZN1AD2Ev, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [~A]
+!26 = !{!"0x101\00i\0016777220\000", !19, !3, !6} ; [ DW_TAG_arg_variable ]
+!27 = !MDLocation(line: 4, column: 11, scope: !19)
+!28 = !{!"0x100\00j\005\000", !29, !3, !6} ; [ DW_TAG_auto_variable ]
+!29 = !{!"0xb\004\0014\000", !51, !19} ; [ DW_TAG_lexical_block ]
+!30 = !MDLocation(line: 5, column: 7, scope: !29)
+!31 = !MDLocation(line: 5, column: 12, scope: !29)
+!32 = !MDLocation(line: 6, column: 3, scope: !29)
+!33 = !MDLocation(line: 7, column: 5, scope: !34)
+!34 = !{!"0xb\006\0016\001", !51, !29} ; [ DW_TAG_lexical_block ]
+!35 = !MDLocation(line: 8, column: 3, scope: !34)
+!36 = !MDLocation(line: 9, column: 9, scope: !29)
+!37 = !{!"0x100\00my_a\009\000", !29, !3, !38} ; [ DW_TAG_auto_variable ]
+!38 = !{!"0x10\00\000\000\000\000\000", !2, null, !1} ; [ DW_TAG_reference_type ]
+!39 = !MDLocation(line: 9, column: 5, scope: !29)
+!40 = !MDLocation(line: 10, column: 3, scope: !29)
+!41 = !MDLocation(line: 11, column: 3, scope: !29)
+!42 = !MDLocation(line: 12, column: 1, scope: !29)
+!43 = !{!"0x101\00this\0016777218\0064", !22, !3, !13} ; [ DW_TAG_arg_variable ]
+!44 = !MDLocation(line: 2, column: 47, scope: !22)
+!45 = !MDLocation(line: 2, column: 61, scope: !22)
+!46 = !{!"0x101\00this\0016777218\0064", !25, !3, !13} ; [ DW_TAG_arg_variable ]
+!47 = !MDLocation(line: 2, column: 47, scope: !25)
+!48 = !MDLocation(line: 2, column: 54, scope: !49)
+!49 = !{!"0xb\002\0052\002", !51, !25} ; [ DW_TAG_lexical_block ]
+!50 = !{!19, !22, !25}
+!51 = !{!"a.cc", !"/private/tmp"}
+!52 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
index 241a5a1b5f11..2ede97b22023 100644
--- a/test/DebugInfo/X86/dbg-declare.ll
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -7,21 +7,21 @@ entry:
   %saved_stack = alloca i8*
   %cleanup.dest.slot = alloca i32
   store i32* %x, i32** %x.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
   %0 = load i32** %x.addr, align 8, !dbg !16
   %1 = load i32* %0, align 4, !dbg !16
   %2 = zext i32 %1 to i64, !dbg !16
   %3 = call i8* @llvm.stacksave(), !dbg !16
   store i8* %3, i8** %saved_stack, !dbg !16
   %vla = alloca i8, i64 %2, align 16, !dbg !16
-  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  call void @llvm.dbg.declare(metadata i8* %vla, metadata !18, metadata !{!"0x102"}), !dbg !23
   store i32 1, i32* %cleanup.dest.slot
   %4 = load i8** %saved_stack, !dbg !24
   call void @llvm.stackrestore(i8* %4), !dbg !24
   ret i32 0, !dbg !25
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @llvm.stacksave() nounwind
 
@@ -30,27 +30,27 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = metadata !{i32 786449, metadata !26, i32 12, metadata !"clang version 3.1 (trunk 153698)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !26, metadata !0, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
-!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 5, i32 21, metadata !5, null}
-!16 = metadata !{i32 7, i32 13, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !26, metadata !5, i32 6, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!19 = metadata !{i32 786433, null, null, null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 8, offset 0] [from char]
-!20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
-!23 = metadata !{i32 7, i32 8, metadata !17, null}
-!24 = metadata !{i32 9, i32 1, metadata !17, null}
-!25 = metadata !{i32 8, i32 3, metadata !17, null}
-!26 = metadata !{metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm"}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 153698)\000\00\000\00\000", !26, !1, !1, !3, !1, null} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00\006\000\001\000\006\00256\000\000", !26, !0, !7, null, i32 (i32*)* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x26\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_const_type ]
+!14 = !{!"0x101\00x\0016777221\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!15 = !MDLocation(line: 5, column: 21, scope: !5)
+!16 = !MDLocation(line: 7, column: 13, scope: !17)
+!17 = !{!"0xb\006\001\000", !26, !5} ; [ DW_TAG_lexical_block ]
+!18 = !{!"0x100\00a\007\000", !17, !6, !19} ; [ DW_TAG_auto_variable ]
+!19 = !{!"0x1\00\000\000\008\000\000", null, null, !20, !21, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 8, offset 0] [from char]
+!20 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!21 = !{!22}
+!22 = !{!"0x21\000\00-1"}        ; [ DW_TAG_subrange_type ]
+!23 = !MDLocation(line: 7, column: 8, scope: !17)
+!24 = !MDLocation(line: 9, column: 1, scope: !17)
+!25 = !MDLocation(line: 8, column: 3, scope: !17)
+!26 = !{!"20020104-2.c", !"/Volumes/Sandbox/llvm"}
+!27 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
index e9c61c13168f..94fdb3730ded 100644
--- a/test/DebugInfo/X86/dbg-file-name.ll
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -12,13 +12,13 @@ define i32 @main() nounwind {
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!12}
 
-!1 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !10, i32 1, metadata !"LLVM build 00", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 786468, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !10, metadata !1, metadata !"main", metadata !"main", metadata !"main", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 786453, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !5}
-!9 = metadata !{metadata !6}
-!10 = metadata !{metadata !"simple.c", metadata !"/Users/manav/one/two"}
-!11 = metadata !{i32 0}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!1 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\00LLVM build 00\001\00\000\00\000", !10, !11, !11, !9, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !10, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00main\00main\00main\009\000\001\000\006\00256\000\000", !10, !1, !7, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !10, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!5}
+!9 = !{!6}
+!10 = !{!"simple.c", !"/Users/manav/one/two"}
+!11 = !{i32 0}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
index 01b105fb100b..71654cb56ce9 100644
--- a/test/DebugInfo/X86/dbg-i128-const.ll
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -5,30 +5,30 @@
 
 define i128 @__foo(i128 %a, i128 %b) nounwind {
 entry:
-  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  tail call void @llvm.dbg.value(metadata i128 42 , i64 0, metadata !1, metadata !{!"0x102"}), !dbg !11
   %add = add i128 %a, %b, !dbg !11
   ret i128 %add, !dbg !11
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!16}
 
-!0 = metadata !{i128 42 }
-!1 = metadata !{i32 786688, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 786443, metadata !13, metadata !3, i32 26, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 786478, metadata !13, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, i128 (i128, i128)* @__foo, null, null, null, i32 26} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786449, metadata !13, i32 1, metadata !"clang", i1 true, metadata !"", i32 0, metadata !15, metadata !15, metadata !12, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 786453, metadata !13, metadata !4, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 786454, metadata !14, metadata !4, metadata !"ti_int", i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!9 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 786468, metadata !13, metadata !4, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 29, i32 0, metadata !2, null}
-!12 = metadata !{metadata !3}
-!13 = metadata !{metadata !"foo.c", metadata !"/tmp"}
-!14 = metadata !{metadata !"myint.h", metadata !"/tmp"}
-!15 = metadata !{i32 0}
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{i128 42 }
+!1 = !{!"0x100\00MAX\0029\000", !2, !4, !8} ; [ DW_TAG_auto_variable ]
+!2 = !{!"0xb\0026\000\000", !13, !3} ; [ DW_TAG_lexical_block ]
+!3 = !{!"0x2e\00__foo\00__foo\00__foo\0026\000\001\000\006\000\000\0026", !13, !4, !6, null, i128 (i128, i128)* @__foo, null, null, null} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x29", !13} ; [ DW_TAG_file_type ]
+!5 = !{!"0x11\001\00clang\001\00\000\00\000", !13, !15, !15, !12, null,  null} ; [ DW_TAG_compile_unit ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", !13, !4, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8, !8}
+!8 = !{!"0x16\00ti_int\0078\000\000\000\000", !14, !4, !10} ; [ DW_TAG_typedef ]
+!9 = !{!"0x29", !14} ; [ DW_TAG_file_type ]
+!10 = !{!"0x24\00\000\00128\00128\000\000\005", !13, !4} ; [ DW_TAG_base_type ]
+!11 = !MDLocation(line: 29, scope: !2)
+!12 = !{!3}
+!13 = !{!"foo.c", !"/tmp"}
+!14 = !{!"myint.h", !"/tmp"}
+!15 = !{i32 0}
+!16 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
index 016d0a1e9f74..0d562223b8c3 100644
--- a/test/DebugInfo/X86/dbg-merge-loc-entry.ll
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -14,8 +14,8 @@ target triple = "x86_64-apple-darwin8"
 
 define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
-  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  tail call void @llvm.dbg.value(metadata i128 %u, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !15
+  tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !21
   br i1 undef, label %bb2, label %bb4, !dbg !22
 
 bb2:                                              ; preds = %entry
@@ -31,45 +31,45 @@ __udivmodti4.exit:                                ; preds = %bb4
   ret i128 undef, !dbg !27
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!32}
 
-!0 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 879} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 786473, metadata !29} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !29, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !31, metadata !31, metadata !28, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
-!5 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"UTItype", i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!6 = metadata !{i32 786473, metadata !30} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786447, metadata !29, metadata !1, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786478, metadata !29, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3, null, null, null, i32 1094} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 786453, metadata !29, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{metadata !12, metadata !12, metadata !12}
-!12 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"TItype", i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"", i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0, null} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 1093, i32 0, metadata !9, null}
-!16 = metadata !{i64 0}
-!17 = metadata !{i32 786688, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0, null} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !29, metadata !9, i32 1094, i32 0, i32 13} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786454, metadata !30, metadata !6, metadata !"word_type", i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
-!20 = metadata !{i32 786468, metadata !29, metadata !1, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 1095, i32 0, metadata !18, null}
-!22 = metadata !{i32 1103, i32 0, metadata !18, null}
-!23 = metadata !{i32 1104, i32 0, metadata !18, null}
-!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
-!25 = metadata !{i32 786443, metadata !29, metadata !0, i32 879, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 1107, i32 0, metadata !18, null}
-!27 = metadata !{i32 1111, i32 0, metadata !18, null}
-!28 = metadata !{metadata !0, metadata !9}
-!29 = metadata !{metadata !"foobar.c", metadata !"/tmp"}
-!30 = metadata !{metadata !"foobar.h", metadata !"/tmp"}
-!31 = metadata !{i32 0}
-!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00__udivmodti4\00__udivmodti4\00\00879\001\001\000\006\00256\001\00879", !29, !1, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !29} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !29, !31, !31, !28, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !29, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5, !5, !5, !8}
+!5 = !{!"0x16\00UTItype\00166\000\000\000\000", !30, !6, !7} ; [ DW_TAG_typedef ]
+!6 = !{!"0x29", !30} ; [ DW_TAG_file_type ]
+!7 = !{!"0x24\00\000\00128\00128\000\000\007", !29, !1} ; [ DW_TAG_base_type ]
+!8 = !{!"0xf\00\000\0064\0064\000\000", !29, !1, !5} ; [ DW_TAG_pointer_type ]
+!9 = !{!"0x2e\00__divti3\00__divti3\00__divti3\001094\000\001\000\006\00256\001\001094", !29, !1, !10, null, i128 (i128, i128)* @__divti3, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", !29, !1, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!12, !12, !12}
+!12 = !{!"0x16\00TItype\00160\000\000\000\000", !30, !6, !13} ; [ DW_TAG_typedef ]
+!13 = !{!"0x24\00\000\00128\00128\000\000\005", !29, !1} ; [ DW_TAG_base_type ]
+!14 = !{!"0x101\00u\001093\000", !9, !1, !12} ; [ DW_TAG_arg_variable ]
+!15 = !MDLocation(line: 1093, scope: !9)
+!16 = !{i64 0}
+!17 = !{!"0x100\00c\001095\000", !18, !1, !19} ; [ DW_TAG_auto_variable ]
+!18 = !{!"0xb\001094\000\0013", !29, !9} ; [ DW_TAG_lexical_block ]
+!19 = !{!"0x16\00word_type\00424\000\000\000\000", !30, !6, !20} ; [ DW_TAG_typedef ]
+!20 = !{!"0x24\00long int\000\0064\0064\000\000\005", !29, !1} ; [ DW_TAG_base_type ]
+!21 = !MDLocation(line: 1095, scope: !18)
+!22 = !MDLocation(line: 1103, scope: !18)
+!23 = !MDLocation(line: 1104, scope: !18)
+!24 = !MDLocation(line: 1003, scope: !25, inlinedAt: !26)
+!25 = !{!"0xb\00879\000\000", !29, !0} ; [ DW_TAG_lexical_block ]
+!26 = !MDLocation(line: 1107, scope: !18)
+!27 = !MDLocation(line: 1111, scope: !18)
+!28 = !{!0, !9}
+!29 = !{!"foobar.c", !"/tmp"}
+!30 = !{!"foobar.h", !"/tmp"}
+!31 = !{i32 0}
+!32 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
index a7c6cb5438ec..4aaaf4a9b2a2 100644
--- a/test/DebugInfo/X86/dbg-prolog-end.ll
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -8,8 +8,8 @@ entry:
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !7), !dbg !8
-  call void @llvm.dbg.declare(metadata !{i32* %j}, metadata !9), !dbg !11
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !7, metadata !{!"0x102"}), !dbg !8
+  call void @llvm.dbg.declare(metadata i32* %j, metadata !9, metadata !{!"0x102"}), !dbg !11
   store i32 2, i32* %j, align 4, !dbg !12
   %tmp = load i32* %j, align 4, !dbg !13
   %inc = add nsw i32 %tmp, 1, !dbg !13
@@ -22,7 +22,7 @@ entry:
   ret i32 %tmp3, !dbg !15
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @main() nounwind ssp {
 entry:
@@ -34,26 +34,26 @@ entry:
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
-!18 = metadata !{metadata !1, metadata !6}
+!18 = !{!1, !6}
 
-!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.0 (trunk 131100)", i1 false, metadata !"", i32 0, metadata !20, metadata !20, metadata !18, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!2 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !19, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !19, metadata !2, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!7 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 1, i32 13, metadata !1, null}
-!9 = metadata !{i32 786688, metadata !10, metadata !"j", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !19, metadata !1, i32 1, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 2, i32 6, metadata !10, null}
-!12 = metadata !{i32 2, i32 11, metadata !10, null}
-!13 = metadata !{i32 3, i32 2, metadata !10, null}
-!14 = metadata !{i32 4, i32 2, metadata !10, null}
-!15 = metadata !{i32 5, i32 2, metadata !10, null}
-!16 = metadata !{i32 8, i32 2, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !19, metadata !6, i32 7, i32 12, i32 1} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{metadata !"/tmp/a.c", metadata !"/private/tmp"}
-!20 = metadata !{i32 0}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 131100)\000\00\000\00\000", !19, !20, !20, !18, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !19, !2, !3, null, i32 (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!2 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !19, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00main\00main\00\007\000\001\000\006\000\000\007", !19, !2, !3, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!7 = !{!"0x101\00i\0016777217\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!8 = !MDLocation(line: 1, column: 13, scope: !1)
+!9 = !{!"0x100\00j\002\000", !10, !2, !5} ; [ DW_TAG_auto_variable ]
+!10 = !{!"0xb\001\0016\000", !19, !1} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 2, column: 6, scope: !10)
+!12 = !MDLocation(line: 2, column: 11, scope: !10)
+!13 = !MDLocation(line: 3, column: 2, scope: !10)
+!14 = !MDLocation(line: 4, column: 2, scope: !10)
+!15 = !MDLocation(line: 5, column: 2, scope: !10)
+!16 = !MDLocation(line: 8, column: 2, scope: !17)
+!17 = !{!"0xb\007\0012\001", !19, !6} ; [ DW_TAG_lexical_block ]
+!19 = !{!"/tmp/a.c", !"/private/tmp"}
+!20 = !{i32 0}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
index f8761d012e86..89754b9b3280 100644
--- a/test/DebugInfo/X86/dbg-subrange.ll
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.7.2"
 
 @s = common global [4294967296 x i8] zeroinitializer, align 16
-;CHECK: .long	4294967295
+; CHECK: .quad 4294967296 ## DW_AT_count
 
 define void @bar() nounwind uwtable ssp {
 entry:
@@ -15,21 +15,21 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786449, metadata !21, i32 12, metadata !"clang version 3.1 (trunk 144833)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !21, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
-!6 = metadata !{i32 720937, metadata !21} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!11 = metadata !{metadata !13}
-!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
-!14 = metadata !{i32 720897, null, null, null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 34359738368, align 8, offset 0] [from char]
-!15 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
-!18 = metadata !{i32 5, i32 3, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !21, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{i32 6, i32 1, metadata !19, null}
-!21 = metadata !{metadata !"small.c", metadata !"/private/tmp"}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 144833)\000\00\000\00\000", !21, !1, !1, !3, !11,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00bar\00bar\00\004\000\001\000\006\00256\000\000", !21, !6, !7, null, void ()* @bar, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
+!6 = !{!"0x29", !21} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!11 = !{!13}
+!13 = !{!"0x34\00s\00s\00\002\000\001", null, !6, !14, [4294967296 x i8]* @s, null} ; [ DW_TAG_variable ]
+!14 = !{!"0x1\00\000\0034359738368\008\000\000", null, null, !15, !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 34359738368, align 8, offset 0] [from char]
+!15 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!16 = !{!17}
+!17 = !{!"0x21\000\004294967296"} ; [ DW_TAG_subrange_type ]
+!18 = !MDLocation(line: 5, column: 3, scope: !19)
+!19 = !{!"0xb\004\001\000", !21, !5} ; [ DW_TAG_lexical_block ]
+!20 = !MDLocation(line: 6, column: 1, scope: !19)
+!21 = !{!"small.c", !"/private/tmp"}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index 23fa3520a7d1..c8ffba871356 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -50,13 +50,13 @@ target triple = "x86_64-apple-macosx10.9.0"
 define i32 @foo() #0 {
 entry:
   %i = alloca i32, align 4
-  call void @llvm.dbg.value(metadata !14, i64 0, metadata !10), !dbg !15
+  call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !15
   %call = call i32 @f3(i32 3) #3, !dbg !16
-  call void @llvm.dbg.value(metadata !17, i64 0, metadata !10), !dbg !18
+  call void @llvm.dbg.value(metadata i32 7, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !18
   %call1 = call i32 (...)* @f1() #3, !dbg !19
-  call void @llvm.dbg.value(metadata !{i32 %call1}, i64 0, metadata !10), !dbg !19
+  call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !19
   store i32 %call1, i32* %i, align 4, !dbg !19, !tbaa !20
-  call void @llvm.dbg.value(metadata !{i32* %i}, i64 0, metadata !10), !dbg !24
+  call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !24
   call void @f2(i32* %i) #3, !dbg !24
   ret i32 0, !dbg !25
 }
@@ -68,7 +68,7 @@ declare i32 @f1(...)
 declare void @f2(i32*)
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind ssp uwtable }
 attributes #2 = { nounwind readnone }
@@ -78,29 +78,29 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [dbg-value-const-byref.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"dbg-value-const-byref.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @foo, null, null, metadata !9, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [dbg-value-const-byref.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786688, metadata !4, metadata !"i", metadata !5, i32 6, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 6]
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!13 = metadata !{metadata !"clang version 3.5.0 "}
-!14 = metadata !{i32 3}
-!15 = metadata !{i32 6, i32 0, metadata !4, null}
-!16 = metadata !{i32 7, i32 0, metadata !4, null}
-!17 = metadata !{i32 7}
-!18 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!19 = metadata !{i32 9, i32 0, metadata !4, null}
-!20 = metadata !{metadata !21, metadata !21, i64 0}
-!21 = metadata !{metadata !"int", metadata !22, i64 0}
-!22 = metadata !{metadata !"omnipotent char", metadata !23, i64 0}
-!23 = metadata !{metadata !"Simple C/C++ TBAA"}
-!24 = metadata !{i32 10, i32 0, metadata !4, null}
-!25 = metadata !{i32 11, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [dbg-value-const-byref.c] [DW_LANG_C99]
+!1 = !{!"dbg-value-const-byref.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\000\001\005", !1, !5, !6, null, i32 ()* @foo, null, null, !9} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [dbg-value-const-byref.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x100\00i\006\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 6]
+!11 = !{i32 2, !"Dwarf Version", i32 2}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{!"clang version 3.5.0 "}
+!14 = !{i32 3}
+!15 = !MDLocation(line: 6, scope: !4)
+!16 = !MDLocation(line: 7, scope: !4)
+!17 = !{i32 7}
+!18 = !MDLocation(line: 8, scope: !4)
+!19 = !MDLocation(line: 9, scope: !4)
+!20 = !{!21, !21, i64 0}
+!21 = !{!"int", !22, i64 0}
+!22 = !{!"omnipotent char", !23, i64 0}
+!23 = !{!"Simple C/C++ TBAA"}
+!24 = !MDLocation(line: 10, scope: !4)
+!25 = !MDLocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
index 12aa61ba125e..9392da952f98 100644
--- a/test/DebugInfo/X86/dbg-value-dag-combine.ll
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -4,21 +4,19 @@ target triple = "x86_64-apple-darwin10.0.0"
 ; PR 9817
 
 
-declare  <4 x i32> @__amdil_get_global_id_int()
-declare  void @llvm.dbg.value(metadata , i64 , metadata )
+declare <4 x i32> @__amdil_get_global_id_int()
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
 entry:
-  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata
-!7), !dbg !8
+  call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !8
   %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
   %1 = extractelement <4 x i32> %0, i32 0
-  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !9), !dbg !11
-  call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
+  call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !11
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !14
   %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
   %tmp3 = add i32 0, %tmp2, !dbg !15
 ; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
-  call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
-!15
+  call void @llvm.dbg.value(metadata i32 %tmp3, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !15
   %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16
   store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
   ret void, !dbg !17
@@ -26,24 +24,24 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786478, metadata !19, metadata !1, metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", metadata !"__OpenCL_test_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_test_kernel]
-!1 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !19, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !12, metadata !12, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !19, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 1, i32 42, metadata !0, null}
-!9 = metadata !{i32 786688, metadata !10, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 786443, metadata !19, metadata !0, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 3, i32 41, metadata !10, null}
-!12 = metadata !{i32 0}
-!13 = metadata !{i32 786688, metadata !10, metadata !"idx", metadata !1, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!14 = metadata !{i32 4, i32 20, metadata !10, null}
-!15 = metadata !{i32 5, i32 15, metadata !10, null}
-!16 = metadata !{i32 6, i32 18, metadata !10, null}
-!17 = metadata !{i32 7, i32 1, metadata !0, null}
-!18 = metadata !{metadata !0}
-!19 = metadata !{metadata !"OCL6368.tmp.cl", metadata !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00__OpenCL_test_kernel\00__OpenCL_test_kernel\00__OpenCL_test_kernel\002\000\001\000\006\000\000\000", !19, !1, !3, null, void (i32 addrspace(1)*)* @__OpenCL_test_kernel, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_test_kernel]
+!1 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\00clc\000\00\000\00\001", !19, !12, !12, !18, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !19, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null, !5}
+!5 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !6} ; [ DW_TAG_pointer_type ]
+!6 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0x101\00ip\001\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!8 = !MDLocation(line: 1, column: 42, scope: !0)
+!9 = !{!"0x100\00gid\003\000", !10, !1, !6} ; [ DW_TAG_auto_variable ]
+!10 = !{!"0xb\002\001\000", !19, !0} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 3, column: 41, scope: !10)
+!12 = !{i32 0}
+!13 = !{!"0x100\00idx\004\000", !10, !1, !6} ; [ DW_TAG_auto_variable ]
+!14 = !MDLocation(line: 4, column: 20, scope: !10)
+!15 = !MDLocation(line: 5, column: 15, scope: !10)
+!16 = !MDLocation(line: 6, column: 18, scope: !10)
+!17 = !MDLocation(line: 7, column: 1, scope: !0)
+!18 = !{!0}
+!19 = !{!"OCL6368.tmp.cl", !"E:\5CUsers\5Cmvillmow.AMD\5CAppData\5CLocal\5CTemp"}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 4d18f7dc3063..31833a8edf57 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -6,26 +6,26 @@
 ; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s
 
 ; CHECK: DW_TAG_subprogram
-; CHECK:   DW_AT_abstract_origin {{.*}}{[[ABS:.*]]}
+; CHECK:   DW_AT_abstract_origin {{.*}} "foo"
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}}{[[ABS_SP:.*]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "sp"
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}}{[[ABS_NUMS:.*]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "nums"
 
-; CHECK: [[ABS]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "foo"
-; CHECK: [[ABS_SP]]:   DW_TAG_formal_parameter
+; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "sp"
-; CHECK: [[ABS_NUMS]]:  DW_TAG_formal_parameter
+; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "nums"
 
 ;CHECK: DW_TAG_inlined_subroutine
-;CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABS]]}
+;CHECK-NEXT: DW_AT_abstract_origin {{.*}} "foo"
 ;CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr]
 ;CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4]
 ;CHECK-NEXT: DW_AT_call_file
@@ -34,9 +34,9 @@
 ;CHECK: DW_TAG_formal_parameter
 ;FIXME: Linux shouldn't drop this parameter either...
 ;CHECK-NOT: DW_TAG
-;DARWIN:   DW_AT_abstract_origin {{.*}}{[[ABS_SP]]}
+;DARWIN:   DW_AT_abstract_origin {{.*}} "sp"
 ;DARWIN: DW_TAG_formal_parameter
-;CHECK: DW_AT_abstract_origin {{.*}}{[[ABS_NUMS]]}
+;CHECK: DW_AT_abstract_origin {{.*}} "nums"
 ;CHECK-NOT: DW_TAG_formal_parameter
 
 %struct.S1 = type { float*, i32 }
@@ -45,8 +45,8 @@
 
 define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  tail call void @llvm.dbg.value(metadata %struct.S1* %sp, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 %nums, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !21
   %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
   store i32 %nums, i32* %tmp2, align 4, !dbg !22
   %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
@@ -61,57 +61,57 @@ declare float* @bar(i32) optsize
 
 define void @foobar() nounwind optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
-  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  tail call void @llvm.dbg.value(metadata %struct.S1* @p, i64 0, metadata !9, metadata !{!"0x102"}) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !{!"0x102"}) nounwind, !dbg !35
   store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36
   %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
   store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37
   ret void, !dbg !38
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!43}
 
-!0 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo, null, null, metadata !41, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
-!1 = metadata !{i32 786473, metadata !42} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !42, i32 12, metadata !"clang version 2.9 (trunk 125693)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !39, metadata !40,  metadata !44, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @foobar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar]
-!7 = metadata !{i32 786453, metadata !42, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 786689, metadata !0, metadata !"sp", metadata !1, i32 16777223, metadata !10, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786454, metadata !42, metadata !2, metadata !"S1", i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786451, metadata !42, metadata !2, metadata !"S1", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ]
-!13 = metadata !{metadata !14, metadata !17}
-!14 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"m", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
-!15 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 786468, null, metadata !2, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 786445, metadata !42, metadata !1, metadata !"nums", i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
-!18 = metadata !{i32 786689, metadata !0, metadata !"nums", metadata !1, i32 33554439, metadata !5, i32 0, metadata !32} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 786484, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p, null} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 7, i32 13, metadata !0, null}
-!21 = metadata !{i32 7, i32 21, metadata !0, null}
-!22 = metadata !{i32 9, i32 3, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !0, i32 8, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 10, i32 3, metadata !23, null}
-!29 = metadata !{i32 11, i32 3, metadata !23, null}
-!30 = metadata !{%struct.S1* @p}
-!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
-!32 = metadata !{i32 16, i32 3, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !1, metadata !6, i32 15, i32 15, i32 1} ; [ DW_TAG_lexical_block ]
-!34 = metadata !{i32 1}
-!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
-!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
-!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
-!38 = metadata !{i32 17, i32 1, metadata !33, null}
-!39 = metadata !{metadata !0, metadata !6}
-!40 = metadata !{metadata !19}
-!41 = metadata !{metadata !9, metadata !18}
-!42 = metadata !{metadata !"nm2.c", metadata !"/private/tmp"}
-!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!44 = metadata !{}
+!0 = !{!"0x2e\00foo\00foo\00\008\000\001\000\006\00256\001\008", !1, !1, !3, null, i32 (%struct.S1*, i32)* @foo, null, null, !41} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
+!1 = !{!"0x29", !42} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 125693)\001\00\000\00\001", !42, !8, !8, !39, !40,  !44} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !42, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00foobar\00foobar\00\0015\000\001\000\006\000\001\000", !1, !1, !7, null, void ()* @foobar, null, null, null} ; [ DW_TAG_subprogram ] [line 15] [def] [scope 0] [foobar]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !42, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x101\00sp\0016777223\000", !0, !1, !10, !32} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x16\00S1\004\000\000\000\000", !42, !2, !12} ; [ DW_TAG_typedef ]
+!12 = !{!"0x13\00S1\001\00128\0064\000\000\000", !42, !2, null, !13, null, null, null} ; [ DW_TAG_structure_type ] [S1] [line 1, size 128, align 64, offset 0] [def] [from ]
+!13 = !{!14, !17}
+!14 = !{!"0xd\00m\002\0064\0064\000\000", !42, !1, !15} ; [ DW_TAG_member ]
+!15 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !16} ; [ DW_TAG_pointer_type ]
+!16 = !{!"0x24\00float\000\0032\0032\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!17 = !{!"0xd\00nums\003\0032\0032\0064\000", !42, !1, !5} ; [ DW_TAG_member ]
+!18 = !{!"0x101\00nums\0033554439\000", !0, !1, !5, !32} ; [ DW_TAG_arg_variable ]
+!19 = !{!"0x34\00p\00p\00\0014\000\001", !2, !1, !11, %struct.S1* @p, null} ; [ DW_TAG_variable ]
+!20 = !MDLocation(line: 7, column: 13, scope: !0)
+!21 = !MDLocation(line: 7, column: 21, scope: !0)
+!22 = !MDLocation(line: 9, column: 3, scope: !23)
+!23 = !{!"0xb\008\001\000", !1, !0} ; [ DW_TAG_lexical_block ]
+!27 = !MDLocation(line: 10, column: 3, scope: !23)
+!29 = !MDLocation(line: 11, column: 3, scope: !23)
+!30 = !{%struct.S1* @p}
+!31 = !MDLocation(line: 7, column: 13, scope: !0, inlinedAt: !32)
+!32 = !MDLocation(line: 16, column: 3, scope: !33)
+!33 = !{!"0xb\0015\0015\001", !1, !6} ; [ DW_TAG_lexical_block ]
+!34 = !{i32 1}
+!35 = !MDLocation(line: 7, column: 21, scope: !0, inlinedAt: !32)
+!36 = !MDLocation(line: 9, column: 3, scope: !23, inlinedAt: !32)
+!37 = !MDLocation(line: 10, column: 3, scope: !23, inlinedAt: !32)
+!38 = !MDLocation(line: 17, column: 1, scope: !33)
+!39 = !{!0, !6}
+!40 = !{!19}
+!41 = !{!9, !18}
+!42 = !{!"nm2.c", !"/private/tmp"}
+!43 = !{i32 1, !"Debug Info Version", i32 2}
+!44 = !{}
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index 155f76f7ab5f..a908b322d76e 100644
--- a/test/DebugInfo/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
 entry:
-  call void @llvm.dbg.value(metadata !{i32 addrspace(1)* %ip}, i64 0, metadata !8), !dbg !9
+  call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !9
   %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
   %1 = extractelement <4 x i32> %0, i32 0
   br label %2
@@ -28,7 +28,7 @@ entry:
 
 get_local_id.exit:                                ; preds = %4
   %6 = phi i32 [ %5, %4 ]
-  call void @llvm.dbg.value(metadata !{i32 %6}, i64 0, metadata !10), !dbg !12
+  call void @llvm.dbg.value(metadata i32 %6, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !12
   %7 = call <4 x i32> @__amdil_get_global_id_int() nounwind, !dbg !12
   %8 = extractelement <4 x i32> %7, i32 0, !dbg !12
   br label %9
@@ -43,7 +43,7 @@ get_local_id.exit:                                ; preds = %4
 
 get_global_id.exit:                               ; preds = %11
   %13 = phi i32 [ %12, %11 ]
-  call void @llvm.dbg.value(metadata !{i32 %13}, i64 0, metadata !13), !dbg !14
+  call void @llvm.dbg.value(metadata i32 %13, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !14
   %14 = call <4 x i32> @__amdil_get_local_size_int() nounwind
   %15 = extractelement <4 x i32> %14, i32 0
   br label %16
@@ -58,7 +58,7 @@ get_global_id.exit:                               ; preds = %11
 
 get_local_size.exit:                              ; preds = %18
   %20 = phi i32 [ %19, %18 ]
-  call void @llvm.dbg.value(metadata !{i32 %20}, i64 0, metadata !15), !dbg !16
+  call void @llvm.dbg.value(metadata i32 %20, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !16
   %tmp5 = add i32 %6, %13, !dbg !17
   %tmp7 = add i32 %tmp5, %20, !dbg !17
   store i32 %tmp7, i32 addrspace(1)* %ip, align 4, !dbg !17
@@ -68,7 +68,7 @@ return:                                           ; preds = %get_local_size.exit
   ret void, !dbg !18
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare <4 x i32> @__amdil_get_local_size_int() nounwind
 
@@ -76,31 +76,31 @@ declare <4 x i32> @__amdil_get_local_id_int() nounwind
 
 declare <4 x i32> @__amdil_get_global_id_int() nounwind
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786478, metadata !20, metadata !1, metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", metadata !"__OpenCL_nbt02_kernel", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_nbt02_kernel]
-!1 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !20, i32 1, metadata !"clc", i1 false, metadata !"", i32 0, metadata !21, metadata !21, metadata !19, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ]
-!6 = metadata !{i32 589846, metadata !20, metadata !2, metadata !"uint", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
-!7 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 786689, metadata !0, metadata !"ip", metadata !1, i32 1, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 1, i32 32, metadata !0, null}
-!10 = metadata !{i32 786688, metadata !11, metadata !"tid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !1, metadata !0, i32 2, i32 1, i32 1} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 5, i32 24, metadata !11, null}
-!13 = metadata !{i32 786688, metadata !11, metadata !"gid", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!14 = metadata !{i32 6, i32 25, metadata !11, null}
-!15 = metadata !{i32 786688, metadata !11, metadata !"lsz", metadata !1, i32 3, metadata !6, i32 0, null} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 7, i32 26, metadata !11, null}
-!17 = metadata !{i32 9, i32 24, metadata !11, null}
-!18 = metadata !{i32 10, i32 1, metadata !0, null}
-!19 = metadata !{metadata !0}
-!20 = metadata !{metadata !"OCLlLwTXZ.cl", metadata !"/tmp"}
-!21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00__OpenCL_nbt02_kernel\00__OpenCL_nbt02_kernel\00__OpenCL_nbt02_kernel\002\000\001\000\006\000\000\000", !20, !1, !3, null, void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [__OpenCL_nbt02_kernel]
+!1 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\00clc\000\00\000\00\001", !20, !21, !21, !19, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !20, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null, !5}
+!5 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !6} ; [ DW_TAG_pointer_type ]
+!6 = !{!"0x16\00uint\000\000\000\000\000", !20, !2, !7} ; [ DW_TAG_typedef ]
+!7 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !2} ; [ DW_TAG_base_type ]
+!8 = !{!"0x101\00ip\001\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!9 = !MDLocation(line: 1, column: 32, scope: !0)
+!10 = !{!"0x100\00tid\003\000", !11, !1, !6} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\002\001\001", !1, !0} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 5, column: 24, scope: !11)
+!13 = !{!"0x100\00gid\003\000", !11, !1, !6} ; [ DW_TAG_auto_variable ]
+!14 = !MDLocation(line: 6, column: 25, scope: !11)
+!15 = !{!"0x100\00lsz\003\000", !11, !1, !6} ; [ DW_TAG_auto_variable ]
+!16 = !MDLocation(line: 7, column: 26, scope: !11)
+!17 = !MDLocation(line: 9, column: 24, scope: !11)
+!18 = !MDLocation(line: 10, column: 1, scope: !0)
+!19 = !{!0}
+!20 = !{!"OCLlLwTXZ.cl", !"/tmp"}
+!21 = !{i32 0}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index 55d1ae6a9f69..015ec89ec4ca 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -14,11 +14,11 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 @dfm = external global i32, align 4
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
 entry:
-  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  call void @llvm.dbg.value(metadata i32 %dev, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !13
   %tmp.i = load i32* @dfm, align 4, !dbg !14
   %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
   br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
@@ -45,35 +45,35 @@ if.else:                                          ; preds = %entry
 declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
 declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
 declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo, null, null, null, i32 19510} ; [ DW_TAG_subprogram ] [line 19510] [def] [foo]
-!1 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !27, i32 12, metadata !"clang version 2.9 (trunk 124753)", i1 true, metadata !"", i32 0, metadata !28, metadata !28, metadata !24, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar3, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 14827] [local] [def] [scope 0] [bar3]
-!7 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @bar2, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 15397] [local] [def] [scope 0] [bar2]
-!8 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32*)* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 12382] [local] [def] [scope 0] [bar]
-!9 = metadata !{i32 786453, metadata !26, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786468, null, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786689, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 19509, i32 20, metadata !0, null}
-!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
-!15 = metadata !{i32 786443, metadata !26, metadata !16, i32 18086, i32 1, i32 748} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 786478, metadata !26, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 18086] [local] [def] [scope 0] [foo_bar]
-!17 = metadata !{i32 19514, i32 2, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !26, metadata !0, i32 19510, i32 1, i32 99} ; [ DW_TAG_lexical_block ]
-!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
-!23 = metadata !{i32 19524, i32 1, metadata !18, null}
-!24 = metadata !{metadata !0, metadata !6, metadata !7, metadata !8, metadata !16}
-!25 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!26 = metadata !{metadata !"/tmp/f.c", metadata !"/tmp"}
-!27 = metadata !{metadata !"f.i", metadata !"/tmp"}
-!28 = metadata !{i32 0}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\0019510\000\001\000\006\00256\001\0019510", !26, !1, !3, null, i32 (i32, i64, i8*, i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 19510] [def] [foo]
+!1 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 124753)\001\00\000\00\000", !27, !28, !28, !24, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !26, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00bar3\00bar3\00\0014827\001\001\000\006\00256\001\000", !26, !1, !3, null, i32 (i32)* @bar3, null, null, null} ; [ DW_TAG_subprogram ] [line 14827] [local] [def] [scope 0] [bar3]
+!7 = !{!"0x2e\00bar2\00bar2\00\0015397\001\001\000\006\00256\001\000", !26, !1, !3, null, i32 (i32)* @bar2, null, null, null} ; [ DW_TAG_subprogram ] [line 15397] [local] [def] [scope 0] [bar2]
+!8 = !{!"0x2e\00bar\00bar\00\0012382\001\001\000\006\00256\001\000", !26, !1, !9, null, i32 (i32, i32*)* @bar, null, null, null} ; [ DW_TAG_subprogram ] [line 12382] [local] [def] [scope 0] [bar]
+!9 = !{!"0x15\00\000\000\000\000\000\000", !26, !1, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{!11}
+!11 = !{!"0x24\00unsigned char\000\008\008\000\000\008", null, !2} ; [ DW_TAG_base_type ]
+!12 = !{!"0x101\00var\0019509\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!13 = !MDLocation(line: 19509, column: 20, scope: !0)
+!14 = !MDLocation(line: 18091, column: 2, scope: !15, inlinedAt: !17)
+!15 = !{!"0xb\0018086\001\00748", !26, !16} ; [ DW_TAG_lexical_block ]
+!16 = !{!"0x2e\00foo_bar\00foo_bar\00\0018086\001\001\000\006\00256\001\000", !26, !1, !3, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 18086] [local] [def] [scope 0] [foo_bar]
+!17 = !MDLocation(line: 19514, column: 2, scope: !18)
+!18 = !{!"0xb\0019510\001\0099", !26, !0} ; [ DW_TAG_lexical_block ]
+!22 = !MDLocation(line: 18094, column: 2, scope: !15, inlinedAt: !17)
+!23 = !MDLocation(line: 19524, column: 1, scope: !18)
+!24 = !{!0, !6, !7, !8, !16}
+!25 = !{!"0x29", !27} ; [ DW_TAG_file_type ]
+!26 = !{!"/tmp/f.c", !"/tmp"}
+!27 = !{!"f.i", !"/tmp"}
+!28 = !{i32 0}
+!29 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
index d9e7a6382627..727f906e1a27 100644
--- a/test/DebugInfo/X86/dbg-value-range.ll
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -4,10 +4,10 @@
 
 define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  tail call void @llvm.dbg.value(metadata %struct.a* %b, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !13
   %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
   %tmp2 = load i32* %tmp1, align 4, !dbg !14
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  tail call void @llvm.dbg.value(metadata i32 %tmp2, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !14
   %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
   %add = add nsw i32 %tmp2, 1, !dbg !19
   ret i32 %add, !dbg !19
@@ -15,32 +15,32 @@ entry:
 
 declare i32 @foo(...) 
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!24}
 
-!0 = metadata !{i32 786478, metadata !22, metadata !1, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.a*)* @bar, null, null, metadata !21, i32 0} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [bar]
-!1 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !22, i32 12, metadata !"clang version 2.9 (trunk 122997)", i1 true, metadata !"", i32 0, metadata !23, metadata !23, metadata !20, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !22, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786447, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !22, metadata !2, metadata !"a", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 1, size 32, align 32, offset 0] [def] [from ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786445, metadata !22, metadata !1, metadata !"c", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!11 = metadata !{i32 786688, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!12 = metadata !{i32 786443, metadata !22, metadata !0, i32 5, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 5, i32 19, metadata !0, null}
-!14 = metadata !{i32 6, i32 14, metadata !12, null}
-!18 = metadata !{i32 7, i32 2, metadata !12, null}
-!19 = metadata !{i32 8, i32 2, metadata !12, null}
-!20 = metadata !{metadata !0}
-!21 = metadata !{metadata !6, metadata !11}
-!22 = metadata !{metadata !"bar.c", metadata !"/private/tmp"}
-!23 = metadata !{i32 0}
+!0 = !{!"0x2e\00bar\00bar\00\005\000\001\000\006\00256\001\000", !22, !1, !3, null, i32 (%struct.a*)* @bar, null, null, !21} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 0] [bar]
+!1 = !{!"0x29", !22} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 122997)\001\00\000\00\001", !22, !23, !23, !20, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !22, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00b\005\000", !0, !1, !7} ; [ DW_TAG_arg_variable ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x13\00a\001\0032\0032\000\000\000", !22, !2, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [a] [line 1, size 32, align 32, offset 0] [def] [from ]
+!9 = !{!10}
+!10 = !{!"0xd\00c\002\0032\0032\000\000", !22, !1, !5} ; [ DW_TAG_member ]
+!11 = !{!"0x100\00x\006\000", !12, !1, !5} ; [ DW_TAG_auto_variable ]
+!12 = !{!"0xb\005\0022\000", !22, !0} ; [ DW_TAG_lexical_block ]
+!13 = !MDLocation(line: 5, column: 19, scope: !0)
+!14 = !MDLocation(line: 6, column: 14, scope: !12)
+!18 = !MDLocation(line: 7, column: 2, scope: !12)
+!19 = !MDLocation(line: 8, column: 2, scope: !12)
+!20 = !{!0}
+!21 = !{!6, !11}
+!22 = !{!"bar.c", !"/private/tmp"}
+!23 = !{i32 0}
 
 ; Check that variable bar:b value range is appropriately truncated in debug info.
 ; The variable is in %rdi which is clobbered by 'movl %ebx, %edi'
@@ -51,8 +51,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ;CHECK-NEXT: [[CLOBBER:Ltmp[0-9]*]]
 
 ;CHECK:Ldebug_loc0:
+;CHECK-NEXT: Lset{{.*}} =
 ;CHECK-NEXT:	.quad
-;CHECK-NEXT:	.quad	[[CLOBBER]]
+;CHECK-NEXT: [[CLOBBER_OFF:Lset.*]] = [[CLOBBER]]-{{.*}}
+;CHECK-NEXT:	.quad	[[CLOBBER_OFF]]
 ;CHECK-NEXT: Lset{{.*}} = Ltmp{{.*}}-Ltmp{{.*}}
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
@@ -60,4 +62,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!24 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index 974e0ad6d837..0653bde62e90 100644
--- a/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -87,7 +87,7 @@ VEC_edge_base_index.exit7.i:                      ; preds = %"3.i5.i"
 "44.i":                                           ; preds = %"42.i"
   %2 = load %a** undef, align 8, !dbg !12
   %3 = bitcast %a* %2 to %a*, !dbg !12
-  call void @llvm.dbg.value(metadata !{%a* %3}, i64 0, metadata !6), !dbg !12
+  call void @llvm.dbg.value(metadata %a* %3, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
   br label %may_unswitch_on.exit, !dbg !12
 
 "45.i":                                           ; preds = %"38.i"
@@ -108,26 +108,26 @@ may_unswitch_on.exit:                             ; preds = %"44.i", %"42.i", %"
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind uwtable }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = metadata !{i32 786449, metadata !20, i32 12, metadata !"Apple clang version", i1 true, metadata !"", i32 0, metadata !21, metadata !21, metadata !18, null,  null, null} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"foo", metadata !"", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, %a* ()* @test, null, null, metadata !19, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 16777218, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"c", metadata !2, i32 33554434, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786447, null, metadata !0, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!9 = metadata !{i32 786468, null, metadata !0, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786688, metadata !11, metadata !"a", metadata !2, i32 3, metadata !9, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !20, metadata !1, i32 2, i32 25, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 2, i32 13, metadata !1, null}
-!18 = metadata !{metadata !1}
-!19 = metadata !{metadata !6, metadata !7, metadata !10}
-!20 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
-!21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00Apple clang version\001\00\000\00\001", !20, !21, !21, !18, null,  null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00\00\002\000\001\000\006\00256\001\000", !20, !2, !3, null, %a* ()* @test, null, null, !19} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00i\0016777218\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!7 = !{!"0x101\00c\0033554434\000", !1, !2, !8} ; [ DW_TAG_arg_variable ]
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, !0, !9} ; [ DW_TAG_pointer_type ]
+!9 = !{!"0x24\00char\000\008\008\000\000\006", null, !0} ; [ DW_TAG_base_type ]
+!10 = !{!"0x100\00a\003\000", !11, !2, !9} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\002\0025\000", !20, !1} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 2, column: 13, scope: !1)
+!18 = !{!1}
+!19 = !{!6, !7, !10}
+!20 = !{!"a.c", !"/private/tmp"}
+!21 = !{i32 0}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index db947ac94799..69c9e99519bb 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -53,7 +53,7 @@ entry:
   %19 = inttoptr i64 %18 to i8*
   %20 = load i8* %19
   %21 = icmp ne i8 %20, 0
-  call void @llvm.dbg.declare(metadata !{i32* %3}, metadata !23)
+  call void @llvm.dbg.declare(metadata i32* %3, metadata !23, metadata !28)
   br i1 %21, label %22, label %28
 
 ; <label>:22                                      ; preds = %entry
@@ -70,7 +70,7 @@ entry:
 
 ; <label>:28                                      ; preds = %22, %entry
   store i32 %0, i32* %3, align 4
-  call void @llvm.dbg.declare(metadata !{%struct.A* %agg.result}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata %struct.A* %agg.result, metadata !24, metadata !{!"0x102"}), !dbg !25
   call void @_ZN1AC1Ev(%struct.A* %agg.result), !dbg !25
   store i64 1172321806, i64* %4, !dbg !26
   %29 = inttoptr i64 %10 to i32*, !dbg !26
@@ -85,7 +85,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_ZN1AC1Ev(%struct.A*) #2
 
@@ -147,32 +147,32 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !27}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/crash.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"crash.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*, i32)* @_Z4funci, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/crash.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !21}
-!8 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !15}
-!10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 2, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14, i32 2} ; [ DW_TAG_subprogram ] [line 2] [A]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!14 = metadata !{i32 786468}
-!15 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"A", metadata !"A", metadata !"", i32 3, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 3} ; [ DW_TAG_subprogram ] [line 3] [A]
-!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null, metadata !13, metadata !18}
-!18 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from A]
-!20 = metadata !{i32 786468}
-!21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!22 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!23 = metadata !{i32 786689, metadata !4, metadata !"", metadata !5, i32 16777222, metadata !21, i32 0, i32 0, metadata !28} ; [ DW_TAG_arg_variable ] [line 6]
-!24 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 7, metadata !8, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 7]
-!25 = metadata !{i32 7, i32 0, metadata !4, null}
-!26 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!28 = metadata !{i64 2}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/crash.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"crash.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_Z4funci\006\000\001\000\006\00256\000\006", !1, !5, !6, null, void (%struct.A*, i32)* @_Z4funci, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/crash.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !21}
+!8 = !{!"0x13\00A\001\008\008\000\000\000", !1, null, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10, !15}
+!10 = !{!"0x2e\00A\00A\00\002\000\000\000\006\00256\000\002", !1, !8, !11, null, null, null, i32 0, !14} ; [ DW_TAG_subprogram ] [line 2] [A]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!14 = !{i32 786468}
+!15 = !{!"0x2e\00A\00A\00\003\000\000\000\006\00256\000\003", !1, !8, !16, null, null, null, i32 0, !20} ; [ DW_TAG_subprogram ] [line 3] [A]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null, !13, !18}
+!18 = !{!"0x10\00\000\000\000\000\000", null, null, !19} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from A]
+!20 = !{i32 786468}
+!21 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = !{i32 2, !"Dwarf Version", i32 3}
+!23 = !{!"0x101\00\0016777222\000", !4, !5, !21} ; [ DW_TAG_arg_variable ] [line 6]
+!24 = !{!"0x100\00a\007\008192", !4, !5, !8} ; [ DW_TAG_auto_variable ] [a] [line 7]
+!25 = !MDLocation(line: 7, scope: !4)
+!26 = !MDLocation(line: 8, scope: !4)
+!27 = !{i32 1, !"Debug Info Version", i32 2}
+!28 = !{!"0x102\006"} ; [ DW_TAG_expression ] [DW_OP_deref]
diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll
index 64f0b2a9e40f..6733dd866690 100644
--- a/test/DebugInfo/X86/debug-dead-local-var.ll
+++ b/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -27,25 +27,25 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/debug-dead-local-var.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"debug-dead-local-var.c", metadata !"/usr/local/google/home/echristo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 11, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @bar, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/debug-dead-local-var.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 6, metadata !10, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [local] [def] [foo]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{null}
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786688, metadata !9, metadata !"xyz", metadata !5, i32 8, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [xyz] [line 8]
-!14 = metadata !{i32 786451, metadata !1, metadata !9, metadata !"X", i32 8, i64 64, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 8, size 64, align 32, offset 0] [def] [from ]
-!15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"a", i32 8, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 8, size 32, align 32, offset 0] [from int]
-!17 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"b", i32 8, i64 32, i64 32, i64 32, i32 0, metadata !8} ; [ DW_TAG_member ] [b] [line 8, size 32, align 32, offset 32] [from int]
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!20 = metadata !{metadata !"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)"}
-!21 = metadata !{i32 13, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/debug-dead-local-var.c] [DW_LANG_C99]
+!1 = !{!"debug-dead-local-var.c", !"/usr/local/google/home/echristo"}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00bar\00bar\00\0011\000\001\000\006\000\001\0011", !1, !5, !6, null, i32 ()* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/debug-dead-local-var.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00foo\00foo\00\006\001\001\000\006\000\001\006", !1, !5, !10, null, null, null, null, !12} ; [ DW_TAG_subprogram ] [line 6] [local] [def] [foo]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null}
+!12 = !{!13}
+!13 = !{!"0x100\00xyz\008\000", !9, !5, !14} ; [ DW_TAG_auto_variable ] [xyz] [line 8]
+!14 = !{!"0x13\00X\008\0064\0032\000\000\000", !1, !9, null, !15, null, null, null} ; [ DW_TAG_structure_type ] [X] [line 8, size 64, align 32, offset 0] [def] [from ]
+!15 = !{!16, !17}
+!16 = !{!"0xd\00a\008\0032\0032\000\000", !1, !14, !8} ; [ DW_TAG_member ] [a] [line 8, size 32, align 32, offset 0] [from int]
+!17 = !{!"0xd\00b\008\0032\0032\0032\000", !1, !14, !8} ; [ DW_TAG_member ] [b] [line 8, size 32, align 32, offset 32] [from int]
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{i32 2, !"Debug Info Version", i32 2}
+!20 = !{!"clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)"}
+!21 = !MDLocation(line: 13, scope: !4)
diff --git a/test/DebugInfo/X86/debug-info-access.ll b/test/DebugInfo/X86/debug-info-access.ll
new file mode 100644
index 000000000000..7727384d6f70
--- /dev/null
+++ b/test/DebugInfo/X86/debug-info-access.ll
@@ -0,0 +1,150 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+;
+; Test the DW_AT_accessibility DWARF attribute.
+;
+;
+; Regenerate me:
+; clang++ -g tools/clang/test/CodeGenCXX/debug-info-access.cpp -S -emit-llvm -o -
+;
+;   struct A {
+;     void pub_default();
+;     static int pub_default_static;
+;   };
+;
+;   class B : public A {
+;   public:
+;     void pub();
+;     static int public_static;
+;   protected:
+;     void prot();
+;   private:
+;     void priv_default();
+;   };
+;
+;   union U {
+;     void union_pub_default();
+;   private:
+;     int union_priv;
+;   };
+;
+;   void free() {}
+;
+;   A a;
+;   B b;
+;   U u;
+
+; CHECK: DW_TAG_member
+; CHECK:     DW_AT_name {{.*}}"pub_default_static")
+; CHECK-NOT: DW_AT_accessibility
+; CHECK-NOT: DW_TAG
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"pub_default")
+; CHECK-NOT: DW_AT_accessibility
+; CHECK: DW_TAG
+;
+; CHECK: DW_TAG_inheritance
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_accessibility {{.*}}(DW_ACCESS_public)
+;
+; CHECK: DW_TAG_member
+; CHECK:     DW_AT_name {{.*}}"public_static")
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_accessibility {{.*}}(DW_ACCESS_public)
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"pub")
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_accessibility {{.*}}(DW_ACCESS_public)
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"prot")
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_accessibility {{.*}}(DW_ACCESS_protected)
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"priv_default")
+; CHECK-NOT: DW_AT_accessibility
+; CHECK: DW_TAG
+;
+; CHECK: DW_TAG_member
+; CHECK:     DW_AT_name {{.*}}"union_priv")
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_accessibility {{.*}}(DW_ACCESS_private)
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"union_pub_default")
+; CHECK-NOT: DW_AT_accessibility
+; CHECK: DW_TAG
+;
+; CHECK: DW_TAG_subprogram
+; CHECK:     DW_AT_name {{.*}}"free")
+; CHECK-NOT: DW_AT_accessibility
+; CHECK-NOT: DW_TAG
+;
+; ModuleID = '/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+%struct.A = type { i8 }
+%class.B = type { i8 }
+%union.U = type { i32 }
+
+@a = global %struct.A zeroinitializer, align 1
+@b = global %class.B zeroinitializer, align 1
+@u = global %union.U zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4freev() #0 {
+  ret void, !dbg !41
+}
+
+attributes #0 = { nounwind ssp uwtable }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!38, !39}
+!llvm.ident = !{!40}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !3, !29, !34, !2} ; [ DW_TAG_compile_unit ] [/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp", !""}
+!2 = !{}
+!3 = !{!4, !12, !22}
+!4 = !{!"0x13\00A\003\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6, !8}
+!6 = !{!"0xd\00pub_default_static\007\000\000\000\004096", !1, !"_ZTS1A", !7, null} ; [ DW_TAG_member ] [pub_default_static] [line 7, size 0, align 0, offset 0] [static] [from int]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"0x2e\00pub_default\00pub_default\00_ZN1A11pub_defaultEv\005\000\000\000\006\00256\000\005", !1, !"_ZTS1A", !9, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 5] [pub_default]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!12 = !{!"0x2\00B\0011\008\008\000\000\000", !1, null, null, !13, null, null, !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 11, size 8, align 8, offset 0] [def] [from ]
+!13 = !{!14, !15, !16, !20, !21}
+!14 = !{!"0x1c\00\000\000\000\000\003", null, !"_ZTS1B", !"_ZTS1A"} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [public] [from _ZTS1A]
+!15 = !{!"0xd\00public_static\0016\000\000\000\004099", !1, !"_ZTS1B", !7, null} ; [ DW_TAG_member ] [public_static] [line 16, size 0, align 0, offset 0] [public] [static] [from int]
+!16 = !{!"0x2e\00pub\00pub\00_ZN1B3pubEv\0014\000\000\000\006\00259\000\0014", !1, !"_ZTS1B", !17, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 14] [public] [pub]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19}
+!19 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!20 = !{!"0x2e\00prot\00prot\00_ZN1B4protEv\0019\000\000\000\006\00258\000\0019", !1, !"_ZTS1B", !17, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 19] [protected] [prot]
+!21 = !{!"0x2e\00priv_default\00priv_default\00_ZN1B12priv_defaultEv\0022\000\000\000\006\00256\000\0022", !1, !"_ZTS1B", !17, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 22] [priv_default]
+!22 = !{!"0x17\00U\0025\0032\0032\000\000\000", !1, null, null, !23, null, null, !"_ZTS1U"} ; [ DW_TAG_union_type ] [U] [line 25, size 32, align 32, offset 0] [def] [from ]
+!23 = !{!24, !25}
+!24 = !{!"0xd\00union_priv\0030\0032\0032\000\001", !1, !"_ZTS1U", !7} ; [ DW_TAG_member ] [union_priv] [line 30, size 32, align 32, offset 0] [private] [from int]
+!25 = !{!"0x2e\00union_pub_default\00union_pub_default\00_ZN1U17union_pub_defaultEv\0027\000\000\000\006\00256\000\0027", !1, !"_ZTS1U", !26, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 27] [union_pub_default]
+!26 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !27, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = !{null, !28}
+!28 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1U"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1U]
+!29 = !{!30}
+!30 = !{!"0x2e\00free\00free\00_Z4freev\0035\000\001\000\006\00256\000\0035", !1, !31, !32, null, void ()* @_Z4freev, null, null, !2} ; [ DW_TAG_subprogram ] [line 35] [def] [free]
+!31 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp]
+!32 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !33, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = !{null}
+!34 = !{!35, !36, !37}
+!35 = !{!"0x34\00a\00a\00\0037\000\001", null, !31, !"_ZTS1A", %struct.A* @a, null} ; [ DW_TAG_variable ] [a] [line 37] [def]
+!36 = !{!"0x34\00b\00b\00\0038\000\001", null, !31, !"_ZTS1B", %class.B* @b, null} ; [ DW_TAG_variable ] [b] [line 38] [def]
+!37 = !{!"0x34\00u\00u\00\0039\000\001", null, !31, !"_ZTS1U", %union.U* @u, null} ; [ DW_TAG_variable ] [u] [line 39] [def]
+!38 = !{i32 2, !"Dwarf Version", i32 2}
+!39 = !{i32 2, !"Debug Info Version", i32 2}
+!40 = !{!"clang version 3.6.0 "}
+!41 = !MDLocation(line: 35, column: 14, scope: !30)
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index 95eda60c5cca..b7d6dd486bce 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -63,50 +63,50 @@
 ; ModuleID = 'llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m'
 %0 = type opaque
 %struct.__block_descriptor = type { i64, i64 }
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 {
   %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !84
   %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !84
-  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !86), !dbg !87
+  call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !86, metadata !110), !dbg !87
   ret void, !dbg !87
 }
 
 define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 {
   %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
   %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !103
-  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block}, metadata !105), !dbg !106
+  call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !105, metadata !109), !dbg !106
   ret void, !dbg !106
 }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!108}
-!0 = metadata !{i32 786449, metadata !107, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !4, metadata !23, metadata !15,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
-!1 = metadata !{i32 786473, metadata !107} ; [ DW_TAG_file_type ]
-!2 = metadata !{metadata !3}
-!3 = metadata !{i32 786436, metadata !107, null, metadata !"", i32 20, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
-!4 = metadata !{}
-!15 = metadata !{}
-!23 = metadata !{metadata !38, metadata !42}
-!27 = metadata !{i32 786454, metadata !107, null, metadata !"id", i32 31, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
-!28 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!29 = metadata !{i32 786451, metadata !107, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
-!30 = metadata !{metadata !31}
-!31 = metadata !{i32 786445, metadata !107, metadata !29, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!32 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!33 = metadata !{i32 786451, metadata !107, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!34 = metadata !{i32 786451, metadata !107, null, metadata !"Main", i32 23, i64 0, i64 0, i32 0, i32 1092, null, i32 0, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [decl] [from ]
-!38 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"__24-[Main initWithContext:]_block_invoke", metadata !"", i32 33, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, metadata !15, i32 33} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
-!39 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!40 = metadata !{null, metadata !41, metadata !27}
-!41 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!42 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"__24-[Main initWithContext:]_block_invoke_2", metadata !"", i32 35, metadata !39, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, metadata !15, i32 35} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
-!84 = metadata !{i32 33, i32 0, metadata !38, null}
-!86 = metadata !{i32 786688, metadata !38, metadata !"self", metadata !1, i32 41, metadata !34, i32 0, i32 0, metadata !110} ; [ DW_TAG_auto_variable ] [self] [line 41]
-!87 = metadata !{i32 41, i32 0, metadata !38, null}
-!103 = metadata !{i32 35, i32 0, metadata !42, null}
-!105 = metadata !{i32 786688, metadata !42, metadata !"self", metadata !1, i32 40, metadata !34, i32 0, i32 0, metadata !109} ; [ DW_TAG_auto_variable ] [self] [line 40]
-!106 = metadata !{i32 40, i32 0, metadata !42, null}
-!107 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", metadata !""}
-!108 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!109 = metadata !{i64 1, i64 32}
-!110 = metadata !{i64 1, i64 32}
+!0 = !{!"0x11\0016\00clang version 3.3 \000\00\002\00\000", !107, !2, !4, !23, !15,  !15} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m] [DW_LANG_ObjC]
+!1 = !{!"0x29", !107} ; [ DW_TAG_file_type ]
+!2 = !{!3}
+!3 = !{!"0x4\00\0020\0032\0032\000\000\000", !107, null, null, !4, null, null, null} ; [ DW_TAG_enumeration_type ] [line 20, size 32, align 32, offset 0] [def] [from ]
+!4 = !{}
+!15 = !{}
+!23 = !{!38, !42}
+!27 = !{!"0x16\00id\0031\000\000\000\000", !107, null, !28} ; [ DW_TAG_typedef ] [id] [line 31, size 0, align 0, offset 0] [from ]
+!28 = !{!"0xf\00\000\0064\0064\000\000", null, null, !29} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!29 = !{!"0x13\00objc_object\000\000\000\000\000\000", !107, null, null, !30, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
+!30 = !{!31}
+!31 = !{!"0xd\00isa\000\0064\000\000\000", !107, !29, !32} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!32 = !{!"0xf\00\000\0064\000\000\000", null, null, !33} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!33 = !{!"0x13\00objc_class\000\000\000\000\004\000", !107, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!34 = !{!"0x13\00Main\0023\000\000\000\001092\0016", !107, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [Main] [line 23, size 0, align 0, offset 0] [artificial] [decl] [from ]
+!38 = !{!"0x2e\00__24-[Main initWithContext:]_block_invoke\00__24-[Main initWithContext:]_block_invoke\00\0033\001\001\000\006\00256\000\0033", !1, !1, !39, null, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", null, null, !15} ; [ DW_TAG_subprogram ] [line 33] [local] [def] [__24-[Main initWithContext:]_block_invoke]
+!39 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !40, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!40 = !{null, !41, !27}
+!41 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!42 = !{!"0x2e\00__24-[Main initWithContext:]_block_invoke_2\00__24-[Main initWithContext:]_block_invoke_2\00\0035\001\001\000\006\00256\000\0035", !1, !1, !39, null, void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", null, null, !15} ; [ DW_TAG_subprogram ] [line 35] [local] [def] [__24-[Main initWithContext:]_block_invoke_2]
+!84 = !MDLocation(line: 33, scope: !38)
+!86 = !{!"0x100\00self\0041\000", !38, !1, !34} ; [ DW_TAG_auto_variable ] [self] [line 41]
+!87 = !MDLocation(line: 41, scope: !38)
+!103 = !MDLocation(line: 35, scope: !42)
+!105 = !{!"0x100\00self\0040\000", !42, !1, !34} ; [ DW_TAG_auto_variable ] [self] [line 40]
+!106 = !MDLocation(line: 40, scope: !42)
+!107 = !{!"llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", !""}
+!108 = !{i32 1, !"Debug Info Version", i32 2}
+!109 = !{!"0x102\0034\0032"} ; [ DW_TAG_expression ] [DW_OP_plus 32]
+!110 = !{!"0x102\0034\0032"} ; [ DW_TAG_expression ] [DW_OP_plus 32]
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index 8a1a125ff804..7cba57f69911 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -101,9 +101,9 @@ define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
   %3 = alloca %struct._objc_super
   %4 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
   store %0* %self, %0** %1, align 8
-  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !60), !dbg !62
+  call void @llvm.dbg.declare(metadata %0** %1, metadata !60, metadata !{!"0x102"}), !dbg !62
   store i8* %_cmd, i8** %2, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !63), !dbg !62
+  call void @llvm.dbg.declare(metadata i8** %2, metadata !63, metadata !{!"0x102"}), !dbg !62
   %5 = load %0** %1, !dbg !65
   %6 = bitcast %0* %5 to i8*, !dbg !65
   %7 = getelementptr inbounds %struct._objc_super* %3, i32 0, i32 0, !dbg !65
@@ -143,14 +143,14 @@ define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
   ret i8* %26, !dbg !71
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...)
 
 define internal void @run(void ()* %block) #0 {
   %1 = alloca void ()*, align 8
   store void ()* %block, void ()** %1, align 8
-  call void @llvm.dbg.declare(metadata !{void ()** %1}, metadata !72), !dbg !73
+  call void @llvm.dbg.declare(metadata void ()** %1, metadata !72, metadata !{!"0x102"}), !dbg !73
   %2 = load void ()** %1, align 8, !dbg !74
   %3 = bitcast void ()* %2 to %struct.__block_literal_generic*, !dbg !74
   %4 = getelementptr inbounds %struct.__block_literal_generic* %3, i32 0, i32 3, !dbg !74
@@ -167,13 +167,13 @@ define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
   %d = alloca %1*, align 8
   store i8* %.block_descriptor, i8** %1, align 8
   %3 = load i8** %1
-  call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !76), !dbg !88
-  call void @llvm.dbg.declare(metadata !{i8* %.block_descriptor}, metadata !76), !dbg !88
+  call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !76, metadata !{!"0x102"}), !dbg !88
+  call void @llvm.dbg.declare(metadata i8* %.block_descriptor, metadata !76, metadata !{!"0x102"}), !dbg !88
   %4 = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !88
   store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, align 8, !dbg !88
   %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !88
-  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2}, metadata !89), !dbg !90
-  call void @llvm.dbg.declare(metadata !{%1** %d}, metadata !91), !dbg !100
+  call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, metadata !89, metadata !111), !dbg !90
+  call void @llvm.dbg.declare(metadata %1** %d, metadata !91, metadata !{!"0x102"}), !dbg !100
   %6 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100
   %7 = bitcast %struct._class_t* %6 to i8*, !dbg !100
   %8 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100
@@ -200,7 +200,7 @@ define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
   ret void, !dbg !90
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...)
 
@@ -210,9 +210,9 @@ define internal void @__copy_helper_block_(i8*, i8*) {
   %3 = alloca i8*, align 8
   %4 = alloca i8*, align 8
   store i8* %0, i8** %3, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %3}, metadata !102), !dbg !103
+  call void @llvm.dbg.declare(metadata i8** %3, metadata !102, metadata !{!"0x102"}), !dbg !103
   store i8* %1, i8** %4, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %4}, metadata !104), !dbg !103
+  call void @llvm.dbg.declare(metadata i8** %4, metadata !104, metadata !{!"0x102"}), !dbg !103
   %5 = load i8** %4, !dbg !103
   %6 = bitcast i8* %5 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
   %7 = load i8** %3, !dbg !103
@@ -231,7 +231,7 @@ declare void @_Block_object_assign(i8*, i8*, i32)
 define internal void @__destroy_helper_block_(i8*) {
   %2 = alloca i8*, align 8
   store i8* %0, i8** %2, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %2}, metadata !105), !dbg !106
+  call void @llvm.dbg.declare(metadata i8** %2, metadata !105, metadata !{!"0x102"}), !dbg !106
   %3 = load i8** %2, !dbg !106
   %4 = bitcast i8* %3 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !106
   %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !106
@@ -247,7 +247,7 @@ define i32 @main() #0 {
   %1 = alloca i32, align 4
   %a = alloca %0*, align 8
   store i32 0, i32* %1
-  call void @llvm.dbg.declare(metadata !{%0** %a}, metadata !107), !dbg !108
+  call void @llvm.dbg.declare(metadata %0** %a, metadata !107, metadata !{!"0x102"}), !dbg !108
   %2 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108
   %3 = bitcast %struct._class_t* %2 to i8*, !dbg !108
   %4 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108
@@ -270,115 +270,115 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!56, !57, !58, !59, !110}
 
-!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !12, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
-!1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/<unknown>", metadata !"llvm/_build.ninja.Debug"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"A", i32 33, i64 32, i64 32, i32 0, i32 512, null, metadata !7, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", metadata !"llvm/_build.ninja.Debug"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
-!7 = metadata !{metadata !8, metadata !10}
-!8 = metadata !{i32 786460, null, metadata !4, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
-!9 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSObject", i32 21, i64 0, i64 8, i32 0, i32 0, null, metadata !2, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [def] [from ]
-!10 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"ivar", i32 35, i64 32, i64 32, i64 0, i32 0, metadata !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13, metadata !27, metadata !31, metadata !35, metadata !36, metadata !39}
-!13 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[A init]", metadata !"-[A init]", metadata !"", i32 46, metadata !14, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, i8* (%0*, i8*)* @"\01-[A init]", null, null, metadata !2, i32 46} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{metadata !16, metadata !23, metadata !24}
-!16 = metadata !{i32 786454, metadata !5, null, metadata !"id", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786445, metadata !1, metadata !18, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!21 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!22 = metadata !{i32 786451, metadata !1, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!23 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!24 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 64, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!26 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!27 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__9-[A init]_block_invoke", metadata !"__9-[A init]_block_invoke", metadata !"", i32 49, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @"__9-[A init]_block_invoke", null, null, metadata !2, i32 49} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
-!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!29 = metadata !{null, metadata !30}
-!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!31 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", i32 52, metadata !33, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*, i8*)* @__copy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
-!32 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
-!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!34 = metadata !{null, metadata !30, metadata !30}
-!35 = metadata !{i32 786478, metadata !1, metadata !32, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", i32 52, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i8*)* @__destroy_helper_block_, null, null, metadata !2, i32 52} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
-!36 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 59, metadata !37, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 60} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
-!37 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !38, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!38 = metadata !{metadata !11}
-!39 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 39, metadata !40, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (void ()*)* @run, null, null, metadata !2, i32 40} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
-!40 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!41 = metadata !{null, metadata !42}
-!42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
-!43 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_generic", i32 40, i64 256, i64 0, i32 0, i32 8, null, metadata !44, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [def] [from ]
-!44 = metadata !{metadata !45, metadata !46, metadata !47, metadata !48, metadata !49}
-!45 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
-!46 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 0, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
-!47 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 0, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int]
-!48 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 0, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
-!49 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 40, i64 64, i64 64, i64 192, i32 0, metadata !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
-!50 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
-!51 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_descriptor", i32 40, i64 128, i64 0, i32 0, i32 8, null, metadata !52, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [def] [from ]
-!52 = metadata !{metadata !53, metadata !55}
-!53 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"reserved", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
-!54 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!55 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"Size", i32 0, i64 64, i64 64, i64 64, i32 0, metadata !54} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int]
-!56 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!57 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!58 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!59 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!60 = metadata !{i32 786689, metadata !13, metadata !"self", metadata !32, i32 16777262, metadata !61, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 46]
-!61 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!62 = metadata !{i32 46, i32 0, metadata !13, null}
-!63 = metadata !{i32 786689, metadata !13, metadata !"_cmd", metadata !32, i32 33554478, metadata !64, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 46]
-!64 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 46, i64 0, i64 0, i64 0, i32 0, metadata !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [from ]
-!65 = metadata !{i32 48, i32 0, metadata !66, null}
-!66 = metadata !{i32 786443, metadata !5, metadata !13, i32 47, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
-!67 = metadata !{}
-!68 = metadata !{i32 49, i32 0, metadata !69, null}
-!69 = metadata !{i32 786443, metadata !5, metadata !66, i32 48, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
-!70 = metadata !{i32 53, i32 0, metadata !69, null}
-!71 = metadata !{i32 54, i32 0, metadata !66, null}
-!72 = metadata !{i32 786689, metadata !39, metadata !"block", metadata !6, i32 16777255, metadata !42, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [block] [line 39]
-!73 = metadata !{i32 39, i32 0, metadata !39, null}
-!74 = metadata !{i32 41, i32 0, metadata !39, null}
-!75 = metadata !{i32 42, i32 0, metadata !39, null}
-!76 = metadata !{i32 786689, metadata !27, metadata !".block_descriptor", metadata !6, i32 16777265, metadata !77, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
-!77 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
-!78 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"__block_literal_1", i32 49, i64 320, i64 64, i32 0, i32 0, null, metadata !79, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [def] [from ]
-!79 = metadata !{metadata !80, metadata !81, metadata !82, metadata !83, metadata !84, metadata !87}
-!80 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__isa", i32 49, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
-!81 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__flags", i32 49, i64 32, i64 32, i64 64, i32 0, metadata !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
-!82 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__reserved", i32 49, i64 32, i64 32, i64 96, i32 0, metadata !11} ; [ DW_TAG_member ] [__reserved] [line 49, size 32, align 32, offset 96] [from int]
-!83 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__FuncPtr", i32 49, i64 64, i64 64, i64 128, i32 0, metadata !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
-!84 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"__descriptor", i32 49, i64 64, i64 64, i64 192, i32 0, metadata !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
-!85 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
-!86 = metadata !{i32 786451, metadata !1, null, metadata !"__block_descriptor_withcopydispose", i32 49, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [decl] [from ]
-!87 = metadata !{i32 786445, metadata !5, metadata !6, metadata !"self", i32 49, i64 64, i64 64, i64 256, i32 0, metadata !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
-!88 = metadata !{i32 49, i32 0, metadata !27, null}
-!89 = metadata !{i32 786688, metadata !27, metadata !"self", metadata !32, i32 52, metadata !23, i32 0, i32 0, metadata !111} ; [ DW_TAG_auto_variable ] [self] [line 52]
-!90 = metadata !{i32 52, i32 0, metadata !27, null}
-!91 = metadata !{i32 786688, metadata !92, metadata !"d", metadata !6, i32 50, metadata !93, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 50]
-!92 = metadata !{i32 786443, metadata !5, metadata !27, i32 49, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
-!93 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
-!94 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSMutableDictionary", i32 30, i64 0, i64 8, i32 0, i32 0, null, metadata !95, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [def] [from ]
-!95 = metadata !{metadata !96}
-!96 = metadata !{i32 786460, null, metadata !94, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
-!97 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"NSDictionary", i32 26, i64 0, i64 8, i32 0, i32 0, null, metadata !98, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [def] [from ]
-!98 = metadata !{metadata !99}
-!99 = metadata !{i32 786460, null, metadata !97, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
-!100 = metadata !{i32 50, i32 0, metadata !92, null}
-!101 = metadata !{i32 51, i32 0, metadata !92, null}
-!102 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
-!103 = metadata !{i32 52, i32 0, metadata !31, null}
-!104 = metadata !{i32 786689, metadata !31, metadata !"", metadata !32, i32 33554484, metadata !30, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
-!105 = metadata !{i32 786689, metadata !35, metadata !"", metadata !32, i32 16777268, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [line 52]
-!106 = metadata !{i32 52, i32 0, metadata !35, null}
-!107 = metadata !{i32 786688, metadata !36, metadata !"a", metadata !6, i32 61, metadata !61, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 61]
-!108 = metadata !{i32 61, i32 0, metadata !36, null}
-!109 = metadata !{i32 62, i32 0, metadata !36, null}
-!110 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!111 = metadata !{i64 2, i64 1, i64 32}
+!0 = !{!"0x11\0016\00clang version 3.3 \000\00\002\00\001", !1, !2, !3, !12, !2,  !2} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenObjC/<unknown>] [DW_LANG_ObjC]
+!1 = !{!"llvm/tools/clang/test/CodeGenObjC/<unknown>", !"llvm/_build.ninja.Debug"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00A\0033\0032\0032\000\00512\0016", !5, !6, null, !7, null, null, null} ; [ DW_TAG_structure_type ] [A] [line 33, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m", !"llvm/_build.ninja.Debug"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!7 = !{!8, !10}
+!8 = !{!"0x1c\00\000\000\000\000\000", null, !4, !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!9 = !{!"0x13\00NSObject\0021\000\008\000\000\0016", !5, !6, null, !2, null, null, null} ; [ DW_TAG_structure_type ] [NSObject] [line 21, size 0, align 8, offset 0] [def] [from ]
+!10 = !{!"0xd\00ivar\0035\0032\0032\000\000", !5, !6, !11, null} ; [ DW_TAG_member ] [ivar] [line 35, size 32, align 32, offset 0] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13, !27, !31, !35, !36, !39}
+!13 = !{!"0x2e\00-[A init]\00-[A init]\00\0046\001\001\000\006\00256\000\0046", !5, !6, !14, null, i8* (%0*, i8*)* @"\01-[A init]", null, null, !2} ; [ DW_TAG_subprogram ] [line 46] [local] [def] [-[A init]]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{!16, !23, !24}
+!16 = !{!"0x16\00id\0046\000\000\000\000", !5, null, !17} ; [ DW_TAG_typedef ] [id] [line 46, size 0, align 0, offset 0] [from ]
+!17 = !{!"0xf\00\000\0064\0064\000\000", null, null, !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!18 = !{!"0x13\00objc_object\000\000\000\000\000\000", !1, null, null, !19, null, null, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
+!19 = !{!20}
+!20 = !{!"0xd\00isa\000\0064\000\000\000", !1, !18, !21} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!21 = !{!"0xf\00\000\0064\000\000\000", null, null, !22} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!22 = !{!"0x13\00objc_class\000\000\000\000\004\000", !1, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!23 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!24 = !{!"0x16\00SEL\0046\000\000\000\0064", !5, null, !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [artificial] [from ]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !26} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!26 = !{!"0x13\00objc_selector\000\000\000\000\004\000", !1, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!27 = !{!"0x2e\00__9-[A init]_block_invoke\00__9-[A init]_block_invoke\00\0049\001\001\000\006\00256\000\0049", !5, !6, !28, null, void (i8*)* @"__9-[A init]_block_invoke", null, null, !2} ; [ DW_TAG_subprogram ] [line 49] [local] [def] [__9-[A init]_block_invoke]
+!28 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = !{null, !30}
+!30 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!31 = !{!"0x2e\00__copy_helper_block_\00__copy_helper_block_\00\0052\001\001\000\006\000\000\0052", !1, !32, !33, null, void (i8*, i8*)* @__copy_helper_block_, null, null, !2} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__copy_helper_block_]
+!32 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenObjC/<unknown>]
+!33 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !34, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = !{null, !30, !30}
+!35 = !{!"0x2e\00__destroy_helper_block_\00__destroy_helper_block_\00\0052\001\001\000\006\000\000\0052", !1, !32, !28, null, void (i8*)* @__destroy_helper_block_, null, null, !2} ; [ DW_TAG_subprogram ] [line 52] [local] [def] [__destroy_helper_block_]
+!36 = !{!"0x2e\00main\00main\00\0059\000\001\000\006\000\000\0060", !5, !6, !37, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 59] [def] [scope 60] [main]
+!37 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !38, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!38 = !{!11}
+!39 = !{!"0x2e\00run\00run\00\0039\001\001\000\006\00256\000\0040", !5, !6, !40, null, void (void ()*)* @run, null, null, !2} ; [ DW_TAG_subprogram ] [line 39] [local] [def] [scope 40] [run]
+!40 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !41, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!41 = !{null, !42}
+!42 = !{!"0xf\00\000\0064\000\000\000", null, null, !43} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_generic]
+!43 = !{!"0x13\00__block_literal_generic\0040\00256\000\000\008\000", !5, !6, null, !44, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_generic] [line 40, size 256, align 0, offset 0] [def] [from ]
+!44 = !{!45, !46, !47, !48, !49}
+!45 = !{!"0xd\00__isa\000\0064\0064\000\000", !5, !6, !30} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
+!46 = !{!"0xd\00__flags\000\0032\0032\0064\000", !5, !6, !11} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 64] [from int]
+!47 = !{!"0xd\00__reserved\000\0032\0032\0096\000", !5, !6, !11} ; [ DW_TAG_member ] [__reserved] [line 0, size 32, align 32, offset 96] [from int]
+!48 = !{!"0xd\00__FuncPtr\000\0064\0064\00128\000", !5, !6, !30} ; [ DW_TAG_member ] [__FuncPtr] [line 0, size 64, align 64, offset 128] [from ]
+!49 = !{!"0xd\00__descriptor\0040\0064\0064\00192\000", !5, !6, !50} ; [ DW_TAG_member ] [__descriptor] [line 40, size 64, align 64, offset 192] [from ]
+!50 = !{!"0xf\00\000\0064\000\000\000", null, null, !51} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_descriptor]
+!51 = !{!"0x13\00__block_descriptor\0040\00128\000\000\008\000", !5, !6, null, !52, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor] [line 40, size 128, align 0, offset 0] [def] [from ]
+!52 = !{!53, !55}
+!53 = !{!"0xd\00reserved\000\0064\0064\000\000", !5, !6, !54} ; [ DW_TAG_member ] [reserved] [line 0, size 64, align 64, offset 0] [from long unsigned int]
+!54 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!55 = !{!"0xd\00Size\000\0064\0064\0064\000", !5, !6, !54} ; [ DW_TAG_member ] [Size] [line 0, size 64, align 64, offset 64] [from long unsigned int]
+!56 = !{i32 1, !"Objective-C Version", i32 2}
+!57 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!58 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!59 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!60 = !{!"0x101\00self\0016777262\001088", !13, !32, !61} ; [ DW_TAG_arg_variable ] [self] [line 46]
+!61 = !{!"0xf\00\000\0064\0064\000\000", null, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!62 = !MDLocation(line: 46, scope: !13)
+!63 = !{!"0x101\00_cmd\0033554478\0064", !13, !32, !64} ; [ DW_TAG_arg_variable ] [_cmd] [line 46]
+!64 = !{!"0x16\00SEL\0046\000\000\000\000", !5, null, !25} ; [ DW_TAG_typedef ] [SEL] [line 46, size 0, align 0, offset 0] [from ]
+!65 = !MDLocation(line: 48, scope: !66)
+!66 = !{!"0xb\0047\000\000", !5, !13} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!67 = !{}
+!68 = !MDLocation(line: 49, scope: !69)
+!69 = !{!"0xb\0048\000\001", !5, !66} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!70 = !MDLocation(line: 53, scope: !69)
+!71 = !MDLocation(line: 54, scope: !66)
+!72 = !{!"0x101\00block\0016777255\000", !39, !6, !42} ; [ DW_TAG_arg_variable ] [block] [line 39]
+!73 = !MDLocation(line: 39, scope: !39)
+!74 = !MDLocation(line: 41, scope: !39)
+!75 = !MDLocation(line: 42, scope: !39)
+!76 = !{!"0x101\00.block_descriptor\0016777265\0064", !27, !6, !77} ; [ DW_TAG_arg_variable ] [.block_descriptor] [line 49]
+!77 = !{!"0xf\00\000\0064\000\000\000", null, null, !78} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __block_literal_1]
+!78 = !{!"0x13\00__block_literal_1\0049\00320\0064\000\000\000", !5, !6, null, !79, null, null, null} ; [ DW_TAG_structure_type ] [__block_literal_1] [line 49, size 320, align 64, offset 0] [def] [from ]
+!79 = !{!80, !81, !82, !83, !84, !87}
+!80 = !{!"0xd\00__isa\0049\0064\0064\000\000", !5, !6, !30} ; [ DW_TAG_member ] [__isa] [line 49, size 64, align 64, offset 0] [from ]
+!81 = !{!"0xd\00__flags\0049\0032\0032\0064\000", !5, !6, !11} ; [ DW_TAG_member ] [__flags] [line 49, size 32, align 32, offset 64] [from int]
+!82 = !{!"0xd\00__reserved\0049\0032\0032\0096\000", !5, !6, !11} ; [ DW_TAG_member ] [__reserved] [line 49, size 32, align 32, offset 96] [from int]
+!83 = !{!"0xd\00__FuncPtr\0049\0064\0064\00128\000", !5, !6, !30} ; [ DW_TAG_member ] [__FuncPtr] [line 49, size 64, align 64, offset 128] [from ]
+!84 = !{!"0xd\00__descriptor\0049\0064\0064\00192\000", !5, !6, !85} ; [ DW_TAG_member ] [__descriptor] [line 49, size 64, align 64, offset 192] [from ]
+!85 = !{!"0xf\00\000\0064\0064\000\000", null, null, !86} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from __block_descriptor_withcopydispose]
+!86 = !{!"0x13\00__block_descriptor_withcopydispose\0049\000\000\000\004\000", !1, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [__block_descriptor_withcopydispose] [line 49, size 0, align 0, offset 0] [decl] [from ]
+!87 = !{!"0xd\00self\0049\0064\0064\00256\000", !5, !6, !61} ; [ DW_TAG_member ] [self] [line 49, size 64, align 64, offset 256] [from ]
+!88 = !MDLocation(line: 49, scope: !27)
+!89 = !{!"0x100\00self\0052\000", !27, !32, !23} ; [ DW_TAG_auto_variable ] [self] [line 52]
+!90 = !MDLocation(line: 52, scope: !27)
+!91 = !{!"0x100\00d\0050\000", !92, !6, !93} ; [ DW_TAG_auto_variable ] [d] [line 50]
+!92 = !{!"0xb\0049\000\002", !5, !27} ; [ DW_TAG_lexical_block ] [llvm/tools/clang/test/CodeGenObjC/debug-info-blocks.m]
+!93 = !{!"0xf\00\000\0064\0064\000\000", null, null, !94} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from NSMutableDictionary]
+!94 = !{!"0x13\00NSMutableDictionary\0030\000\008\000\000\0016", !5, !6, null, !95, null, null, null} ; [ DW_TAG_structure_type ] [NSMutableDictionary] [line 30, size 0, align 8, offset 0] [def] [from ]
+!95 = !{!96}
+!96 = !{!"0x1c\00\000\000\000\000\000", null, !94, !97} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSDictionary]
+!97 = !{!"0x13\00NSDictionary\0026\000\008\000\000\0016", !5, !6, null, !98, null, null, null} ; [ DW_TAG_structure_type ] [NSDictionary] [line 26, size 0, align 8, offset 0] [def] [from ]
+!98 = !{!99}
+!99 = !{!"0x1c\00\000\000\000\000\000", null, !97, !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from NSObject]
+!100 = !MDLocation(line: 50, scope: !92)
+!101 = !MDLocation(line: 51, scope: !92)
+!102 = !{!"0x101\00\0016777268\001088", !31, !32, !30} ; [ DW_TAG_arg_variable ] [line 52]
+!103 = !MDLocation(line: 52, scope: !31)
+!104 = !{!"0x101\00\0033554484\0064", !31, !32, !30} ; [ DW_TAG_arg_variable ] [line 52]
+!105 = !{!"0x101\00\0016777268\001088", !35, !32, !30} ; [ DW_TAG_arg_variable ] [line 52]
+!106 = !MDLocation(line: 52, scope: !35)
+!107 = !{!"0x100\00a\0061\000", !36, !6, !61} ; [ DW_TAG_auto_variable ] [a] [line 61]
+!108 = !MDLocation(line: 61, scope: !36)
+!109 = !MDLocation(line: 62, scope: !36)
+!110 = !{i32 1, !"Debug Info Version", i32 2}
+!111 = !{!"0x102\006\0034\0032"} ; [ DW_TAG_expression ] [DW_OP_deref DW_OP_plus 32]
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 7d258f9d8377..8a14b6ad19c1 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -1,5 +1,5 @@
 ; RUN: llc %s -o %t -filetype=obj -O0 -mtriple=x86_64-unknown-linux-gnu -dwarf-version=4
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=PRESENT
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=PRESENT 
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=ABSENT
 ; RUN: llc %s -o %t -filetype=obj -O0 -mtriple=x86_64-apple-darwin -dwarf-version=4
 ; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s -check-prefix=DARWINP
@@ -47,149 +47,149 @@ entry:
   %retval = alloca i32, align 4
   %instance_C = alloca %class.C, align 4
   store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{%class.C* %instance_C}, metadata !29), !dbg !30
+  call void @llvm.dbg.declare(metadata %class.C* %instance_C, metadata !29, metadata !{!"0x102"}), !dbg !30
   %d = getelementptr inbounds %class.C* %instance_C, i32 0, i32 0, !dbg !31
   store i32 8, i32* %d, align 4, !dbg !31
   %0 = load i32* @_ZN1C1cE, align 4, !dbg !32
   ret i32 %0, !dbg !32
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!34}
 
-!0 = metadata !{i32 786449, metadata !33, i32 4, metadata !"clang version 3.3 (trunk 171914)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !10,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !33, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 18, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 23} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
-!6 = metadata !{i32 786473, metadata !33} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !12, metadata !27, metadata !28}
-!12 = metadata !{i32 786484, i32 0, metadata !13, metadata !"a", metadata !"a", metadata !"_ZN1C1aE", metadata !6, i32 14, metadata !9, i32 0, i32 1, i32* @_ZN1C1aE, metadata !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
-!13 = metadata !{i32 786434, metadata !33, null, metadata !"C", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [def] [from ]
-!14 = metadata !{metadata !15, metadata !16, metadata !19, metadata !20, metadata !23, metadata !24, metadata !26}
-!15 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"a", i32 3, i64 0, i64 0, i64 0, i32 4097, metadata !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
-!16 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_a", i32 4, i64 0, i64 0, i64 0, i32 4097, metadata !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
-!17 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
-!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!19 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"b", i32 6, i64 0, i64 0, i64 0, i32 4098, metadata !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
-!20 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_b", i32 7, i64 0, i64 0, i64 0, i32 4098, metadata !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
-!21 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
-!22 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!23 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"c", i32 9, i64 0, i64 0, i64 0, i32 4096, metadata !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
-!24 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"const_c", i32 10, i64 0, i64 0, i64 0, i32 4096, metadata !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
-!25 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
-!26 = metadata !{i32 786445, metadata !33, metadata !13, metadata !"d", i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
-!27 = metadata !{i32 786484, i32 0, metadata !13, metadata !"b", metadata !"b", metadata !"_ZN1C1bE", metadata !6, i32 15, metadata !9, i32 0, i32 1, i32* @_ZN1C1bE, metadata !19} ; [ DW_TAG_variable ] [b] [line 15] [def]
-!28 = metadata !{i32 786484, i32 0, metadata !13, metadata !"c", metadata !"c", metadata !"_ZN1C1cE", metadata !6, i32 16, metadata !9, i32 0, i32 1, i32* @_ZN1C1cE, metadata !23} ; [ DW_TAG_variable ] [c] [line 16] [def]
-!29 = metadata !{i32 786688, metadata !5, metadata !"instance_C", metadata !6, i32 20, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [instance_C] [line 20]
-!30 = metadata !{i32 20, i32 0, metadata !5, null}
-!31 = metadata !{i32 21, i32 0, metadata !5, null}
-!32 = metadata !{i32 22, i32 0, metadata !5, null}
-!33 = metadata !{metadata !"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", metadata !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm"}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 171914)\000\00\000\00\000", !33, !1, !1, !3, !10,  !1} ; [ DW_TAG_compile_unit ] [/home/probinson/projects/upstream/static-member/test/debug-info-static-member.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00main\00main\00\0018\000\001\000\006\00256\000\0023", !33, !6, !7, null, i32 ()* @main, null, null, !1} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 23] [main]
+!6 = !{!"0x29", !33} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!12, !27, !28}
+!12 = !{!"0x34\00a\00a\00_ZN1C1aE\0014\000\001", null, !6, !9, i32* @_ZN1C1aE, !15} ; [ DW_TAG_variable ] [a] [line 14] [def]
+!13 = !{!"0x2\00C\001\0032\0032\000\000\000", !33, null, null, !14, null, null, null} ; [ DW_TAG_class_type ] [C] [line 1, size 32, align 32, offset 0] [def] [from ]
+!14 = !{!15, !16, !19, !20, !23, !24, !26}
+!15 = !{!"0xd\00a\003\000\000\000\004097", !33, !13, !9, null} ; [ DW_TAG_member ] [a] [line 3, size 0, align 0, offset 0] [private] [static] [from int]
+!16 = !{!"0xd\00const_a\004\000\000\000\004097", !33, !13, !17, i1 true} ; [ DW_TAG_member ] [const_a] [line 4, size 0, align 0, offset 0] [private] [static] [from ]
+!17 = !{!"0x26\00\000\000\000\000\000", null, null, !18} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from bool]
+!18 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = !{!"0xd\00b\006\000\000\000\004098", !33, !13, !9, null} ; [ DW_TAG_member ] [b] [line 6, size 0, align 0, offset 0] [protected] [static] [from int]
+!20 = !{!"0xd\00const_b\007\000\000\000\004098", !33, !13, !21, float 0x40091EB860000000} ; [ DW_TAG_member ] [const_b] [line 7, size 0, align 0, offset 0] [protected] [static] [from ]
+!21 = !{!"0x26\00\000\000\000\000\000", null, null, !22} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from float]
+!22 = !{!"0x24\00float\000\0032\0032\000\000\004", null, null} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!23 = !{!"0xd\00c\009\000\000\000\004099", !33, !13, !9, null} ; [ DW_TAG_member ] [c] [line 9, size 0, align 0, offset 0] [static] [from int]
+!24 = !{!"0xd\00const_c\0010\000\000\000\004099", !33, !13, !25, i32 18} ; [ DW_TAG_member ] [const_c] [line 10, size 0, align 0, offset 0] [static] [from ]
+!25 = !{!"0x26\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!26 = !{!"0xd\00d\0011\0032\0032\000\003", !33, !13, !9} ; [ DW_TAG_member ] [d] [line 11, size 32, align 32, offset 0] [from int]
+!27 = !{!"0x34\00b\00b\00_ZN1C1bE\0015\000\001", null, !6, !9, i32* @_ZN1C1bE, !19} ; [ DW_TAG_variable ] [b] [line 15] [def]
+!28 = !{!"0x34\00c\00c\00_ZN1C1cE\0016\000\001", null, !6, !9, i32* @_ZN1C1cE, !23} ; [ DW_TAG_variable ] [c] [line 16] [def]
+!29 = !{!"0x100\00instance_C\0020\000", !5, !6, !13} ; [ DW_TAG_auto_variable ] [instance_C] [line 20]
+!30 = !MDLocation(line: 20, scope: !5)
+!31 = !MDLocation(line: 21, scope: !5)
+!32 = !MDLocation(line: 22, scope: !5)
+!33 = !{!"/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", !"/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm"}
 ; PRESENT verifies that static member declarations have these attributes:
 ; external, declaration, accessibility, and either DW_AT_MIPS_linkage_name
 ; (for variables) or DW_AT_const_value (for constants).
 ;
 ; PRESENT:      .debug_info contents:
+; PRESENT:      DW_TAG_variable
+; PRESENT-NEXT: DW_AT_specification {{.*}} "a"
+; PRESENT-NEXT: DW_AT_location
+; PRESENT-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1aE"
 ; PRESENT:      DW_TAG_class_type
 ; PRESENT-NEXT: DW_AT_name {{.*}} "C"
-; PRESENT:      0x[[DECL_A:[0-9a-f]+]]: DW_TAG_member
+; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "a"
 ; PRESENT:      DW_AT_external
 ; PRESENT:      DW_AT_declaration
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_private)
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_a"
 ; PRESENT:      DW_AT_external
 ; PRESENT:      DW_AT_declaration
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_private)
 ; PRESENT:      DW_AT_const_value {{.*}} (1)
-; PRESENT:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
+; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "b"
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_protected)
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_b"
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_protected)
 ; PRESENT:      DW_AT_const_value [DW_FORM_udata] (1078523331)
-; PRESENT:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
+; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "c"
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "const_c"
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; PRESENT:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; PRESENT:      DW_TAG_member
 ; PRESENT-NEXT: DW_AT_name {{.*}} "d"
 ; PRESENT:      DW_AT_data_member_location
-; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; PRESENT:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; PRESENT:      NULL
 ; Definitions point back to their declarations, and have a location.
 ; PRESENT:      DW_TAG_variable
-; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_A]]}
-; PRESENT-NEXT: DW_AT_location
-; PRESENT-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1aE"
-; PRESENT:      DW_TAG_variable
-; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_B]]}
+; PRESENT-NEXT: DW_AT_specification {{.*}} "b"
 ; PRESENT-NEXT: DW_AT_location
 ; PRESENT-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1bE"
 ; PRESENT:      DW_TAG_variable
-; PRESENT-NEXT: DW_AT_specification {{.*}} {0x[[DECL_C]]}
+; PRESENT-NEXT: DW_AT_specification {{.*}} "c"
 ; PRESENT-NEXT: DW_AT_location
 ; PRESENT-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1cE"
 
 ; For Darwin gdb:
 ; DARWINP:      .debug_info contents:
+; DARWINP:      DW_TAG_variable
+; DARWINP-NEXT: DW_AT_specification {{.*}} "a"
+; DARWINP-NEXT: DW_AT_location
+; DARWINP-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1aE"
 ; DARWINP:      DW_TAG_class_type
 ; DARWINP-NEXT: DW_AT_name {{.*}} "C"
-; DARWINP:      0x[[DECL_A:[0-9a-f]+]]: DW_TAG_member
+; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "a"
 ; DARWINP:      DW_AT_external
 ; DARWINP:      DW_AT_declaration
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_private)
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_a"
 ; DARWINP:      DW_AT_external
 ; DARWINP:      DW_AT_declaration
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x03)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_private)
 ; DARWINP:      DW_AT_const_value {{.*}} (1)
-; DARWINP:      0x[[DECL_B:[0-9a-f]+]]: DW_TAG_member
+; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "b"
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_protected)
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_b"
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x02)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_protected)
 ; DARWINP:      DW_AT_const_value [DW_FORM_udata] (1078523331)
-; DARWINP:      0x[[DECL_C:[0-9a-f]+]]: DW_TAG_member
+; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "c"
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "const_c"
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; DARWINP:      DW_AT_const_value {{.*}} (18)
 ; While we're here, a normal member has data_member_location and
 ; accessibility attributes.
 ; DARWINP:      DW_TAG_member
 ; DARWINP-NEXT: DW_AT_name {{.*}} "d"
 ; DARWINP:      DW_AT_data_member_location
-; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; DARWINP:      DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; DARWINP:      NULL
 ; Definitions point back to their declarations, and have a location.
 ; DARWINP:      DW_TAG_variable
-; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_A]]}
-; DARWINP-NEXT: DW_AT_location
-; DARWINP-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1aE"
-; DARWINP:      DW_TAG_variable
-; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_B]]}
+; DARWINP-NEXT: DW_AT_specification {{.*}} "b"
 ; DARWINP-NEXT: DW_AT_location
 ; DARWINP-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1bE"
 ; DARWINP:      DW_TAG_variable
-; DARWINP-NEXT: DW_AT_specification {{.*}} {0x[[DECL_C]]}
+; DARWINP-NEXT: DW_AT_specification {{.*}} "c"
 ; DARWINP-NEXT: DW_AT_location
 ; DARWINP-NEXT: DW_AT_linkage_name {{.*}} "_ZN1C1cE"
 
@@ -253,4 +253,4 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; DARWINA-NOT:  DW_AT_const_value
 ; DARWINA-NOT:  DW_AT_location
 ; DARWINA:      NULL
-!34 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!34 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index b1980ecda2d8..13e193bdf116 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -21,18 +21,14 @@
 ; CHECK: .Ldebug_loc{{[0-9]+}}:
 ; We expect two location ranges for the variable.
 
-; First, it is stored in %rdx:
-; CHECK:      .Lset{{[0-9]+}} = .Lfunc_begin0-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK-NEXT: .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK: DW_OP_reg5
+; First, its address is stored in %rdi:
+; CHECK:      .quad .Lfunc_begin0-.Lfunc_begin0
+; CHECK-NEXT: .quad [[START_LABEL]]-.Lfunc_begin0
+; CHECK: DW_OP_breg5
 
 ; Then it's addressed via %rsp:
-; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK-NEXT: .Lset{{[0-9]+}} = .Lfunc_end0-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK:      .quad [[START_LABEL]]-.Lfunc_begin0
+; CHECK-NEXT: .Lfunc_end0-.Lfunc_begin0
 ; CHECK: DW_OP_breg7
 ; CHECK-NEXT: [[OFFSET]]
 ; CHECK: DW_OP_deref
@@ -81,7 +77,7 @@ entry:
   %21 = inttoptr i64 %20 to i8*
   %22 = load i8* %21
   %23 = icmp ne i8 %22, 0
-  call void @llvm.dbg.declare(metadata !{i32* %8}, metadata !12)
+  call void @llvm.dbg.declare(metadata i32* %8, metadata !12, metadata !14)
   br i1 %23, label %24, label %30
 
 ; <label>:24                                      ; preds = %5
@@ -147,7 +143,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define internal void @asan.module_ctor() {
   call void @__asan_init_v3()
@@ -169,18 +165,18 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (209308)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"test.cc", metadata !"/llvm_cmake_gcc"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3bari", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3bari, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/test.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5.0 (209308)"}
-!12 = metadata !{i32 786689, metadata !4, metadata !"y", metadata !5, i32 16777217, metadata !8, i32 0, i32 0, metadata !14} ; [ DW_TAG_arg_variable ] [y] [line 1]
-!13 = metadata !{i32 2, i32 0, metadata !4, null}
-!14 = metadata !{i64 2}
+!0 = !{!"0x11\004\00clang version 3.5.0 (209308)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/test.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"test.cc", !"/llvm_cmake_gcc"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00bar\00bar\00_Z3bari\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @_Z3bari, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/test.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0 (209308)"}
+!12 = !{!"0x101\00y\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [y] [line 1]
+!13 = !MDLocation(line: 2, scope: !4)
+!14 = !{!"0x102\006"} ; [ DW_TAG_expression ] [DW_OP_deref]
diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll
index 7866d0eac5c9..9022891e06f3 100644
--- a/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/test/DebugInfo/X86/debug-loc-offset.ll
@@ -64,20 +64,20 @@ define i32 @_Z3bari(i32 %b) #0 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !21), !dbg !22
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !21, metadata !{!"0x102"}), !dbg !22
   %0 = load i32* %b.addr, align 4, !dbg !23
   %add = add nsw i32 %0, 4, !dbg !23
   ret i32 %add, !dbg !23
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define void @_Z3baz1A(%struct.A* %a) #2 {
 entry:
   %z = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{%struct.A* %a}, metadata !24), !dbg !25
-  call void @llvm.dbg.declare(metadata !{i32* %z}, metadata !26), !dbg !27
+  call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !24, metadata !{!"0x102"}), !dbg !25
+  call void @llvm.dbg.declare(metadata i32* %z, metadata !26, metadata !{!"0x102"}), !dbg !27
   store i32 2, i32* %z, align 4, !dbg !27
   %var = getelementptr inbounds %struct.A* %a, i32 0, i32 1, !dbg !28
   %0 = load i32* %var, align 4, !dbg !28
@@ -116,38 +116,38 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20, !20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (210479)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset1.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"debug-loc-offset1.cc", metadata !"/llvm_cmake_gcc"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3bari", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3bari, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset1.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 (210479)", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !13, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset2.cc] [DW_LANG_C_plus_plus]
-!10 = metadata !{metadata !"debug-loc-offset2.cc", metadata !"/llvm_cmake_gcc"}
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786451, metadata !10, null, metadata !"A", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786478, metadata !10, metadata !15, metadata !"baz", metadata !"baz", metadata !"_Z3baz1A", i32 6, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.A*)* @_Z3baz1A, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [baz]
-!15 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
-!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null, metadata !12}
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!19 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!20 = metadata !{metadata !"clang version 3.5.0 (210479)"}
-!21 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 1]
-!22 = metadata !{i32 1, i32 0, metadata !4, null}
-!23 = metadata !{i32 2, i32 0, metadata !4, null}
-!24 = metadata !{i32 786689, metadata !14, metadata !"a", metadata !15, i32 16777222, metadata !"_ZTS1A", i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 6]
-!25 = metadata !{i32 6, i32 0, metadata !14, null}
-!26 = metadata !{i32 786688, metadata !14, metadata !"z", metadata !15, i32 7, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [z] [line 7]
-!27 = metadata !{i32 7, i32 0, metadata !14, null}
-!28 = metadata !{i32 8, i32 0, metadata !29, null} ; [ DW_TAG_imported_declaration ]
-!29 = metadata !{i32 786443, metadata !10, metadata !14, i32 8, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
-!30 = metadata !{i32 9, i32 0, metadata !29, null}
-!31 = metadata !{i32 10, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !10, metadata !14, i32 10, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
-!33 = metadata !{i32 11, i32 0, metadata !32, null}
-!34 = metadata !{i32 12, i32 0, metadata !14, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 (210479)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset1.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"debug-loc-offset1.cc", !"/llvm_cmake_gcc"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00bar\00bar\00_Z3bari\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @_Z3bari, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset1.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x11\004\00clang version 3.5.0 (210479)\000\00\000\00\001", !10, !2, !11, !13, !2, !2} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/debug-loc-offset2.cc] [DW_LANG_C_plus_plus]
+!10 = !{!"debug-loc-offset2.cc", !"/llvm_cmake_gcc"}
+!11 = !{!12}
+!12 = !{!"0x13\00A\001\000\000\000\004\000", !10, null, null, null, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!13 = !{!14}
+!14 = !{!"0x2e\00baz\00baz\00_Z3baz1A\006\000\001\000\006\00256\000\006", !10, !15, !16, null, void (%struct.A*)* @_Z3baz1A, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [baz]
+!15 = !{!"0x29", !10}        ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null, !12}
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{i32 2, !"Debug Info Version", i32 2}
+!20 = !{!"clang version 3.5.0 (210479)"}
+!21 = !{!"0x101\00b\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 1]
+!22 = !MDLocation(line: 1, scope: !4)
+!23 = !MDLocation(line: 2, scope: !4)
+!24 = !{!"0x101\00a\0016777222\008192", !14, !15, !"_ZTS1A"} ; [ DW_TAG_arg_variable ] [a] [line 6]
+!25 = !MDLocation(line: 6, scope: !14)
+!26 = !{!"0x100\00z\007\000", !14, !15, !8} ; [ DW_TAG_auto_variable ] [z] [line 7]
+!27 = !MDLocation(line: 7, scope: !14)
+!28 = !MDLocation(line: 8, scope: !29)
+!29 = !{!"0xb\008\000\000", !10, !14} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!30 = !MDLocation(line: 9, scope: !29)
+!31 = !MDLocation(line: 10, scope: !32)
+!32 = !{!"0xb\0010\000\000", !10, !14} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/debug-loc-offset2.cc]
+!33 = !MDLocation(line: 11, scope: !32)
+!34 = !MDLocation(line: 12, scope: !14)
diff --git a/test/DebugInfo/X86/debug-ranges-offset.ll b/test/DebugInfo/X86/debug-ranges-offset.ll
index 365ba171a0d9..fd8fe0e11f35 100644
--- a/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -31,11 +31,11 @@ entry:
   %call = call i8* @_Znwm(i64 4) #4, !dbg !19
   %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
   %3 = bitcast i8* %call to i32*, !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32* %3}, i64 0, metadata !9), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32* %3, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
   %4 = inttoptr i64 %1 to i64*, !dbg !19
   store i64 %_msret, i64* %4, align 8, !dbg !19
   store volatile i32* %3, i32** %p, align 8, !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32** %p}, i64 0, metadata !9), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32** %p, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
   %p.0.p.0. = load volatile i32** %p, align 8, !dbg !20
   %_msld = load i64* %4, align 8, !dbg !20
   %_mscmp = icmp eq i64 %_msld, 0, !dbg !20
@@ -96,11 +96,11 @@ entry:
   %call.i = call i8* @_Znwm(i64 4) #4, !dbg !30
   %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
   %3 = bitcast i8* %call.i to i32*, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i32* %3}, i64 0, metadata !32), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32* %3, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !30
   %4 = inttoptr i64 %1 to i64*, !dbg !30
   store i64 %_msret, i64* %4, align 8, !dbg !30
   store volatile i32* %3, i32** %p.i, align 8, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i32** %p.i}, i64 0, metadata !32), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32** %p.i, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !30
   %p.i.0.p.0.p.0..i = load volatile i32** %p.i, align 8, !dbg !33
   %_msld = load i64* %4, align 8, !dbg !33
   %_mscmp = icmp eq i64 %_msld, 0, !dbg !33
@@ -148,7 +148,7 @@ _Z1fv.exit:                                       ; preds = %16, %if.then.i
 declare void @__msan_init()
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 ; Function Attrs: nounwind
 declare i32 @puts(i8* nocapture readonly) #3
@@ -202,40 +202,40 @@ attributes #4 = { builtin }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !13}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"_Z1fv", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1fv, null, null, metadata !8, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 4, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 4]
-!10 = metadata !{i32 786485, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{metadata !12}
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!18 = metadata !{metadata !"clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)"}
-!19 = metadata !{i32 4, i32 0, metadata !4, null}
-!20 = metadata !{i32 5, i32 0, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.cpp]
-!22 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
-!23 = metadata !{metadata !24, metadata !24, i64 0}
-!24 = metadata !{metadata !"int", metadata !25, i64 0}
-!25 = metadata !{metadata !"omnipotent char", metadata !26, i64 0}
-!26 = metadata !{metadata !"Simple C/C++ TBAA"}
-!27 = metadata !{metadata !"branch_weights", i32 1, i32 1000}
-!28 = metadata !{i32 6, i32 0, metadata !21, null}
-!29 = metadata !{i32 7, i32 0, metadata !4, null}
-!30 = metadata !{i32 4, i32 0, metadata !4, metadata !31}
-!31 = metadata !{i32 10, i32 0, metadata !13, null}
-!32 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 4, metadata !10, i32 0, metadata !31} ; [ DW_TAG_auto_variable ] [p] [line 4]
-!33 = metadata !{i32 5, i32 0, metadata !21, metadata !31}
-!34 = metadata !{i32 6, i32 0, metadata !21, metadata !31}
-!35 = metadata !{i32 7, i32 0, metadata !4, metadata !31}
-!36 = metadata !{i32 11, i32 0, metadata !13, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cpp", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4, !13}
+!4 = !{!"0x2e\00f\00f\00_Z1fv\003\000\001\000\006\00256\001\003", !1, !5, !6, null, void ()* @_Z1fv, null, null, !8} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!9}
+!9 = !{!"0x100\00p\004\000", !4, !5, !10} ; [ DW_TAG_auto_variable ] [p] [line 4]
+!10 = !{!"0x35\00\000\000\000\000\000", null, null, !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0x2e\00main\00main\00\009\000\001\000\006\00256\001\009", !1, !5, !14, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{!12}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
+!18 = !{!"clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)"}
+!19 = !MDLocation(line: 4, scope: !4)
+!20 = !MDLocation(line: 5, scope: !21)
+!21 = !{!"0xb\005\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.cpp]
+!22 = !{!"branch_weights", i32 1000, i32 1}
+!23 = !{!24, !24, i64 0}
+!24 = !{!"int", !25, i64 0}
+!25 = !{!"omnipotent char", !26, i64 0}
+!26 = !{!"Simple C/C++ TBAA"}
+!27 = !{!"branch_weights", i32 1, i32 1000}
+!28 = !MDLocation(line: 6, scope: !21)
+!29 = !MDLocation(line: 7, scope: !4)
+!30 = !MDLocation(line: 4, scope: !4, inlinedAt: !31)
+!31 = !MDLocation(line: 10, scope: !13)
+!32 = !{!"0x100\00p\004\000", !4, !5, !10, !31} ; [ DW_TAG_auto_variable ] [p] [line 4]
+!33 = !MDLocation(line: 5, scope: !21, inlinedAt: !31)
+!34 = !MDLocation(line: 6, scope: !21, inlinedAt: !31)
+!35 = !MDLocation(line: 7, scope: !4, inlinedAt: !31)
+!36 = !MDLocation(line: 11, scope: !13)
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index 67f2e5dffab0..56122e3c5306 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -11,12 +11,12 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!7}
-!5 = metadata !{metadata !0}
+!5 = !{!0}
 
-!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!6 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/llvm/build"}
-!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\001\001", !6, !1, !3, null, void ()* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!1 = !{!"0x29", !6} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 ()\001\00\000\00\000", !6, !4, !4, !5, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !6, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!6 = !{!"/home/espindola/llvm/test.c", !"/home/espindola/llvm/build"}
+!7 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/decl-derived-member.ll b/test/DebugInfo/X86/decl-derived-member.ll
index 4035602fb25f..8c15c53e8141 100644
--- a/test/DebugInfo/X86/decl-derived-member.ll
+++ b/test/DebugInfo/X86/decl-derived-member.ll
@@ -37,7 +37,7 @@ define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 align
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !36), !dbg !38
+  call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !38
   %this1 = load %struct.foo** %this.addr
   %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !39
   call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !39
@@ -49,7 +49,7 @@ define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 align
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !40), !dbg !41
+  call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !40, metadata !{!"0x102"}), !dbg !41
   %this1 = load %struct.foo** %this.addr
   %b = getelementptr inbounds %struct.foo* %this1, i32 0, i32 0, !dbg !42
   call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !42
@@ -60,7 +60,7 @@ entry:
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #3
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
 
 declare void @_ZN4baseD1Ev(%struct.base*) #4
 
@@ -69,7 +69,7 @@ define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 alig
 entry:
   %this.addr = alloca %struct.base*, align 8
   store %struct.base* %this, %struct.base** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.base** %this.addr}, metadata !45), !dbg !47
+  call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !45, metadata !{!"0x102"}), !dbg !47
   %this1 = load %struct.base** %this.addr
   %0 = bitcast %struct.base* %this1 to i8***, !dbg !48
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2), i8*** %0, !dbg !48
@@ -92,53 +92,53 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!32, !33}
 !llvm.ident = !{!34}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !8, metadata !30, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 5, i64 64, i64 64, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 5, size 64, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS3foo", metadata !"b", i32 6, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4base"} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from _ZTS4base]
-!7 = metadata !{i32 786451, metadata !1, null, metadata !"base", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!8 = metadata !{metadata !9, metadata !13, metadata !19, metadata !22, metadata !28}
-!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 9, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [__cxx_global_var_init]
-!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.cc]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null}
-!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"~foo", metadata !"~foo", metadata !"_ZN3fooD2Ev", i32 5, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 320, i1 false, void (%struct.foo*)* @_ZN3fooD2Ev, null, metadata !17, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !16}
-!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
-!17 = metadata !{i32 786478, null, metadata !"_ZTS3foo", metadata !"~foo", metadata !"~foo", metadata !"", i32 0, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 0} ; [ DW_TAG_subprogram ] [line 0] [~foo]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"foo", metadata !"foo", metadata !"_ZN3fooC2Ev", i32 5, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 320, i1 false, void (%struct.foo*)* @_ZN3fooC2Ev, null, metadata !20, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
-!20 = metadata !{i32 786478, null, metadata !"_ZTS3foo", metadata !"foo", metadata !"foo", metadata !"", i32 0, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !21, i32 0} ; [ DW_TAG_subprogram ] [line 0] [foo]
-!21 = metadata !{i32 786468}
-!22 = metadata !{i32 786478, metadata !1, metadata !"_ZTS4base", metadata !"base", metadata !"base", metadata !"_ZN4baseC2Ev", i32 1, metadata !23, i1 false, i1 true, i32 0, i32 0, null, i32 320, i1 false, void (%struct.base*)* @_ZN4baseC2Ev, null, metadata !26, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [base]
-!23 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{null, metadata !25}
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base]
-!26 = metadata !{i32 786478, null, metadata !"_ZTS4base", metadata !"base", metadata !"base", metadata !"", i32 0, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 0} ; [ DW_TAG_subprogram ] [line 0] [base]
-!27 = metadata !{i32 786468}
-!28 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 1, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
-!29 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = metadata !{metadata !31}
-!31 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !10, i32 9, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 9] [def]
-!32 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!34 = metadata !{metadata !"clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)"}
-!35 = metadata !{i32 9, i32 0, metadata !9, null}
-!36 = metadata !{i32 786689, metadata !19, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
-!38 = metadata !{i32 0, i32 0, metadata !19, null}
-!39 = metadata !{i32 5, i32 0, metadata !19, null}
-!40 = metadata !{i32 786689, metadata !13, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!41 = metadata !{i32 0, i32 0, metadata !13, null}
-!42 = metadata !{i32 5, i32 0, metadata !43, null}
-!43 = metadata !{i32 786443, metadata !1, metadata !13, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cc]
-!44 = metadata !{i32 5, i32 0, metadata !13, null}
-!45 = metadata !{i32 786689, metadata !22, metadata !"this", null, i32 16777216, metadata !46, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!46 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base]
-!47 = metadata !{i32 0, i32 0, metadata !22, null}
-!48 = metadata !{i32 1, i32 0, metadata !22, null}
-!49 = metadata !{i32 1, i32 0, metadata !28, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)\000\00\000\00\001", !1, !2, !3, !8, !30, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cc", !"/usr/local/google/home/echristo"}
+!2 = !{}
+!3 = !{!4, !7}
+!4 = !{!"0x13\00foo\005\0064\0064\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 5, size 64, align 64, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0xd\00b\006\0064\0064\000\000", !1, !"_ZTS3foo", !"_ZTS4base"} ; [ DW_TAG_member ] [b] [line 6, size 64, align 64, offset 0] [from _ZTS4base]
+!7 = !{!"0x13\00base\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS4base"} ; [ DW_TAG_structure_type ] [base] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!8 = !{!9, !13, !19, !22, !28}
+!9 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\009\001\001\000\006\00256\000\009", !1, !10, !11, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [local] [def] [__cxx_global_var_init]
+!10 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.cc]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null}
+!13 = !{!"0x2e\00~foo\00~foo\00_ZN3fooD2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooD2Ev, null, !17, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [~foo]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !16}
+!16 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!17 = !{!"0x2e\00~foo\00~foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 0] [~foo]
+!18 = !{i32 786468}
+!19 = !{!"0x2e\00foo\00foo\00_ZN3fooC2Ev\005\000\001\000\006\00320\000\005", !1, !"_ZTS3foo", !14, null, void (%struct.foo*)* @_ZN3fooC2Ev, null, !20, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!20 = !{!"0x2e\00foo\00foo\00\000\000\000\000\006\00320\000\000", null, !"_ZTS3foo", !14, null, null, null, i32 0, !21} ; [ DW_TAG_subprogram ] [line 0] [foo]
+!21 = !{i32 786468}
+!22 = !{!"0x2e\00base\00base\00_ZN4baseC2Ev\001\000\001\000\006\00320\000\001", !1, !"_ZTS4base", !23, null, void (%struct.base*)* @_ZN4baseC2Ev, null, !26, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [base]
+!23 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{null, !25}
+!25 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS4base]
+!26 = !{!"0x2e\00base\00base\00\000\000\000\000\006\00320\000\000", null, !"_ZTS4base", !23, null, null, null, i32 0, !27} ; [ DW_TAG_subprogram ] [line 0] [base]
+!27 = !{i32 786468}
+!28 = !{!"0x2e\00\00\00_GLOBAL__I_a\001\001\001\000\006\0064\000\001", !1, !10, !29, null, void ()* @_GLOBAL__I_a, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
+!29 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{!31}
+!31 = !{!"0x34\00f\00f\00\009\000\001", null, !10, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 9] [def]
+!32 = !{i32 2, !"Dwarf Version", i32 4}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
+!34 = !{!"clang version 3.5.0 (trunk 203673) (llvm/trunk 203681)"}
+!35 = !MDLocation(line: 9, scope: !9)
+!36 = !{!"0x101\00this\0016777216\001088", !19, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!38 = !MDLocation(line: 0, scope: !19)
+!39 = !MDLocation(line: 5, scope: !19)
+!40 = !{!"0x101\00this\0016777216\001088", !13, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!41 = !MDLocation(line: 0, scope: !13)
+!42 = !MDLocation(line: 5, scope: !43)
+!43 = !{!"0xb\005\000\000", !1, !13} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cc]
+!44 = !MDLocation(line: 5, scope: !13)
+!45 = !{!"0x101\00this\0016777216\001088", !22, null, !46} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!46 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4base"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS4base]
+!47 = !MDLocation(line: 0, scope: !22)
+!48 = !MDLocation(line: 1, scope: !22)
+!49 = !MDLocation(line: 1, scope: !28)
diff --git a/test/DebugInfo/X86/discriminator.ll b/test/DebugInfo/X86/discriminator.ll
index aafdae1626d5..185f7cd24164 100644
--- a/test/DebugInfo/X86/discriminator.ll
+++ b/test/DebugInfo/X86/discriminator.ll
@@ -41,23 +41,23 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./discriminator.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"discriminator.c", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./discriminator.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 "}
-!10 = metadata !{i32 2, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./discriminator.c]
-!12 = metadata !{i32 3, i32 0, metadata !4, null}
-!13 = metadata !{i32 4, i32 0, metadata !4, null}
-!14 = metadata !{i32 2, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 42, i32 1} ; [ DW_TAG_lexical_block ] [./discriminator.c]
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./discriminator.c] [DW_LANG_C99]
+!1 = !{!"discriminator.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./discriminator.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 "}
+!10 = !MDLocation(line: 2, scope: !11)
+!11 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [./discriminator.c]
+!12 = !MDLocation(line: 3, scope: !4)
+!13 = !MDLocation(line: 4, scope: !4)
+!14 = !MDLocation(line: 2, scope: !15)
+!15 = !{!"0xb\0042", !1, !4} ; [ DW_TAG_lexical_block ] [./discriminator.c]
 
 ; CHECK: Address            Line   Column File   ISA Discriminator Flags
 ; CHECK: ------------------ ------ ------ ------ --- ------------- -------------
-; CHECK: 0x0000000000000011      2      0      1   0            42  is_stmt
+; CHECK: 0x0000000000000011      2      0      1   0            42 {{$}}
diff --git a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index 021b89e1e1ca..1bda8ec79844 100644
--- a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -28,14 +28,14 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind readnone uwtable
 define i32 @_Z3fooi(i32 %bar) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %bar}, i64 0, metadata !10), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 %bar, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !20
   ret i32 %bar, !dbg !20
 }
 
 ; Function Attrs: nounwind readnone uwtable
 define i32 @_Z4foo2i(i32 %bar2) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %bar2}, i64 0, metadata !13), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 %bar2, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !21
   ret i32 %bar2, !dbg !21
 }
 
@@ -51,7 +51,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -60,30 +60,30 @@ attributes #2 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (191881)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !17, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/debug_ranges/a.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tmp/debug_ranges/a.cc", metadata !"/"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !11, metadata !14}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3fooi", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z3fooi, null, null, metadata !9, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/debug_ranges/a.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786689, metadata !4, metadata !"bar", metadata !5, i32 16777218, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar] [line 2]
-!11 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo2", metadata !"foo2", metadata !"_Z4foo2i", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @_Z4foo2i, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo2]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786689, metadata !11, metadata !"bar2", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [bar2] [line 3]
-!14 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !8}
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786484, i32 0, null, metadata !"global", metadata !"global", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @global, null} ; [ DW_TAG_variable ] [global] [line 1] [def]
-!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!20 = metadata !{i32 2, i32 0, metadata !4, null}
-!21 = metadata !{i32 3, i32 0, metadata !11, null}
-!22 = metadata !{i32 6, i32 0, metadata !14, null}
-!23 = metadata !{metadata !"int", metadata !24}
-!24 = metadata !{metadata !"omnipotent char", metadata !25}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (191881)\001\00\000\00\001", !1, !2, !2, !3, !17, !2} ; [ DW_TAG_compile_unit ] [/tmp/debug_ranges/a.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"tmp/debug_ranges/a.cc", !"/"}
+!2 = !{}
+!3 = !{!4, !11, !14}
+!4 = !{!"0x2e\00foo\00foo\00_Z3fooi\002\000\001\000\006\00256\001\002", !1, !5, !6, null, i32 (i32)* @_Z3fooi, null, null, !9} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/debug_ranges/a.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x101\00bar\0016777218\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [bar] [line 2]
+!11 = !{!"0x2e\00foo2\00foo2\00_Z4foo2i\003\000\001\000\006\00256\001\003", !1, !5, !6, null, i32 (i32)* @_Z4foo2i, null, null, !12} ; [ DW_TAG_subprogram ] [line 3] [def] [foo2]
+!12 = !{!13}
+!13 = !{!"0x101\00bar2\0016777219\000", !11, !5, !8} ; [ DW_TAG_arg_variable ] [bar2] [line 3]
+!14 = !{!"0x2e\00main\00main\00\005\000\001\000\006\00256\001\005", !1, !5, !15, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!8}
+!17 = !{!18}
+!18 = !{!"0x34\00global\00global\00\001\000\001", null, !5, !8, i32* @global, null} ; [ DW_TAG_variable ] [global] [line 1] [def]
+!19 = !{i32 2, !"Dwarf Version", i32 4}
+!20 = !MDLocation(line: 2, scope: !4)
+!21 = !MDLocation(line: 3, scope: !11)
+!22 = !MDLocation(line: 6, scope: !14)
+!23 = !{!"int", !24}
+!24 = !{!"omnipotent char", !25}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dwarf-aranges.ll b/test/DebugInfo/X86/dwarf-aranges.ll
index 9ad618507a25..6873e58eab5b 100644
--- a/test/DebugInfo/X86/dwarf-aranges.ll
+++ b/test/DebugInfo/X86/dwarf-aranges.ll
@@ -15,18 +15,15 @@
 
 ; <data section> - it should have made one span covering all vars in this CU.
 ; CHECK-NEXT: .quad some_data
-; CHECK-NEXT: [[R1:\.[A-Za-z0-9]*]] = .Ldebug_end1-some_data
-; CHECK-NEXT: .quad [[R1]]
+; CHECK-NEXT: .quad .Ldebug_end1-some_data
 
 ; <text section> - it should have made one span covering all functions in this CU.
 ; CHECK-NEXT: .quad .Lfunc_begin0
-; CHECK-NEXT: [[R2:\.[A-Za-z0-9]*]] = .Ldebug_end2-.Lfunc_begin0
-; CHECK-NEXT: .quad [[R2]]
+; CHECK-NEXT: .quad .Ldebug_end2-.Lfunc_begin0
 
 ; <other sections> - it should have made one span covering all vars in this CU.
 ; CHECK-NEXT: .quad some_other
-; CHECK-NEXT: [[R3:\.[A-Za-z0-9]*]] = .Ldebug_end3-some_other
-; CHECK-NEXT: .quad [[R3]]
+; CHECK-NEXT: .quad .Ldebug_end3-some_other
 
 ; -- finish --
 ; CHECK-NEXT: # ARange terminator
@@ -65,20 +62,20 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !16}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !8, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"/home/kayamon"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"some_code", metadata !"some_code", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @some_code, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [some_code]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{metadata !9, metadata !11, metadata !12}
-!9 = metadata !{i32 786484, i32 0, null, metadata !"some_data", metadata !"some_data", metadata !"", metadata !5, i32 1, metadata !10, i32 0, i32 1, i32* @some_data, null} ; [ DW_TAG_variable ] [some_data] [line 1] [def]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786484, i32 0, null, metadata !"some_other", metadata !"some_other", metadata !"", metadata !5, i32 3, metadata !10, i32 0, i32 1, i32* @some_other, null} ; [ DW_TAG_variable ] [some_other] [line 3] [def]
-!12 = metadata !{i32 786484, i32 0, null, metadata !"some_bss", metadata !"some_bss", metadata !"", metadata !5, i32 2, metadata !10, i32 0, i32 1, i32* @some_bss, null} ; [ DW_TAG_variable ] [some_bss] [line 2] [def]
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{i32 7, i32 0, metadata !4, null}
-!15 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !8, !2} ; [ DW_TAG_compile_unit ] [/home/kayamon/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"/home/kayamon"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00some_code\00some_code\00\005\000\001\000\006\000\000\006", !1, !5, !6, null, void ()* @some_code, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [some_code]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!9, !11, !12}
+!9 = !{!"0x34\00some_data\00some_data\00\001\000\001", null, !5, !10, i32* @some_data, null} ; [ DW_TAG_variable ] [some_data] [line 1] [def]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = !{!"0x34\00some_other\00some_other\00\003\000\001", null, !5, !10, i32* @some_other, null} ; [ DW_TAG_variable ] [some_other] [line 3] [def]
+!12 = !{!"0x34\00some_bss\00some_bss\00\002\000\001", null, !5, !10, i32* @some_bss, null} ; [ DW_TAG_variable ] [some_bss] [line 2] [def]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !MDLocation(line: 7, scope: !4)
+!15 = !MDLocation(line: 8, scope: !4)
+!16 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index 793971a5f89f..778738ccb5f0 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -41,6 +41,7 @@
 
 ; Skip the output to the header of the pubnames section.
 ; LINUX: debug_pubnames
+; LINUX: unit_size = 0x00000128
 
 ; Check for each name in the output.
 ; LINUX-DAG: "ns"
@@ -62,13 +63,13 @@ define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !30
   %this1 = load %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
   ret void, !dbg !32
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
 entry:
@@ -93,42 +94,42 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
-!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
-!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{null, metadata !7}
-!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
-!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !12, metadata !14}
-!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
-!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !11}
-!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
-!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
-!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
-!21 = metadata !{i32 786489, metadata !4, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
-!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null}
-!24 = metadata !{metadata !25, metadata !26, metadata !27}
-!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
-!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
-!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
-!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
-!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
-!30 = metadata !{i32 9, i32 0, metadata !3, null}
-!31 = metadata !{i32 10, i32 0, metadata !3, null}
-!32 = metadata !{i32 11, i32 0, metadata !3, null}
-!33 = metadata !{i32 14, i32 0, metadata !18, null}
-!34 = metadata !{i32 20, i32 0, metadata !19, null}
-!35 = metadata !{i32 25, i32 0, metadata !20, null}
-!36 = metadata !{i32 26, i32 0, metadata !20, null}
-!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)\000\00\000\00\000", !37, !1, !1, !2, !24,  !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!2 = !{!3, !18, !19, !20}
+!3 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\009\000\001\000\006\00256\000\009", !4, null, !5, null, void (%struct.C*)* @_ZN1C15member_functionEv, null, !12, !1} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = !{!"0x29", !37} ; [ DW_TAG_file_type ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{null, !7}
+!7 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = !{!"0x13\00C\001\008\008\000\000\000", !37, null, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10, !12, !14}
+!10 = !{!"0xd\00static_member_variable\004\000\000\000\004096", !37, !8, !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\002\000\000\000\006\00256\000\002", !4, !8, !5, null, null, null, i32 0, !13} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\003\000\000\000\006\00256\000\003", !4, !8, !15, null, null, null, i32 0, !17} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!11}
+!17 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\0013\000\001\000\006\00256\000\0013", !4, null, !15, null, i32 ()* @_ZN1C22static_member_functionEv, null, !14, !1} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = !{!"0x2e\00global_function\00global_function\00_Z15global_functionv\0019\000\001\000\006\00256\000\0019", !4, !4, !15, null, i32 ()* @_Z15global_functionv, null, null, !1} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = !{!"0x2e\00global_namespace_function\00global_namespace_function\00_ZN2ns25global_namespace_functionEv\0024\000\001\000\006\00256\000\0024", !4, !21, !22, null, void ()* @_ZN2ns25global_namespace_functionEv, null, null, !1} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = !{!"0x39\00ns\0023", !4, null} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null}
+!24 = !{!25, !26, !27}
+!25 = !{!"0x34\00static_member_variable\00static_member_variable\00_ZN1C22static_member_variableE\007\000\001", !8, !4, !11, i32* @_ZN1C22static_member_variableE, !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = !{!"0x34\00global_variable\00global_variable\00\0017\000\001", null, !4, !8, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = !{!"0x34\00global_namespace_variable\00global_namespace_variable\00_ZN2ns25global_namespace_variableE\0027\000\001", !21, !4, !11, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = !{!"0x101\00this\0016777225\001088", !3, !4, !29} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = !MDLocation(line: 9, scope: !3)
+!31 = !MDLocation(line: 10, scope: !3)
+!32 = !MDLocation(line: 11, scope: !3)
+!33 = !MDLocation(line: 14, scope: !18)
+!34 = !MDLocation(line: 20, scope: !19)
+!35 = !MDLocation(line: 25, scope: !20)
+!36 = !MDLocation(line: 26, scope: !20)
+!37 = !{!"dwarf-public-names.cpp", !"/usr2/kparzysz/s.hex/t"}
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/dwarf-pubnames-split.ll b/test/DebugInfo/X86/dwarf-pubnames-split.ll
index 65c46d368d1b..fc99474badd9 100644
--- a/test/DebugInfo/X86/dwarf-pubnames-split.ll
+++ b/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -24,15 +24,15 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 189287) (llvm/trunk 189296)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!10 = metadata !{i32 2, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 189287) (llvm/trunk 189296)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = !{!"foo.c", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 3}
+!10 = !MDLocation(line: 2, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index b5dc01e68a93..fb85ada9d04f 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -4,7 +4,7 @@
 
 %struct.cpp_dir = type { %struct.cpp_dir*, i8*, i32, i8, i8**, i8*, i8* (i8*, %struct.cpp_dir*)*, i64, i32, i8 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp {
 entry:
@@ -13,7 +13,7 @@ entry:
 bb:                                               ; preds = %entry
   %tmp = icmp eq i32 undef, 0
   %tmp1 = add i32 0, 11
-  call void @llvm.dbg.value(metadata !{i32 %tmp1}, i64 0, metadata !0)
+  call void @llvm.dbg.value(metadata i32 %tmp1, i64 0, metadata !0, metadata !{!"0x102"})
   br i1 undef, label %bb18, label %bb31.preheader
 
 bb31.preheader:                                   ; preds = %bb19, %bb
@@ -44,51 +44,51 @@ declare void @foobar(i32)
 
 !llvm.dbg.cu = !{!4}
 !llvm.module.flags = !{!47}
-!0 = metadata !{i32 590080, metadata !1, metadata !"frname_len", metadata !3, i32 517, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 589835, metadata !44, metadata !2, i32 515, i32 0, i32 19} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 589870, metadata !44, null, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!3 = metadata !{i32 589865, metadata !44}  ; [ DW_TAG_file_type ]
-!4 = metadata !{i32 589841, metadata !44, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !46, metadata !46, metadata !45, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 589845, metadata !44, metadata !3, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7, metadata !9, metadata !11}
-!7 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 589862, metadata !44, metadata !3, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 589846, metadata !41, metadata !13, metadata !"cpp_dir", i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
-!13 = metadata !{i32 589865, metadata !41} ; [ DW_TAG_file_type ]
-!14 = metadata !{i32 589843, metadata !41, metadata !3, metadata !"cpp_dir", i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [cpp_dir] [line 43, size 352, align 32, offset 0] [def] [from ]
-!15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !21, metadata !23, metadata !25, metadata !27, metadata !29, metadata !33, metadata !36}
-!16 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"next", i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
-!17 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"name", i32 575, i64 32, i64 32, i64 32, i32 0, metadata !7} ; [ DW_TAG_member ]
-!19 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"len", i32 576, i64 32, i64 32, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
-!20 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"sysp", i32 580, i64 8, i64 8, i64 96, i32 0, metadata !22} ; [ DW_TAG_member ]
-!22 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!23 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"name_map", i32 584, i64 32, i64 32, i64 128, i32 0, metadata !24} ; [ DW_TAG_member ]
-!24 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
-!25 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"header_map", i32 590, i64 32, i64 32, i64 160, i32 0, metadata !26} ; [ DW_TAG_member ]
-!26 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!27 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"construct", i32 597, i64 32, i64 32, i64 192, i32 0, metadata !28} ; [ DW_TAG_member ]
-!28 = metadata !{i32 589839, metadata !44, metadata !3, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!29 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"ino", i32 601, i64 64, i64 64, i64 224, i32 0, metadata !30} ; [ DW_TAG_member ]
-!30 = metadata !{i32 589846, metadata !42, metadata !31, metadata !"ino_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]
-!31 = metadata !{i32 589865, metadata !42} ; [ DW_TAG_file_type ]
-!32 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!33 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"dev", i32 602, i64 32, i64 32, i64 288, i32 0, metadata !34} ; [ DW_TAG_member ]
-!34 = metadata !{i32 589846, metadata !42, metadata !31, metadata !"dev_t", i32 107, i64 0, i64 0, i64 0, i32 0, metadata !35} ; [ DW_TAG_typedef ]
-!35 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!36 = metadata !{i32 589837, metadata !41, metadata !14, metadata !"user_supplied_p", i32 605, i64 8, i64 8, i64 320, i32 0, metadata !37} ; [ DW_TAG_member ]
-!37 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"_Bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!38 = metadata !{i32 589846, metadata !43, metadata !39, metadata !"size_t", i32 326, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ]
-!39 = metadata !{i32 589865, metadata !43} ; [ DW_TAG_file_type ]
-!40 = metadata !{i32 589860, metadata !44, metadata !3, metadata !"long unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!41 = metadata !{metadata !"cpplib.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include"}
-!42 = metadata !{metadata !"types.h", metadata !"/usr/include/sys"}
-!43 = metadata !{metadata !"stddef.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include"}
-!44 = metadata !{metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config"}
-!45 = metadata !{metadata !2}
-!46 = metadata !{i32 0}
-!47 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x100\00frname_len\00517\000", !1, !3, !38} ; [ DW_TAG_auto_variable ]
+!1 = !{!"0xb\00515\000\0019", !44, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0x2e\00framework_construct_pathname\00framework_construct_pathname\00\00515\001\001\000\006\00256\001\000", !44, null, !5, null, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x29", !44}  ; [ DW_TAG_file_type ]
+!4 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !44, !46, !46, !45, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !44, !3, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7, !9, !11}
+!7 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x24\00char\000\008\008\000\000\006", !44, !3} ; [ DW_TAG_base_type ]
+!9 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x26\00\000\008\008\000\000", !44, !3, !8} ; [ DW_TAG_const_type ]
+!11 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x16\00cpp_dir\0045\000\000\000\000", !41, !13, !14} ; [ DW_TAG_typedef ]
+!13 = !{!"0x29", !41} ; [ DW_TAG_file_type ]
+!14 = !{!"0x13\00cpp_dir\0043\00352\0032\000\000\000", !41, !3, null, !15, null, null, null} ; [ DW_TAG_structure_type ] [cpp_dir] [line 43, size 352, align 32, offset 0] [def] [from ]
+!15 = !{!16, !18, !19, !21, !23, !25, !27, !29, !33, !36}
+!16 = !{!"0xd\00next\00572\0032\0032\000\000", !41, !14, !17} ; [ DW_TAG_member ]
+!17 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !14} ; [ DW_TAG_pointer_type ]
+!18 = !{!"0xd\00name\00575\0032\0032\0032\000", !41, !14, !7} ; [ DW_TAG_member ]
+!19 = !{!"0xd\00len\00576\0032\0032\0064\000", !41, !14, !20} ; [ DW_TAG_member ]
+!20 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !44, !3} ; [ DW_TAG_base_type ]
+!21 = !{!"0xd\00sysp\00580\008\008\0096\000", !41, !14, !22} ; [ DW_TAG_member ]
+!22 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !44, !3} ; [ DW_TAG_base_type ]
+!23 = !{!"0xd\00name_map\00584\0032\0032\00128\000", !41, !14, !24} ; [ DW_TAG_member ]
+!24 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !9} ; [ DW_TAG_pointer_type ]
+!25 = !{!"0xd\00header_map\00590\0032\0032\00160\000", !41, !14, !26} ; [ DW_TAG_member ]
+!26 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, null} ; [ DW_TAG_pointer_type ]
+!27 = !{!"0xd\00construct\00597\0032\0032\00192\000", !41, !14, !28} ; [ DW_TAG_member ]
+!28 = !{!"0xf\00\000\0032\0032\000\000", !44, !3, !5} ; [ DW_TAG_pointer_type ]
+!29 = !{!"0xd\00ino\00601\0064\0064\00224\000", !41, !14, !30} ; [ DW_TAG_member ]
+!30 = !{!"0x16\00ino_t\00141\000\000\000\000", !42, !31, !32} ; [ DW_TAG_typedef ]
+!31 = !{!"0x29", !42} ; [ DW_TAG_file_type ]
+!32 = !{!"0x24\00long long unsigned int\000\0064\0064\000\000\007", !44, !3} ; [ DW_TAG_base_type ]
+!33 = !{!"0xd\00dev\00602\0032\0032\00288\000", !41, !14, !34} ; [ DW_TAG_member ]
+!34 = !{!"0x16\00dev_t\00107\000\000\000\000", !42, !31, !35} ; [ DW_TAG_typedef ]
+!35 = !{!"0x24\00int\000\0032\0032\000\000\005", !44, !3} ; [ DW_TAG_base_type ]
+!36 = !{!"0xd\00user_supplied_p\00605\008\008\00320\000", !41, !14, !37} ; [ DW_TAG_member ]
+!37 = !{!"0x24\00_Bool\000\008\008\000\000\002", !44, !3} ; [ DW_TAG_base_type ]
+!38 = !{!"0x16\00size_t\00326\000\000\000\000", !43, !39, !40} ; [ DW_TAG_typedef ]
+!39 = !{!"0x29", !43} ; [ DW_TAG_file_type ]
+!40 = !{!"0x24\00long unsigned int\000\0032\0032\000\000\007", !44, !3} ; [ DW_TAG_base_type ]
+!41 = !{!"cpplib.h", !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include"}
+!42 = !{!"types.h", !"/usr/include/sys"}
+!43 = !{!"stddef.h", !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include"}
+!44 = !{!"darwin-c.c", !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config"}
+!45 = !{!2}
+!46 = !{i32 0}
+!47 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index 36fd232a9045..910a67447770 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -22,7 +22,7 @@
 
 define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable align 2 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%class.D* %this}, i64 0, metadata !29), !dbg !36
+  tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !29, metadata !{!"0x102"}), !dbg !36
   %c1 = getelementptr inbounds %class.D* %this, i64 0, i32 0, !dbg !37
   store i32 1, i32* %c1, align 4, !dbg !37
   %c2 = getelementptr inbounds %class.D* %this, i64 0, i32 1, !dbg !42
@@ -36,8 +36,8 @@ entry:
 
 define void @_ZN1DC2ERKS_(%class.D* nocapture %this, %class.D* nocapture %d) unnamed_addr nounwind uwtable align 2 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%class.D* %this}, i64 0, metadata !34), !dbg !46
-  tail call void @llvm.dbg.value(metadata !{%class.D* %d}, i64 0, metadata !35), !dbg !46
+  tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !46
+  tail call void @llvm.dbg.value(metadata %class.D* %d, i64 0, metadata !35, metadata !{!"0x102"}), !dbg !46
   %c1 = getelementptr inbounds %class.D* %d, i64 0, i32 0, !dbg !47
   %0 = load i32* %c1, align 4, !dbg !47
   %c12 = getelementptr inbounds %class.D* %this, i64 0, i32 0, !dbg !47
@@ -57,56 +57,56 @@ entry:
   ret void, !dbg !52
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!54}
 
-!0 = metadata !{i32 786449, metadata !53, i32 4, metadata !"clang version 3.2 (trunk 167506) (llvm/trunk 167505)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !31}
-!5 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2Ev", i32 12, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*)* @_ZN1DC2Ev, null, metadata !17, metadata !27, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
-!6 = metadata !{i32 786473, metadata !53} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!10 = metadata !{i32 786434, metadata !53, null, metadata !"D", i32 1, i64 128, i64 32, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [def] [from ]
-!11 = metadata !{metadata !12, metadata !14, metadata !15, metadata !16, metadata !17, metadata !20}
-!12 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c1", i32 6, i64 32, i64 32, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c2", i32 7, i64 32, i64 32, i64 32, i32 1, metadata !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
-!15 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c3", i32 8, i64 32, i64 32, i64 64, i32 1, metadata !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
-!16 = metadata !{i32 786445, metadata !53, metadata !10, metadata !"c4", i32 9, i64 32, i64 32, i64 96, i32 1, metadata !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
-!17 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [D]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"D", metadata !"D", metadata !"", i32 4, metadata !21, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !25, i32 4} ; [ DW_TAG_subprogram ] [line 4] [D]
-!21 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!22 = metadata !{null, metadata !9, metadata !23}
-!23 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!27 = metadata !{metadata !29}
-!29 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777228, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 12]
-!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
-!31 = metadata !{i32 786478, metadata !6, null, metadata !"D", metadata !"D", metadata !"_ZN1DC2ERKS_", i32 19, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, metadata !20, metadata !32, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
-!32 = metadata !{metadata !34, metadata !35}
-!34 = metadata !{i32 786689, metadata !31, metadata !"this", metadata !6, i32 16777235, metadata !30, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 19]
-!35 = metadata !{i32 786689, metadata !31, metadata !"d", metadata !6, i32 33554451, metadata !23, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [d] [line 19]
-!36 = metadata !{i32 12, i32 0, metadata !5, null}
-!37 = metadata !{i32 13, i32 0, metadata !38, null}
-!38 = metadata !{i32 786443, metadata !6, metadata !5, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
-!42 = metadata !{i32 14, i32 0, metadata !38, null}
-!43 = metadata !{i32 15, i32 0, metadata !38, null}
-!44 = metadata !{i32 16, i32 0, metadata !38, null}
-!45 = metadata !{i32 17, i32 0, metadata !38, null}
-!46 = metadata !{i32 19, i32 0, metadata !31, null}
-!47 = metadata !{i32 20, i32 0, metadata !48, null}
-!48 = metadata !{i32 786443, metadata !6, metadata !31, i32 19, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
-!49 = metadata !{i32 21, i32 0, metadata !48, null}
-!50 = metadata !{i32 22, i32 0, metadata !48, null}
-!51 = metadata !{i32 23, i32 0, metadata !48, null}
-!52 = metadata !{i32 24, i32 0, metadata !48, null}
-!53 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/echristo"}
-!54 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 167506) (llvm/trunk 167505)\001\00\000\00\000", !53, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5, !31}
+!5 = !{!"0x2e\00D\00D\00_ZN1DC2Ev\0012\000\001\000\006\00256\001\0012", !6, null, !7, null, void (%class.D*)* @_ZN1DC2Ev, null, !17, !27} ; [ DW_TAG_subprogram ] [line 12] [def] [D]
+!6 = !{!"0x29", !53} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!10 = !{!"0x2\00D\001\00128\0032\000\000\000", !53, null, null, !11, null, null, null} ; [ DW_TAG_class_type ] [D] [line 1, size 128, align 32, offset 0] [def] [from ]
+!11 = !{!12, !14, !15, !16, !17, !20}
+!12 = !{!"0xd\00c1\006\0032\0032\000\001", !53, !10, !13} ; [ DW_TAG_member ] [c1] [line 6, size 32, align 32, offset 0] [private] [from int]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!"0xd\00c2\007\0032\0032\0032\001", !53, !10, !13} ; [ DW_TAG_member ] [c2] [line 7, size 32, align 32, offset 32] [private] [from int]
+!15 = !{!"0xd\00c3\008\0032\0032\0064\001", !53, !10, !13} ; [ DW_TAG_member ] [c3] [line 8, size 32, align 32, offset 64] [private] [from int]
+!16 = !{!"0xd\00c4\009\0032\0032\0096\001", !53, !10, !13} ; [ DW_TAG_member ] [c4] [line 9, size 32, align 32, offset 96] [private] [from int]
+!17 = !{!"0x2e\00D\00D\00\003\000\000\000\006\00256\001\003", !6, !10, !7, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 3] [D]
+!18 = !{!19}
+!19 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = !{!"0x2e\00D\00D\00\004\000\000\000\006\00256\001\004", !6, !10, !21, null, null, null, i32 0, !25} ; [ DW_TAG_subprogram ] [line 4] [D]
+!21 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{null, !9, !23}
+!23 = !{!"0x10\00\000\000\000\000\000", null, null, !24} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{!"0x26\00\000\000\000\000\000", null, null, !10} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from D]
+!25 = !{!26}
+!26 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!27 = !{!29}
+!29 = !{!"0x101\00this\0016777228\001088", !5, !6, !30} ; [ DW_TAG_arg_variable ] [this] [line 12]
+!30 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from D]
+!31 = !{!"0x2e\00D\00D\00_ZN1DC2ERKS_\0019\000\001\000\006\00256\001\0019", !6, null, !21, null, void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, null, !20, !32} ; [ DW_TAG_subprogram ] [line 19] [def] [D]
+!32 = !{!34, !35}
+!34 = !{!"0x101\00this\0016777235\001088", !31, !6, !30} ; [ DW_TAG_arg_variable ] [this] [line 19]
+!35 = !{!"0x101\00d\0033554451\000", !31, !6, !23} ; [ DW_TAG_arg_variable ] [d] [line 19]
+!36 = !MDLocation(line: 12, scope: !5)
+!37 = !MDLocation(line: 13, scope: !38)
+!38 = !{!"0xb\0012\000\000", !6, !5} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
+!42 = !MDLocation(line: 14, scope: !38)
+!43 = !MDLocation(line: 15, scope: !38)
+!44 = !MDLocation(line: 16, scope: !38)
+!45 = !MDLocation(line: 17, scope: !38)
+!46 = !MDLocation(line: 19, scope: !31)
+!47 = !MDLocation(line: 20, scope: !48)
+!48 = !{!"0xb\0019\000\001", !6, !31} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/foo.cpp]
+!49 = !MDLocation(line: 21, scope: !48)
+!50 = !MDLocation(line: 22, scope: !48)
+!51 = !MDLocation(line: 23, scope: !48)
+!52 = !MDLocation(line: 24, scope: !48)
+!53 = !{!"foo.cpp", !"/usr/local/google/home/echristo"}
+!54 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index f5c37df1e5e8..94e75e4d8bfa 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -9,8 +9,8 @@ define i32 @func() nounwind uwtable ssp {
 entry:
   %my_foo = alloca %struct.foo, align 4
   %my_bar = alloca %struct.bar, align 4
-  call void @llvm.dbg.declare(metadata !{%struct.foo* %my_foo}, metadata !10), !dbg !19
-  call void @llvm.dbg.declare(metadata !{%struct.bar* %my_bar}, metadata !20), !dbg !28
+  call void @llvm.dbg.declare(metadata %struct.foo* %my_foo, metadata !10, metadata !{!"0x102"}), !dbg !19
+  call void @llvm.dbg.declare(metadata %struct.bar* %my_bar, metadata !20, metadata !{!"0x102"}), !dbg !28
   %a = getelementptr inbounds %struct.foo* %my_foo, i32 0, i32 0, !dbg !29
   store i32 3, i32* %a, align 4, !dbg !29
   %a1 = getelementptr inbounds %struct.bar* %my_bar, i32 0, i32 0, !dbg !30
@@ -23,14 +23,11 @@ entry:
   ret i32 %add, !dbg !31
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-; An empty array should not have an AT_upper_bound attribute. But an array of 1
-; should.
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 ; CHECK:      DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name [DW_FORM_strp]  ( .debug_str[{{.*}}] = "int")
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (DW_ATE_signed)
 ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x04)
 
 ; int foo::b[1]:
@@ -46,7 +43,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
 ; CHECK:      DW_TAG_subrange_type [{{.*}}]
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
-; CHECK-NEXT: DW_AT_upper_bound [DW_FORM_data1]  (0x00)
+; CHECK-NEXT: DW_AT_count [DW_FORM_data1]  (0x01)
 
 ; int bar::b[0]:
 ; CHECK: DW_TAG_structure_type
@@ -59,42 +56,42 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ; int[0]:
 ; CHECK:      DW_TAG_array_type [{{.*}}] *
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
-; CHECK:      DW_TAG_subrange_type [11]
+; CHECK:      DW_TAG_subrange_type
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
-; CHECK-NOT:  DW_AT_upper_bound
+; CHECK:      DW_AT_count [DW_FORM_data1]  (0x00)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"func", metadata !"func", metadata !"", i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
-!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
-!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 11, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
-!12 = metadata !{i32 786451, metadata !32, null, metadata !"foo", i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
-!13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!15 = metadata !{i32 786445, metadata !32, metadata !12, metadata !"b", i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
-!16 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
-!19 = metadata !{i32 12, i32 0, metadata !11, null}
-!20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
-!21 = metadata !{i32 786451, metadata !32, null, metadata !"bar", i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [def] [from ]
-!22 = metadata !{metadata !23, metadata !24}
-!23 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"a", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
-!24 = metadata !{i32 786445, metadata !32, metadata !21, metadata !"b", i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
-!25 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!26 = metadata !{metadata !27}
-!27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
-!28 = metadata !{i32 13, i32 0, metadata !11, null}
-!29 = metadata !{i32 15, i32 0, metadata !11, null}
-!30 = metadata !{i32 16, i32 0, metadata !11, null}
-!31 = metadata !{i32 17, i32 0, metadata !11, null}
-!32 = metadata !{metadata !"test.c", metadata !"/Volumes/Sandbox/llvm"}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 169136)\000\00\000\00\000", !32, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00func\00func\00\0011\000\001\000\006\000\000\0011", !6, !6, !7, null, i32 ()* @func, null, null, !1} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
+!6 = !{!"0x29", !32} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x100\00my_foo\0012\000", !11, !6, !12} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
+!11 = !{!"0xb\0011\000\000", !6, !5} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
+!12 = !{!"0x13\00foo\001\0064\0032\000\000\000", !32, null, null, !13, null, i32 0, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [def] [from ]
+!13 = !{!14, !15}
+!14 = !{!"0xd\00a\002\0032\0032\000\000", !32, !12, !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = !{!"0xd\00b\003\0032\0032\0032\000", !32, !12, !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
+!16 = !{!"0x1\00\000\0032\0032\000\000", null, null, !9, !17, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!17 = !{!18}
+!18 = !{!"0x21\000\001"} ; [ DW_TAG_subrange_type ] [0, 1]
+!19 = !MDLocation(line: 12, scope: !11)
+!20 = !{!"0x100\00my_bar\0013\000", !11, !6, !21} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
+!21 = !{!"0x13\00bar\006\0032\0032\000\000\000", !32, null, null, !22, null, i32 0, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [def] [from ]
+!22 = !{!23, !24}
+!23 = !{!"0xd\00a\007\0032\0032\000\000", !32, !21, !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
+!24 = !{!"0xd\00b\008\000\0032\0032\000", !32, !21, !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
+!25 = !{!"0x1\00\000\000\0032\000\000", null, null, !9, !26, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!26 = !{!27}
+!27 = !{!"0x21\000\000"} ; [ DW_TAG_subrange_type ] [0, 0]
+!28 = !MDLocation(line: 13, scope: !11)
+!29 = !MDLocation(line: 15, scope: !11)
+!30 = !MDLocation(line: 16, scope: !11)
+!31 = !MDLocation(line: 17, scope: !11)
+!32 = !{!"test.c", !"/Volumes/Sandbox/llvm"}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index 3fab313fe0ef..d5a521acd3bb 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -22,28 +22,28 @@
 ; CHECK: [[BASE2]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name
 ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1]  (0x08)
-; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (0x07)
+; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1]   (DW_ATE_unsigned)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
-!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 169136)\000\00\000\00\000", !20, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\001\000\001", null, !6, !7, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!7 = !{!"0x2\00A\001\000\0032\000\000\000", !20, null, null, !8, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
+!8 = !{!9, !14}
+!9 = !{!"0xd\00x\001\000\000\000\001", !20, !7, !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = !{!"0x1\00\000\000\0032\000\000", null, null, !11, !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13}
+!13 = !{!"0x21\000\00-1"} ; [ DW_TAG_subrange_type ] [unbound]
+!14 = !{!"0x2e\00A\00A\00\001\000\000\000\006\00320\000\001", !6, !7, !15, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = !{!19}
+!19 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = !{!"t.cpp", !"/Volumes/Sandbox/llvm"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 165074e3002c..0b5c77fea6ec 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -13,8 +13,8 @@ entry:
   %x.addr = alloca i32, align 4
   %y = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !12), !dbg !13
-  call void @llvm.dbg.declare(metadata !{i32* %y}, metadata !14), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !{!"0x102"}), !dbg !13
+  call void @llvm.dbg.declare(metadata i32* %y, metadata !14, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %x.addr, align 4, !dbg !17
   %1 = load i32* %x.addr, align 4, !dbg !17
   %mul = mul nsw i32 %0, %1, !dbg !17
@@ -24,25 +24,25 @@ entry:
   ret i32 %sub, !dbg !18
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, metadata !19, i32 12, metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !19, metadata !6, metadata !"callee", metadata !"callee", metadata !"", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, null, i32 7} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !19} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 5, i32 5, metadata !5, null}
-!14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786443, metadata !19, metadata !5, i32 7, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 8, i32 9, metadata !15, null}
-!17 = metadata !{i32 8, i32 18, metadata !15, null}
-!18 = metadata !{i32 9, i32 5, metadata !15, null}
-!19 = metadata !{metadata !"ending-run.c", metadata !"/Users/echristo/tmp"}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 153921) (llvm/trunk 153916)\000\00\000\00\000", !19, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00callee\00callee\00\004\000\001\000\006\000\000\007", !19, !6, !7, null, i32 (i32)* @callee, null, null, null} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !19} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!12 = !{!"0x101\00x\0016777221\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!13 = !MDLocation(line: 5, column: 5, scope: !5)
+!14 = !{!"0x100\00y\008\000", !15, !6, !9} ; [ DW_TAG_auto_variable ]
+!15 = !{!"0xb\007\001\000", !19, !5} ; [ DW_TAG_lexical_block ]
+!16 = !MDLocation(line: 8, column: 9, scope: !15)
+!17 = !MDLocation(line: 8, column: 18, scope: !15)
+!18 = !MDLocation(line: 9, column: 5, scope: !15)
+!19 = !{!"ending-run.c", !"/Users/echristo/tmp"}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 23ffbcc8f782..c2975a9d521e 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -8,26 +8,26 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!23}
 
-!0 = metadata !{i32 786449, metadata !22, i32 4, metadata !"clang version 3.2 (trunk 157269) (llvm/trunk 157264)", i1 false, metadata !"", i32 0, metadata !1, metadata !15, metadata !15, metadata !17,  metadata !15, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !3, metadata !8, metadata !12}
-!3 = metadata !{i32 786436, metadata !4, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, metadata !5, metadata !6, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from int]
-!4 = metadata !{i32 786473, metadata !22} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786472, metadata !"A1", i64 1} ; [ DW_TAG_enumerator ]
-!8 = metadata !{i32 786436, metadata !4, null, metadata !"B", i32 2, i64 64, i64 64, i32 0, i32 0, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [B] [line 2, size 64, align 64, offset 0] [def] [from long unsigned int]
-!9 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786472, metadata !"B1", i64 1} ; [ DW_TAG_enumerator ]
-!12 = metadata !{i32 786436, metadata !4, null, metadata !"C", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [C] [line 3, size 32, align 32, offset 0] [def] [from ]
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786472, metadata !"C1", i64 1} ; [ DW_TAG_enumerator ]
-!15 = metadata !{}
-!17 = metadata !{metadata !19, metadata !20, metadata !21}
-!19 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !4, i32 4, metadata !3, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ]
-!20 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !4, i32 5, metadata !8, i32 0, i32 1, i64* @b, null} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 786484, i32 0, null, metadata !"c", metadata !"c", metadata !"", metadata !4, i32 6, metadata !12, i32 0, i32 1, i32* @c, null} ; [ DW_TAG_variable ]
-!22 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 157269) (llvm/trunk 157264)\000\00\000\00\000", !22, !1, !15, !15, !17,  !15} ; [ DW_TAG_compile_unit ]
+!1 = !{!3, !8, !12}
+!3 = !{!"0x4\00A\001\0032\0032\000\000\000", !4, null, !5, !6, null, null, null} ; [ DW_TAG_enumeration_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from int]
+!4 = !{!"0x29", !22} ; [ DW_TAG_file_type ]
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!6 = !{!7}
+!7 = !{!"0x28\00A1\001"} ; [ DW_TAG_enumerator ]
+!8 = !{!"0x4\00B\002\0064\0064\000\000\000", !4, null, !9, !10, null, null, null} ; [ DW_TAG_enumeration_type ] [B] [line 2, size 64, align 64, offset 0] [def] [from long unsigned int]
+!9 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!11}
+!11 = !{!"0x28\00B1\001"} ; [ DW_TAG_enumerator ]
+!12 = !{!"0x4\00C\003\0032\0032\000\000\000", !4, null, null, !13, null, null, null} ; [ DW_TAG_enumeration_type ] [C] [line 3, size 32, align 32, offset 0] [def] [from ]
+!13 = !{!14}
+!14 = !{!"0x28\00C1\001"} ; [ DW_TAG_enumerator ]
+!15 = !{}
+!17 = !{!19, !20, !21}
+!19 = !{!"0x34\00a\00a\00\004\000\001", null, !4, !3, i32* @a, null} ; [ DW_TAG_variable ]
+!20 = !{!"0x34\00b\00b\00\005\000\001", null, !4, !8, i64* @b, null} ; [ DW_TAG_variable ]
+!21 = !{!"0x34\00c\00c\00\006\000\001", null, !4, !12, i32* @c, null} ; [ DW_TAG_variable ]
+!22 = !{!"foo.cpp", !"/Users/echristo/tmp"}
 
 ; CHECK: DW_TAG_enumeration_type [{{.*}}]
 ; CHECK: DW_AT_type [DW_FORM_ref4]
@@ -42,4 +42,4 @@
 ; CHECK: DW_TAG_enumeration_type [6]
 ; CHECK-NOT: DW_AT_enum_class
 ; CHECK: DW_AT_name [DW_FORM_strp]      ( .debug_str[{{.*}}] = "C")
-!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!23 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index adb962ea527e..832fdb0673fd 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -6,16 +6,16 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 786449, metadata !8, i32 4, metadata !"clang version 3.2 (trunk 165274) (llvm/trunk 165272)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"e", metadata !"e", metadata !"", metadata !6, i32 2, metadata !7, i32 0, i32 1, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
-!6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786436, metadata !8, null, metadata !"E", i32 1, i64 16, i64 16, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [decl] [from ]
-!8 = metadata !{metadata !"foo.cpp", metadata !"/tmp"}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 165274) (llvm/trunk 165272)\000\00\000\00\000", !8, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/tmp/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00e\00e\00\002\000\001", null, !6, !7, i16* @e, null} ; [ DW_TAG_variable ] [e] [line 2] [def]
+!6 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!7 = !{!"0x4\00E\001\0016\0016\000\004\000", !8, null, null, null, null, null, null} ; [ DW_TAG_enumeration_type ] [E] [line 1, size 16, align 16, offset 0] [decl] [from ]
+!8 = !{!"foo.cpp", !"/tmp"}
 
 ; CHECK: DW_TAG_enumeration_type
 ; CHECK-NEXT: DW_AT_name
 ; CHECK-NEXT: DW_AT_byte_size
 ; CHECK-NEXT: DW_AT_declaration
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 7f176986394c..f1ee0fe33afa 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -8,13 +8,13 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !"baz.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!6 = metadata !{i32 786473, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp"}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 169021) (llvm/trunk 169020)\000\00\000\00baz.dwo\000", !8, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\001\000\001", null, !6, !7, i32* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"baz.c", !"/usr/local/google/home/echristo/tmp"}
 
 ; Check that the skeleton compile unit contains the proper attributes:
 ; This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
@@ -65,7 +65,7 @@
 ; CHECK: .debug_info.dwo contents:
 ; CHECK: DW_TAG_compile_unit
 ; CHECK: DW_AT_producer [DW_FORM_GNU_str_index] ( indexed (00000000) string = "clang version 3.3 (trunk 169021) (llvm/trunk 169020)")
-; CHECK: DW_AT_language [DW_FORM_data2]        (0x000c)
+; CHECK: DW_AT_language [DW_FORM_data2]        (DW_LANG_C99)
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]    ( indexed (00000001) string = "baz.c")
 ; CHECK-NOT: DW_AT_low_pc
 ; CHECK-NOT: DW_AT_stmt_list
@@ -76,7 +76,7 @@
 ; CHECK: DW_AT_type [DW_FORM_ref4]       (cu + 0x{{[0-9a-f]*}} => {[[TYPE:0x[0-9a-f]*]]})
 ; CHECK: DW_AT_external [DW_FORM_flag_present]   (true)
 ; CHECK: DW_AT_decl_file [DW_FORM_data1] (0x01)
-; CHECK: DW_AT_decl_line [DW_FORM_data1] (0x01)
+; CHECK: DW_AT_decl_line [DW_FORM_data1] (1)
 ; CHECK: DW_AT_location [DW_FORM_exprloc] (<0x2> fb 00 )
 ; CHECK: [[TYPE]]: DW_TAG_base_type
 ; CHECK: DW_AT_name [DW_FORM_GNU_str_index]     ( indexed (00000003) string = "int")
@@ -111,4 +111,4 @@
 ; HDR-NOT: .debug_aranges
 ; HDR-NOT: .rela.{{.*}}.dwo
 
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/fission-hash.ll b/test/DebugInfo/X86/fission-hash.ll
index 3987faaf9716..5002ac63485e 100644
--- a/test/DebugInfo/X86/fission-hash.ll
+++ b/test/DebugInfo/X86/fission-hash.ll
@@ -9,8 +9,8 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 188230) (llvm/trunk 188234)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !"foo.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 188230) (llvm/trunk 188234)\000\00\000\00foo.dwo\000", !1, !2, !2, !2, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = !{!"foo.c", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 3}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/fission-inline.ll b/test/DebugInfo/X86/fission-inline.ll
new file mode 100644
index 000000000000..64999669c5be
--- /dev/null
+++ b/test/DebugInfo/X86/fission-inline.ll
@@ -0,0 +1,119 @@
+; RUN: llc -split-dwarf=Enable -O0 < %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Test the emission of gmlt-like inlining information into the skeleton unit.
+; This allows inline-aware symbolication/backtracing given only the linked
+; executable, without needing access to the .dwos.
+
+; A simple example of inlining generated with clang -gsplit-dwarf
+
+; A member function is used to force emission of the declaration of the
+; function into the .dwo file, which may be shared with other CUs in the dwo ;
+; under fission, but should not be shared with the skeleton's CU. This also
+; tests the general case of context emission, which is suppressed in gmlt-like
+; data.
+
+; Include a template just to test template parameters are not emitted in
+; gmlt-like data.
+
+; And some varargs to make sure DW_TAG_unspecified_parameters is not emitted.
+
+; And a using declaration in a nested lexical_block... because that shouldn't
+; be emitted either.
+
+; Minor complication: after generating the LLVM IR, it was manually edited so
+; that the 'f1()' call from f3 was reordered to appear between the two inlined
+; f1 calls from f2. This causes f2's inlined_subroutine to use DW_AT_ranges,
+; thus exercising range list generation/referencing which was buggy.
+
+; struct foo {
+;   template<typename T>
+;   static void f2();
+;   static void f3(...);
+; };
+;
+; void f1();
+;
+; template<typename T>
+; inline __attribute__((always_inline)) void foo::f2() {
+;   f1();
+;   f1();
+; }
+;
+; void foo::f3(...) {
+;   if (true) {
+;     f1();
+;     f2<int>();
+;     using ::foo;
+;   }
+; }
+
+; Check that we emit the usual gmlt-like data for this file, including brief
+; descriptions of subprograms with inlined scopes.
+
+; FIXME: Once tools support indexed addresses in the skeleton CU, we should use
+; those (DW_FORM_addr would become DW_FORM_GNU_addr_index below) since those
+; addresses will already be in the address pool anyway.
+
+; CHECK:      DW_TAG_subprogram
+; CHECK-NEXT:   DW_AT_name {{.*}} "f2<int>"
+; CHECK-NOT: DW_
+; CHECK:      DW_TAG_subprogram
+; CHECK-NEXT:   DW_AT_low_pc [DW_FORM_addr]
+; CHECK-NEXT:   DW_AT_high_pc
+; CHECK-NEXT:   DW_AT_name {{.*}} "f3"
+; CHECK-NOT: {{DW_|NULL}}
+; CHECK:        DW_TAG_inlined_subroutine
+; CHECK-NEXT:     DW_AT_abstract_origin {{.*}} "f2<int>"
+; CHECK-NEXT:     DW_AT_ranges
+; CHECK-NEXT:     DW_AT_call_file
+; CHECK-NEXT:     DW_AT_call_line {{.*}} (18)
+; CHECK-NOT: DW_
+
+; Function Attrs: uwtable
+define void @_ZN3foo2f3Ez(...) #0 align 2 {
+entry:
+  call void @_Z2f1v(), !dbg !26
+  call void @_Z2f1v(), !dbg !25
+  call void @_Z2f1v(), !dbg !28
+  ret void, !dbg !29
+}
+
+declare void @_Z2f1v() #1
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00fission-inline.dwo\001", !1, !2, !3, !9, !2, !18} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/fission-inline.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"fission-inline.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00f3\00f3\00_ZN3foo2f3Ez\004\000\000\000\000\00256\000\004", !1, !"_ZTS3foo", !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [f3]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, null}
+!9 = !{!10, !11}
+!10 = !{!"0x2e\00f3\00f3\00_ZN3foo2f3Ez\0015\000\001\000\000\00256\000\0015", !1, !"_ZTS3foo", !7, null, void (...)* @_ZN3foo2f3Ez, null, !6, !2} ; [ DW_TAG_subprogram ] [line 15] [def] [f3]
+!11 = !{!"0x2e\00f2<int>\00f2<int>\00_ZN3foo2f2IiEEvv\0010\000\001\000\000\00256\000\0010", !1, !"_ZTS3foo", !12, null, null, !14, !17, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [f2<int>]
+!12 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null}
+!14 = !{!15}
+!15 = !{!"0x2f\00T\000\000", null, !16, null} ; [ DW_TAG_template_type_parameter ]
+!16 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!17 = !{!"0x2e\00f2<int>\00f2<int>\00_ZN3foo2f2IiEEvv\0010\000\000\000\000\00256\000\0010", !1, !"_ZTS3foo", !12, null, null, !14, null, null} ; [ DW_TAG_subprogram ] [line 10] [f2<int>]
+!18 = !{!19}
+!19 = !{!"0x8\0019\00", !20, !"_ZTS3foo"} ; [ DW_TAG_imported_declaration ]
+!20 = !{!"0xb\0016\0013\001", !1, !21} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/fission-inline.cpp]
+!21 = !{!"0xb\0016\007\000", !1, !10} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/fission-inline.cpp]
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 2}
+!24 = !{!"clang version 3.6.0 "}
+!25 = !MDLocation(line: 17, column: 5, scope: !20)
+!26 = !MDLocation(line: 11, column: 3, scope: !11, inlinedAt: !27)
+!27 = !MDLocation(line: 18, column: 5, scope: !20)
+!28 = !MDLocation(line: 12, column: 3, scope: !11, inlinedAt: !27)
+!29 = !MDLocation(line: 21, column: 1, scope: !10)
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index 135837582fcc..400998e9126c 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -16,7 +16,7 @@
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]   ([[E:0x[0-9a-z]*]])
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]   ([[B:0x[0-9a-z]*]])
 ; CHECK: DW_AT_location [DW_FORM_sec_offset]   ([[D:0x[0-9a-z]*]])
-; CHECK: DW_AT_ranges [DW_FORM_sec_offset]   (0x000000a0)
+; CHECK: DW_AT_ranges [DW_FORM_sec_offset]   (0x00000000
 ; CHECK: .debug_loc contents:
 ; CHECK-NOT: Beginning address offset
 ; CHECK: .debug_loc.dwo contents:
@@ -25,7 +25,7 @@
 ; if they've changed due to a bugfix, change in register allocation, etc.
 
 ; CHECK: [[A]]: Beginning address index: 2
-; CHECK-NEXT:                    Length: 199
+; CHECK-NEXT:                    Length: 190
 ; CHECK-NEXT:      Location description: 11 00
 ; CHECK-NEXT: {{^$}}
 ; CHECK-NEXT:   Beginning address index: 3
@@ -91,8 +91,8 @@ entry:
 ; Function Attrs: nounwind uwtable
 define internal fastcc void @foo() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !29, i64 0, metadata !13), !dbg !30
-  tail call void @llvm.dbg.value(metadata !44, i64 0, metadata !14), !dbg !31
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !31
   %c.promoted9 = load i32* @c, align 4, !dbg !32, !tbaa !33
   br label %for.cond1.preheader, !dbg !31
 
@@ -114,28 +114,28 @@ for.cond7.preheader:                              ; preds = %for.inc10, %for.con
 for.body9:                                        ; preds = %for.body9, %for.cond7.preheader
   %and2 = phi i32 [ %and.lcssa5, %for.cond7.preheader ], [ %and, %for.body9 ], !dbg !40
   %e.01 = phi i32 [ 0, %for.cond7.preheader ], [ %inc, %for.body9 ]
-  tail call void @llvm.dbg.value(metadata !41, i64 0, metadata !19), !dbg !40
+  tail call void @llvm.dbg.value(metadata i32* @c, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !40
   %and = and i32 %and2, 1, !dbg !32
   %inc = add i32 %e.01, 1, !dbg !39
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !18), !dbg !39
+  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !39
   %exitcond = icmp eq i32 %inc, 30, !dbg !39
   br i1 %exitcond, label %for.inc10, label %for.body9, !dbg !39
 
 for.inc10:                                        ; preds = %for.body9
   %inc11 = add nsw i32 %b.03, 1, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{i32 %inc11}, i64 0, metadata !15), !dbg !38
+  tail call void @llvm.dbg.value(metadata i32 %inc11, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !38
   %exitcond11 = icmp eq i32 %inc11, 30, !dbg !38
   br i1 %exitcond11, label %for.inc13, label %for.cond7.preheader, !dbg !38
 
 for.inc13:                                        ; preds = %for.inc10
   %inc14 = add i32 %d.06, 1, !dbg !37
-  tail call void @llvm.dbg.value(metadata !{i32 %inc14}, i64 0, metadata !16), !dbg !37
+  tail call void @llvm.dbg.value(metadata i32 %inc14, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !37
   %exitcond12 = icmp eq i32 %inc14, 30, !dbg !37
   br i1 %exitcond12, label %for.inc16, label %for.cond4.preheader, !dbg !37
 
 for.inc16:                                        ; preds = %for.inc13
   %inc17 = add nsw i32 %a.08, 1, !dbg !31
-  tail call void @llvm.dbg.value(metadata !{i32 %inc17}, i64 0, metadata !14), !dbg !31
+  tail call void @llvm.dbg.value(metadata i32 %inc17, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !31
   %exitcond13 = icmp eq i32 %inc17, 30, !dbg !31
   br i1 %exitcond13, label %for.end18, label %for.cond1.preheader, !dbg !31
 
@@ -145,7 +145,7 @@ for.end18:                                        ; preds = %for.inc16
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -153,48 +153,48 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26, !43}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191700) (llvm/trunk 191710)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"small.dwo"} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/small.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"small.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 18, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @bar, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 19] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/small.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !9, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [scope 3] [foo]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{null, metadata !11}
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16, metadata !18, metadata !19}
-!13 = metadata !{i32 786689, metadata !8, metadata !"p", metadata !5, i32 16777218, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 2]
-!14 = metadata !{i32 786688, metadata !8, metadata !"a", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 4]
-!15 = metadata !{i32 786688, metadata !8, metadata !"b", metadata !5, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 4]
-!16 = metadata !{i32 786688, metadata !8, metadata !"d", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 5]
-!17 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!18 = metadata !{i32 786688, metadata !8, metadata !"e", metadata !5, i32 5, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 5]
-!19 = metadata !{i32 786688, metadata !20, metadata !"w", metadata !5, i32 12, metadata !25, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [w] [line 12]
-!20 = metadata !{i32 786443, metadata !1, metadata !21, i32 11, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
-!21 = metadata !{i32 786443, metadata !1, metadata !22, i32 10, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
-!22 = metadata !{i32 786443, metadata !1, metadata !23, i32 9, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
-!23 = metadata !{i32 786443, metadata !1, metadata !24, i32 8, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
-!24 = metadata !{i32 786443, metadata !1, metadata !8, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!26 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!27 = metadata !{i32 20, i32 0, metadata !4, null}
-!28 = metadata !{i32 21, i32 0, metadata !4, null}
-!29 = metadata !{i32 1}
-!30 = metadata !{i32 2, i32 0, metadata !8, null}
-!31 = metadata !{i32 7, i32 0, metadata !24, null}
-!32 = metadata !{i32 13, i32 0, metadata !20, null}
-!33 = metadata !{metadata !34, metadata !34, i64 0}
-!34 = metadata !{metadata !"int", metadata !35, i64 0}
-!35 = metadata !{metadata !"omnipotent char", metadata !36, i64 0}
-!36 = metadata !{metadata !"Simple C/C++ TBAA"}
-!37 = metadata !{i32 8, i32 0, metadata !23, null} ; [ DW_TAG_imported_declaration ]
-!38 = metadata !{i32 9, i32 0, metadata !22, null}
-!39 = metadata !{i32 10, i32 0, metadata !21, null}
-!40 = metadata !{i32 12, i32 0, metadata !20, null}
-!41 = metadata !{i32* @c}
-!42 = metadata !{i32 15, i32 0, metadata !8, null}
-!43 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!44 = metadata !{i32 0}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 191700) (llvm/trunk 191710)\001\00\000\00small.dwo\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/small.c] [DW_LANG_C99]
+!1 = !{!"small.c", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x2e\00bar\00bar\00\0018\000\001\000\006\000\001\0019", !1, !5, !6, null, void ()* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 18] [def] [scope 19] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/small.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00foo\00foo\00\002\001\001\000\006\00256\001\003", !1, !5, !9, null, void ()* @foo, null, null, !12} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [scope 3] [foo]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13, !14, !15, !16, !18, !19}
+!13 = !{!"0x101\00p\0016777218\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [p] [line 2]
+!14 = !{!"0x100\00a\004\000", !8, !5, !11} ; [ DW_TAG_auto_variable ] [a] [line 4]
+!15 = !{!"0x100\00b\004\000", !8, !5, !11} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!16 = !{!"0x100\00d\005\000", !8, !5, !17} ; [ DW_TAG_auto_variable ] [d] [line 5]
+!17 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!18 = !{!"0x100\00e\005\000", !8, !5, !17} ; [ DW_TAG_auto_variable ] [e] [line 5]
+!19 = !{!"0x100\00w\0012\000", !20, !5, !25} ; [ DW_TAG_auto_variable ] [w] [line 12]
+!20 = !{!"0xb\0011\000\004", !1, !21} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!21 = !{!"0xb\0010\000\003", !1, !22} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!22 = !{!"0xb\009\000\002", !1, !23} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!23 = !{!"0xb\008\000\001", !1, !24} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!24 = !{!"0xb\007\000\000", !1, !8} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/small.c]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!26 = !{i32 2, !"Dwarf Version", i32 4}
+!27 = !MDLocation(line: 20, scope: !4)
+!28 = !MDLocation(line: 21, scope: !4)
+!29 = !{i32 1}
+!30 = !MDLocation(line: 2, scope: !8)
+!31 = !MDLocation(line: 7, scope: !24)
+!32 = !MDLocation(line: 13, scope: !20)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"int", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !MDLocation(line: 8, scope: !23)
+!38 = !MDLocation(line: 9, scope: !22)
+!39 = !MDLocation(line: 10, scope: !21)
+!40 = !MDLocation(line: 12, scope: !20)
+!41 = !{i32* @c}
+!42 = !MDLocation(line: 15, scope: !8)
+!43 = !{i32 1, !"Debug Info Version", i32 2}
+!44 = !{i32 0}
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
index 2fdab7a07f54..9077c74b29fd 100644
--- a/test/DebugInfo/X86/formal_parameter.ll
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -28,7 +28,7 @@ define void @foo(i32 %map) #0 {
 entry:
   %map.addr = alloca i32, align 4
   store i32 %map, i32* %map.addr, align 4, !tbaa !15
-  call void @llvm.dbg.declare(metadata !{i32* %map.addr}, metadata !10), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %map.addr, metadata !10, metadata !{!"0x102"}), !dbg !14
   %call = call i32 (i32*, ...)* bitcast (i32 (...)* @lookup to i32 (i32*, ...)*)(i32* %map.addr) #3, !dbg !19
   ; Ensure that all dbg intrinsics have the same scope after
   ; LowerDbgDeclare is finished with them.
@@ -42,14 +42,14 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @lookup(...)
 
 declare i32 @verify(...)
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable }
 attributes #1 = { nounwind readnone }
@@ -59,26 +59,26 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [formal_parameter.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"formal_parameter.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32)* @foo, null, null, metadata !9, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [formal_parameter.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786689, metadata !4, metadata !"map", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [map] [line 1]
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!13 = metadata !{metadata !"clang version 3.5.0 "}
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{metadata !16, metadata !16, i64 0}
-!16 = metadata !{metadata !"int", metadata !17, i64 0}
-!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
-!18 = metadata !{metadata !"Simple C/C++ TBAA"}
-!19 = metadata !{i32 3, i32 0, metadata !4, null}
-!20 = metadata !{i32 4, i32 0, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [formal_parameter.c]
-!22 = metadata !{i32 5, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [formal_parameter.c] [DW_LANG_C99]
+!1 = !{!"formal_parameter.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\001\002", !1, !5, !6, null, void (i32)* @foo, null, null, !9} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [formal_parameter.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x101\00map\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [map] [line 1]
+!11 = !{i32 2, !"Dwarf Version", i32 2}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{!"clang version 3.5.0 "}
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !17, i64 0}
+!17 = !{!"omnipotent char", !18, i64 0}
+!18 = !{!"Simple C/C++ TBAA"}
+!19 = !MDLocation(line: 3, scope: !4)
+!20 = !MDLocation(line: 4, scope: !21)
+!21 = !{!"0xb\004\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [formal_parameter.c]
+!22 = !MDLocation(line: 5, scope: !4)
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index 2256b3e212a7..6d4fa86697c7 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -1,10 +1,12 @@
 ; REQUIRES: object-emission
 
-; RUN: llc %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
 ; RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=SINGLE %s
+; RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=OBJ_SINGLE %s
 
-; RUN: llc %s -split-dwarf=Enable -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
+; RUN: llc < %s -split-dwarf=Enable -o %t -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu
 ; RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=FISSION %s
+; RUN: llvm-readobj -s -t %t | FileCheck --check-prefix=OBJ_FISSION %s
 
 ; Generated from bar.cpp:
 
@@ -72,27 +74,16 @@
 ; CHECK-NEXT: DW_AT_declaration
 ; CHECK-NEXT: DW_AT_signature {{.*}} (0xfd756cee88f8a118)
 
-; FISSION-LABEL: .debug_types contents:
-; FISSION-NOT: type_signature
-; FISSION-LABEL: type_signature = 0x1d02f3be30cc5688
-; FISSION: DW_TAG_type_unit
-; FISSION-NEXT: DW_AT_GNU_dwo_name{{.*}}"bar.dwo"
-; FISSION-NEXT: DW_AT_comp_dir{{.*}}"/tmp/dbginfo"
-; FISSION-NOT: type_signature
-; FISSION-LABEL: type_signature = 0xb04af47397402e77
-; FISSION-NOT: type_signature
-; FISSION-LABEL: type_signature = 0xfd756cee88f8a118
-; FISSION-NOT: type_signature
-; FISSION-LABEL: type_signature = 0xe94f6d3843e62d6b
-
 ; SINGLE-LABEL: .debug_types contents:
+; FISSION-NOT: .debug_types contents:
 ; FISSION-LABEL: .debug_types.dwo contents:
 
 ; Check that we generate a hash for bar and the value.
 ; CHECK-NOT: type_signature
 ; CHECK-LABEL: type_signature = 0x1d02f3be30cc5688
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name{{.*}}"bar"
+; FISSION-NEXT: DW_AT_name {{.*}} ( indexed {{.*}} "bar"
+; SINGLE-NEXT: DW_AT_name {{.*}} "bar"
 
 
 ; Check that we generate a hash for fluffy and the value.
@@ -161,6 +152,20 @@
 ; CHECK-DAG: [[WOMBAT]] "wombat"
 ; CHECK-DAG: [[FLUFFY]] "echidna::capybara::mongoose::fluffy"
 
+; Make sure debug_types are in comdat groups. This could be more rigid to check
+; that they're the right comdat groups (each type in a separate comdat group,
+; etc)
+; OBJ_SINGLE: Name: .debug_types (
+; OBJ_SINGLE-NOT: }
+; OBJ_SINGLE: SHF_GROUP
+
+; Fission type units don't go in comdat groups, since their linker is debug
+; aware it's handled using the debug info semantics rather than raw ELF object
+; semantics.
+; OBJ_FISSION: Name: .debug_types.dwo (
+; OBJ_FISSION-NOT: SHF_GROUP
+; OBJ_FISSION: }
+
 %struct.bar = type { i8 }
 %"class.echidna::capybara::mongoose::fluffy" = type { i32, i32 }
 %"struct.<anonymous namespace>::walrus" = type { i8 }
@@ -178,12 +183,12 @@
 define void @_Z3foov() #0 {
 entry:
   %b = alloca %struct.baz, align 1
-  call void @llvm.dbg.declare(metadata !{%struct.baz* %b}, metadata !46), !dbg !48
+  call void @llvm.dbg.declare(metadata %struct.baz* %b, metadata !46, metadata !{!"0x102"}), !dbg !48
   ret void, !dbg !49
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define internal void @__cxx_global_var_init() section ".text.startup" {
 entry:
@@ -196,7 +201,7 @@ define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace
 entry:
   %this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
   store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%"struct.<anonymous namespace>::walrus"** %this.addr}, metadata !51), !dbg !53
+  call void @llvm.dbg.declare(metadata %"struct.<anonymous namespace>::walrus"** %this.addr, metadata !51, metadata !{!"0x102"}), !dbg !53
   %this1 = load %"struct.<anonymous namespace>::walrus"** %this.addr
   ret void, !dbg !54
 }
@@ -214,59 +219,59 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!43, !44}
 !llvm.ident = !{!45}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !38, metadata !2, metadata !"bar.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !6, metadata !14, metadata !17}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"bar", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"bar.h", metadata !"/tmp/dbginfo"}
-!6 = metadata !{i32 786434, metadata !1, metadata !7, metadata !"fluffy", i32 13, i64 64, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE"} ; [ DW_TAG_class_type ] [fluffy] [line 13, size 64, align 32, offset 0] [def] [from ]
-!7 = metadata !{i32 786489, metadata !1, metadata !8, metadata !"mongoose", i32 12} ; [ DW_TAG_namespace ] [mongoose] [line 12]
-!8 = metadata !{i32 786489, metadata !1, metadata !9, metadata !"capybara", i32 11} ; [ DW_TAG_namespace ] [capybara] [line 11]
-!9 = metadata !{i32 786489, metadata !1, null, metadata !"echidna", i32 10} ; [ DW_TAG_namespace ] [echidna] [line 10]
-!10 = metadata !{metadata !11, metadata !13}
-!11 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"a", i32 14, i64 32, i64 32, i64 0, i32 1, metadata !12} ; [ DW_TAG_member ] [a] [line 14, size 32, align 32, offset 0] [private] [from int]
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN7echidna8capybara8mongoose6fluffyE", metadata !"b", i32 15, i64 32, i64 32, i64 32, i32 1, metadata !12} ; [ DW_TAG_member ] [b] [line 15, size 32, align 32, offset 32] [private] [from int]
-!14 = metadata !{i32 786451, metadata !1, null, metadata !"wombat", i32 31, i64 64, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, metadata !"_ZTS6wombat"} ; [ DW_TAG_structure_type ] [wombat] [line 31, size 64, align 32, offset 0] [def] [from ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786445, metadata !1, metadata !"_ZTS6wombat", metadata !"a_b", i32 35, i64 64, i64 32, i64 0, i32 0, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_member ] [a_b] [line 35, size 64, align 32, offset 0] [from _ZTSN6wombatUt_E]
-!17 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6wombat", metadata !"", i32 32, i64 64, i64 32, i32 0, i32 0, null, metadata !18, i32 0, null, null, metadata !"_ZTSN6wombatUt_E"} ; [ DW_TAG_structure_type ] [line 32, size 64, align 32, offset 0] [def] [from ]
-!18 = metadata !{metadata !19, metadata !20}
-!19 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"a", i32 33, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] [a] [line 33, size 32, align 32, offset 0] [from int]
-!20 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN6wombatUt_E", metadata !"b", i32 34, i64 32, i64 32, i64 32, i32 0, metadata !12} ; [ DW_TAG_member ] [b] [line 34, size 32, align 32, offset 32] [from int]
-!21 = metadata !{metadata !22, metadata !26, metadata !27, metadata !36}
-!22 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 5, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
-!23 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/bar.cpp]
-!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{null}
-!26 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 29, metadata !24, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 29} ; [ DW_TAG_subprogram ] [line 29] [local] [def] [__cxx_global_var_init]
-!27 = metadata !{i32 786478, metadata !1, metadata !28, metadata !"walrus", metadata !"walrus", metadata !"_ZN12_GLOBAL__N_16walrusC2Ev", i32 25, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, null, metadata !31, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def] [walrus]
-!28 = metadata !{i32 786451, metadata !1, metadata !29, metadata !"walrus", i32 24, i64 8, i64 8, i32 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [walrus] [line 24, size 8, align 8, offset 0] [def] [from ]
-!29 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 23} ; [ DW_TAG_namespace ] [line 23]
-!30 = metadata !{metadata !31}
-!31 = metadata !{i32 786478, metadata !1, metadata !28, metadata !"walrus", metadata !"walrus", metadata !"", i32 25, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !35, i32 25} ; [ DW_TAG_subprogram ] [line 25] [walrus]
-!32 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!33 = metadata !{null, metadata !34}
-!34 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from walrus]
-!35 = metadata !{i32 786468}
-!36 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 25, metadata !37, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [local] [def]
-!37 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!38 = metadata !{metadata !39, metadata !40, metadata !41, metadata !42}
-!39 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !23, i32 3, metadata !4, i32 0, i32 1, %struct.bar* @b, null} ; [ DW_TAG_variable ] [b] [line 3] [def]
-!40 = metadata !{i32 786484, i32 0, metadata !7, metadata !"animal", metadata !"animal", metadata !"_ZN7echidna8capybara8mongoose6animalE", metadata !23, i32 18, metadata !6, i32 0, i32 1, %"class.echidna::capybara::mongoose::fluffy"* @_ZN7echidna8capybara8mongoose6animalE, null} ; [ DW_TAG_variable ] [animal] [line 18] [def]
-!41 = metadata !{i32 786484, i32 0, null, metadata !"w", metadata !"w", metadata !"", metadata !23, i32 29, metadata !28, i32 1, i32 1, %"struct.<anonymous namespace>::walrus"* @w, null} ; [ DW_TAG_variable ] [w] [line 29] [local] [def]
-!42 = metadata !{i32 786484, i32 0, null, metadata !"wom", metadata !"wom", metadata !"", metadata !23, i32 38, metadata !14, i32 0, i32 1, %struct.wombat* @wom, null} ; [ DW_TAG_variable ] [wom] [line 38] [def]
-!43 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!44 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!45 = metadata !{metadata !"clang version 3.5 "}
-!46 = metadata !{i32 786688, metadata !22, metadata !"b", metadata !23, i32 7, metadata !47, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 7]
-!47 = metadata !{i32 786451, metadata !1, metadata !22, metadata !"baz", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [baz] [line 6, size 8, align 8, offset 0] [def] [from ]
-!48 = metadata !{i32 7, i32 0, metadata !22, null}
-!49 = metadata !{i32 8, i32 0, metadata !22, null} ; [ DW_TAG_imported_declaration ]
-!50 = metadata !{i32 29, i32 0, metadata !26, null}
-!51 = metadata !{i32 786689, metadata !27, metadata !"this", null, i32 16777216, metadata !52, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!52 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from walrus]
-!53 = metadata !{i32 0, i32 0, metadata !27, null}
-!54 = metadata !{i32 25, i32 0, metadata !27, null}
-!55 = metadata !{i32 25, i32 0, metadata !36, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00bar.dwo\000", !1, !2, !3, !21, !38, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"bar.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !6, !14, !17}
+!4 = !{!"0x13\00bar\001\008\008\000\000\000", !5, null, null, !2, null, null, !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"bar.h", !"/tmp/dbginfo"}
+!6 = !{!"0x2\00fluffy\0013\0064\0032\000\000\000", !1, !7, null, !10, null, null, !"_ZTSN7echidna8capybara8mongoose6fluffyE"} ; [ DW_TAG_class_type ] [fluffy] [line 13, size 64, align 32, offset 0] [def] [from ]
+!7 = !{!"0x39\00mongoose\0012", !1, !8} ; [ DW_TAG_namespace ] [mongoose] [line 12]
+!8 = !{!"0x39\00capybara\0011", !1, !9} ; [ DW_TAG_namespace ] [capybara] [line 11]
+!9 = !{!"0x39\00echidna\0010", !1, null} ; [ DW_TAG_namespace ] [echidna] [line 10]
+!10 = !{!11, !13}
+!11 = !{!"0xd\00a\0014\0032\0032\000\001", !1, !"_ZTSN7echidna8capybara8mongoose6fluffyE", !12} ; [ DW_TAG_member ] [a] [line 14, size 32, align 32, offset 0] [private] [from int]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0xd\00b\0015\0032\0032\0032\001", !1, !"_ZTSN7echidna8capybara8mongoose6fluffyE", !12} ; [ DW_TAG_member ] [b] [line 15, size 32, align 32, offset 32] [private] [from int]
+!14 = !{!"0x13\00wombat\0031\0064\0032\000\000\000", !1, null, null, !15, null, null, !"_ZTS6wombat"} ; [ DW_TAG_structure_type ] [wombat] [line 31, size 64, align 32, offset 0] [def] [from ]
+!15 = !{!16}
+!16 = !{!"0xd\00a_b\0035\0064\0032\000\000", !1, !"_ZTS6wombat", !"_ZTSN6wombatUt_E"} ; [ DW_TAG_member ] [a_b] [line 35, size 64, align 32, offset 0] [from _ZTSN6wombatUt_E]
+!17 = !{!"0x13\00\0032\0064\0032\000\000\000", !1, !"_ZTS6wombat", null, !18, null, null, !"_ZTSN6wombatUt_E"} ; [ DW_TAG_structure_type ] [line 32, size 64, align 32, offset 0] [def] [from ]
+!18 = !{!19, !20}
+!19 = !{!"0xd\00a\0033\0032\0032\000\000", !1, !"_ZTSN6wombatUt_E", !12} ; [ DW_TAG_member ] [a] [line 33, size 32, align 32, offset 0] [from int]
+!20 = !{!"0xd\00b\0034\0032\0032\0032\000", !1, !"_ZTSN6wombatUt_E", !12} ; [ DW_TAG_member ] [b] [line 34, size 32, align 32, offset 32] [from int]
+!21 = !{!22, !26, !27, !36}
+!22 = !{!"0x2e\00foo\00foo\00_Z3foov\005\000\001\000\006\00256\000\005", !1, !23, !24, null, void ()* @_Z3foov, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [foo]
+!23 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/bar.cpp]
+!24 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = !{null}
+!26 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\0029\001\001\000\006\00256\000\0029", !1, !23, !24, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 29] [local] [def] [__cxx_global_var_init]
+!27 = !{!"0x2e\00walrus\00walrus\00_ZN12_GLOBAL__N_16walrusC2Ev\0025\001\001\000\006\00256\000\0025", !1, !28, !32, null, void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, null, !31, !2} ; [ DW_TAG_subprogram ] [line 25] [local] [def] [walrus]
+!28 = !{!"0x13\00walrus\0024\008\008\000\000\000", !1, !29, null, !30, null, null, null} ; [ DW_TAG_structure_type ] [walrus] [line 24, size 8, align 8, offset 0] [def] [from ]
+!29 = !{!"0x39\00\0023", !1, null} ; [ DW_TAG_namespace ] [line 23]
+!30 = !{!31}
+!31 = !{!"0x2e\00walrus\00walrus\00\0025\000\000\000\006\00256\000\0025", !1, !28, !32, null, null, null, i32 0, !35} ; [ DW_TAG_subprogram ] [line 25] [walrus]
+!32 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !33, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = !{null, !34}
+!34 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from walrus]
+!35 = !{i32 786468}
+!36 = !{!"0x2e\00\00\00_GLOBAL__I_a\0025\001\001\000\006\0064\000\0025", !1, !23, !37, null, void ()* @_GLOBAL__I_a, null, null, !2} ; [ DW_TAG_subprogram ] [line 25] [local] [def]
+!37 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!38 = !{!39, !40, !41, !42}
+!39 = !{!"0x34\00b\00b\00\003\000\001", null, !23, !4, %struct.bar* @b, null} ; [ DW_TAG_variable ] [b] [line 3] [def]
+!40 = !{!"0x34\00animal\00animal\00_ZN7echidna8capybara8mongoose6animalE\0018\000\001", !7, !23, !6, %"class.echidna::capybara::mongoose::fluffy"* @_ZN7echidna8capybara8mongoose6animalE, null} ; [ DW_TAG_variable ] [animal] [line 18] [def]
+!41 = !{!"0x34\00w\00w\00\0029\001\001", null, !23, !28, %"struct.<anonymous namespace>::walrus"* @w, null} ; [ DW_TAG_variable ] [w] [line 29] [local] [def]
+!42 = !{!"0x34\00wom\00wom\00\0038\000\001", null, !23, !14, %struct.wombat* @wom, null} ; [ DW_TAG_variable ] [wom] [line 38] [def]
+!43 = !{i32 2, !"Dwarf Version", i32 4}
+!44 = !{i32 1, !"Debug Info Version", i32 2}
+!45 = !{!"clang version 3.5 "}
+!46 = !{!"0x100\00b\007\000", !22, !23, !47} ; [ DW_TAG_auto_variable ] [b] [line 7]
+!47 = !{!"0x13\00baz\006\008\008\000\000\000", !1, !22, null, !2, null, null, null} ; [ DW_TAG_structure_type ] [baz] [line 6, size 8, align 8, offset 0] [def] [from ]
+!48 = !MDLocation(line: 7, scope: !22)
+!49 = !MDLocation(line: 8, scope: !22)
+!50 = !MDLocation(line: 29, scope: !26)
+!51 = !{!"0x101\00this\0016777216\001088", !27, null, !52} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!52 = !{!"0xf\00\000\0064\0064\000\000", null, null, !28} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from walrus]
+!53 = !MDLocation(line: 0, scope: !27)
+!54 = !MDLocation(line: 25, scope: !27)
+!55 = !MDLocation(line: 25, scope: !36)
diff --git a/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
new file mode 100644
index 000000000000..9391da6da4f1
--- /dev/null
+++ b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
@@ -0,0 +1,105 @@
+; REQUIRES: object-emission
+
+; RUN: llc -mtriple=x86_64-apple-macosx10.10.0 -o %t %s
+
+; Testcase generated from:
+; #include <stdint.h>
+; int foo(int a) {
+;     int b = (int16_t)a + 8;
+;     int c = (int16_t)b + 8;
+;     int d = (int16_t)c + 8;
+;     int e = (int16_t)d + 8;
+;     int f = (int16_t)e + 8;
+;     return f;
+; }
+; by emitting the IR and then manually applying mem2reg to it.
+
+; This testcase would trigger the assert commited along with it if the
+; fix of r221709 isn't applied. There is no other check except the successful
+; run of llc.
+; What happened before r221709, is that SDDbgInfo (the data structure helping
+; SelectionDAG to keep track of dbg.values) kept a map keyed by SDNode pointers.
+; This map was never purged when the SDNodes were deallocated and thus if a new
+; SDNode was allocated in the same memory, it would have an entry in the SDDbgInfo
+; map upon creation (Reallocation in the same memory can happen easily as
+; SelectionDAG uses a Recycling allocator). This behavior could turn into a
+; pathological memory consumption explosion if the DAG combiner hit the 'right'
+; allocation patterns as could be seen in PR20893.
+; By nature, this test could bitrot quite easily. If it doesn't trigger an assert
+; when run with r221709 reverted, then it really doesn't test anything anymore.
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo(i32 %a) #0 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !17), !dbg !18
+  %conv = trunc i32 %a to i16, !dbg !19
+  %conv1 = sext i16 %conv to i32, !dbg !19
+  %add = add nsw i32 %conv1, 8, !dbg !19
+  call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !20, metadata !17), !dbg !21
+  %conv2 = trunc i32 %add to i16, !dbg !22
+  %conv3 = sext i16 %conv2 to i32, !dbg !22
+  %add4 = add nsw i32 %conv3, 8, !dbg !22
+  call void @llvm.dbg.value(metadata i32 %add4, i64 0, metadata !23, metadata !17), !dbg !24
+  %conv5 = trunc i32 %add4 to i16, !dbg !25
+  %conv6 = sext i16 %conv5 to i32, !dbg !25
+  %add7 = add nsw i32 %conv6, 8, !dbg !25
+  call void @llvm.dbg.value(metadata i32 %add7, i64 0, metadata !26, metadata !17), !dbg !27
+  %conv8 = trunc i32 %add7 to i16, !dbg !28
+  %conv9 = sext i16 %conv8 to i32, !dbg !28
+  %add10 = add nsw i32 %conv9, 8, !dbg !28
+  call void @llvm.dbg.value(metadata i32 %add10, i64 0, metadata !29, metadata !17), !dbg !30
+  %conv11 = trunc i32 %add10 to i16, !dbg !31
+  %conv12 = sext i16 %conv11 to i32, !dbg !31
+  %add13 = add nsw i32 %conv12, 8, !dbg !31
+  call void @llvm.dbg.value(metadata i32 %add13, i64 0, metadata !32, metadata !17), !dbg !33
+  ret i32 %add13, !dbg !34
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !3, !7, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/ghost-sdnode-dbgvalues.c] [DW_LANG_C99]
+!1 = !{!"ghost-sdnode-dbgvalues.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x16\00int16_t\0030\000\000\000\000", !5, null, !6} ; [ DW_TAG_typedef ] [int16_t] [line 30, size 0, align 0, offset 0] [from short]
+!5 = !{!"/usr/include/sys/_types/_int16_t.h", !"/tmp"}
+!6 = !{!"0x24\00short\000\0016\0016\000\000\005", null, null} ; [ DW_TAG_base_type ] [short] [line 0, size 16, align 16, offset 0, enc DW_ATE_signed]
+!7 = !{!8}
+!8 = !{!"0x2e\00foo\00foo\00\003\000\001\000\000\00256\000\003", !1, !9, !10, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!9 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/tmp/ghost-sdnode-dbgvalues.c]
+!10 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!12, !12}
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 2}
+!15 = !{!"clang version 3.6.0 "}
+!16 = !{!"0x101\00a\0016777219\000", !8, !9, !12} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!17 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!18 = !MDLocation(line: 3, column: 13, scope: !8)
+!19 = !MDLocation(line: 4, column: 5, scope: !8)
+!20 = !{!"0x100\00b\004\000", !8, !9, !12} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!21 = !MDLocation(line: 4, column: 9, scope: !8)
+!22 = !MDLocation(line: 5, column: 5, scope: !8)
+!23 = !{!"0x100\00c\005\000", !8, !9, !12} ; [ DW_TAG_auto_variable ] [c] [line 5]
+!24 = !MDLocation(line: 5, column: 9, scope: !8)
+!25 = !MDLocation(line: 6, column: 5, scope: !8)
+!26 = !{!"0x100\00d\006\000", !8, !9, !12} ; [ DW_TAG_auto_variable ] [d] [line 6]
+!27 = !MDLocation(line: 6, column: 9, scope: !8)
+!28 = !MDLocation(line: 7, column: 5, scope: !8)
+!29 = !{!"0x100\00e\007\000", !8, !9, !12} ; [ DW_TAG_auto_variable ] [e] [line 7]
+!30 = !MDLocation(line: 7, column: 9, scope: !8)
+!31 = !MDLocation(line: 8, column: 5, scope: !8)
+!32 = !{!"0x100\00f\008\000", !8, !9, !12} ; [ DW_TAG_auto_variable ] [f] [line 8]
+!33 = !MDLocation(line: 8, column: 9, scope: !8)
+!34 = !MDLocation(line: 9, column: 5, scope: !8)
diff --git a/test/DebugInfo/X86/gmlt.test b/test/DebugInfo/X86/gmlt.test
new file mode 100644
index 000000000000..6cdd71d68bd9
--- /dev/null
+++ b/test/DebugInfo/X86/gmlt.test
@@ -0,0 +1,2 @@
+; RUN: llc -O0 -filetype=obj < %S/../Inputs/gmlt.ll -mtriple x86_64-apple-darwin | llvm-dwarfdump - \
+; RUN:     | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %S/../Inputs/gmlt.ll
diff --git a/test/DebugInfo/X86/gnu-public-names-empty.ll b/test/DebugInfo/X86/gnu-public-names-empty.ll
index 46ae65d00384..f9a06175d2cd 100644
--- a/test/DebugInfo/X86/gnu-public-names-empty.ll
+++ b/test/DebugInfo/X86/gnu-public-names-empty.ll
@@ -12,8 +12,8 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 191846) (llvm/trunk 191866)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 191846) (llvm/trunk 191866)\000\00\000\00\000", !1, !2, !2, !2, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = !{!"foo.c", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index 96fa52b92caf..7e92b537c894 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -49,37 +49,43 @@
 ; CHECK: DW_AT_GNU_pubnames [DW_FORM_flag_present]   (true)
 ; CHECK-NOT: DW_AT_GNU_pubtypes [
 
+; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK-NEXT: DW_AT_specification {{.*}} "static_member_variable"
+
 ; CHECK: [[C:0x[0-9a-f]+]]: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "C"
 
-; CHECK: [[STATIC_MEM_DECL:0x[0-9a-f]+]]: DW_TAG_member
+; CHECK: DW_TAG_member
 ; CHECK-NEXT: DW_AT_name {{.*}} "static_member_variable"
 
-; CHECK: [[MEM_FUNC_DECL:0x[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name
 ; CHECK-NEXT: DW_AT_name {{.*}} "member_function"
 
-; CHECK: [[STATIC_MEM_FUNC_DECL:0x[0-9a-f]+]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NEXT: DW_AT_MIPS_linkage_name
 ; CHECK-NEXT: DW_AT_name {{.*}} "static_member_function"
 
 ; CHECK: [[INT:0x[0-9a-f]+]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "int"
 
-; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}} {[[STATIC_MEM_DECL]]}
-
 ; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "global_variable"
 
 ; CHECK: [[NS:0x[0-9a-f]+]]: DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name {{.*}} "ns"
 
-; CHECK: [[GLOB_NS_VAR_DECL:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK: [[GLOB_NS_VAR:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "global_namespace_variable"
+; CHECK-NOT: DW_AT_specification
+; CHECK: DW_AT_location
+; CHECK-NOT: DW_AT_specification
 
-; CHECK: [[D_VAR_DECL:0x[0-9a-f]+]]: DW_TAG_variable
+; CHECK: [[D_VAR:0x[0-9a-f]+]]: DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name {{.*}} "d"
+; CHECK-NOT: DW_AT_specification
+; CHECK: DW_AT_location
+; CHECK-NOT: DW_AT_specification
 
 ; CHECK: [[D:0x[0-9a-f]+]]: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "D"
@@ -90,12 +96,6 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name {{.*}} "global_namespace_function"
 
-; CHECK: [[GLOB_NS_VAR:0x[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}} {[[GLOB_NS_VAR_DECL]]}
-
-; CHECK: [[D_VAR:0x[0-9a-f]+]]: DW_TAG_variable
-; CHECK-NEXT: DW_AT_specification {{.*}} {[[D_VAR_DECL]]}
-
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "f3"
@@ -116,7 +116,7 @@
 ; CHECK: [[OUTER_ANON:.*]]:  DW_TAG_namespace
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK-NOT:     DW_AT_name
-; CHECK: [[OUTER_ANON_C_DECL:.*]]:     DW_TAG_variable
+; CHECK: [[OUTER_ANON_C:.*]]: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK:       DW_AT_name {{.*}} "c"
 ; CHECK-NOT: {{DW_TAG|NULL}}
@@ -129,9 +129,6 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   NULL
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[OUTER_ANON_C:.*]]: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK-NEXT:   DW_AT_specification {{.*}} {[[OUTER_ANON_C_DECL]]}
 
 ; CHECK: [[ANON:.*]]: DW_TAG_namespace
 ; CHECK-NOT:   DW_AT_name
@@ -139,32 +136,26 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "inner"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ANON_INNER_B_DECL:.*]]:     DW_TAG_variable
+; CHECK: [[ANON_INNER_B:.*]]: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK:       DW_AT_name {{.*}} "b"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:     NULL
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ANON_I_DECL:.*]]:   DW_TAG_variable
+; CHECK: [[ANON_I:.*]]: DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "i"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   NULL
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ANON_INNER_B:.*]]: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK-NEXT:   DW_AT_specification {{.*}} {[[ANON_INNER_B_DECL]]}
-; CHECK: [[ANON_I:.*]]: DW_TAG_variable
-; CHECK-NOT: DW_TAG
-; CHECK-NEXT:   DW_AT_specification {{.*}} {[[ANON_I_DECL]]}
 
 ; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}} {[[MEM_FUNC_DECL]]}
+; CHECK: DW_AT_specification {{.*}} "_ZN1C15member_functionEv"
 
 ; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}} {[[STATIC_MEM_FUNC_DECL]]}
+; CHECK: DW_AT_specification {{.*}} "_ZN1C22static_member_functionEv"
 
 ; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
@@ -223,14 +214,14 @@ define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !50), !dbg !52
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !50, metadata !{!"0x102"}), !dbg !52
   %this1 = load %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !53
   ret void, !dbg !54
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
 define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
@@ -279,64 +270,64 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!47, !48}
 !llvm.ident = !{!49}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !19, metadata !32, metadata !45, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/pubnames.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"pubnames.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !15}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !8, metadata !12}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1C", metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !7, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{null, metadata !11}
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!12 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !7}
-!15 = metadata !{i32 786451, metadata !1, metadata !16, metadata !"D", i32 28, i64 32, i64 32, i32 0, i32 0, null, metadata !17, i32 0, null, null, metadata !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 28, size 32, align 32, offset 0] [def] [from ]
-!16 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [ns] [line 23]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN2ns1DE", metadata !"A", i32 29, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [A] [line 29, size 32, align 32, offset 0] [from int]
-!19 = metadata !{metadata !20, metadata !21, metadata !22, metadata !24, metadata !27, metadata !31}
-!20 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !8, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
-!21 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !12, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
-!22 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
-!23 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/pubnames.cpp]
-!24 = metadata !{i32 786478, metadata !1, metadata !16, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !25, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !2, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
-!25 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!26 = metadata !{null}
-!27 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"f3", metadata !"f3", metadata !"_Z2f3v", i32 37, metadata !28, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32* ()* @_Z2f3v, null, null, metadata !2, i32 37} ; [ DW_TAG_subprogram ] [line 37] [def] [f3]
-!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!29 = metadata !{metadata !30}
-!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!31 = metadata !{i32 786478, metadata !1, metadata !23, metadata !"f7", metadata !"f7", metadata !"_Z2f7v", i32 54, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z2f7v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f7]
-!32 = metadata !{metadata !33, metadata !34, metadata !35, metadata !36, metadata !37, metadata !38, metadata !41, metadata !44}
-!33 = metadata !{i32 786484, i32 0, metadata !4, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !23, i32 7, metadata !7, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
-!34 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !23, i32 17, metadata !"_ZTS1C", i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
-!35 = metadata !{i32 786484, i32 0, metadata !16, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !23, i32 27, metadata !7, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
-!36 = metadata !{i32 786484, i32 0, metadata !16, metadata !"d", metadata !"d", metadata !"_ZN2ns1dE", metadata !23, i32 30, metadata !"_ZTSN2ns1DE", i32 0, i32 1, %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 30] [def]
-!37 = metadata !{i32 786484, i32 0, metadata !27, metadata !"z", metadata !"z", metadata !"", metadata !23, i32 38, metadata !7, i32 1, i32 1, i32* @_ZZ2f3vE1z, null} ; [ DW_TAG_variable ] [z] [line 38] [local] [def]
-!38 = metadata !{i32 786484, i32 0, metadata !39, metadata !"c", metadata !"c", metadata !"_ZN5outer12_GLOBAL__N_11cE", metadata !23, i32 50, metadata !7, i32 1, i32 1, i32* @_ZN5outer12_GLOBAL__N_11cE, null} ; [ DW_TAG_variable ] [c] [line 50] [local] [def]
-!39 = metadata !{i32 786489, metadata !1, metadata !40, metadata !"", i32 49} ; [ DW_TAG_namespace ] [line 49]
-!40 = metadata !{i32 786489, metadata !1, null, metadata !"outer", i32 48} ; [ DW_TAG_namespace ] [outer] [line 48]
-!41 = metadata !{i32 786484, i32 0, metadata !42, metadata !"b", metadata !"b", metadata !"_ZN12_GLOBAL__N_15inner1bE", metadata !23, i32 44, metadata !7, i32 1, i32 1, i32* @_ZN12_GLOBAL__N_15inner1bE, null} ; [ DW_TAG_variable ] [b] [line 44] [local] [def]
-!42 = metadata !{i32 786489, metadata !1, metadata !43, metadata !"inner", i32 43} ; [ DW_TAG_namespace ] [inner] [line 43]
-!43 = metadata !{i32 786489, metadata !1, null, metadata !"", i32 33} ; [ DW_TAG_namespace ] [line 33]
-!44 = metadata !{i32 786484, i32 0, metadata !43, metadata !"i", metadata !"i", metadata !"_ZN12_GLOBAL__N_11iE", metadata !23, i32 34, metadata !7, i32 1, i32 1, i32* @_ZN12_GLOBAL__N_11iE, null} ; [ DW_TAG_variable ] [i] [line 34] [local] [def]
-!45 = metadata !{metadata !46}
-!46 = metadata !{i32 786490, metadata !40, metadata !39, i32 40} ; [ DW_TAG_imported_module ]
-!47 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!48 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!49 = metadata !{metadata !"clang version 3.5.0 "}
-!50 = metadata !{i32 786689, metadata !20, metadata !"this", null, i32 16777216, metadata !51, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!51 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
-!52 = metadata !{i32 0, i32 0, metadata !20, null}
-!53 = metadata !{i32 10, i32 0, metadata !20, null}
-!54 = metadata !{i32 11, i32 0, metadata !20, null}
-!55 = metadata !{i32 14, i32 0, metadata !21, null}
-!56 = metadata !{i32 20, i32 0, metadata !22, null}
-!57 = metadata !{i32 25, i32 0, metadata !24, null}
-!58 = metadata !{i32 26, i32 0, metadata !24, null}
-!59 = metadata !{i32 39, i32 0, metadata !27, null}
-!60 = metadata !{i32 55, i32 0, metadata !31, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !19, !32, !45} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/pubnames.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"pubnames.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !15}
+!4 = !{!"0x13\00C\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6, !8, !12}
+!6 = !{!"0xd\00static_member_variable\004\000\000\000\004096", !1, !"_ZTS1C", !7, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\002\000\000\000\006\00256\000\002", !1, !"_ZTS1C", !9, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!12 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\003\000\000\000\006\00256\000\003", !1, !"_ZTS1C", !13, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!7}
+!15 = !{!"0x13\00D\0028\0032\0032\000\000\000", !1, !16, null, !17, null, null, !"_ZTSN2ns1DE"} ; [ DW_TAG_structure_type ] [D] [line 28, size 32, align 32, offset 0] [def] [from ]
+!16 = !{!"0x39\00ns\0023", !1, null} ; [ DW_TAG_namespace ] [ns] [line 23]
+!17 = !{!18}
+!18 = !{!"0xd\00A\0029\0032\0032\000\000", !1, !"_ZTSN2ns1DE", !7} ; [ DW_TAG_member ] [A] [line 29, size 32, align 32, offset 0] [from int]
+!19 = !{!20, !21, !22, !24, !27, !31}
+!20 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\009\000\001\000\006\00256\000\009", !1, !"_ZTS1C", !9, null, void (%struct.C*)* @_ZN1C15member_functionEv, null, !8, !2} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!21 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\0013\000\001\000\006\00256\000\0013", !1, !"_ZTS1C", !13, null, i32 ()* @_ZN1C22static_member_functionEv, null, !12, !2} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!22 = !{!"0x2e\00global_function\00global_function\00_Z15global_functionv\0019\000\001\000\006\00256\000\0019", !1, !23, !13, null, i32 ()* @_Z15global_functionv, null, null, !2} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!23 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/pubnames.cpp]
+!24 = !{!"0x2e\00global_namespace_function\00global_namespace_function\00_ZN2ns25global_namespace_functionEv\0024\000\001\000\006\00256\000\0024", !1, !16, !25, null, void ()* @_ZN2ns25global_namespace_functionEv, null, null, !2} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!25 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !26, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = !{null}
+!27 = !{!"0x2e\00f3\00f3\00_Z2f3v\0037\000\001\000\006\00256\000\0037", !1, !23, !28, null, i32* ()* @_Z2f3v, null, null, !2} ; [ DW_TAG_subprogram ] [line 37] [def] [f3]
+!28 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = !{!30}
+!30 = !{!"0xf\00\000\0064\0064\000\000", null, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!31 = !{!"0x2e\00f7\00f7\00_Z2f7v\0054\000\001\000\006\00256\000\0054", !1, !23, !13, null, i32 ()* @_Z2f7v, null, null, !2} ; [ DW_TAG_subprogram ] [line 54] [def] [f7]
+!32 = !{!33, !34, !35, !36, !37, !38, !41, !44}
+!33 = !{!"0x34\00static_member_variable\00static_member_variable\00_ZN1C22static_member_variableE\007\000\001", null, !23, !7, i32* @_ZN1C22static_member_variableE, !6} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!34 = !{!"0x34\00global_variable\00global_variable\00\0017\000\001", null, !23, !"_ZTS1C", %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!35 = !{!"0x34\00global_namespace_variable\00global_namespace_variable\00_ZN2ns25global_namespace_variableE\0027\000\001", !16, !23, !7, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!36 = !{!"0x34\00d\00d\00_ZN2ns1dE\0030\000\001", !16, !23, !"_ZTSN2ns1DE", %"struct.ns::D"* @_ZN2ns1dE, null} ; [ DW_TAG_variable ] [d] [line 30] [def]
+!37 = !{!"0x34\00z\00z\00\0038\001\001", !27, !23, !7, i32* @_ZZ2f3vE1z, null} ; [ DW_TAG_variable ] [z] [line 38] [local] [def]
+!38 = !{!"0x34\00c\00c\00_ZN5outer12_GLOBAL__N_11cE\0050\001\001", !39, !23, !7, i32* @_ZN5outer12_GLOBAL__N_11cE, null} ; [ DW_TAG_variable ] [c] [line 50] [local] [def]
+!39 = !{!"0x39\00\0049", !1, !40} ; [ DW_TAG_namespace ] [line 49]
+!40 = !{!"0x39\00outer\0048", !1, null} ; [ DW_TAG_namespace ] [outer] [line 48]
+!41 = !{!"0x34\00b\00b\00_ZN12_GLOBAL__N_15inner1bE\0044\001\001", !42, !23, !7, i32* @_ZN12_GLOBAL__N_15inner1bE, null} ; [ DW_TAG_variable ] [b] [line 44] [local] [def]
+!42 = !{!"0x39\00inner\0043", !1, !43} ; [ DW_TAG_namespace ] [inner] [line 43]
+!43 = !{!"0x39\00\0033", !1, null} ; [ DW_TAG_namespace ] [line 33]
+!44 = !{!"0x34\00i\00i\00_ZN12_GLOBAL__N_11iE\0034\001\001", !43, !23, !7, i32* @_ZN12_GLOBAL__N_11iE, null} ; [ DW_TAG_variable ] [i] [line 34] [local] [def]
+!45 = !{!46}
+!46 = !{!"0x3a\0040\00", !40, !39} ; [ DW_TAG_imported_module ]
+!47 = !{i32 2, !"Dwarf Version", i32 4}
+!48 = !{i32 2, !"Debug Info Version", i32 2}
+!49 = !{!"clang version 3.5.0 "}
+!50 = !{!"0x101\00this\0016777216\001088", !20, null, !51} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!51 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!52 = !MDLocation(line: 0, scope: !20)
+!53 = !MDLocation(line: 10, scope: !20)
+!54 = !MDLocation(line: 11, scope: !20)
+!55 = !MDLocation(line: 14, scope: !21)
+!56 = !MDLocation(line: 20, scope: !22)
+!57 = !MDLocation(line: 25, scope: !24)
+!58 = !MDLocation(line: 26, scope: !24)
+!59 = !MDLocation(line: 39, scope: !27)
+!60 = !MDLocation(line: 55, scope: !31)
diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll
index 3dc6043bf36c..68a211fcecc3 100644
--- a/test/DebugInfo/X86/inline-member-function.ll
+++ b/test/DebugInfo/X86/inline-member-function.ll
@@ -18,13 +18,14 @@
 
 ; But make sure we emit DW_AT_object_pointer on the abstract definition.
 ; CHECK: [[ABSTRACT_ORIGIN:.*]]: DW_TAG_subprogram
-; CHECK-NOT: NULL
-; CHECK-NOT: TAG
+; CHECK-NOT: {{NULL|TAG}}
+; CHECK: DW_AT_specification {{.*}} "_ZN3foo4funcEi"
+; CHECK-NOT: {{NULL|TAG}}
 ; CHECK: DW_AT_object_pointer
 
 ; Ensure we omit DW_AT_object_pointer on inlined subroutines.
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}}{[[ABSTRACT_ORIGIN]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[ABSTRACT_ORIGIN]]} "_ZN3foo4funcEi"
 ; CHECK-NOT: NULL
 ; CHECK-NOT: DW_AT_object_pointer
 ; CHECK: DW_TAG_formal_parameter
@@ -45,9 +46,9 @@ entry:
   store i32 0, i32* %retval
   %0 = load i32* @i, align 4, !dbg !23
   store %struct.foo* %tmp, %struct.foo** %this.addr.i, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr.i}, metadata !24), !dbg !26
+  call void @llvm.dbg.declare(metadata %struct.foo** %this.addr.i, metadata !24, metadata !{!"0x102"}), !dbg !26
   store i32 %0, i32* %x.addr.i, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr.i}, metadata !27), !dbg !28
+  call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !27, metadata !{!"0x102"}), !dbg !28
   %this1.i = load %struct.foo** %this.addr.i
   %1 = load i32* %x.addr.i, align 4, !dbg !28
   %add.i = add nsw i32 %1, 2, !dbg !28
@@ -55,7 +56,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -64,32 +65,32 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !21}
 !llvm.ident = !{!22}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !18, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"inline.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"func", metadata !"func", metadata !"_ZN3foo4funcEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !11, i32 2} ; [ DW_TAG_subprogram ] [line 2] [func]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
-!11 = metadata !{i32 786468}
-!12 = metadata !{metadata !13, metadata !17}
-!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline.cpp]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !9}
-!17 = metadata !{i32 786478, metadata !1, metadata !"_ZTS3foo", metadata !"func", metadata !"func", metadata !"_ZN3foo4funcEi", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, metadata !6, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"", metadata !14, i32 5, metadata !9, i32 0, i32 1, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 5] [def]
-!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!22 = metadata !{metadata !"clang version 3.5.0 "}
-!23 = metadata !{i32 8, i32 0, metadata !13, null} ; [ DW_TAG_imported_declaration ]
-!24 = metadata !{i32 786689, metadata !17, metadata !"this", null, i32 16777216, metadata !25, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
-!26 = metadata !{i32 0, i32 0, metadata !17, metadata !23}
-!27 = metadata !{i32 786689, metadata !17, metadata !"x", metadata !14, i32 33554434, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 2]
-!28 = metadata !{i32 2, i32 0, metadata !17, metadata !23}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !12, !18, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"inline.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00func\00func\00_ZN3foo4funcEi\002\000\000\000\006\00256\000\002", !1, !"_ZTS3foo", !7, null, null, null, i32 0, !11} ; [ DW_TAG_subprogram ] [line 2] [func]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!11 = !{i32 786468}
+!12 = !{!13, !17}
+!13 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !1, !14, !15, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!14 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline.cpp]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!9}
+!17 = !{!"0x2e\00func\00func\00_ZN3foo4funcEi\002\000\001\000\006\00256\000\002", !1, !"_ZTS3foo", !7, null, null, null, !6, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
+!18 = !{!19}
+!19 = !{!"0x34\00i\00i\00\005\000\001", null, !14, !9, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 5] [def]
+!20 = !{i32 2, !"Dwarf Version", i32 4}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
+!22 = !{!"clang version 3.5.0 "}
+!23 = !MDLocation(line: 8, scope: !13)
+!24 = !{!"0x101\00this\0016777216\001088", !17, null, !25} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!26 = !MDLocation(line: 0, scope: !17, inlinedAt: !23)
+!27 = !{!"0x101\00x\0033554434\000", !17, !14, !9} ; [ DW_TAG_arg_variable ] [x] [line 2]
+!28 = !MDLocation(line: 2, scope: !17, inlinedAt: !23)
diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll
index 615f03a2ad28..8c10e3a859d9 100644
--- a/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/test/DebugInfo/X86/inline-seldag-test.ll
@@ -11,12 +11,8 @@
 ;   x = f(x);
 ; }
 
-; CHECK: [[F:.*]]: DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_name {{.*}} "f"
-
 ; CHECK: DW_TAG_inlined_subroutine
-; CHECK-NEXT: DW_AT_abstract_origin {{.*}} {[[F]]}
+; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "f"
 
 
 ; Make sure the condition test is attributed to the inline function, not the
@@ -31,10 +27,10 @@ define void @func() #0 {
 entry:
   %y.addr.i = alloca i32, align 4
   %x = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x}, metadata !15), !dbg !17
+  call void @llvm.dbg.declare(metadata i32* %x, metadata !15, metadata !{!"0x102"}), !dbg !17
   %0 = load volatile i32* %x, align 4, !dbg !18
   store i32 %0, i32* %y.addr.i, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %y.addr.i}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %y.addr.i, metadata !19, metadata !{!"0x102"}), !dbg !20
   %1 = load i32* %y.addr.i, align 4, !dbg !21
   %tobool.i = icmp ne i32 %1, 0, !dbg !21
   %cond.i = select i1 %tobool.i, i32 4, i32 7, !dbg !21
@@ -43,7 +39,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -52,26 +48,26 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-seldag-test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"inline-seldag-test.c", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @func, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [func]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-seldag-test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{metadata !11, metadata !11}
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!14 = metadata !{metadata !"clang version 3.5.0 "}
-!15 = metadata !{i32 786688, metadata !4, metadata !"x", metadata !5, i32 5, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 5]
-!16 = metadata !{i32 786485, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from int]
-!17 = metadata !{i32 5, i32 0, metadata !4, null}
-!18 = metadata !{i32 6, i32 7, metadata !4, null}
-!19 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 16777217, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [y] [line 1]
-!20 = metadata !{i32 1, i32 0, metadata !8, metadata !18}
-!21 = metadata !{i32 2, i32 0, metadata !8, metadata !18}
-!22 = metadata !{i32 7, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-seldag-test.c] [DW_LANG_C99]
+!1 = !{!"inline-seldag-test.c", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x2e\00func\00func\00\004\000\001\000\006\000\000\004", !1, !5, !6, null, void ()* @func, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [func]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-seldag-test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\000\001", !1, !5, !9, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{!11, !11}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.5.0 "}
+!15 = !{!"0x100\00x\005\000", !4, !5, !16} ; [ DW_TAG_auto_variable ] [x] [line 5]
+!16 = !{!"0x35\00\000\000\000\000\000", null, null, !11} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from int]
+!17 = !MDLocation(line: 5, scope: !4)
+!18 = !MDLocation(line: 6, column: 7, scope: !4)
+!19 = !{!"0x101\00y\0016777217\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [y] [line 1]
+!20 = !MDLocation(line: 1, scope: !8, inlinedAt: !18)
+!21 = !MDLocation(line: 2, scope: !8, inlinedAt: !18)
+!22 = !MDLocation(line: 7, scope: !4)
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 2fd7ee319c2e..12b99a359496 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -2,8 +2,8 @@
 ; Verify that we emit the same intrinsic at most once.
 ; rdar://problem/13056109
 ;
-; CHECK: call void @llvm.dbg.value(metadata !{%struct.i14** %p}
-; CHECK-NOT: call void @llvm.dbg.value(metadata !{%struct.i14** %p}
+; CHECK: call void @llvm.dbg.value(metadata %struct.i14** %p
+; CHECK-NOT: call void @llvm.dbg.value(metadata %struct.i14** %p
 ; CHECK-NEXT: call i32 @foo
 ; CHECK: ret
 ;
@@ -30,7 +30,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Function Attrs: nounwind ssp uwtable
 define void @init() #0 {
   %p = alloca %struct.i14*, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.i14** %p}, metadata !11), !dbg !18
+  call void @llvm.dbg.declare(metadata %struct.i14** %p, metadata !11, metadata !{!"0x102"}), !dbg !18
   store %struct.i14* null, %struct.i14** %p, align 8, !dbg !18
   %1 = call i32 @foo(%struct.i14** %p), !dbg !19
   %2 = load %struct.i14** %p, align 8, !dbg !20
@@ -43,7 +43,7 @@ define void @init() #0 {
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @foo(%struct.i14**)
 
@@ -54,26 +54,26 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [instcombine_intrinsics.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"instcombine_intrinsics.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"init", metadata !"init", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @init, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [init]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [instcombine_intrinsics.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 "}
-!11 = metadata !{i32 786688, metadata !4, metadata !"p", metadata !5, i32 8, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 8]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
-!13 = metadata !{i32 786454, metadata !1, null, metadata !"i14", i32 3, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ] [i14] [line 3, size 0, align 0, offset 0] [from ]
-!14 = metadata !{i32 786451, metadata !1, null, metadata !"", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 64, align 64, offset 0] [def] [from ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786445, metadata !1, metadata !14, metadata !"i", i32 2, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ] [i] [line 2, size 64, align 64, offset 0] [from long int]
-!17 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
-!18 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!19 = metadata !{i32 9, i32 0, metadata !4, null}
-!20 = metadata !{i32 10, i32 0, metadata !4, null}
-!21 = metadata !{i32 11, i32 0, metadata !4, null}
-!22 = metadata !{i32 12, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [instcombine_intrinsics.c] [DW_LANG_C99]
+!1 = !{!"instcombine_intrinsics.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00init\00init\00\007\000\001\000\006\000\000\007", !1, !5, !6, null, void ()* @init, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [init]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [instcombine_intrinsics.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !{!"0x100\00p\008\000", !4, !5, !12} ; [ DW_TAG_auto_variable ] [p] [line 8]
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from i14]
+!13 = !{!"0x16\00i14\003\000\000\000\000", !1, null, !14} ; [ DW_TAG_typedef ] [i14] [line 3, size 0, align 0, offset 0] [from ]
+!14 = !{!"0x13\00\001\0064\0064\000\000\000", !1, null, null, !15, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 64, align 64, offset 0] [def] [from ]
+!15 = !{!16}
+!16 = !{!"0xd\00i\002\0064\0064\000\000", !1, !14, !17} ; [ DW_TAG_member ] [i] [line 2, size 64, align 64, offset 0] [from long int]
+!17 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!18 = !MDLocation(line: 8, scope: !4)
+!19 = !MDLocation(line: 9, scope: !4)
+!20 = !MDLocation(line: 10, scope: !4)
+!21 = !MDLocation(line: 11, scope: !4)
+!22 = !MDLocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/lexical_block.ll b/test/DebugInfo/X86/lexical_block.ll
index 95b3921ab364..a9e377a01810 100644
--- a/test/DebugInfo/X86/lexical_block.ll
+++ b/test/DebugInfo/X86/lexical_block.ll
@@ -1,11 +1,19 @@
 ; REQUIRES: object-emission
 
 ; RUN: llc -mtriple=x86_64-linux -O0 -filetype=obj < %s \
-; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V4 %s
+; RUN: llc -mtriple=x86_64-linux -dwarf-version=3 -O0 -filetype=obj < %s \
+; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V3 %s
+
+; Check that we emit DW_TAG_lexical_block and that it has the right encoding
+; depending on the dwarf version.
 
 ; CHECK: DW_TAG_lexical_block
-; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr]
-; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4]
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_low_pc [DW_FORM_addr]
+; CHECK-NOT: DW_TAG
+; CHECK-V4: DW_AT_high_pc [DW_FORM_data4]
+; CHECK-V3: DW_AT_high_pc [DW_FORM_addr]
 
 ; Test case produced from:
 ; void b() {
@@ -17,7 +25,7 @@
 define void @_Z1bv() #0 {
 entry:
   %i = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !11), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !11, metadata !{!"0x102"}), !dbg !14
   store i32 3, i32* %i, align 4, !dbg !14
   %0 = load i32* %i, align 4, !dbg !14
   %tobool = icmp ne i32 %0, 0, !dbg !14
@@ -31,7 +39,7 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -40,20 +48,20 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/lexical_block.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"lexical_block.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"b", metadata !"b", metadata !"_Z1bv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z1bv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [b]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/lexical_block.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 "}
-!11 = metadata !{i32 786688, metadata !12, metadata !"i", metadata !5, i32 2, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 2]
-!12 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/lexical_block.cpp]
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 2, i32 0, metadata !12, null}
-!15 = metadata !{i32 3, i32 0, metadata !12, null}
-!16 = metadata !{i32 4, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/lexical_block.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"lexical_block.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00b\00b\00_Z1bv\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void ()* @_Z1bv, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [b]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/lexical_block.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !{!"0x100\00i\002\000", !12, !5, !13} ; [ DW_TAG_auto_variable ] [i] [line 2]
+!12 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/lexical_block.cpp]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !MDLocation(line: 2, scope: !12)
+!15 = !MDLocation(line: 3, scope: !12)
+!16 = !MDLocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index f6deee9ff165..e4364268021b 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -18,14 +18,14 @@ define i32 @foo(i32 %x) #0 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !14), !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
   %0 = load i32* %x.addr, align 4, !dbg !16
   %inc = add nsw i32 %0, 1, !dbg !16
   store i32 %inc, i32* %x.addr, align 4, !dbg !16
   ret i32 %inc, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 define i32 @main() #0 {
 entry:
@@ -38,23 +38,23 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"list0.c", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !10}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{metadata !"./list0.h", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
-!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{metadata !9}
-!14 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
-!15 = metadata !{i32 1, i32 0, metadata !4, null}
-!16 = metadata !{i32 2, i32 0, metadata !4, null}
-!17 = metadata !{i32 3, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !11, metadata !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 \000\00\000\00\000", !1, !2, !2, !3, !2,  !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/list0.c] [DW_LANG_C99]
+!1 = !{!"list0.c", !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !5, !6, !7, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"./list0.h", !"/usr/local/google/home/blaikie/dev/scratch"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/./list0.h]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00main\00main\00\002\000\001\000\006\000\000\002", !1, !11, !12, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{!9}
+!14 = !{!"0x101\00x\0016777217\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!15 = !MDLocation(line: 1, scope: !4)
+!16 = !MDLocation(line: 2, scope: !4)
+!17 = !MDLocation(line: 3, scope: !18)
+!18 = !{!"0xb\000", !11, !10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/list0.c]
+!19 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index 2b1647b3d3d9..187ff8bce3af 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -14,39 +14,39 @@ entry:
   %this.addr = alloca %class.A*, align 8
   %b.addr = alloca i32, align 4
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !21), !dbg !23
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !21, metadata !{!"0x102"}), !dbg !23
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
   %this1 = load %class.A** %this.addr
   %0 = load i32* %b.addr, align 4, !dbg !26
   ret i32 %0, !dbg !26
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, metadata !28, i32 4, metadata !"clang version 3.1 (trunk 152691) (llvm/trunk 152692)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, null, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*, i32)* @_ZN1A1aEi, null, metadata !13, null, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786434, metadata !28, null, metadata !"A", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !6, metadata !11, metadata !"a", metadata !"a", metadata !"_ZN1A1aEi", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, null, i32 0} ; [ DW_TAG_subprogram ]
-!18 = metadata !{metadata !20}
-!20 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 9, metadata !11, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ]
-!21 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777221, metadata !22, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!23 = metadata !{i32 5, i32 8, metadata !5, null}
-!24 = metadata !{i32 786689, metadata !5, metadata !"b", metadata !6, i32 33554437, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 5, i32 14, metadata !5, null}
-!26 = metadata !{i32 6, i32 4, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !6, metadata !5, i32 5, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
-!28 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo"}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.1 (trunk 152691) (llvm/trunk 152692)\000\00\000\00\000", !28, !1, !1, !3, !18,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00a\00a\00_ZN1A1aEi\005\000\001\000\006\00256\000\005", !6, null, !7, null, i32 (%class.A*, i32)* @_ZN1A1aEi, null, !13, null} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0x2\00A\001\008\008\000\000\000", !28, null, null, !12, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!12 = !{!13}
+!13 = !{!"0x2e\00a\00a\00_ZN1A1aEi\002\000\000\000\006\00257\000\000", !6, !11, !7, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ]
+!18 = !{!20}
+!20 = !{!"0x34\00a\00a\00\009\000\001", null, !6, !11, %class.A* @a, null} ; [ DW_TAG_variable ]
+!21 = !{!"0x101\00this\0016777221\0064", !5, !6, !22} ; [ DW_TAG_arg_variable ]
+!22 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!23 = !MDLocation(line: 5, column: 8, scope: !5)
+!24 = !{!"0x101\00b\0033554437\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!25 = !MDLocation(line: 5, column: 14, scope: !5)
+!26 = !MDLocation(line: 6, column: 4, scope: !27)
+!27 = !{!"0xb\005\0017\000", !6, !5} ; [ DW_TAG_lexical_block ]
+!28 = !{!"foo.cpp", !"/Users/echristo"}
+!29 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 979d4006b920..cac572e1b2c8 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -1,14 +1,24 @@
-; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
-; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj < %s \
+; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V4
+; RUN: llc -mtriple=x86_64-apple-darwin -filetype=obj -dwarf-version=3 < %s \
+; RUN:     | llvm-dwarfdump -debug-dump=info - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V3
 
-; Check that we use DW_AT_low_pc
+
+; Check that we use DW_AT_low_pc and that it has the right encoding depending
+; on dwarf version.
 
 ; CHECK: DW_TAG_compile_unit [1]
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
-; CHECK: DW_AT_high_pc [DW_FORM_data4]
+; CHECK-NOT: DW_TAG
+; CHECK-V3: DW_AT_high_pc [DW_FORM_addr]
+; CHECK-V4: DW_AT_high_pc [DW_FORM_data4]
 ; CHECK: DW_TAG_subprogram [2]
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_low_pc [DW_FORM_addr]
-; CHECK: DW_AT_high_pc [DW_FORM_data4]
+; CHECK-NOT: DW_TAG
+; CHECK-V3: DW_AT_high_pc [DW_FORM_addr]
+; CHECK-V4: DW_AT_high_pc [DW_FORM_data4]
 
 ; Function Attrs: nounwind uwtable
 define void @z() #0 {
@@ -22,15 +32,15 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/z.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"z.c", metadata !"/usr/local/google/home/echristo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"z", metadata !"z", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @z, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [z]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/z.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!11 = metadata !{i32 1, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/z.c] [DW_LANG_C99]
+!1 = !{!"z.c", !"/usr/local/google/home/echristo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00z\00z\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void ()* @z, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [z]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/z.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!11 = !MDLocation(line: 1, scope: !4)
diff --git a/test/DebugInfo/X86/memberfnptr.ll b/test/DebugInfo/X86/memberfnptr.ll
new file mode 100644
index 000000000000..6d346011a68e
--- /dev/null
+++ b/test/DebugInfo/X86/memberfnptr.ll
@@ -0,0 +1,44 @@
+; struct A {
+;   void foo();
+; };
+;  
+; void (A::*p)() = &A::foo;
+;
+; RUN: llc -filetype=obj -o - %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Check that the member function pointer is emitted without a DW_AT_size attribute.
+; CHECK: DW_TAG_ptr_to_member_type
+; CHECK-NOT: DW_AT_{{.*}}size
+; CHECK: DW_TAG
+;
+; ModuleID = 'memberfnptr.cpp'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+%struct.A = type { i8 }
+
+@p = global { i64, i64 } { i64 ptrtoint (void (%struct.A*)* @_ZN1A3fooEv to i64), i64 0 }, align 8
+
+declare void @_ZN1A3fooEv(%struct.A*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !3, !2, !10, !2} ; [ DW_TAG_compile_unit ] [/memberfnptr.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"memberfnptr.cpp", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00A\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00foo\00foo\00_ZN1A3fooEv\002\000\000\000\000\00256\000\002", !1, !"_ZTS1A", !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\001088\00", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!10 = !{!11}
+!11 = !{!"0x34\00p\00p\00\005\000\001", null, !12, !13, { i64, i64 }* @p, null} ; [ DW_TAG_variable ] [p] [line 5] [def]
+!12 = !{!"0x29", !1}                              ; [ DW_TAG_file_type ] [/memberfnptr.cpp]
+!13 = !{!"0x1f\00\000\0064\000\000\000", null, null, !7, !"_ZTS1A"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 64, align 0, offset 0] [from ]
+!14 = !{i32 2, !"Dwarf Version", i32 2}
+!15 = !{i32 2, !"Debug Info Version", i32 2}
+!16 = !{i32 1, !"PIC Level", i32 2}
+!17 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index c713e65110e7..709b6b2194e3 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -48,12 +48,12 @@
 
 define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32* %Array1Par}, i64 0, metadata !23), !dbg !64
-  tail call void @llvm.dbg.value(metadata !{[51 x i32]* %Array2Par}, i64 0, metadata !24), !dbg !65
-  tail call void @llvm.dbg.value(metadata !{i32 %IntParI1}, i64 0, metadata !25), !dbg !66
-  tail call void @llvm.dbg.value(metadata !{i32 %IntParI2}, i64 0, metadata !26), !dbg !67
+  tail call void @llvm.dbg.value(metadata i32* %Array1Par, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !64
+  tail call void @llvm.dbg.value(metadata [51 x i32]* %Array2Par, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !65
+  tail call void @llvm.dbg.value(metadata i32 %IntParI1, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !66
+  tail call void @llvm.dbg.value(metadata i32 %IntParI2, i64 0, metadata !26, metadata !{!"0x102"}), !dbg !67
   %add = add i32 %IntParI1, 5, !dbg !68
-  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !27), !dbg !68
+  tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !68
   %idxprom = sext i32 %add to i64, !dbg !69
   %arrayidx = getelementptr inbounds i32* %Array1Par, i64 %idxprom, !dbg !69
   store i32 %IntParI2, i32* %arrayidx, align 4, !dbg !69
@@ -65,7 +65,7 @@ entry:
   %idxprom7 = sext i32 %add6 to i64, !dbg !74
   %arrayidx8 = getelementptr inbounds i32* %Array1Par, i64 %idxprom7, !dbg !74
   store i32 %add, i32* %arrayidx8, align 4, !dbg !74
-  tail call void @llvm.dbg.value(metadata !{i32 %add}, i64 0, metadata !28), !dbg !75
+  tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !75
   br label %for.body, !dbg !75
 
 for.body:                                         ; preds = %entry, %for.body
@@ -74,7 +74,7 @@ for.body:                                         ; preds = %entry, %for.body
   %arrayidx13 = getelementptr inbounds [51 x i32]* %Array2Par, i64 %idxprom, i64 %indvars.iv, !dbg !77
   store i32 %add, i32* %arrayidx13, align 4, !dbg !77
   %inc = add nsw i32 %IntIndex.046, 1, !dbg !75
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !28), !dbg !75
+  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !75
   %cmp = icmp sgt i32 %inc, %add3, !dbg !75
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !75
   br i1 %cmp, label %for.end, label %for.body, !dbg !75
@@ -95,7 +95,7 @@ for.end:                                          ; preds = %for.body
   ret void, !dbg !81
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 attributes #0 = { nounwind optsize ssp uwtable }
 attributes #1 = { nounwind readnone }
@@ -103,84 +103,84 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!83}
 
-!0 = metadata !{i32 786449, metadata !82, i32 12, metadata !"clang version 3.3 (trunk 175015)", i1 true, metadata !"", i32 0, metadata !1, metadata !10, metadata !11, metadata !29,  metadata !10, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 786436, metadata !82, null, metadata !"", i32 128, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [def] [from ]
-!3 = metadata !{i32 786473, metadata !82} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8, metadata !9}
-!5 = metadata !{i32 786472, metadata !"Ident1", i64 0} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
-!6 = metadata !{i32 786472, metadata !"Ident2", i64 10000} ; [ DW_TAG_enumerator ] [Ident2 :: 10000]
-!7 = metadata !{i32 786472, metadata !"Ident3", i64 10001} ; [ DW_TAG_enumerator ] [Ident3 :: 10001]
-!8 = metadata !{i32 786472, metadata !"Ident4", i64 10002} ; [ DW_TAG_enumerator ] [Ident4 :: 10002]
-!9 = metadata !{i32 786472, metadata !"Ident5", i64 10003} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
-!10 = metadata !{}
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !82, metadata !3, metadata !"Proc8", metadata !"Proc8", metadata !"", i32 180, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, metadata !22, i32 185} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
-!13 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{null, metadata !15, metadata !17, metadata !21, metadata !21}
-!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!16 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!18 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 1632, i64 32, i32 0, i32 0, metadata !16, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786465, i64 0, i64 51}       ; [ DW_TAG_subrange_type ] [0, 50]
-!21 = metadata !{i32 786454, metadata !82, null, metadata !"OneToFifty", i32 132, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
-!22 = metadata !{metadata !23, metadata !24, metadata !25, metadata !26, metadata !27, metadata !28}
-!23 = metadata !{i32 786689, metadata !12, metadata !"Array1Par", metadata !3, i32 16777397, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181]
-!24 = metadata !{i32 786689, metadata !12, metadata !"Array2Par", metadata !3, i32 33554614, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182]
-!25 = metadata !{i32 786689, metadata !12, metadata !"IntParI1", metadata !3, i32 50331831, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI1] [line 183]
-!26 = metadata !{i32 786689, metadata !12, metadata !"IntParI2", metadata !3, i32 67109048, metadata !21, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [IntParI2] [line 184]
-!27 = metadata !{i32 786688, metadata !12, metadata !"IntLoc", metadata !3, i32 186, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntLoc] [line 186]
-!28 = metadata !{i32 786688, metadata !12, metadata !"IntIndex", metadata !3, i32 187, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
-!29 = metadata !{metadata !30, metadata !35, metadata !36, metadata !38, metadata !39, metadata !40, metadata !42, metadata !46, metadata !63}
-!30 = metadata !{i32 786484, i32 0, null, metadata !"Version", metadata !"Version", metadata !"", metadata !3, i32 111, metadata !31, i32 0, i32 1, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
-!31 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 32, i64 8, i32 0, i32 0, metadata !32, metadata !33, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
-!32 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!33 = metadata !{metadata !34}
-!34 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
-!35 = metadata !{i32 786484, i32 0, null, metadata !"IntGlob", metadata !"IntGlob", metadata !"", metadata !3, i32 171, metadata !16, i32 0, i32 1, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def]
-!36 = metadata !{i32 786484, i32 0, null, metadata !"BoolGlob", metadata !"BoolGlob", metadata !"", metadata !3, i32 172, metadata !37, i32 0, i32 1, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def]
-!37 = metadata !{i32 786454, metadata !82, null, metadata !"boolean", i32 149, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
-!38 = metadata !{i32 786484, i32 0, null, metadata !"Char1Glob", metadata !"Char1Glob", metadata !"", metadata !3, i32 173, metadata !32, i32 0, i32 1, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def]
-!39 = metadata !{i32 786484, i32 0, null, metadata !"Char2Glob", metadata !"Char2Glob", metadata !"", metadata !3, i32 174, metadata !32, i32 0, i32 1, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def]
-!40 = metadata !{i32 786484, i32 0, null, metadata !"Array1Glob", metadata !"Array1Glob", metadata !"", metadata !3, i32 175, metadata !41, i32 0, i32 1, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def]
-!41 = metadata !{i32 786454, metadata !82, null, metadata !"Array1Dim", i32 135, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
-!42 = metadata !{i32 786484, i32 0, null, metadata !"Array2Glob", metadata !"Array2Glob", metadata !"", metadata !3, i32 176, metadata !43, i32 0, i32 1, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
-!43 = metadata !{i32 786454, metadata !82, null, metadata !"Array2Dim", i32 136, i64 0, i64 0, i64 0, i32 0, metadata !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
-!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 83232, i64 32, i32 0, i32 0, metadata !16, metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
-!45 = metadata !{metadata !20, metadata !20}
-!46 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlb", metadata !"PtrGlb", metadata !"", metadata !3, i32 177, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
-!47 = metadata !{i32 786454, metadata !82, null, metadata !"RecordPtr", i32 148, i64 0, i64 0, i64 0, i32 0, metadata !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
-!48 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
-!49 = metadata !{i32 786454, metadata !82, null, metadata !"RecordType", i32 147, i64 0, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
-!50 = metadata !{i32 786451, metadata !82, null, metadata !"Record", i32 138, i64 448, i64 64, i32 0, i32 0, null, metadata !51, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [def] [from ]
-!51 = metadata !{metadata !52, metadata !54, metadata !56, metadata !57, metadata !58}
-!52 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"PtrComp", i32 140, i64 64, i64 64, i64 0, i32 0, metadata !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
-!53 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
-!54 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"Discr", i32 141, i64 32, i64 32, i64 64, i32 0, metadata !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
-!55 = metadata !{i32 786454, metadata !82, null, metadata !"Enumeration", i32 128, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
-!56 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"EnumComp", i32 142, i64 32, i64 32, i64 96, i32 0, metadata !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
-!57 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"IntComp", i32 143, i64 32, i64 32, i64 128, i32 0, metadata !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
-!58 = metadata !{i32 786445, metadata !82, metadata !50, metadata !"StringComp", i32 144, i64 248, i64 8, i64 160, i32 0, metadata !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
-!59 = metadata !{i32 786454, metadata !82, null, metadata !"String30", i32 134, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
-!60 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 248, i64 8, i32 0, i32 0, metadata !32, metadata !61, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
-!61 = metadata !{metadata !62}
-!62 = metadata !{i32 786465, i64 0, i64 31}       ; [ DW_TAG_subrange_type ] [0, 30]
-!63 = metadata !{i32 786484, i32 0, null, metadata !"PtrGlbNext", metadata !"PtrGlbNext", metadata !"", metadata !3, i32 178, metadata !47, i32 0, i32 1, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
-!64 = metadata !{i32 181, i32 0, metadata !12, null}
-!65 = metadata !{i32 182, i32 0, metadata !12, null}
-!66 = metadata !{i32 183, i32 0, metadata !12, null}
-!67 = metadata !{i32 184, i32 0, metadata !12, null}
-!68 = metadata !{i32 189, i32 0, metadata !12, null}
-!69 = metadata !{i32 190, i32 0, metadata !12, null}
-!73 = metadata !{i32 191, i32 0, metadata !12, null}
-!74 = metadata !{i32 192, i32 0, metadata !12, null}
-!75 = metadata !{i32 193, i32 0, metadata !76, null}
-!76 = metadata !{i32 786443, metadata !82, metadata !12, i32 193, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
-!77 = metadata !{i32 194, i32 0, metadata !76, null}
-!78 = metadata !{i32 195, i32 0, metadata !12, null}
-!79 = metadata !{i32 196, i32 0, metadata !12, null}
-!80 = metadata !{i32 197, i32 0, metadata !12, null}
-!81 = metadata !{i32 198, i32 0, metadata !12, null}
-!82 = metadata !{metadata !"dry.c", metadata !"/Users/manmanren/test-Nov/rdar_13183203/test2"}
-!83 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 175015)\001\00\000\00\001", !82, !1, !10, !11, !29,  !10} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c] [DW_LANG_C99]
+!1 = !{!2}
+!2 = !{!"0x4\00\00128\0032\0032\000\000\000", !82, null, null, !4, null, null, null} ; [ DW_TAG_enumeration_type ] [line 128, size 32, align 32, offset 0] [def] [from ]
+!3 = !{!"0x29", !82} ; [ DW_TAG_file_type ]
+!4 = !{!5, !6, !7, !8, !9}
+!5 = !{!"0x28\00Ident1\000"} ; [ DW_TAG_enumerator ] [Ident1 :: 0]
+!6 = !{!"0x28\00Ident2\0010000"} ; [ DW_TAG_enumerator ] [Ident2 :: 10000]
+!7 = !{!"0x28\00Ident3\0010001"} ; [ DW_TAG_enumerator ] [Ident3 :: 10001]
+!8 = !{!"0x28\00Ident4\0010002"} ; [ DW_TAG_enumerator ] [Ident4 :: 10002]
+!9 = !{!"0x28\00Ident5\0010003"} ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
+!10 = !{}
+!11 = !{!12}
+!12 = !{!"0x2e\00Proc8\00Proc8\00\00180\000\001\000\006\000\001\00185", !82, !3, !13, null, void (i32*, [51 x i32]*, i32, i32)* @Proc8, null, null, !22} ; [ DW_TAG_subprogram ] [line 180] [def] [scope 185] [Proc8]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null, !15, !17, !21, !21}
+!15 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!16 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!17 = !{!"0xf\00\000\0064\0064\000\000", null, null, !18} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!18 = !{!"0x1\00\000\001632\0032\000\000", null, null, !16, !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 1632, align 32, offset 0] [from int]
+!19 = !{!20}
+!20 = !{!"0x21\000\0051"}       ; [ DW_TAG_subrange_type ] [0, 50]
+!21 = !{!"0x16\00OneToFifty\00132\000\000\000\000", !82, null, !16} ; [ DW_TAG_typedef ] [OneToFifty] [line 132, size 0, align 0, offset 0] [from int]
+!22 = !{!23, !24, !25, !26, !27, !28}
+!23 = !{!"0x101\00Array1Par\0016777397\000", !12, !3, !15} ; [ DW_TAG_arg_variable ] [Array1Par] [line 181]
+!24 = !{!"0x101\00Array2Par\0033554614\000", !12, !3, !17} ; [ DW_TAG_arg_variable ] [Array2Par] [line 182]
+!25 = !{!"0x101\00IntParI1\0050331831\000", !12, !3, !21} ; [ DW_TAG_arg_variable ] [IntParI1] [line 183]
+!26 = !{!"0x101\00IntParI2\0067109048\000", !12, !3, !21} ; [ DW_TAG_arg_variable ] [IntParI2] [line 184]
+!27 = !{!"0x100\00IntLoc\00186\000", !12, !3, !21} ; [ DW_TAG_auto_variable ] [IntLoc] [line 186]
+!28 = !{!"0x100\00IntIndex\00187\000", !12, !3, !21} ; [ DW_TAG_auto_variable ] [IntIndex] [line 187]
+!29 = !{!30, !35, !36, !38, !39, !40, !42, !46, !63}
+!30 = !{!"0x34\00Version\00Version\00\00111\000\001", null, !3, !31, [4 x i8]* @Version, null} ; [ DW_TAG_variable ] [Version] [line 111] [def]
+!31 = !{!"0x1\00\000\0032\008\000\000", null, null, !32, !33, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32, align 8, offset 0] [from char]
+!32 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!33 = !{!34}
+!34 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!35 = !{!"0x34\00IntGlob\00IntGlob\00\00171\000\001", null, !3, !16, i32* @IntGlob, null} ; [ DW_TAG_variable ] [IntGlob] [line 171] [def]
+!36 = !{!"0x34\00BoolGlob\00BoolGlob\00\00172\000\001", null, !3, !37, i32* @BoolGlob, null} ; [ DW_TAG_variable ] [BoolGlob] [line 172] [def]
+!37 = !{!"0x16\00boolean\00149\000\000\000\000", !82, null, !16} ; [ DW_TAG_typedef ] [boolean] [line 149, size 0, align 0, offset 0] [from int]
+!38 = !{!"0x34\00Char1Glob\00Char1Glob\00\00173\000\001", null, !3, !32, i8* @Char1Glob, null} ; [ DW_TAG_variable ] [Char1Glob] [line 173] [def]
+!39 = !{!"0x34\00Char2Glob\00Char2Glob\00\00174\000\001", null, !3, !32, i8* @Char2Glob, null} ; [ DW_TAG_variable ] [Char2Glob] [line 174] [def]
+!40 = !{!"0x34\00Array1Glob\00Array1Glob\00\00175\000\001", null, !3, !41, [51 x i32]* @Array1Glob, null} ; [ DW_TAG_variable ] [Array1Glob] [line 175] [def]
+!41 = !{!"0x16\00Array1Dim\00135\000\000\000\000", !82, null, !18} ; [ DW_TAG_typedef ] [Array1Dim] [line 135, size 0, align 0, offset 0] [from ]
+!42 = !{!"0x34\00Array2Glob\00Array2Glob\00\00176\000\001", null, !3, !43, [51 x [51 x i32]]* @Array2Glob, null} ; [ DW_TAG_variable ] [Array2Glob] [line 176] [def]
+!43 = !{!"0x16\00Array2Dim\00136\000\000\000\000", !82, null, !44} ; [ DW_TAG_typedef ] [Array2Dim] [line 136, size 0, align 0, offset 0] [from ]
+!44 = !{!"0x1\00\000\0083232\0032\000\000", null, null, !16, !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 83232, align 32, offset 0] [from int]
+!45 = !{!20, !20}
+!46 = !{!"0x34\00PtrGlb\00PtrGlb\00\00177\000\001", null, !3, !47, %struct.Record** @PtrGlb, null} ; [ DW_TAG_variable ] [PtrGlb] [line 177] [def]
+!47 = !{!"0x16\00RecordPtr\00148\000\000\000\000", !82, null, !48} ; [ DW_TAG_typedef ] [RecordPtr] [line 148, size 0, align 0, offset 0] [from ]
+!48 = !{!"0xf\00\000\0064\0064\000\000", null, null, !49} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from RecordType]
+!49 = !{!"0x16\00RecordType\00147\000\000\000\000", !82, null, !50} ; [ DW_TAG_typedef ] [RecordType] [line 147, size 0, align 0, offset 0] [from Record]
+!50 = !{!"0x13\00Record\00138\00448\0064\000\000\000", !82, null, null, !51, null, i32 0, null} ; [ DW_TAG_structure_type ] [Record] [line 138, size 448, align 64, offset 0] [def] [from ]
+!51 = !{!52, !54, !56, !57, !58}
+!52 = !{!"0xd\00PtrComp\00140\0064\0064\000\000", !82, !50, !53} ; [ DW_TAG_member ] [PtrComp] [line 140, size 64, align 64, offset 0] [from ]
+!53 = !{!"0xf\00\000\0064\0064\000\000", null, null, !50} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Record]
+!54 = !{!"0xd\00Discr\00141\0032\0032\0064\000", !82, !50, !55} ; [ DW_TAG_member ] [Discr] [line 141, size 32, align 32, offset 64] [from Enumeration]
+!55 = !{!"0x16\00Enumeration\00128\000\000\000\000", !82, null, !2} ; [ DW_TAG_typedef ] [Enumeration] [line 128, size 0, align 0, offset 0] [from ]
+!56 = !{!"0xd\00EnumComp\00142\0032\0032\0096\000", !82, !50, !55} ; [ DW_TAG_member ] [EnumComp] [line 142, size 32, align 32, offset 96] [from Enumeration]
+!57 = !{!"0xd\00IntComp\00143\0032\0032\00128\000", !82, !50, !21} ; [ DW_TAG_member ] [IntComp] [line 143, size 32, align 32, offset 128] [from OneToFifty]
+!58 = !{!"0xd\00StringComp\00144\00248\008\00160\000", !82, !50, !59} ; [ DW_TAG_member ] [StringComp] [line 144, size 248, align 8, offset 160] [from String30]
+!59 = !{!"0x16\00String30\00134\000\000\000\000", !82, null, !60} ; [ DW_TAG_typedef ] [String30] [line 134, size 0, align 0, offset 0] [from ]
+!60 = !{!"0x1\00\000\00248\008\000\000", null, null, !32, !61, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 248, align 8, offset 0] [from char]
+!61 = !{!62}
+!62 = !{!"0x21\000\0031"}       ; [ DW_TAG_subrange_type ] [0, 30]
+!63 = !{!"0x34\00PtrGlbNext\00PtrGlbNext\00\00178\000\001", null, !3, !47, %struct.Record** @PtrGlbNext, null} ; [ DW_TAG_variable ] [PtrGlbNext] [line 178] [def]
+!64 = !MDLocation(line: 181, scope: !12)
+!65 = !MDLocation(line: 182, scope: !12)
+!66 = !MDLocation(line: 183, scope: !12)
+!67 = !MDLocation(line: 184, scope: !12)
+!68 = !MDLocation(line: 189, scope: !12)
+!69 = !MDLocation(line: 190, scope: !12)
+!73 = !MDLocation(line: 191, scope: !12)
+!74 = !MDLocation(line: 192, scope: !12)
+!75 = !MDLocation(line: 193, scope: !76)
+!76 = !{!"0xb\00193\000\000", !82, !12} ; [ DW_TAG_lexical_block ] [/Users/manmanren/test-Nov/rdar_13183203/test2/dry.c]
+!77 = !MDLocation(line: 194, scope: !76)
+!78 = !MDLocation(line: 195, scope: !12)
+!79 = !MDLocation(line: 196, scope: !12)
+!80 = !MDLocation(line: 197, scope: !12)
+!81 = !MDLocation(line: 198, scope: !12)
+!82 = !{!"dry.c", !"/Users/manmanren/test-Nov/rdar_13183203/test2"}
+!83 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/multiple-aranges.ll b/test/DebugInfo/X86/multiple-aranges.ll
index 2da293874245..88b8bc97ecfa 100644
--- a/test/DebugInfo/X86/multiple-aranges.ll
+++ b/test/DebugInfo/X86/multiple-aranges.ll
@@ -8,8 +8,7 @@
 ; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
 ; CHECK-NEXT: .zero   4,255
 ; CHECK-NEXT: .quad   kittens
-; CHECK-NEXT: .Lset0 = rainbows-kittens
-; CHECK-NEXT: .quad   .Lset0
+; CHECK-NEXT: .quad   rainbows-kittens
 ; CHECK-NEXT: .quad   0                       # ARange terminator
 ; CHECK-NEXT: .quad   0
 
@@ -21,8 +20,7 @@
 ; CHECK-NEXT: .byte   0                       # Segment Size (in bytes)
 ; CHECK-NEXT: .zero   4,255
 ; CHECK-NEXT: .quad   rainbows
-; CHECK-NEXT: .Lset1 = .Ldebug_end0-rainbows
-; CHECK-NEXT: .quad   .Lset1
+; CHECK-NEXT: .quad   .Ldebug_end0-rainbows
 ; CHECK-NEXT: .quad   0                       # ARange terminator
 ; CHECK-NEXT: .quad   0
 
@@ -44,17 +42,17 @@ target triple = "x86_64-unknown-linux-gnu"
 !llvm.dbg.cu = !{!0, !7}
 !llvm.module.flags = !{!12, !13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test1.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test1.c", metadata !"/home/kayamon"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"kittens", metadata !"kittens", metadata !"", metadata !5, i32 1, metadata !6, i32 0, i32 1, i32* @kittens, null} ; [ DW_TAG_variable ] [kittens] [line 1] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test1.c]
-!6 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!7 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/kayamon/test2.c] [DW_LANG_C99]
-!8 = metadata !{metadata !"test2.c", metadata !"/home/kayamon"}
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"rainbows", metadata !"rainbows", metadata !"", metadata !11, i32 1, metadata !6, i32 0, i32 1, i32* @rainbows, null} ; [ DW_TAG_variable ] [rainbows] [line 1] [def]
-!11 = metadata !{i32 786473, metadata !8}         ; [ DW_TAG_file_type ] [/home/kayamon/test2.c]
-!12 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/home/kayamon/test1.c] [DW_LANG_C99]
+!1 = !{!"test1.c", !"/home/kayamon"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00kittens\00kittens\00\001\000\001", null, !5, !6, i32* @kittens, null} ; [ DW_TAG_variable ] [kittens] [line 1] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/kayamon/test1.c]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!7 = !{!"0x11\0012\00clang version 3.4 \000\00\000\00\000", !8, !2, !2, !2, !9, !2} ; [ DW_TAG_compile_unit ] [/home/kayamon/test2.c] [DW_LANG_C99]
+!8 = !{!"test2.c", !"/home/kayamon"}
+!9 = !{!10}
+!10 = !{!"0x34\00rainbows\00rainbows\00\001\000\001", null, !11, !6, i32* @rainbows, null} ; [ DW_TAG_variable ] [rainbows] [line 1] [def]
+!11 = !{!"0x29", !8}         ; [ DW_TAG_file_type ] [/home/kayamon/test2.c]
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 27a5510f1190..17dc2c49619b 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -27,37 +27,37 @@ entry:
 
 declare %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"*, i8*, i64)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!1803}
 
-!0 = metadata !{i32 786449, metadata !1802, i32 4, metadata !"clang version 3.3 (trunk 174207)", i1 true, metadata !"", i32 0, metadata !1, metadata !955, metadata !956, metadata !1786,  metadata !955, metadata !""} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !26}
-!4 = metadata !{i32 786489, null, metadata !"std", metadata !5, i32 48} ; [ DW_TAG_namespace ]
-!5 = metadata !{i32 786473, metadata !1801} ; [ DW_TAG_file_type ]
-!25 = metadata !{i32 786472, metadata !"_S_os_fmtflags_end", i64 65536} ; [ DW_TAG_enumerator ]
-!26 = metadata !{i32 786436, metadata !1801, metadata !4, metadata !"_Ios_Iostate", i32 146, i64 32, i64 32, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [_Ios_Iostate] [line 146, size 32, align 32, offset 0] [def] [from ]
-!27 = metadata !{metadata !28, metadata !29, metadata !30, metadata !31, metadata !32}
-!28 = metadata !{i32 786472, metadata !"_S_goodbit", i64 0} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
-!29 = metadata !{i32 786472, metadata !"_S_badbit", i64 1} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
-!30 = metadata !{i32 786472, metadata !"_S_eofbit", i64 2} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
-!31 = metadata !{i32 786472, metadata !"_S_failbit", i64 4} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
-!32 = metadata !{i32 786472, metadata !"_S_os_ostate_end", i64 65536} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
-!49 = metadata !{i32 786434, metadata !1801, metadata !4, metadata !"os_base", i32 200, i64 1728, i64 64, i32 0, i32 0, null, metadata !50, i32 0, metadata !49, null, null} ; [ DW_TAG_class_type ] [os_base] [line 200, size 1728, align 64, offset 0] [def] [from ]
-!50 = metadata !{metadata !77}
-!54 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !55, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!55 = metadata !{metadata !56}
-!56 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!77 = metadata !{i32 786445, metadata !1801, metadata !49, metadata !"badbit", i32 331, i64 0, i64 0, i64 0, i32 4096, metadata !78, i32 1} ; [ DW_TAG_member ]
-!78 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !79} ; [ DW_TAG_const_type ]
-!79 = metadata !{i32 786454, metadata !1801, metadata !49, metadata !"ostate", i32 327, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_typedef ]
-!955 = metadata !{}
-!956 = metadata !{metadata !960}
-!960 = metadata !{i32 786478, metadata !1802, null, metadata !"main", metadata !"main", metadata !"", i32 73, metadata !54, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !955, i32 73} ; [ DW_TAG_subprogram ]
-!961 = metadata !{i32 786473, metadata !1802} ; [ DW_TAG_file_type ]
-!1786 = metadata !{metadata !1800}
-!1800 = metadata !{i32 786484, i32 0, metadata !5, metadata !"badbit", metadata !"badbit", metadata !"badbit", metadata !5, i32 331, metadata !78, i32 1, i32 1, i32 1, metadata !77} ; [ DW_TAG_variable ]
-!1801 = metadata !{metadata !"os_base.h", metadata !"/privite/tmp"}
-!1802 = metadata !{metadata !"student2.cpp", metadata !"/privite/tmp"}
-!1803 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 174207)\001\00\000\00\000", !1802, !1, !955, !956, !1786,  !955} ; [ DW_TAG_compile_unit ] [/privite/tmp/student2.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!26}
+!4 = !{!"0x39\00std\0048", null, !5} ; [ DW_TAG_namespace ]
+!5 = !{!"0x29", !1801} ; [ DW_TAG_file_type ]
+!25 = !{!"0x28\00_S_os_fmtflags_end\0065536"} ; [ DW_TAG_enumerator ]
+!26 = !{!"0x4\00_Ios_Iostate\00146\0032\0032\000\000\000", !1801, !4, null, !27, null, null, null} ; [ DW_TAG_enumeration_type ] [_Ios_Iostate] [line 146, size 32, align 32, offset 0] [def] [from ]
+!27 = !{!28, !29, !30, !31, !32}
+!28 = !{!"0x28\00_S_goodbit\000"} ; [ DW_TAG_enumerator ] [_S_goodbit :: 0]
+!29 = !{!"0x28\00_S_badbit\001"} ; [ DW_TAG_enumerator ] [_S_badbit :: 1]
+!30 = !{!"0x28\00_S_eofbit\002"} ; [ DW_TAG_enumerator ] [_S_eofbit :: 2]
+!31 = !{!"0x28\00_S_failbit\004"} ; [ DW_TAG_enumerator ] [_S_failbit :: 4]
+!32 = !{!"0x28\00_S_os_ostate_end\0065536"} ; [ DW_TAG_enumerator ] [_S_os_ostate_end :: 65536]
+!49 = !{!"0x2\00os_base\00200\001728\0064\000\000\000", !1801, !4, null, !50, !49, null, null} ; [ DW_TAG_class_type ] [os_base] [line 200, size 1728, align 64, offset 0] [def] [from ]
+!50 = !{!77}
+!54 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !55, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!55 = !{!56}
+!56 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!77 = !{!"0xd\00badbit\00331\000\000\000\004096", !1801, !49, !78, i32 1} ; [ DW_TAG_member ]
+!78 = !{!"0x26\00\000\000\000\000\000", null, null, !79} ; [ DW_TAG_const_type ]
+!79 = !{!"0x16\00ostate\00327\000\000\000\000", !1801, !49, !26} ; [ DW_TAG_typedef ]
+!955 = !{}
+!956 = !{!960}
+!960 = !{!"0x2e\00main\00main\00\0073\000\001\000\006\00256\001\0073", !1802, null, !54, null, i32 ()* @main, null, null, !955} ; [ DW_TAG_subprogram ]
+!961 = !{!"0x29", !1802} ; [ DW_TAG_file_type ]
+!1786 = !{!1800}
+!1800 = !{!"0x34\00badbit\00badbit\00badbit\00331\001\001", !5, !5, !78, i32 1, !77} ; [ DW_TAG_variable ]
+!1801 = !{!"os_base.h", !"/privite/tmp"}
+!1802 = !{!"student2.cpp", !"/privite/tmp"}
+!1803 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
new file mode 100644
index 000000000000..cc9a87472808
--- /dev/null
+++ b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -0,0 +1,139 @@
+; REQUIRES: object-emission
+
+; RUN: llc -mtriple=i386-linux-gnu -filetype=obj -relocation-model=pic %s -o /dev/null
+
+; Derived from the test case in PR20367, there's nothing really positive to
+; test here (hence no FileCheck, etc). All that was wrong is that the debug info
+; intrinsics (introduced by inlining) in 'f1' were causing codegen to crash, but
+; since 'f1' is a nodebug function, there's no positive outcome to confirm, just
+; that debug info doesn't get in the way/cause a crash.
+
+; The test case isn't particularly well reduced/tidy, but as simple as I could
+; get the C++ source. I assume the complexity is mostly just about producing a
+; certain amount of register pressure, so it might be able to be simplified/made
+; more uniform.
+
+; Generated from:
+; $ clang-tot -cc1 -triple i386 -emit-obj -g -O3 repro.cpp
+; void sink(const void *);
+; int source();
+; void f3(int);
+; 
+; extern bool b;
+; 
+; struct string {
+;   unsigned *mem;
+; };
+; 
+; extern string &str;
+; 
+; inline __attribute__((always_inline)) void s2(string *lhs) { sink(lhs->mem); }
+; inline __attribute__((always_inline)) void f() {
+;   string str2;
+;   s2(&str2);
+;   sink(&str2);
+; }
+; void __attribute__((nodebug)) f1() {
+;   for (int iter = 0; iter != 2; ++iter) {
+;     f();
+;     sink(str.mem);
+;     if (b) return;
+;   }
+; }
+
+%struct.string = type { i32* }
+
+@str = external constant %struct.string*
+@b = external global i8
+
+; Function Attrs: nounwind
+define void @_Z2f1v() #0 {
+entry:
+  %str2.i = alloca %struct.string, align 4
+  %0 = bitcast %struct.string* %str2.i to i8*, !dbg !26
+  %1 = load %struct.string** @str, align 4
+  %mem = getelementptr inbounds %struct.string* %1, i32 0, i32 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iter.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  call void @llvm.lifetime.start(i64 4, i8* %0), !dbg !26
+  call void @llvm.dbg.value(metadata %struct.string* %str2.i, i64 0, metadata !16, metadata !{!"0x102"}) #3, !dbg !26
+  call void @llvm.dbg.value(metadata %struct.string* %str2.i, i64 0, metadata !27, metadata !{!"0x102"}) #3, !dbg !29
+  call void @_Z4sinkPKv(i8* undef) #3, !dbg !29
+  call void @_Z4sinkPKv(i8* %0) #3, !dbg !30
+  call void @llvm.lifetime.end(i64 4, i8* %0), !dbg !31
+  %2 = load i32** %mem, align 4, !tbaa !32
+  %3 = bitcast i32* %2 to i8*
+  call void @_Z4sinkPKv(i8* %3) #3
+  %4 = load i8* @b, align 1, !tbaa !37, !range !39
+  %tobool = icmp ne i8 %4, 0
+  %inc = add nsw i32 %iter.02, 1
+  %cmp = icmp eq i32 %inc, 2
+  %or.cond = or i1 %tobool, %cmp
+  br i1 %or.cond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare void @_Z4sinkPKv(i8*) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #3
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #3
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.ident = !{!25}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !3, !10, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00string\007\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS6string"} ; [ DW_TAG_structure_type ] [string] [line 7, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"repro.cpp", !"/tmp/dbginfo"}
+!6 = !{!7}
+!7 = !{!"0xd\00mem\008\0032\0032\000\000", !5, !"_ZTS6string", !8} ; [ DW_TAG_member ] [mem] [line 8, size 32, align 32, offset 0] [from ]
+!8 = !{!"0xf\00\000\0032\0032\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from unsigned int]
+!9 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!10 = !{!11, !17}
+!11 = !{!"0x2e\00f\00f\00_Z1fv\0014\000\001\000\006\00256\001\0014", !5, !12, !13, null, null, null, null, !15} ; [ DW_TAG_subprogram ] [line 14] [def] [f]
+!12 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/repro.cpp]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null}
+!15 = !{!16}
+!16 = !{!"0x100\00str2\0015\000", !11, !12, !"_ZTS6string"} ; [ DW_TAG_auto_variable ] [str2] [line 15]
+!17 = !{!"0x2e\00s2\00s2\00_Z2s2P6string\0013\000\001\000\006\00256\001\0013", !5, !12, !18, null, null, null, null, !21} ; [ DW_TAG_subprogram ] [line 13] [def] [s2]
+!18 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{null, !20}
+!20 = !{!"0xf\00\000\0032\0032\000\000", null, null, !"_ZTS6string"} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from _ZTS6string]
+!21 = !{!22}
+!22 = !{!"0x101\00lhs\0016777229\000", !17, !12, !20} ; [ DW_TAG_arg_variable ] [lhs] [line 13]
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 2, !"Debug Info Version", i32 2}
+!25 = !{!"clang version 3.5.0 "}
+!26 = !MDLocation(line: 15, scope: !11)
+!27 = !{!"0x101\00lhs\0016777229\000", !17, !12, !20, !28} ; [ DW_TAG_arg_variable ] [lhs] [line 13]
+!28 = !MDLocation(line: 16, scope: !11)
+!29 = !MDLocation(line: 13, scope: !17, inlinedAt: !28)
+!30 = !MDLocation(line: 17, scope: !11)
+!31 = !MDLocation(line: 18, scope: !11)
+!32 = !{!33, !34, i64 0}
+!33 = !{!"_ZTS6string", !34, i64 0}
+!34 = !{!"any pointer", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !{!38, !38, i64 0}
+!38 = !{!"bool", !35, i64 0}
+!39 = !{i8 0, i8 2}
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index 4df1bd4847d0..3167d9f1e6ba 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -19,34 +19,34 @@
 ; CHECK: DW_TAG_subrange_type
 ; CHECK-NEXT:                   DW_AT_type [DW_FORM_ref4]  (cu + 0x{{[0-9a-f]*}} => {[[BASE2:0x[0-9a-f]*]]})
 ; CHECK-NEXT:                   DW_AT_lower_bound [DW_FORM_data8]       (0xfffffffffffffffd)
-; CHECK-NEXT:                   DW_AT_upper_bound [DW_FORM_data1]       (0x26)
+; CHECK-NEXT:                   DW_AT_count [DW_FORM_data1]       (0x2a)
 
 ; CHECK: [[BASE]]: DW_TAG_base_type
 ; CHECK: [[BASE2]]: DW_TAG_base_type
 ; CHECK-NEXT:                 DW_AT_name [DW_FORM_strp]       ( .debug_str[0x{{[0-9a-f]*}}] = "sizetype")
 ; CHECK-NEXT:                 DW_AT_byte_size [DW_FORM_data1] (0x08)
-; CHECK-NEXT:                 DW_AT_encoding [DW_FORM_data1]  (0x07)
+; CHECK-NEXT:                 DW_AT_encoding [DW_FORM_data1]  (DW_ATE_unsigned)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 169136)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786434, metadata !20, null, metadata !"A", i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
-!8 = metadata !{metadata !9, metadata !14}
-!9 = metadata !{i32 786445, metadata !20, metadata !7, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
-!14 = metadata !{i32 786478, metadata !6, metadata !7, metadata !"A", metadata !"A", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!20 = metadata !{metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 169136)\000\00\000\00\000", !20, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\001\000\001", null, !6, !7, %class.A* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!7 = !{!"0x2\00A\001\000\0032\000\000\000", !20, null, null, !8, null, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [def] [from ]
+!8 = !{!9, !14}
+!9 = !{!"0xd\00x\001\000\000\000\001", !20, !7, !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = !{!"0x1\00\000\000\0032\000\000", null, null, !11, !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13}
+!13 = !{!"0x21\00-3\0042"} ; [ DW_TAG_subrange_type ] [-3, 39]
+!14 = !{!"0x2e\00A\00A\00\001\000\000\000\006\00320\000\001", !6, !7, !15, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = !{!19}
+!19 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!20 = !{!"t.cpp", !"/Volumes/Sandbox/llvm"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 1ec56be81a46..cd71396661c5 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -12,16 +12,16 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10, !11, !12, !14}
 
-!0 = metadata !{i32 786449, metadata !13, i32 16, metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!8 = metadata !{i32 786451, metadata !13, null, metadata !"FooBarBaz", i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [FooBarBaz] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!13 = metadata !{metadata !"foo.m", metadata !"/Users/echristo"}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0016\00clang version 3.1 (trunk 152054 trunk 152094)\000\00\002\00\000", !13, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\003\000\001", null, !6, !7, %0** @a, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !13} ; [ DW_TAG_file_type ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ]
+!8 = !{!"0x13\00FooBarBaz\001\000\000\000\004\0016", !13, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [FooBarBaz] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!9 = !{i32 1, !"Objective-C Version", i32 2}
+!10 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!11 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!12 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!13 = !{!"foo.m", !"/Users/echristo"}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/objc-property-void.ll b/test/DebugInfo/X86/objc-property-void.ll
index d366a7acf4e5..0ae9e1a337ed 100644
--- a/test/DebugInfo/X86/objc-property-void.ll
+++ b/test/DebugInfo/X86/objc-property-void.ll
@@ -56,14 +56,14 @@ entry:
   %self.addr = alloca %0*, align 8
   %_cmd.addr = alloca i8*, align 8
   store %0* %self, %0** %self.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%0** %self.addr}, metadata !24), !dbg !26
+  call void @llvm.dbg.declare(metadata %0** %self.addr, metadata !24, metadata !{!"0x102"}), !dbg !26
   store i8* %_cmd, i8** %_cmd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %_cmd.addr}, metadata !27), !dbg !26
+  call void @llvm.dbg.declare(metadata i8** %_cmd.addr, metadata !27, metadata !{!"0x102"}), !dbg !26
   ret void, !dbg !29
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -72,33 +72,33 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18, !19, !20, !21, !22}
 !llvm.ident = !{!23}
 
-!0 = metadata !{i32 786449, metadata !1, i32 16, metadata !"", i1 false, metadata !"", i32 2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [] [DW_LANG_ObjC]
-!1 = metadata !{metadata !"-", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"Foo", i32 1, i64 0, i64 8, i32 0, i32 512, null, metadata !7, i32 16, null, null, null} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"<stdin>", metadata !""}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] []
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 803328, metadata !"foo", metadata !6, i32 2, metadata !"", metadata !"", i32 2117, null} ; [ DW_TAG_APPLE_property ] [foo] [line 2, properties 2117]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"-[Foo foo]", metadata !"-[Foo foo]", metadata !"", i32 5, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%0*, i8*)* @"\01-[Foo foo]", null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [-[Foo foo]]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13, metadata !14}
-!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Foo]
-!14 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 5, i64 0, i64 0, i64 0, i32 64, metadata !15} ; [ DW_TAG_typedef ] [SEL] [line 5, size 0, align 0, offset 0] [artificial] [from ]
-!15 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
-!16 = metadata !{i32 786451, metadata !1, null, metadata !"objc_selector", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!17 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!18 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!19 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!20 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!23 = metadata !{metadata !""}
-!24 = metadata !{i32 786689, metadata !10, metadata !"self", null, i32 16777216, metadata !25, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [self] [line 0]
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Foo]
-!26 = metadata !{i32 0, i32 0, metadata !10, null}
-!27 = metadata !{i32 786689, metadata !10, metadata !"_cmd", null, i32 33554432, metadata !28, i32 64, i32 0} ; [ DW_TAG_arg_variable ] [_cmd] [line 0]
-!28 = metadata !{i32 786454, metadata !5, null, metadata !"SEL", i32 5, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [SEL] [line 5, size 0, align 0, offset 0] [from ]
-!29 = metadata !{i32 5, i32 0, metadata !10, null}
+!0 = !{!"0x11\0016\00\000\00\002\00\000", !1, !2, !3, !9, !2, !2} ; [ DW_TAG_compile_unit ] [] [DW_LANG_ObjC]
+!1 = !{!"-", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Foo\001\000\008\000\00512\0016", !5, !6, null, !7, null, null, null} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 8, offset 0] [def] [from ]
+!5 = !{!"<stdin>", !""}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] []
+!7 = !{!8}
+!8 = !{!"0x4200\00foo\002\00\00\002117", !6, null} ; [ DW_TAG_APPLE_property ] [foo] [line 2, properties 2117]
+!9 = !{!10}
+!10 = !{!"0x2e\00-[Foo foo]\00-[Foo foo]\00\005\001\001\000\006\00256\000\005", !5, !6, !11, null, void (%0*, i8*)* @"\01-[Foo foo]", null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [-[Foo foo]]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13, !14}
+!13 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Foo]
+!14 = !{!"0x16\00SEL\005\000\000\000\0064", !5, null, !15} ; [ DW_TAG_typedef ] [SEL] [line 5, size 0, align 0, offset 0] [artificial] [from ]
+!15 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_selector]
+!16 = !{!"0x13\00objc_selector\000\000\000\000\004\000", !1, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_selector] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!17 = !{i32 1, !"Objective-C Version", i32 2}
+!18 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!19 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!20 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!21 = !{i32 2, !"Dwarf Version", i32 2}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
+!23 = !{!""}
+!24 = !{!"0x101\00self\0016777216\001088", !10, null, !25} ; [ DW_TAG_arg_variable ] [self] [line 0]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !4} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from Foo]
+!26 = !MDLocation(line: 0, scope: !10)
+!27 = !{!"0x101\00_cmd\0033554432\0064", !10, null, !28} ; [ DW_TAG_arg_variable ] [_cmd] [line 0]
+!28 = !{!"0x16\00SEL\005\000\000\000\000", !5, null, !15} ; [ DW_TAG_typedef ] [SEL] [line 5, size 0, align 0, offset 0] [from ]
+!29 = !MDLocation(line: 5, scope: !10)
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 31003eee2a8c..2aff5c2e8e0f 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -23,20 +23,23 @@
 ; ASM-CHECK: DEBUG_VALUE: vla <- RCX
 ; ASM-CHECK: DW_OP_breg2
 
+; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
+; PRETTY-PRINT: [ DW_TAG_expression ] [DW_OP_deref]
+
 define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
 entry:
   %s.addr = alloca i32, align 4
   %saved_stack = alloca i8*
   %i = alloca i32, align 4
   store i32 %s, i32* %s.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %s.addr}, metadata !10), !dbg !11
+  call void @llvm.dbg.declare(metadata i32* %s.addr, metadata !10, metadata !{!"0x102"}), !dbg !11
   %0 = load i32* %s.addr, align 4, !dbg !12
   %1 = zext i32 %0 to i64, !dbg !12
   %2 = call i8* @llvm.stacksave(), !dbg !12
   store i8* %2, i8** %saved_stack, !dbg !12
   %vla = alloca i32, i64 %1, align 16, !dbg !12
-  call void @llvm.dbg.declare(metadata !{i32* %vla}, metadata !14), !dbg !18
-  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %vla, metadata !14, metadata !30), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !19, metadata !{!"0x102"}), !dbg !20
   store i32 0, i32* %i, align 4, !dbg !21
   br label %for.cond, !dbg !21
 
@@ -68,7 +71,7 @@ for.end:                                          ; preds = %for.cond
   ret void, !dbg !27
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @llvm.stacksave() nounwind
 
@@ -77,32 +80,32 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.2 (trunk 156005) (llvm/trunk 156000)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !28, metadata !6, metadata !"testVLAwithSize", metadata !"testVLAwithSize", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @testVLAwithSize, null, null, metadata !1, i32 2} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 1, i32 26, metadata !5, null}
-!12 = metadata !{i32 3, i32 13, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !28, metadata !5, i32 2, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 8192, i32 0, metadata !30} ; [ DW_TAG_auto_variable ]
-!15 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
-!18 = metadata !{i32 3, i32 7, metadata !13, null}
-!19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 4, i32 7, metadata !13, null}
-!21 = metadata !{i32 5, i32 8, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !28, metadata !13, i32 5, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 6, i32 5, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !28, metadata !22, i32 5, i32 27, i32 2} ; [ DW_TAG_lexical_block ]
-!25 = metadata !{i32 7, i32 3, metadata !24, null}
-!26 = metadata !{i32 5, i32 22, metadata !22, null}
-!27 = metadata !{i32 8, i32 1, metadata !13, null}
-!28 = metadata !{metadata !"bar.c", metadata !"/Users/echristo/tmp"}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!30 = metadata !{i64 2}
+!0 = !{!"0x11\0012\00clang version 3.2 (trunk 156005) (llvm/trunk 156000)\000\00\000\00\001", !28, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00testVLAwithSize\00testVLAwithSize\00\001\000\001\000\006\00256\000\002", !28, !6, !7, null, void (i32)* @testVLAwithSize, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0x101\00s\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!11 = !MDLocation(line: 1, column: 26, scope: !5)
+!12 = !MDLocation(line: 3, column: 13, scope: !13)
+!13 = !{!"0xb\002\001\000", !28, !5} ; [ DW_TAG_lexical_block ]
+!14 = !{!"0x100\00vla\003\008192", !13, !6, !15} ; [ DW_TAG_auto_variable ]
+!15 = !{!"0x1\00\000\000\0032\000\000", null, null, !9, !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!16 = !{!17}
+!17 = !{!"0x21\000\00-1"}        ; [ DW_TAG_subrange_type ]
+!18 = !MDLocation(line: 3, column: 7, scope: !13)
+!19 = !{!"0x100\00i\004\000", !13, !6, !9} ; [ DW_TAG_auto_variable ]
+!20 = !MDLocation(line: 4, column: 7, scope: !13)
+!21 = !MDLocation(line: 5, column: 8, scope: !22)
+!22 = !{!"0xb\005\003\001", !28, !13} ; [ DW_TAG_lexical_block ]
+!23 = !MDLocation(line: 6, column: 5, scope: !24)
+!24 = !{!"0xb\005\0027\002", !28, !22} ; [ DW_TAG_lexical_block ]
+!25 = !MDLocation(line: 7, column: 3, scope: !24)
+!26 = !MDLocation(line: 5, column: 22, scope: !22)
+!27 = !MDLocation(line: 8, column: 1, scope: !13)
+!28 = !{!"bar.c", !"/Users/echristo/tmp"}
+!29 = !{i32 1, !"Debug Info Version", i32 2}
+!30 = !{!"0x102\006"} ; [ DW_TAG_expression ] [DW_OP_deref]
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 4215c21721ef..1655f09790eb 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -42,13 +42,13 @@
 ; Function Attrs: uwtable
 define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 {
 entry:
-  call void @llvm.dbg.declare(metadata !{%"struct.pr14763::foo"* %f}, metadata !22), !dbg !24
+  call void @llvm.dbg.declare(metadata %"struct.pr14763::foo"* %f, metadata !22, metadata !{!"0x102"}), !dbg !24
   call void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"* %agg.result, %"struct.pr14763::foo"* %f), !dbg !25
   ret void, !dbg !25
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"*, %"struct.pr14763::foo"*) #2
 
@@ -58,8 +58,8 @@ entry:
   %b.addr = alloca i8, align 1
   %frombool = zext i1 %b to i8
   store i8 %frombool, i8* %b.addr, align 1
-  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !26), !dbg !27
-  call void @llvm.dbg.declare(metadata !{%"struct.pr14763::foo"* %g}, metadata !28), !dbg !27
+  call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !26, metadata !{!"0x102"}), !dbg !27
+  call void @llvm.dbg.declare(metadata %"struct.pr14763::foo"* %g, metadata !28, metadata !{!"0x102"}), !dbg !27
   %0 = load i8* %b.addr, align 1, !dbg !29
   %tobool = trunc i8 %0 to i1, !dbg !29
   br i1 %tobool, label %if.then, label %if.end, !dbg !29
@@ -82,37 +82,37 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21, !33}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/pass.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"pass.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !17}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_ZN7pr147634funcENS_3fooE", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
-!5 = metadata !{i32 786489, metadata !1, null, metadata !"pr14763", i32 1} ; [ DW_TAG_namespace ] [pr14763] [line 1]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786451, metadata !1, metadata !5, metadata !"foo", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 2, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, metadata !1, metadata !8, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 3} ; [ DW_TAG_subprogram ] [line 3] [foo]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13, metadata !14}
-!13 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
-!14 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
-!16 = metadata !{i32 786468}
-!17 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func2", metadata !"func2", metadata !"_ZN7pr147635func2EbNS_3fooE", i32 12, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, null, null, metadata !2, i32 12} ; [ DW_TAG_subprogram ] [line 12] [def] [func2]
-!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{null, metadata !20, metadata !8}
-!20 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!22 = metadata !{i32 786689, metadata !4, metadata !"f", metadata !23, i32 16777222, metadata !8, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [f] [line 6]
-!23 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/pass.cpp]
-!24 = metadata !{i32 6, i32 0, metadata !4, null}
-!25 = metadata !{i32 7, i32 0, metadata !4, null}
-!26 = metadata !{i32 786689, metadata !17, metadata !"b", metadata !23, i32 16777228, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 12]
-!27 = metadata !{i32 12, i32 0, metadata !17, null}
-!28 = metadata !{i32 786689, metadata !17, metadata !"g", metadata !23, i32 33554444, metadata !8, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [g] [line 12]
-!29 = metadata !{i32 13, i32 0, metadata !30, null}
-!30 = metadata !{i32 786443, metadata !1, metadata !17, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/pass.cpp]
-!31 = metadata !{i32 14, i32 0, metadata !30, null}
-!32 = metadata !{i32 15, i32 0, metadata !17, null}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/pass.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"pass.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4, !17}
+!4 = !{!"0x2e\00func\00func\00_ZN7pr147634funcENS_3fooE\006\000\001\000\006\00256\000\006", !1, !5, !6, null, void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!5 = !{!"0x39\00pr14763\001", !1, null} ; [ DW_TAG_namespace ] [pr14763] [line 1]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x13\00foo\002\008\008\000\000\000", !1, !5, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10}
+!10 = !{!"0x2e\00foo\00foo\00\003\000\000\000\006\00256\000\003", !1, !8, !11, null, null, null, i32 0, !16} ; [ DW_TAG_subprogram ] [line 3] [foo]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13, !14}
+!13 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo]
+!14 = !{!"0x10\00\000\000\000\000\000", null, null, !15} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo]
+!16 = !{i32 786468}
+!17 = !{!"0x2e\00func2\00func2\00_ZN7pr147635func2EbNS_3fooE\0012\000\001\000\006\00256\000\0012", !1, !5, !18, null, void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, null, null, !2} ; [ DW_TAG_subprogram ] [line 12] [def] [func2]
+!18 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{null, !20, !8}
+!20 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!21 = !{i32 2, !"Dwarf Version", i32 3}
+!22 = !{!"0x101\00f\0016777222\008192", !4, !23, !8} ; [ DW_TAG_arg_variable ] [f] [line 6]
+!23 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/pass.cpp]
+!24 = !MDLocation(line: 6, scope: !4)
+!25 = !MDLocation(line: 7, scope: !4)
+!26 = !{!"0x101\00b\0016777228\000", !17, !23, !20} ; [ DW_TAG_arg_variable ] [b] [line 12]
+!27 = !MDLocation(line: 12, scope: !17)
+!28 = !{!"0x101\00g\0033554444\008192", !17, !23, !8} ; [ DW_TAG_arg_variable ] [g] [line 12]
+!29 = !MDLocation(line: 13, scope: !30)
+!30 = !{!"0xb\0013\000\000", !1, !17} ; [ DW_TAG_lexical_block ] [/tmp/pass.cpp]
+!31 = !MDLocation(line: 14, scope: !30)
+!32 = !MDLocation(line: 15, scope: !17)
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/pieces-1.ll b/test/DebugInfo/X86/pieces-1.ll
new file mode 100644
index 000000000000..4c11a8b969cd
--- /dev/null
+++ b/test/DebugInfo/X86/pieces-1.ll
@@ -0,0 +1,79 @@
+; RUN: llc -O0 %s -filetype=obj -o %t.o
+; RUN: llvm-dwarfdump -debug-dump=loc %t.o | FileCheck %s
+;
+; rdar://problem/15928306
+;
+; Test that we can emit debug info for aggregate values that are split
+; up across multiple registers by SROA.
+;
+;    // Compile with -O1.
+;    typedef struct { long int a; int b;} S;
+;
+;    int foo(S s) {
+;            return s.b;
+;    }
+;
+;
+; CHECK: .debug_loc contents:
+;
+
+; 0x0000000000000000 - 0x0000000000000006: rdi, piece 0x00000008, rsi, piece 0x00000004
+; CHECK:            Beginning address offset: 0x0000000000000000
+; CHECK:               Ending address offset: [[LTMP3:.*]]
+; CHECK:                Location description: 55 93 08 54 93 04
+; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x00000008, rax, piece 0x00000004 )
+; CHECK:            Beginning address offset: [[LTMP3]]
+; CHECK:               Ending address offset: [[END:.*]]
+; CHECK:                Location description: 76 78 93 08 54 93 04
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 {
+entry:
+  call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
+  call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
+  ret i32 %s.coerce1, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = !{!"0x11\0012\00clang version 3.5 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"pieces.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\00256\001\003", !1, !5, !6, null, i32 (i64, i32)* @foo, null, null, !15} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/pieces.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x16\00S\001\000\000\000\000", !1, null, !10} ; [ DW_TAG_typedef ] [S] [line 1, size 0, align 0, offset 0] [from ]
+!10 = !{!"0x13\00\001\00128\0064\000\000\000", !1, null, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 128, align 64, offset 0] [def] [from ]
+!11 = !{!12, !14}
+!12 = !{!"0xd\00a\001\0064\0064\000\000", !1, !10, !13} ; [ DW_TAG_member ] [a] [line 1, size 64, align 64, offset 0] [from long int]
+!13 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!14 = !{!"0xd\00b\001\0032\0032\0064\000", !1, !10, !8} ; [ DW_TAG_member ] [b] [line 1, size 32, align 32, offset 64] [from int]
+!15 = !{!16}
+!16 = !{!"0x101\00s\0016777219\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [s] [line 3]
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
+!19 = !{!"clang version 3.5 "}
+!20 = !{!"0x101\00s\0016777219\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [s] [line 3]
+!21 = !MDLocation(line: 3, scope: !4)
+!22 = !{!"0x101\00s\0016777219\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [s] [line 3]
+!23 = !MDLocation(line: 4, scope: !4)
+!24 = !{!"0x102\00147\000\008"} ; [ DW_TAG_expression ] [DW_OP_piece 0 8] [piece, size 8, offset 0]
+!25 = !{}
+!27 = !{!"0x102\00147\008\004"} ; [ DW_TAG_expression ] [DW_OP_piece 8 4] [piece, size 4, offset 8]
diff --git a/test/DebugInfo/X86/pieces-2.ll b/test/DebugInfo/X86/pieces-2.ll
new file mode 100644
index 000000000000..184840956fd3
--- /dev/null
+++ b/test/DebugInfo/X86/pieces-2.ll
@@ -0,0 +1,91 @@
+; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
+;
+;    // Compile with -O1
+;    typedef struct {
+;      int a;
+;      long int b;
+;    } Inner;
+;
+;    typedef struct {
+;      Inner inner[2];
+;    } Outer;
+;
+;    int foo(Outer outer) {
+;      Inner i1 = outer.inner[1];
+;      return i1.a;
+;    }
+;
+;
+; CHECK: DW_TAG_variable [4]
+;                                                  rax, piece 0x00000004
+; CHECK-NEXT: DW_AT_location [DW_FORM_block1]{{.*}}50 93 04
+; CHECK-NEXT:  DW_AT_name {{.*}}"i1"
+;
+; ModuleID = '/Volumes/Data/llvm/test/DebugInfo/X86/sroasplit-1.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+%struct.Outer = type { [2 x %struct.Inner] }
+%struct.Inner = type { i32, i64 }
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo(%struct.Outer* byval align 8 %outer) #0 {
+entry:
+  call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !{!"0x102"}), !dbg !26
+  %i1.sroa.0.0..sroa_idx = getelementptr inbounds %struct.Outer* %outer, i64 0, i32 0, i64 1, i32 0, !dbg !27
+  %i1.sroa.0.0.copyload = load i32* %i1.sroa.0.0..sroa_idx, align 8, !dbg !27
+  call void @llvm.dbg.value(metadata i32 %i1.sroa.0.0.copyload, i64 0, metadata !28, metadata !29), !dbg !27
+  %i1.sroa.2.0..sroa_raw_cast = bitcast %struct.Outer* %outer to i8*, !dbg !27
+  %i1.sroa.2.0..sroa_raw_idx = getelementptr inbounds i8* %i1.sroa.2.0..sroa_raw_cast, i64 20, !dbg !27
+  ret i32 %i1.sroa.0.0.copyload, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/sroasplit-1.c] [DW_LANG_C99]
+!1 = !{!"sroasplit-1.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\0010\000\001\000\006\00256\000\0010", !1, !5, !6, null, i32 (%struct.Outer*)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/sroasplit-1.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x16\00Outer\008\000\000\000\000", !1, null, !10} ; [ DW_TAG_typedef ] [Outer] [line 8, size 0, align 0, offset 0] [from ]
+!10 = !{!"0x13\00\006\00256\0064\000\000\000", !1, null, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [line 6, size 256, align 64, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00inner\007\00256\0064\000\000", !1, !10, !13} ; [ DW_TAG_member ] [inner] [line 7, size 256, align 64, offset 0] [from ]
+!13 = !{!"0x1\00\000\00256\0064\000\000", null, null, !14, !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from Inner]
+!14 = !{!"0x16\00Inner\004\000\000\000\000", !1, null, !15} ; [ DW_TAG_typedef ] [Inner] [line 4, size 0, align 0, offset 0] [from ]
+!15 = !{!"0x13\00\001\00128\0064\000\000\000", !1, null, null, !16, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 128, align 64, offset 0] [def] [from ]
+!16 = !{!17, !18}
+!17 = !{!"0xd\00a\002\0032\0032\000\000", !1, !15, !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!18 = !{!"0xd\00b\003\0064\0064\0064\000", !1, !15, !19} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from long int]
+!19 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!20 = !{!21}
+!21 = !{!"0x21\000\002"}        ; [ DW_TAG_subrange_type ] [0, 1]
+!22 = !{i32 2, !"Dwarf Version", i32 2}
+!23 = !{i32 1, !"Debug Info Version", i32 2}
+!24 = !{!"clang version 3.5.0 "}
+!25 = !{!"0x101\00outer\0016777226\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [outer] [line 10]
+!26 = !MDLocation(line: 10, scope: !4)
+!27 = !MDLocation(line: 11, scope: !4)
+!28 = !{!"0x100\00i1\0011\000", !4, !5, !14} ; [ DW_TAG_auto_variable ] [i1] [line 11]
+!29 = !{!"0x102\00147\000\004"} ; [ DW_TAG_expression ] [DW_OP_piece 0 4] [piece, size 4, offset 0]
+!31 = !{i32 3, i32 0, i32 12}
+!32 = !MDLocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pieces-3.ll b/test/DebugInfo/X86/pieces-3.ll
new file mode 100644
index 000000000000..3e4965fcf81a
--- /dev/null
+++ b/test/DebugInfo/X86/pieces-3.ll
@@ -0,0 +1,106 @@
+; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s
+;
+;    // Compile with -O1
+;    typedef struct {
+;      int a;
+;      int b;
+;    } Inner;
+;
+;    typedef struct {
+;      Inner inner[2];
+;    } Outer;
+;
+;    int foo(Outer outer) {
+;      Inner i1 = outer.inner[1];
+;      return i1.a;
+;    }
+;
+; CHECK: DW_TAG_formal_parameter [3]
+; CHECK-NEXT:   DW_AT_location [DW_FORM_data4]        ([[LOC:.*]])
+; CHECK-NEXT:   DW_AT_name {{.*}}"outer"
+; CHECK: DW_TAG_variable
+;                                                 rsi, piece 0x00000004
+; CHECK-NEXT:   DW_AT_location [DW_FORM_block1]       {{.*}} 54 93 04
+; CHECK-NEXT:   "i1"
+;
+; CHECK: .debug_loc
+; CHECK: [[LOC]]:
+; CHECK: Beginning address offset: 0x0000000000000000
+; CHECK:    Ending address offset: 0x0000000000000004
+; rdi, piece 0x00000008, piece 0x00000004, rsi, piece 0x00000004
+; CHECK: Location description: 55 93 08 93 04 54 93 04 
+;
+; ModuleID = '/Volumes/Data/llvm/test/DebugInfo/X86/sroasplit-2.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 {
+  call void @llvm.dbg.value(metadata i64 %outer.coerce0, i64 0, metadata !24, metadata !25), !dbg !26
+  call void @llvm.dbg.declare(metadata !{null}, metadata !27, metadata !28), !dbg !26
+  call void @llvm.dbg.value(metadata i64 %outer.coerce1, i64 0, metadata !29, metadata !30), !dbg !26
+  call void @llvm.dbg.declare(metadata !{null}, metadata !31, metadata !32), !dbg !26
+  %outer.sroa.1.8.extract.trunc = trunc i64 %outer.coerce1 to i32, !dbg !33
+  call void @llvm.dbg.value(metadata i32 %outer.sroa.1.8.extract.trunc, i64 0, metadata !34, metadata !35), !dbg !33
+  %outer.sroa.1.12.extract.shift = lshr i64 %outer.coerce1, 32, !dbg !33
+  %outer.sroa.1.12.extract.trunc = trunc i64 %outer.sroa.1.12.extract.shift to i32, !dbg !33
+  call void @llvm.dbg.value(metadata i64 %outer.sroa.1.12.extract.shift, i64 0, metadata !34, metadata !35), !dbg !33
+  call void @llvm.dbg.value(metadata i32 %outer.sroa.1.12.extract.trunc, i64 0, metadata !34, metadata !35), !dbg !33
+  call void @llvm.dbg.declare(metadata !{null}, metadata !34, metadata !35), !dbg !33
+  ret i32 %outer.sroa.1.8.extract.trunc, !dbg !36
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind ssp uwtable "no-frame-pointer-elim"="true" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/sroasplit-2.c] [DW_LANG_C99]
+!1 = !{!"sroasplit-2.c", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\0010\000\001\000\006\00256\000\0010", !1, !5, !6, null, i32 (i64, i64)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/sroasplit-2.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x16\00Outer\008\000\000\000\000", !1, null, !10} ; [ DW_TAG_typedef ] [Outer] [line 8, size 0, align 0, offset 0] [from ]
+!10 = !{!"0x13\00\006\00128\0032\000\000\000", !1, null, null, !11, null, null, null} ; [ DW_TAG_structure_type ] [line 6, size 128, align 32, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00inner\007\00128\0032\000\000", !1, !10, !13} ; [ DW_TAG_member ] [inner] [line 7, size 128, align 32, offset 0] [from ]
+!13 = !{!"0x1\00\000\00128\0032\000\000", null, null, !14, !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 32, offset 0] [from Inner]
+!14 = !{!"0x16\00Inner\004\000\000\000\000", !1, null, !15} ; [ DW_TAG_typedef ] [Inner] [line 4, size 0, align 0, offset 0] [from ]
+!15 = !{!"0x13\00\001\0064\0032\000\000\000", !1, null, null, !16, null, null, null} ; [ DW_TAG_structure_type ] [line 1, size 64, align 32, offset 0] [def] [from ]
+!16 = !{!17, !18}
+!17 = !{!"0xd\00a\002\0032\0032\000\000", !1, !15, !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!18 = !{!"0xd\00b\003\0032\0032\0032\000", !1, !15, !8} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from int]
+!19 = !{!20}
+!20 = !{!"0x21\000\002"}        ; [ DW_TAG_subrange_type ] [0, 1]
+!21 = !{i32 2, !"Dwarf Version", i32 2}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5.0 "}
+!24 = !{!"0x101\00outer\0016777226\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [outer] [line 10]
+!25 = !{!"0x102\00147\000\008"} ; [ DW_TAG_expression ] [DW_OP_piece 0 8] [piece, size 8, offset 0]
+!26 = !MDLocation(line: 10, scope: !4)
+!27 = !{!"0x101\00outer\0016777226\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [outer] [line 10]
+!28 = !{!"0x102\00147\008\008"} ; [ DW_TAG_expression ] [DW_OP_piece 8 8] [piece, size 8, offset 8]
+!29 = !{!"0x101\00outer\0016777226\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [outer] [line 10]
+!30 = !{!"0x102\00147\0012\004"} ; [ DW_TAG_expression ] [DW_OP_piece 12 4] [piece, size 4, offset 12]
+!31 = !{!"0x101\00outer\0016777226\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [outer] [line 10]
+!32 = !{!"0x102\00147\008\004"} ; [ DW_TAG_expression ] [DW_OP_piece 8 4] [piece, size 4, offset 8]
+!33 = !MDLocation(line: 11, scope: !4)
+!34 = !{!"0x100\00i1\0011\000", !4, !5, !14} ; [ DW_TAG_auto_variable ] [i1] [line 11]
+!35 = !{!"0x102\00147\000\004"} ; [ DW_TAG_expression ] [DW_OP_piece 0 4] [piece, size 4, offset 0]
+!36 = !MDLocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index 40dc955ab52c..a7f569df827e 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -11,16 +11,16 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.1 (trunk 147882)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !13, null, metadata !"crass", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [crass] [line 1, size 64, align 64, offset 0] [def] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786445, metadata !13, metadata !7, metadata !"ptr", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 720934, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 720932, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!13 = metadata !{metadata !"foo.c", metadata !"/Users/echristo/tmp"}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 147882)\000\00\000\00\000", !13, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00crass\00crass\00\001\000\001", null, !6, !7, %struct.crass* @crass, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !13} ; [ DW_TAG_file_type ]
+!7 = !{!"0x13\00crass\001\0064\0064\000\000\000", !13, null, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [crass] [line 1, size 64, align 64, offset 0] [def] [from ]
+!8 = !{!9}
+!9 = !{!"0xd\00ptr\001\0064\0064\000\000", !13, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x26\00\000\000\000\000\000", null, null, !11} ; [ DW_TAG_const_type ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!13 = !{!"foo.c", !"/Users/echristo/tmp"}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index 11c409c16042..53e85acf82cc 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -6,11 +6,11 @@
 ; Skip the definition of zed(foo*)
 ; CHECK: DW_TAG_subprogram
 ; CHECK: DW_TAG_class_type
-; CHECK: [[BAR_DECL:0x[0-9a-f]*]]:     DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
 ; CHECK:     DW_AT_MIPS_linkage_name {{.*}} "_ZN3foo3barEv"
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_specification {{.*}} {[[BAR_DECL]]}
+; CHECK:   DW_AT_specification {{.*}} "_ZN3foo3barEv"
 
 %struct.foo = type { i8 }
 
@@ -18,19 +18,19 @@ define void @_Z3zedP3foo(%struct.foo* %x) uwtable {
 entry:
   %x.addr = alloca %struct.foo*, align 8
   store %struct.foo* %x, %struct.foo** %x.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %x.addr}, metadata !23), !dbg !24
+  call void @llvm.dbg.declare(metadata %struct.foo** %x.addr, metadata !23, metadata !{!"0x102"}), !dbg !24
   %0 = load %struct.foo** %x.addr, align 8, !dbg !25
   call void @_ZN3foo3barEv(%struct.foo* %0), !dbg !25
   ret void, !dbg !27
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !28), !dbg !29
+  call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !29
   %this1 = load %struct.foo** %this.addr
   ret void, !dbg !30
 }
@@ -38,33 +38,33 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !32, i32 4, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !20}
-!5 = metadata !{i32 720942, metadata !6, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
-!6 = metadata !{i32 720937, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 720898, metadata !32, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 720942, metadata !6, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16, i32 2} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 720917, i32 0, null, i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{null, metadata !15}
-!15 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 720942, metadata !6, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
-!23 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 4, i32 15, metadata !5, null}
-!25 = metadata !{i32 4, i32 20, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !6, metadata !5, i32 4, i32 18, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 4, i32 30, metadata !26, null}
-!28 = metadata !{i32 786689, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!29 = metadata !{i32 2, i32 8, metadata !20, null}
-!30 = metadata !{i32 2, i32 15, metadata !31, null}
-!31 = metadata !{i32 786443, metadata !6, metadata !20, i32 2, i32 14, i32 1} ; [ DW_TAG_lexical_block ]
-!32 = metadata !{metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build"}
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.0 ()\000\00\000\00\000", !32, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5, !20}
+!5 = !{!"0x2e\00zed\00zed\00_Z3zedP3foo\004\000\001\000\006\00256\000\004", !6, !6, !7, null, void (%struct.foo*)* @_Z3zedP3foo, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [def] [zed]
+!6 = !{!"0x29", !32} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x2\00foo\001\008\008\000\000\000", !32, null, null, !11, null, null, null} ; [ DW_TAG_class_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEv\002\000\000\000\006\00256\000\002", !6, !10, !13, null, null, null, i32 0, !16} ; [ DW_TAG_subprogram ]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{null, !15}
+!15 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !10} ; [ DW_TAG_pointer_type ]
+!16 = !{!17}
+!17 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!18 = !{!19}
+!19 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!20 = !{!"0x2e\00bar\00bar\00_ZN3foo3barEv\002\000\001\000\006\00256\000\002", !6, null, !13, null, void (%struct.foo*)* @_ZN3foo3barEv, null, !12, null} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!23 = !{!"0x101\00x\0016777220\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!24 = !MDLocation(line: 4, column: 15, scope: !5)
+!25 = !MDLocation(line: 4, column: 20, scope: !26)
+!26 = !{!"0xb\004\0018\000", !6, !5} ; [ DW_TAG_lexical_block ]
+!27 = !MDLocation(line: 4, column: 30, scope: !26)
+!28 = !{!"0x101\00this\0016777218\0064", !20, !6, !15} ; [ DW_TAG_arg_variable ]
+!29 = !MDLocation(line: 2, column: 8, scope: !20)
+!30 = !MDLocation(line: 2, column: 15, scope: !31)
+!31 = !{!"0xb\002\0014\001", !6, !20} ; [ DW_TAG_lexical_block ]
+!32 = !{!"/home/espindola/llvm/test.cc", !"/home/espindola/tmpfs/build"}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 79d00eddba28..5adaeaf7d31a 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -9,8 +9,8 @@ target triple = "x86_64-unknown-linux-gnu"
 %class.anon = type { i8 }
 %class.anon.0 = type { i8 }
 
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = alias internal void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = alias internal void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
 
 define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 {
 entry:
@@ -20,7 +20,7 @@ entry:
   %agg.tmp4 = alloca %class.function, align 1
   %agg.tmp5 = alloca %class.anon.0, align 1
   store %class.BPLFunctionWriter* %this, %class.BPLFunctionWriter** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.BPLFunctionWriter** %this.addr}, metadata !133), !dbg !135
+  call void @llvm.dbg.declare(metadata %class.BPLFunctionWriter** %this.addr, metadata !133, metadata !{!"0x102"}), !dbg !135
   %this1 = load %class.BPLFunctionWriter** %this.addr
   %MW = getelementptr inbounds %class.BPLFunctionWriter* %this1, i32 0, i32 0, !dbg !136
   %0 = load %struct.BPLModuleWriter** %MW, align 8, !dbg !136
@@ -33,7 +33,7 @@ entry:
   ret void, !dbg !139
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter*)
 
@@ -42,8 +42,8 @@ entry:
   %this.addr = alloca %class.function*, align 8
   %__f = alloca %class.anon.0, align 1
   store %class.function* %this, %class.function** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.function** %this.addr}, metadata !140), !dbg !142
-  call void @llvm.dbg.declare(metadata !{%class.anon.0* %__f}, metadata !143), !dbg !144
+  call void @llvm.dbg.declare(metadata %class.function** %this.addr, metadata !140, metadata !{!"0x102"}), !dbg !142
+  call void @llvm.dbg.declare(metadata %class.anon.0* %__f, metadata !143, metadata !{!"0x102"}), !dbg !144
   %this1 = load %class.function** %this.addr
   call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_"(%class.anon.0* %__f), !dbg !145
   ret void, !dbg !147
@@ -61,8 +61,8 @@ entry:
   %this.addr = alloca %class.function*, align 8
   %__f = alloca %class.anon, align 1
   store %class.function* %this, %class.function** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.function** %this.addr}, metadata !150), !dbg !151
-  call void @llvm.dbg.declare(metadata !{%class.anon* %__f}, metadata !152), !dbg !153
+  call void @llvm.dbg.declare(metadata %class.function** %this.addr, metadata !150, metadata !{!"0x102"}), !dbg !151
+  call void @llvm.dbg.declare(metadata %class.anon* %__f, metadata !152, metadata !{!"0x102"}), !dbg !153
   %this1 = load %class.function** %this.addr
   call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_"(%class.anon* %__f), !dbg !154
   ret void, !dbg !156
@@ -78,163 +78,163 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!162}
 
-!0 = metadata !{i32 786449, metadata !161, i32 4, metadata !"clang version 3.2 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !128, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !106, metadata !107, metadata !126, metadata !127}
-!5 = metadata !{i32 786478, metadata !6, null, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 19, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, metadata !103, metadata !1, i32 19} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !160} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 786434, metadata !160, null, metadata !"BPLFunctionWriter", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLFunctionWriter] [line 15, size 64, align 64, offset 0] [def] [from ]
-!11 = metadata !{metadata !12, metadata !103}
-!12 = metadata !{i32 786445, metadata !160, metadata !10, metadata !"MW", i32 16, i64 64, i64 64, i64 0, i32 1, metadata !13} ; [ DW_TAG_member ]
-!13 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
-!14 = metadata !{i32 786434, metadata !160, null, metadata !"BPLModuleWriter", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_class_type ] [BPLModuleWriter] [line 12, size 8, align 8, offset 0] [def] [from ]
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786478, metadata !6, metadata !14, metadata !"writeIntrinsic", metadata !"writeIntrinsic", metadata !"_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE", i32 13, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !101, i32 13} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null, metadata !19, metadata !20}
-!19 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !14} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 786434, metadata !160, null, metadata !"function<void ()>", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !21, i32 0, null, metadata !97, null} ; [ DW_TAG_class_type ] [function<void ()>] [line 6, size 8, align 8, offset 0] [def] [from ]
-!21 = metadata !{metadata !22, metadata !51, metadata !58, metadata !86, metadata !92}
-!22 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"", i32 8, metadata !23, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !47, i32 0, metadata !49, i32 8} ; [ DW_TAG_subprogram ]
-!23 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !24, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!24 = metadata !{null, metadata !25, metadata !26}
-!25 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_pointer_type ]
-!26 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 20, size 8, align 8, offset 0] [def] [from ]
-!27 = metadata !{metadata !28, metadata !35, metadata !41}
-!28 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"operator()", metadata !"operator()", metadata !"", i32 20, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !33, i32 20} ; [ DW_TAG_subprogram ]
-!29 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = metadata !{null, metadata !31}
-!31 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !32} ; [ DW_TAG_pointer_type ]
-!32 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_const_type ]
-!33 = metadata !{metadata !34}
-!34 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!35 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"~", metadata !"~", metadata !"", i32 20, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !39, i32 20} ; [ DW_TAG_subprogram ]
-!36 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!37 = metadata !{null, metadata !38}
-!38 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !26} ; [ DW_TAG_pointer_type ]
-!39 = metadata !{metadata !40}
-!40 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!41 = metadata !{i32 786478, metadata !6, metadata !26, metadata !"", metadata !"", metadata !"", i32 20, metadata !42, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !45, i32 20} ; [ DW_TAG_subprogram ]
-!42 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !43, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!43 = metadata !{null, metadata !38, metadata !44}
-!44 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_rvalue_reference_type ]
-!45 = metadata !{metadata !46}
-!46 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!47 = metadata !{metadata !48}
-!48 = metadata !{i32 786479, null, metadata !"_Functor", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!49 = metadata !{metadata !50}
-!50 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!51 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<function<void ()> >", metadata !"function<function<void ()> >", metadata !"", i32 8, metadata !52, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !54, i32 0, metadata !56, i32 8} ; [ DW_TAG_subprogram ]
-!52 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !53, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!53 = metadata !{null, metadata !25, metadata !20}
-!54 = metadata !{metadata !55}
-!55 = metadata !{i32 786479, null, metadata !"_Functor", metadata !20, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!56 = metadata !{metadata !57}
-!57 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!58 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"", i32 8, metadata !59, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !82, i32 0, metadata !84, i32 8} ; [ DW_TAG_subprogram ]
-!59 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !60, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!60 = metadata !{null, metadata !25, metadata !61}
-!61 = metadata !{i32 786434, metadata !160, metadata !5, metadata !"", i32 23, i64 8, i64 8, i32 0, i32 0, null, metadata !62, i32 0, null, null, null} ; [ DW_TAG_class_type ] [line 23, size 8, align 8, offset 0] [def] [from ]
-!62 = metadata !{metadata !63, metadata !70, metadata !76}
-!63 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"operator()", metadata !"operator()", metadata !"", i32 23, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !68, i32 23} ; [ DW_TAG_subprogram ]
-!64 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!65 = metadata !{null, metadata !66}
-!66 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !67} ; [ DW_TAG_pointer_type ]
-!67 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_const_type ]
-!68 = metadata !{metadata !69}
-!69 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!70 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"~", metadata !"~", metadata !"", i32 23, metadata !71, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !74, i32 23} ; [ DW_TAG_subprogram ]
-!71 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !72, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!72 = metadata !{null, metadata !73}
-!73 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !61} ; [ DW_TAG_pointer_type ]
-!74 = metadata !{metadata !75}
-!75 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!76 = metadata !{i32 786478, metadata !6, metadata !61, metadata !"", metadata !"", metadata !"", i32 23, metadata !77, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !80, i32 23} ; [ DW_TAG_subprogram ]
-!77 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !78, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!78 = metadata !{null, metadata !73, metadata !79}
-!79 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_rvalue_reference_type ]
-!80 = metadata !{metadata !81}
-!81 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!82 = metadata !{metadata !83}
-!83 = metadata !{i32 786479, null, metadata !"_Functor", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!84 = metadata !{metadata !85}
-!85 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!86 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"function", metadata !"function", metadata !"", i32 6, metadata !87, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !90, i32 6} ; [ DW_TAG_subprogram ]
-!87 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !88, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!88 = metadata !{null, metadata !25, metadata !89}
-!89 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_rvalue_reference_type ]
-!90 = metadata !{metadata !91}
-!91 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!92 = metadata !{i32 786478, metadata !6, metadata !20, metadata !"~function", metadata !"~function", metadata !"", i32 6, metadata !93, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !95, i32 6} ; [ DW_TAG_subprogram ]
-!93 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !94, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!94 = metadata !{null, metadata !25}
-!95 = metadata !{metadata !96}
-!96 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
-!97 = metadata !{metadata !98}
-!98 = metadata !{i32 786479, null, metadata !"T", metadata !99, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!99 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !100, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!100 = metadata !{null}
-!101 = metadata !{metadata !102}
-!102 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!103 = metadata !{i32 786478, metadata !6, metadata !10, metadata !"writeExpr", metadata !"writeExpr", metadata !"_ZN17BPLFunctionWriter9writeExprEv", i32 17, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !104, i32 17} ; [ DW_TAG_subprogram ]
-!104 = metadata !{metadata !105}
-!105 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!106 = metadata !{i32 786478, metadata !6, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", i32 8, metadata !59, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", metadata !82, metadata !58, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!107 = metadata !{i32 786478, metadata !6, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", metadata !111, metadata !113, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!108 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !109, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!109 = metadata !{null, metadata !110}
-!110 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !61} ; [ DW_TAG_reference_type ]
-!111 = metadata !{metadata !112}
-!112 = metadata !{i32 786479, null, metadata !"_Tp", metadata !61, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!113 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", i32 3, metadata !108, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !111, i32 0, metadata !124, i32 3} ; [ DW_TAG_subprogram ]
-!114 = metadata !{i32 786434, metadata !160, null, metadata !"_Base_manager", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !115, i32 0, null, null, null} ; [ DW_TAG_class_type ] [_Base_manager] [line 1, size 8, align 8, offset 0] [def] [from ]
-!115 = metadata !{metadata !116, metadata !113}
-!116 = metadata !{i32 786478, metadata !6, metadata !114, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 3, metadata !117, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, metadata !120, i32 0, metadata !122, i32 3} ; [ DW_TAG_subprogram ]
-!117 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !118, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!118 = metadata !{null, metadata !119}
-!119 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !26} ; [ DW_TAG_reference_type ]
-!120 = metadata !{metadata !121}
-!121 = metadata !{i32 786479, null, metadata !"_Tp", metadata !26, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!122 = metadata !{metadata !123}
-!123 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!124 = metadata !{metadata !125}
-!125 = metadata !{i32 786468}                     ; [ DW_TAG_base_type ]
-!126 = metadata !{i32 786478, metadata !6, null, metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", i32 8, metadata !23, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", metadata !47, metadata !22, metadata !1, i32 8} ; [ DW_TAG_subprogram ]
-!127 = metadata !{i32 786478, metadata !6, null, metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", metadata !"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", i32 3, metadata !117, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", metadata !120, metadata !116, metadata !1, i32 3} ; [ DW_TAG_subprogram ]
-!128 = metadata !{metadata !130}
-!130 = metadata !{i32 786484, i32 0, metadata !114, metadata !"__stored_locally", metadata !"__stored_locally", metadata !"__stored_locally", metadata !6, i32 2, metadata !131, i32 1, i32 1, i1 true, null} ; [ DW_TAG_variable ]
-!131 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !132} ; [ DW_TAG_const_type ]
-!132 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!133 = metadata !{i32 786689, metadata !5, metadata !"this", metadata !6, i32 16777235, metadata !134, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!134 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!135 = metadata !{i32 19, i32 39, metadata !5, null}
-!136 = metadata !{i32 20, i32 17, metadata !137, null}
-!137 = metadata !{i32 786443, metadata !6, metadata !5, i32 19, i32 51, i32 0} ; [ DW_TAG_lexical_block ]
-!138 = metadata !{i32 23, i32 17, metadata !137, null}
-!139 = metadata !{i32 26, i32 15, metadata !137, null}
-!140 = metadata !{i32 786689, metadata !106, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!141 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!142 = metadata !{i32 8, i32 45, metadata !106, null}
-!143 = metadata !{i32 786689, metadata !106, metadata !"__f", metadata !6, i32 33554440, metadata !61, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!144 = metadata !{i32 8, i32 63, metadata !106, null}
-!145 = metadata !{i32 9, i32 9, metadata !146, null}
-!146 = metadata !{i32 786443, metadata !6, metadata !106, i32 8, i32 81, i32 1} ; [ DW_TAG_lexical_block ]
-!147 = metadata !{i32 10, i32 13, metadata !146, null}
-!148 = metadata !{i32 4, i32 5, metadata !149, null}
-!149 = metadata !{i32 786443, metadata !6, metadata !107, i32 3, i32 105, i32 2} ; [ DW_TAG_lexical_block ]
-!150 = metadata !{i32 786689, metadata !126, metadata !"this", metadata !6, i32 16777224, metadata !141, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!151 = metadata !{i32 8, i32 45, metadata !126, null}
-!152 = metadata !{i32 786689, metadata !126, metadata !"__f", metadata !6, i32 33554440, metadata !26, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!153 = metadata !{i32 8, i32 63, metadata !126, null}
-!154 = metadata !{i32 9, i32 9, metadata !155, null}
-!155 = metadata !{i32 786443, metadata !6, metadata !126, i32 8, i32 81, i32 3} ; [ DW_TAG_lexical_block ]
-!156 = metadata !{i32 10, i32 13, metadata !155, null}
-!157 = metadata !{i32 4, i32 5, metadata !158, null}
-!158 = metadata !{i32 786443, metadata !6, metadata !127, i32 3, i32 105, i32 4} ; [ DW_TAG_lexical_block ]
-!159 = metadata !{i32 786473, metadata !161} ; [ DW_TAG_file_type ]
-!160 = metadata !{metadata !"BPLFunctionWriter2.ii", metadata !"/home/peter/crashdelta"}
-!161 = metadata !{metadata !"BPLFunctionWriter.cpp", metadata !"/home/peter/crashdelta"}
-!162 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.2 \000\00\000\00\000", !161, !1, !1, !3, !128, null} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5, !106, !107, !126, !127}
+!5 = !{!"0x2e\00writeExpr\00writeExpr\00_ZN17BPLFunctionWriter9writeExprEv\0019\000\001\000\006\00256\000\0019", !6, null, !7, null, void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, null, !103, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !160} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x2\00BPLFunctionWriter\0015\0064\0064\000\000\000", !160, null, null, !11, null, null, null} ; [ DW_TAG_class_type ] [BPLFunctionWriter] [line 15, size 64, align 64, offset 0] [def] [from ]
+!11 = !{!12, !103}
+!12 = !{!"0xd\00MW\0016\0064\0064\000\001", !160, !10, !13} ; [ DW_TAG_member ]
+!13 = !{!"0xf\00\000\0064\0064\000\000", null, null, !14} ; [ DW_TAG_pointer_type ]
+!14 = !{!"0x2\00BPLModuleWriter\0012\008\008\000\000\000", !160, null, null, !15, null, null, null} ; [ DW_TAG_class_type ] [BPLModuleWriter] [line 12, size 8, align 8, offset 0] [def] [from ]
+!15 = !{!16}
+!16 = !{!"0x2e\00writeIntrinsic\00writeIntrinsic\00_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE\0013\000\000\000\006\00256\000\0013", !6, !14, !17, null, null, null, i32 0, !101} ; [ DW_TAG_subprogram ]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19, !20}
+!19 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !14} ; [ DW_TAG_pointer_type ]
+!20 = !{!"0x2\00function<void ()>\006\008\008\000\000\000", !160, null, null, !21, null, !97, null} ; [ DW_TAG_class_type ] [function<void ()>] [line 6, size 8, align 8, offset 0] [def] [from ]
+!21 = !{!22, !51, !58, !86, !92}
+!22 = !{!"0x2e\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00\008\000\000\000\006\00256\000\008", !6, !20, !23, null, null, !47, i32 0, !49} ; [ DW_TAG_subprogram ]
+!23 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !24, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!24 = !{null, !25, !26}
+!25 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !20} ; [ DW_TAG_pointer_type ]
+!26 = !{!"0x2\00\0020\008\008\000\000\000", !160, !5, null, !27, null, null, null} ; [ DW_TAG_class_type ] [line 20, size 8, align 8, offset 0] [def] [from ]
+!27 = !{!28, !35, !41}
+!28 = !{!"0x2e\00operator()\00operator()\00\0020\000\000\000\006\00256\000\0020", !6, !26, !29, null, null, null, i32 0, !33} ; [ DW_TAG_subprogram ]
+!29 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !30, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{null, !31}
+!31 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !32} ; [ DW_TAG_pointer_type ]
+!32 = !{!"0x26\00\000\000\000\000\000", null, null, !26} ; [ DW_TAG_const_type ]
+!33 = !{!34}
+!34 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!35 = !{!"0x2e\00~\00~\00\0020\000\000\000\006\00320\000\0020", !6, !26, !36, null, null, null, i32 0, !39} ; [ DW_TAG_subprogram ]
+!36 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !37, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = !{null, !38}
+!38 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !26} ; [ DW_TAG_pointer_type ]
+!39 = !{!40}
+!40 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!41 = !{!"0x2e\00\00\00\0020\000\000\000\006\00320\000\0020", !6, !26, !42, null, null, null, i32 0, !45} ; [ DW_TAG_subprogram ]
+!42 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !43, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!43 = !{null, !38, !44}
+!44 = !{!"0x42\00\000\000\000\000\000", null, null, !26} ; [ DW_TAG_rvalue_reference_type ]
+!45 = !{!46}
+!46 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!47 = !{!48}
+!48 = !{!"0x2f\00_Functor\000\000", null, !26, null} ; [ DW_TAG_template_type_parameter ]
+!49 = !{!50}
+!50 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!51 = !{!"0x2e\00function<function<void ()> >\00function<function<void ()> >\00\008\000\000\000\006\00256\000\008", !6, !20, !52, null, null, !54, i32 0, !56} ; [ DW_TAG_subprogram ]
+!52 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !53, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!53 = !{null, !25, !20}
+!54 = !{!55}
+!55 = !{!"0x2f\00_Functor\000\000", null, !20, null} ; [ DW_TAG_template_type_parameter ]
+!56 = !{!57}
+!57 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!58 = !{!"0x2e\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00\008\000\000\000\006\00256\000\008", !6, !20, !59, null, null, !82, i32 0, !84} ; [ DW_TAG_subprogram ]
+!59 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !60, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!60 = !{null, !25, !61}
+!61 = !{!"0x2\00\0023\008\008\000\000\000", !160, !5, null, !62, null, null, null} ; [ DW_TAG_class_type ] [line 23, size 8, align 8, offset 0] [def] [from ]
+!62 = !{!63, !70, !76}
+!63 = !{!"0x2e\00operator()\00operator()\00\0023\000\000\000\006\00256\000\0023", !6, !61, !64, null, null, null, i32 0, !68} ; [ DW_TAG_subprogram ]
+!64 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !65, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!65 = !{null, !66}
+!66 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !67} ; [ DW_TAG_pointer_type ]
+!67 = !{!"0x26\00\000\000\000\000\000", null, null, !61} ; [ DW_TAG_const_type ]
+!68 = !{!69}
+!69 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!70 = !{!"0x2e\00~\00~\00\0023\000\000\000\006\00320\000\0023", !6, !61, !71, null, null, null, i32 0, !74} ; [ DW_TAG_subprogram ]
+!71 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !72, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!72 = !{null, !73}
+!73 = !{!"0xf\00\000\0064\0064\000\0064", i32 0, null, !61} ; [ DW_TAG_pointer_type ]
+!74 = !{!75}
+!75 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!76 = !{!"0x2e\00\00\00\0023\000\000\000\006\00320\000\0023", !6, !61, !77, null, null, null, i32 0, !80} ; [ DW_TAG_subprogram ]
+!77 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !78, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!78 = !{null, !73, !79}
+!79 = !{!"0x42\00\000\000\000\000\000", null, null, !61} ; [ DW_TAG_rvalue_reference_type ]
+!80 = !{!81}
+!81 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!82 = !{!83}
+!83 = !{!"0x2f\00_Functor\000\000", null, !61, null} ; [ DW_TAG_template_type_parameter ]
+!84 = !{!85}
+!85 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!86 = !{!"0x2e\00function\00function\00\006\000\000\000\006\00320\000\006", !6, !20, !87, null, null, null, i32 0, !90} ; [ DW_TAG_subprogram ]
+!87 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !88, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!88 = !{null, !25, !89}
+!89 = !{!"0x42\00\000\000\000\000\000", null, null, !20} ; [ DW_TAG_rvalue_reference_type ]
+!90 = !{!91}
+!91 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!92 = !{!"0x2e\00~function\00~function\00\006\000\000\000\006\00320\000\006", !6, !20, !93, null, null, null, i32 0, !95} ; [ DW_TAG_subprogram ]
+!93 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !94, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!94 = !{null, !25}
+!95 = !{!96}
+!96 = !{!"0x24"}                      ; [ DW_TAG_base_type ]
+!97 = !{!98}
+!98 = !{!"0x2f\00T\000\000", null, !99, null} ; [ DW_TAG_template_type_parameter ]
+!99 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !100, i32 0} ; [ DW_TAG_subroutine_type ]
+!100 = !{null}
+!101 = !{!102}
+!102 = !{!"0x24"}                     ; [ DW_TAG_base_type ]
+!103 = !{!"0x2e\00writeExpr\00writeExpr\00_ZN17BPLFunctionWriter9writeExprEv\0017\000\000\000\006\00257\000\0017", !6, !10, !7, null, null, null, i32 0, !104} ; [ DW_TAG_subprogram ]
+!104 = !{!105}
+!105 = !{!"0x24"}                     ; [ DW_TAG_base_type ]
+!106 = !{!"0x2e\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_\008\001\001\000\006\00256\000\008", !6, null, !59, null, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", !82, !58, !1} ; [ DW_TAG_subprogram ]
+!107 = !{!"0x2e\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_\003\001\001\000\006\00256\000\003", !6, null, !108, null, void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", !111, !113, !1} ; [ DW_TAG_subprogram ]
+!108 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !109, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!109 = !{null, !110}
+!110 = !{!"0x10\00\000\000\000\000\000", null, null, !61} ; [ DW_TAG_reference_type ]
+!111 = !{!112}
+!112 = !{!"0x2f\00_Tp\000\000", null, !61, null} ; [ DW_TAG_template_type_parameter ]
+!113 = !{!"0x2e\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >\00_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_\003\000\000\000\006\00256\000\003", !6, !114, !108, null, null, !111, i32 0, !124} ; [ DW_TAG_subprogram ]
+!114 = !{!"0x2\00_Base_manager\001\008\008\000\000\000", !160, null, null, !115, null, null, null} ; [ DW_TAG_class_type ] [_Base_manager] [line 1, size 8, align 8, offset 0] [def] [from ]
+!115 = !{!116, !113}
+!116 = !{!"0x2e\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_\003\000\000\000\006\00256\000\003", !6, !114, !117, null, null, !120, i32 0, !122} ; [ DW_TAG_subprogram ]
+!117 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !118, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!118 = !{null, !119}
+!119 = !{!"0x10\00\000\000\000\000\000", null, null, !26} ; [ DW_TAG_reference_type ]
+!120 = !{!121}
+!121 = !{!"0x2f\00_Tp\000\000", null, !26, null} ; [ DW_TAG_template_type_parameter ]
+!122 = !{!123}
+!123 = !{!"0x24"}                     ; [ DW_TAG_base_type ]
+!124 = !{!125}
+!125 = !{!"0x24"}                     ; [ DW_TAG_base_type ]
+!126 = !{!"0x2e\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_\008\001\001\000\006\00256\000\008", !6, null, !23, null, void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", !47, !22, !1} ; [ DW_TAG_subprogram ]
+!127 = !{!"0x2e\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >\00_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_\003\001\001\000\006\00256\000\003", !6, null, !117, null, void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", !120, !116, !1} ; [ DW_TAG_subprogram ]
+!128 = !{!130}
+!130 = !{!"0x34\00__stored_locally\00__stored_locally\00__stored_locally\002\001\001", !114, !6, !131, i1 1, null} ; [ DW_TAG_variable ]
+!131 = !{!"0x26\00\000\000\000\000\000", null, null, !132} ; [ DW_TAG_const_type ]
+!132 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ]
+!133 = !{!"0x101\00this\0016777235\0064", !5, !6, !134} ; [ DW_TAG_arg_variable ]
+!134 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ]
+!135 = !MDLocation(line: 19, column: 39, scope: !5)
+!136 = !MDLocation(line: 20, column: 17, scope: !137)
+!137 = !{!"0xb\0019\0051\000", !6, !5} ; [ DW_TAG_lexical_block ]
+!138 = !MDLocation(line: 23, column: 17, scope: !137)
+!139 = !MDLocation(line: 26, column: 15, scope: !137)
+!140 = !{!"0x101\00this\0016777224\0064", !106, !6, !141} ; [ DW_TAG_arg_variable ]
+!141 = !{!"0xf\00\000\0064\0064\000\000", null, null, !20} ; [ DW_TAG_pointer_type ]
+!142 = !MDLocation(line: 8, column: 45, scope: !106)
+!143 = !{!"0x101\00__f\0033554440\000", !106, !6, !61} ; [ DW_TAG_arg_variable ]
+!144 = !MDLocation(line: 8, column: 63, scope: !106)
+!145 = !MDLocation(line: 9, column: 9, scope: !146)
+!146 = !{!"0xb\008\0081\001", !6, !106} ; [ DW_TAG_lexical_block ]
+!147 = !MDLocation(line: 10, column: 13, scope: !146)
+!148 = !MDLocation(line: 4, column: 5, scope: !149)
+!149 = !{!"0xb\003\00105\002", !6, !107} ; [ DW_TAG_lexical_block ]
+!150 = !{!"0x101\00this\0016777224\0064", !126, !6, !141} ; [ DW_TAG_arg_variable ]
+!151 = !MDLocation(line: 8, column: 45, scope: !126)
+!152 = !{!"0x101\00__f\0033554440\000", !126, !6, !26} ; [ DW_TAG_arg_variable ]
+!153 = !MDLocation(line: 8, column: 63, scope: !126)
+!154 = !MDLocation(line: 9, column: 9, scope: !155)
+!155 = !{!"0xb\008\0081\003", !6, !126} ; [ DW_TAG_lexical_block ]
+!156 = !MDLocation(line: 10, column: 13, scope: !155)
+!157 = !MDLocation(line: 4, column: 5, scope: !158)
+!158 = !{!"0xb\003\00105\004", !6, !127} ; [ DW_TAG_lexical_block ]
+!159 = !{!"0x29", !161} ; [ DW_TAG_file_type ]
+!160 = !{!"BPLFunctionWriter2.ii", !"/home/peter/crashdelta"}
+!161 = !{!"BPLFunctionWriter.cpp", !"/home/peter/crashdelta"}
+!162 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index 16e5966f84db..2ca697a3e45d 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -15,15 +15,15 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.2 (trunk 160143)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !12, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
-!6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 1, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !12, metadata !5, i32 1, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
-!12 = metadata !{metadata !"PR13303.c", metadata !"/home/probinson"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.2 (trunk 160143)\000\00\000\00\000", !12, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00main\00main\00\001\000\001\000\006\000\000\001", !12, !6, !7, null, i32 ()* @main, null, null, !1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!6 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !MDLocation(line: 1, column: 14, scope: !11)
+!11 = !{!"0xb\001\0012\000", !12, !5} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
+!12 = !{!"PR13303.c", !"/home/probinson"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/pr19307.ll b/test/DebugInfo/X86/pr19307.ll
index 07e3a4255b08..c05dc475d63b 100644
--- a/test/DebugInfo/X86/pr19307.ll
+++ b/test/DebugInfo/X86/pr19307.ll
@@ -20,10 +20,8 @@
 ; Verify that we have proper range in debug_loc section:
 ; CHECK: .Ldebug_loc{{[0-9]+}}:
 ; CHECK: DW_OP_breg1
-; CHECK:      .Lset{{[0-9]+}} = [[START_LABEL]]-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
-; CHECK-NEXT: .Lset{{[0-9]+}} = .Lfunc_end0-.Lfunc_begin0
-; CHECK-NEXT: .quad .Lset{{[0-9]+}}
+; CHECK:      .quad [[START_LABEL]]-.Lfunc_begin0
+; CHECK-NEXT: .quad .Lfunc_end0-.Lfunc_begin0
 ; CHECK: DW_OP_breg6
 ; CHECK: DW_OP_deref
 
@@ -42,10 +40,10 @@ entry:
   %offset.addr = alloca i64*, align 8
   %limit.addr = alloca i64*, align 8
   store i64* %offset, i64** %offset.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64** %offset.addr}, metadata !45), !dbg !46
+  call void @llvm.dbg.declare(metadata i64** %offset.addr, metadata !45, metadata !{!"0x102"}), !dbg !46
   store i64* %limit, i64** %limit.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64** %limit.addr}, metadata !47), !dbg !46
-  call void @llvm.dbg.declare(metadata !{%"class.std::basic_string"* %range}, metadata !48), !dbg !49
+  call void @llvm.dbg.declare(metadata i64** %limit.addr, metadata !47, metadata !{!"0x102"}), !dbg !46
+  call void @llvm.dbg.declare(metadata %"class.std::basic_string"* %range, metadata !48, metadata !{!"0x102"}), !dbg !49
   %call = call i32 @_ZNKSs7compareEmmPKc(%"class.std::basic_string"* %range, i64 0, i64 6, i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)), !dbg !50
   %cmp = icmp ne i32 %call, 0, !dbg !50
   br i1 %cmp, label %if.then, label %lor.lhs.false, !dbg !50
@@ -70,7 +68,7 @@ if.end:                                           ; preds = %if.then, %lor.lhs.f
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @_ZNKSs7compareEmmPKc(%"class.std::basic_string"*, i64, i64, i8*) #2
 
@@ -86,62 +84,62 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!42, !43}
 !llvm.ident = !{!44}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (209308)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !2, metadata !21, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/pr19307.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"pr19307.cc", metadata !"/llvm_cmake_gcc"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !6, metadata !8}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"", i32 83, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS11__mbstate_t"} ; [ DW_TAG_structure_type ] [line 83, size 0, align 0, offset 0] [decl] [from ]
-!5 = metadata !{metadata !"/usr/include/wchar.h", metadata !"/llvm_cmake_gcc"}
-!6 = metadata !{i32 786451, metadata !7, null, metadata !"lconv", i32 54, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS5lconv"} ; [ DW_TAG_structure_type ] [lconv] [line 54, size 0, align 0, offset 0] [decl] [from ]
-!7 = metadata !{metadata !"/usr/include/locale.h", metadata !"/llvm_cmake_gcc"}
-!8 = metadata !{i32 786434, metadata !9, metadata !10, metadata !"basic_string<char, std::char_traits<char>, std::allocator<char> >", i32 1134, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTSSs"} ; [ DW_TAG_class_type ] [basic_string<char, std::char_traits<char>, std::allocator<char> >] [line 1134, size 0, align 0, offset 0] [decl] [from ]
-!9 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/basic_string.tcc", metadata !"/llvm_cmake_gcc"}
-!10 = metadata !{i32 786489, metadata !11, null, metadata !"std", i32 153} ; [ DW_TAG_namespace ] [std] [line 153]
-!11 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", metadata !"/llvm_cmake_gcc"}
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"parse_range", metadata !"parse_range", metadata !"_Z11parse_rangeRyS_Ss", i32 3, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i64*, i64*, %"class.std::basic_string"*)* @_Z11parse_rangeRyS_Ss, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [parse_range]
-!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/pr19307.cc]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17, metadata !17, metadata !19}
-!17 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
-!18 = metadata !{i32 786468, null, null, metadata !"long long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!19 = metadata !{i32 786454, metadata !20, metadata !10, metadata !"string", i32 65, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTSSs"} ; [ DW_TAG_typedef ] [string] [line 65, size 0, align 0, offset 0] [from _ZTSSs]
-!20 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/stringfwd.h", metadata !"/llvm_cmake_gcc"}
-!21 = metadata !{metadata !22, metadata !26, metadata !29, metadata !33, metadata !38, metadata !41}
-!22 = metadata !{i32 786490, metadata !23, metadata !25, i32 57} ; [ DW_TAG_imported_module ]
-!23 = metadata !{i32 786489, metadata !24, null, metadata !"__gnu_debug", i32 55} ; [ DW_TAG_namespace ] [__gnu_debug] [line 55]
-!24 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/debug/debug.h", metadata !"/llvm_cmake_gcc"}
-!25 = metadata !{i32 786489, metadata !24, metadata !10, metadata !"__debug", i32 49} ; [ DW_TAG_namespace ] [__debug] [line 49]
-!26 = metadata !{i32 786440, metadata !10, metadata !27, i32 66} ; [ DW_TAG_imported_declaration ]
-!27 = metadata !{i32 786454, metadata !5, null, metadata !"mbstate_t", i32 106, i64 0, i64 0, i64 0, i32 0, metadata !28} ; [ DW_TAG_typedef ] [mbstate_t] [line 106, size 0, align 0, offset 0] [from __mbstate_t]
-!28 = metadata !{i32 786454, metadata !5, null, metadata !"__mbstate_t", i32 95, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS11__mbstate_t"} ; [ DW_TAG_typedef ] [__mbstate_t] [line 95, size 0, align 0, offset 0] [from _ZTS11__mbstate_t]
-!29 = metadata !{i32 786440, metadata !10, metadata !30, i32 141} ; [ DW_TAG_imported_declaration ]
-!30 = metadata !{i32 786454, metadata !31, null, metadata !"wint_t", i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ] [wint_t] [line 141, size 0, align 0, offset 0] [from unsigned int]
-!31 = metadata !{metadata !"/llvm_cmake_gcc/bin/../lib/clang/3.5.0/include/stddef.h", metadata !"/llvm_cmake_gcc"}
-!32 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!33 = metadata !{i32 786440, metadata !34, metadata !36, i32 42} ; [ DW_TAG_imported_declaration ]
-!34 = metadata !{i32 786489, metadata !35, null, metadata !"__gnu_cxx", i32 69} ; [ DW_TAG_namespace ] [__gnu_cxx] [line 69]
-!35 = metadata !{metadata !"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/cpp_type_traits.h", metadata !"/llvm_cmake_gcc"}
-!36 = metadata !{i32 786454, metadata !11, metadata !10, metadata !"size_t", i32 155, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_typedef ] [size_t] [line 155, size 0, align 0, offset 0] [from long unsigned int]
-!37 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!38 = metadata !{i32 786440, metadata !34, metadata !39, i32 43} ; [ DW_TAG_imported_declaration ]
-!39 = metadata !{i32 786454, metadata !11, metadata !10, metadata !"ptrdiff_t", i32 156, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ] [ptrdiff_t] [line 156, size 0, align 0, offset 0] [from long int]
-!40 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
-!41 = metadata !{i32 786440, metadata !10, metadata !"_ZTS5lconv", i32 55} ; [ DW_TAG_imported_declaration ]
-!42 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!43 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!44 = metadata !{metadata !"clang version 3.5.0 (209308)"}
-!45 = metadata !{i32 786689, metadata !13, metadata !"offset", metadata !14, i32 16777219, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [offset] [line 3]
-!46 = metadata !{i32 3, i32 0, metadata !13, null}
-!47 = metadata !{i32 786689, metadata !13, metadata !"limit", metadata !14, i32 33554435, metadata !17, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [limit] [line 3]
-!48 = metadata !{i32 786689, metadata !13, metadata !"range", metadata !14, i32 50331652, metadata !19, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [range] [line 4]
-!49 = metadata !{i32 4, i32 0, metadata !13, null}
-!50 = metadata !{i32 5, i32 0, metadata !51, null}
-!51 = metadata !{i32 786443, metadata !1, metadata !13, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
-!52 = metadata !{i32 5, i32 0, metadata !53, null}
-!53 = metadata !{i32 786443, metadata !1, metadata !51, i32 5, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
-!54 = metadata !{i32 6, i32 0, metadata !51, null}
-!55 = metadata !{i32 7, i32 0, metadata !13, null}
-!56 = metadata !{i32 8, i32 0, metadata !13, null} ; [ DW_TAG_imported_declaration ]
-!57 = metadata !{i32 9, i32 0, metadata !13, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 (209308)\000\00\000\00\001", !1, !2, !3, !12, !2, !21} ; [ DW_TAG_compile_unit ] [/llvm_cmake_gcc/pr19307.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"pr19307.cc", !"/llvm_cmake_gcc"}
+!2 = !{}
+!3 = !{!4, !6, !8}
+!4 = !{!"0x13\00\0083\000\000\000\004\000", !5, null, null, null, null, null, !"_ZTS11__mbstate_t"} ; [ DW_TAG_structure_type ] [line 83, size 0, align 0, offset 0] [decl] [from ]
+!5 = !{!"/usr/include/wchar.h", !"/llvm_cmake_gcc"}
+!6 = !{!"0x13\00lconv\0054\000\000\000\004\000", !7, null, null, null, null, null, !"_ZTS5lconv"} ; [ DW_TAG_structure_type ] [lconv] [line 54, size 0, align 0, offset 0] [decl] [from ]
+!7 = !{!"/usr/include/locale.h", !"/llvm_cmake_gcc"}
+!8 = !{!"0x2\00basic_string<char, std::char_traits<char>, std::allocator<char> >\001134\000\000\000\004\000", !9, !10, null, null, null, null, !"_ZTSSs"} ; [ DW_TAG_class_type ] [basic_string<char, std::char_traits<char>, std::allocator<char> >] [line 1134, size 0, align 0, offset 0] [decl] [from ]
+!9 = !{!"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/basic_string.tcc", !"/llvm_cmake_gcc"}
+!10 = !{!"0x39\00std\00153", !11, null} ; [ DW_TAG_namespace ] [std] [line 153]
+!11 = !{!"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", !"/llvm_cmake_gcc"}
+!12 = !{!13}
+!13 = !{!"0x2e\00parse_range\00parse_range\00_Z11parse_rangeRyS_Ss\003\000\001\000\006\00256\000\004", !1, !14, !15, null, void (i64*, i64*, %"class.std::basic_string"*)* @_Z11parse_rangeRyS_Ss, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [parse_range]
+!14 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/llvm_cmake_gcc/pr19307.cc]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17, !17, !19}
+!17 = !{!"0x10\00\000\000\000\000\000", null, null, !18} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from long long unsigned int]
+!18 = !{!"0x24\00long long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!19 = !{!"0x16\00string\0065\000\000\000\000", !20, !10, !"_ZTSSs"} ; [ DW_TAG_typedef ] [string] [line 65, size 0, align 0, offset 0] [from _ZTSSs]
+!20 = !{!"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/stringfwd.h", !"/llvm_cmake_gcc"}
+!21 = !{!22, !26, !29, !33, !38, !41}
+!22 = !{!"0x3a\0057\00", !23, !25} ; [ DW_TAG_imported_module ]
+!23 = !{!"0x39\00__gnu_debug\0055", !24, null} ; [ DW_TAG_namespace ] [__gnu_debug] [line 55]
+!24 = !{!"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/debug/debug.h", !"/llvm_cmake_gcc"}
+!25 = !{!"0x39\00__debug\0049", !24, !10} ; [ DW_TAG_namespace ] [__debug] [line 49]
+!26 = !{!"0x8\0066\00", !10, !27} ; [ DW_TAG_imported_declaration ]
+!27 = !{!"0x16\00mbstate_t\00106\000\000\000\000", !5, null, !28} ; [ DW_TAG_typedef ] [mbstate_t] [line 106, size 0, align 0, offset 0] [from __mbstate_t]
+!28 = !{!"0x16\00__mbstate_t\0095\000\000\000\000", !5, null, !"_ZTS11__mbstate_t"} ; [ DW_TAG_typedef ] [__mbstate_t] [line 95, size 0, align 0, offset 0] [from _ZTS11__mbstate_t]
+!29 = !{!"0x8\00141\00", !10, !30} ; [ DW_TAG_imported_declaration ]
+!30 = !{!"0x16\00wint_t\00141\000\000\000\000", !31, null, !32} ; [ DW_TAG_typedef ] [wint_t] [line 141, size 0, align 0, offset 0] [from unsigned int]
+!31 = !{!"/llvm_cmake_gcc/bin/../lib/clang/3.5.0/include/stddef.h", !"/llvm_cmake_gcc"}
+!32 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!33 = !{!"0x8\0042\00", !34, !36} ; [ DW_TAG_imported_declaration ]
+!34 = !{!"0x39\00__gnu_cxx\0069", !35, null} ; [ DW_TAG_namespace ] [__gnu_cxx] [line 69]
+!35 = !{!"/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/bits/cpp_type_traits.h", !"/llvm_cmake_gcc"}
+!36 = !{!"0x16\00size_t\00155\000\000\000\000", !11, !10, !37} ; [ DW_TAG_typedef ] [size_t] [line 155, size 0, align 0, offset 0] [from long unsigned int]
+!37 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, null} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
+!38 = !{!"0x8\0043\00", !34, !39} ; [ DW_TAG_imported_declaration ]
+!39 = !{!"0x16\00ptrdiff_t\00156\000\000\000\000", !11, !10, !40} ; [ DW_TAG_typedef ] [ptrdiff_t] [line 156, size 0, align 0, offset 0] [from long int]
+!40 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!41 = !{!"0x8\0055\00", !10, !"_ZTS5lconv"} ; [ DW_TAG_imported_declaration ]
+!42 = !{i32 2, !"Dwarf Version", i32 4}
+!43 = !{i32 2, !"Debug Info Version", i32 2}
+!44 = !{!"clang version 3.5.0 (209308)"}
+!45 = !{!"0x101\00offset\0016777219\000", !13, !14, !17} ; [ DW_TAG_arg_variable ] [offset] [line 3]
+!46 = !MDLocation(line: 3, scope: !13)
+!47 = !{!"0x101\00limit\0033554435\000", !13, !14, !17} ; [ DW_TAG_arg_variable ] [limit] [line 3]
+!48 = !{!"0x101\00range\0050331652\008192", !13, !14, !19} ; [ DW_TAG_arg_variable ] [range] [line 4]
+!49 = !MDLocation(line: 4, scope: !13)
+!50 = !MDLocation(line: 5, scope: !51)
+!51 = !{!"0xb\005\000\000", !1, !13} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
+!52 = !MDLocation(line: 5, scope: !53)
+!53 = !{!"0xb\005\000\001", !1, !51} ; [ DW_TAG_lexical_block ] [/llvm_cmake_gcc/pr19307.cc]
+!54 = !MDLocation(line: 6, scope: !51)
+!55 = !MDLocation(line: 7, scope: !13)
+!56 = !MDLocation(line: 8, scope: !13)
+!57 = !MDLocation(line: 9, scope: !13)
 
diff --git a/test/DebugInfo/X86/processes-relocations.ll b/test/DebugInfo/X86/processes-relocations.ll
new file mode 100644
index 000000000000..b60ffdfbe213
--- /dev/null
+++ b/test/DebugInfo/X86/processes-relocations.ll
@@ -0,0 +1,21 @@
+; RUN: llc -filetype=obj -O0 < %s -mtriple x86_64-none-linux | \
+; RUN:     llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple i386-none-linux | \
+; RUN:     llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple x86_64-none-mingw32 | \
+; RUN:     llvm-dwarfdump - 2>&1 | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s -mtriple i386-none-mingw32 | \
+; RUN:     llvm-dwarfdump - 2>&1 | FileCheck %s
+
+; CHECK-NOT: failed to compute relocation
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = !{i32 786449, !1, i32 12, !"clang version 3.6.0 ", i1 false, !"", i32 0, !2, !2, !2, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/a/empty.c] [DW_LANG_C99]
+!1 = !{!"empty.c", !"/a"}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 2}
+!5 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index a5bae841fa3c..1cc872ab4184 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -21,16 +21,16 @@ declare i32 @callme(i32)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786449, metadata !13, i32 12, metadata !"clang version 3.2 (trunk 164980) (llvm/trunk 164979)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !13, metadata !6, metadata !"isel_line_test2", metadata !"isel_line_test2", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @isel_line_test2, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
-!6 = metadata !{i32 786473, metadata !13} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 5, i32 3, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !13, metadata !5, i32 4, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
-!12 = metadata !{i32 6, i32 3, metadata !11, null}
-!13 = metadata !{metadata !"bar.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.2 (trunk 164980) (llvm/trunk 164979)\000\00\000\00\000", !13, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00isel_line_test2\00isel_line_test2\00\003\000\001\000\006\000\000\004", !13, !6, !7, null, i32 ()* @isel_line_test2, null, null, !1} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [isel_line_test2]
+!6 = !{!"0x29", !13} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !MDLocation(line: 5, column: 3, scope: !11)
+!11 = !{!"0xb\004\001\000", !13, !5} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/bar.c]
+!12 = !MDLocation(line: 6, column: 3, scope: !11)
+!13 = !{!"bar.c", !"/usr/local/google/home/echristo/tmp"}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/recursive_inlining.ll b/test/DebugInfo/X86/recursive_inlining.ll
new file mode 100644
index 000000000000..6cd935ce061a
--- /dev/null
+++ b/test/DebugInfo/X86/recursive_inlining.ll
@@ -0,0 +1,275 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; This isn't a very pretty test case - I imagine there might be other ways to
+; tickle the optimizers into producing the desired code, but I haven't found
+; them.
+
+; The issue is when a function is inlined into itself, the inlined argument
+; accidentally overwrote the concrete argument and was lost.
+
+; IR generated from the following source compiled with clang -g:
+; void fn1(void *);
+; void fn2(int, int, int, int);
+; void fn3();
+; void fn8();
+; struct C {
+;   int b;
+;   void m_fn2() {
+;     fn8();
+;     if (b) fn2(0, 0, 0, 0);
+;     fn3();
+;   }
+; };
+; C *x;
+; inline void fn7() {}
+; void fn6() {
+;   fn8();
+;   x->m_fn2();
+;   fn7();
+; }
+; void fn3() { fn6(); }
+; void fn4() { x->m_fn2(); }
+; void fn5() { x->m_fn2(); }
+
+; The definition of C and declaration of C::m_fn2
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_member
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "m_fn2"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[M_FN2_THIS_DECL:.*]]:     DW_TAG_formal_parameter
+
+; The abstract definition of C::m_fn2
+; CHECK: [[M_FN2_ABS_DEF:.*]]: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_specification {{.*}} "_ZN1C5m_fn2Ev"
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_inline
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK: [[M_FN2_THIS_ABS_DEF:.*]]:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "this"
+
+; Skip some other functions
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+
+; The concrete definition of C::m_fn2
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined fn3:
+; CHECK:     DW_TAG_inlined_subroutine
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined fn6:
+; CHECK:       DW_TAG_inlined_subroutine
+; CHECK-NOT: {{DW_TAG|NULL}}
+; Inlined C::m_fn2:
+; CHECK:         DW_TAG_inlined_subroutine
+; CHECK-NOT: DW_TAG
+; CHECK:           DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:           DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:              DW_AT_abstract_origin {{.*}} {[[M_FN2_THIS_ABS_DEF]]}
+
+
+
+%struct.C = type { i32 }
+
+@x = global %struct.C* null, align 8
+
+; Function Attrs: nounwind
+define void @_Z3fn6v() #0 {
+entry:
+  tail call void @_Z3fn8v() #3, !dbg !31
+  %0 = load %struct.C** @x, align 8, !dbg !32, !tbaa !33
+  tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !37, metadata !{!"0x102"}) #3, !dbg !38
+  tail call void @_Z3fn8v() #3, !dbg !39
+  %b.i = getelementptr inbounds %struct.C* %0, i64 0, i32 0, !dbg !40
+  %1 = load i32* %b.i, align 4, !dbg !40, !tbaa !42
+  %tobool.i = icmp eq i32 %1, 0, !dbg !40
+  br i1 %tobool.i, label %_ZN1C5m_fn2Ev.exit, label %if.then.i, !dbg !40
+
+if.then.i:                                        ; preds = %entry
+  tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !45
+  br label %_ZN1C5m_fn2Ev.exit, !dbg !45
+
+_ZN1C5m_fn2Ev.exit:                               ; preds = %entry, %if.then.i
+  tail call void @_Z3fn3v() #3, !dbg !47
+  ret void, !dbg !48
+}
+
+declare void @_Z3fn8v() #1
+
+; Function Attrs: nounwind
+define linkonce_odr void @_ZN1C5m_fn2Ev(%struct.C* nocapture readonly %this) #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !49
+  tail call void @_Z3fn8v() #3, !dbg !50
+  %b = getelementptr inbounds %struct.C* %this, i64 0, i32 0, !dbg !51
+  %0 = load i32* %b, align 4, !dbg !51, !tbaa !42
+  %tobool = icmp eq i32 %0, 0, !dbg !51
+  br i1 %tobool, label %if.end, label %if.then, !dbg !51
+
+if.then:                                          ; preds = %entry
+  tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !52
+  br label %if.end, !dbg !52
+
+if.end:                                           ; preds = %entry, %if.then
+  tail call void @_Z3fn8v() #3, !dbg !53
+  %1 = load %struct.C** @x, align 8, !dbg !56, !tbaa !33
+  tail call void @llvm.dbg.value(metadata %struct.C* %1, i64 0, metadata !57, metadata !{!"0x102"}) #3, !dbg !58
+  tail call void @_Z3fn8v() #3, !dbg !59
+  %b.i.i = getelementptr inbounds %struct.C* %1, i64 0, i32 0, !dbg !60
+  %2 = load i32* %b.i.i, align 4, !dbg !60, !tbaa !42
+  %tobool.i.i = icmp eq i32 %2, 0, !dbg !60
+  br i1 %tobool.i.i, label %_Z3fn6v.exit, label %if.then.i.i, !dbg !60
+
+if.then.i.i:                                      ; preds = %if.end
+  tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !61
+  br label %_Z3fn6v.exit, !dbg !61
+
+_Z3fn6v.exit:                                     ; preds = %if.end, %if.then.i.i
+  tail call void @_Z3fn3v() #3, !dbg !62
+  ret void, !dbg !63
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn3v() #0 {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %tailrecurse.backedge, %entry
+  tail call void @_Z3fn8v() #3, !dbg !64
+  %0 = load %struct.C** @x, align 8, !dbg !66, !tbaa !33
+  tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !67, metadata !{!"0x102"}) #3, !dbg !68
+  tail call void @_Z3fn8v() #3, !dbg !69
+  %b.i.i = getelementptr inbounds %struct.C* %0, i64 0, i32 0, !dbg !70
+  %1 = load i32* %b.i.i, align 4, !dbg !70, !tbaa !42
+  %tobool.i.i = icmp eq i32 %1, 0, !dbg !70
+  br i1 %tobool.i.i, label %tailrecurse.backedge, label %if.then.i.i, !dbg !70
+
+tailrecurse.backedge:                             ; preds = %tailrecurse, %if.then.i.i
+  br label %tailrecurse
+
+if.then.i.i:                                      ; preds = %tailrecurse
+  tail call void @_Z3fn2iiii(i32 0, i32 0, i32 0, i32 0) #3, !dbg !71
+  br label %tailrecurse.backedge, !dbg !71
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn4v() #0 {
+entry:
+  %0 = load %struct.C** @x, align 8, !dbg !72, !tbaa !33
+  tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !72
+  ret void, !dbg !72
+}
+
+; Function Attrs: nounwind
+define void @_Z3fn5v() #0 {
+entry:
+  %0 = load %struct.C** @x, align 8, !dbg !73, !tbaa !33
+  tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !73
+  ret void, !dbg !73
+}
+
+declare void @_Z3fn2iiii(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!28, !29}
+!llvm.ident = !{!30}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \001\00\000\00\001", !1, !2, !3, !13, !26, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00C\005\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 5, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"recursive_inlining.cpp", !"/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce"}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00b\006\0032\0032\000\000", !5, !"_ZTS1C", !8} ; [ DW_TAG_member ] [b] [line 6, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00m_fn2\00m_fn2\00_ZN1C5m_fn2Ev\007\000\000\000\006\00256\001\007", !5, !"_ZTS1C", !10, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [m_fn2]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12}
+!12 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!13 = !{!14, !18, !19, !20, !21, !22}
+!14 = !{!"0x2e\00fn6\00fn6\00_Z3fn6v\0015\000\001\000\006\00256\001\0015", !5, !15, !16, null, void ()* @_Z3fn6v, null, null, !2} ; [ DW_TAG_subprogram ] [line 15] [def] [fn6]
+!15 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce/recursive_inlining.cpp]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null}
+!18 = !{!"0x2e\00fn3\00fn3\00_Z3fn3v\0020\000\001\000\006\00256\001\0020", !5, !15, !16, null, void ()* @_Z3fn3v, null, null, !2} ; [ DW_TAG_subprogram ] [line 20] [def] [fn3]
+!19 = !{!"0x2e\00fn4\00fn4\00_Z3fn4v\0021\000\001\000\006\00256\001\0021", !5, !15, !16, null, void ()* @_Z3fn4v, null, null, !2} ; [ DW_TAG_subprogram ] [line 21] [def] [fn4]
+!20 = !{!"0x2e\00fn5\00fn5\00_Z3fn5v\0022\000\001\000\006\00256\001\0022", !5, !15, !16, null, void ()* @_Z3fn5v, null, null, !2} ; [ DW_TAG_subprogram ] [line 22] [def] [fn5]
+!21 = !{!"0x2e\00fn7\00fn7\00_Z3fn7v\0014\000\001\000\006\00256\001\0014", !5, !15, !16, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 14] [def] [fn7]
+!22 = !{!"0x2e\00m_fn2\00m_fn2\00_ZN1C5m_fn2Ev\007\000\001\000\006\00256\001\007", !5, !"_ZTS1C", !10, null, void (%struct.C*)* @_ZN1C5m_fn2Ev, null, !9, !23} ; [ DW_TAG_subprogram ] [line 7] [def] [m_fn2]
+!23 = !{!24}
+!24 = !{!"0x101\00this\0016777216\001088", !22, null, !25} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!26 = !{!27}
+!27 = !{!"0x34\00x\00x\00\0013\000\001", null, !15, !25, %struct.C** @x, null} ; [ DW_TAG_variable ] [x] [line 13] [def]
+!28 = !{i32 2, !"Dwarf Version", i32 4}
+!29 = !{i32 2, !"Debug Info Version", i32 2}
+!30 = !{!"clang version 3.6.0 "}
+!31 = !MDLocation(line: 16, scope: !14)
+!32 = !MDLocation(line: 17, scope: !14)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"any pointer", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !{!"0x101\00this\0016777216\001088", !22, null, !25, !32} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!38 = !MDLocation(line: 0, scope: !22, inlinedAt: !32)
+!39 = !MDLocation(line: 8, scope: !22, inlinedAt: !32)
+!40 = !MDLocation(line: 9, scope: !41, inlinedAt: !32)
+!41 = !{!"0xb\009\000\000", !5, !22} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce/recursive_inlining.cpp]
+!42 = !{!43, !44, i64 0}
+!43 = !{!"_ZTS1C", !44, i64 0}
+!44 = !{!"int", !35, i64 0}
+!45 = !MDLocation(line: 9, scope: !46, inlinedAt: !32)
+!46 = !{!"0xb\009\000\001", !5, !41} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce/recursive_inlining.cpp]
+!47 = !MDLocation(line: 10, scope: !22, inlinedAt: !32)
+!48 = !MDLocation(line: 19, scope: !14)
+!49 = !MDLocation(line: 0, scope: !22)
+!50 = !MDLocation(line: 8, scope: !22)
+!51 = !MDLocation(line: 9, scope: !41)
+!52 = !MDLocation(line: 9, scope: !46)
+!53 = !MDLocation(line: 16, scope: !14, inlinedAt: !54)
+!54 = !MDLocation(line: 20, scope: !18, inlinedAt: !55)
+!55 = !MDLocation(line: 10, scope: !22)
+!56 = !MDLocation(line: 17, scope: !14, inlinedAt: !54)
+!57 = !{!"0x101\00this\0016777216\001088", !22, null, !25, !56} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!58 = !MDLocation(line: 0, scope: !22, inlinedAt: !56)
+!59 = !MDLocation(line: 8, scope: !22, inlinedAt: !56)
+!60 = !MDLocation(line: 9, scope: !41, inlinedAt: !56)
+!61 = !MDLocation(line: 9, scope: !46, inlinedAt: !56)
+!62 = !MDLocation(line: 10, scope: !22, inlinedAt: !56)
+!63 = !MDLocation(line: 11, scope: !22)
+!64 = !MDLocation(line: 16, scope: !14, inlinedAt: !65)
+!65 = !MDLocation(line: 20, scope: !18)
+!66 = !MDLocation(line: 17, scope: !14, inlinedAt: !65)
+!67 = !{!"0x101\00this\0016777216\001088", !22, null, !25, !66} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!68 = !MDLocation(line: 0, scope: !22, inlinedAt: !66)
+!69 = !MDLocation(line: 8, scope: !22, inlinedAt: !66)
+!70 = !MDLocation(line: 9, scope: !41, inlinedAt: !66)
+!71 = !MDLocation(line: 9, scope: !46, inlinedAt: !66)
+!72 = !MDLocation(line: 21, scope: !19)
+!73 = !MDLocation(line: 22, scope: !20)
diff --git a/test/DebugInfo/X86/ref_addr_relocation.ll b/test/DebugInfo/X86/ref_addr_relocation.ll
index 76e6aa6777ec..7e39b8ee2a92 100644
--- a/test/DebugInfo/X86/ref_addr_relocation.ll
+++ b/test/DebugInfo/X86/ref_addr_relocation.ll
@@ -23,7 +23,7 @@
 ; CHECK: DW_TAG_variable
 ; CHECK: .long [[TYPE:.*]] # DW_AT_type
 ; CHECK: DW_TAG_structure_type
-; CHECK: debug_info_end0
+; CHECK: debug_info_begin1
 ; CHECK: DW_TAG_compile_unit
 ; CHECK-NOT: DW_TAG_structure_type
 ; This variable's type is in the 1st CU.
@@ -31,7 +31,7 @@
 ; Make sure this is relocatable.
 ; CHECK: .quad .Lsection_info+[[TYPE]] # DW_AT_type
 ; CHECK-NOT: DW_TAG_structure_type
-; CHECK: debug_info_end1
+; CHECK: .section
 
 ; CHECK-DWARF: DW_TAG_compile_unit
 ; CHECK-DWARF: 0x[[ADDR:.*]]: DW_TAG_structure_type
@@ -53,19 +53,19 @@
 !llvm.dbg.cu = !{!0, !9}
 !llvm.module.flags = !{!14, !15}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !6, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tu1.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./hdr.h", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !8, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
-!8 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp]
-!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.4 (trunk 191799)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !11, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp] [DW_LANG_C_plus_plus]
-!10 = metadata !{metadata !"tu2.cpp", metadata !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786484, i32 0, null, metadata !"g", metadata !"g", metadata !"", metadata !13, i32 2, metadata !4, i32 0, i32 1, %struct.foo* @g, null} ; [ DW_TAG_variable ] [g] [line 2] [def]
-!13 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp]
-!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!15 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (trunk 191799)\000\00\000\00\000", !1, !2, !3, !2, !6, !2} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tu1.cpp", !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo\001\008\008\000\000\000", !5, null, null, !2, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"./hdr.h", !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!6 = !{!7}
+!7 = !{!"0x34\00f\00f\00\002\000\001", null, !8, !4, %struct.foo* @f, null} ; [ DW_TAG_variable ] [f] [line 2] [def]
+!8 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu1.cpp]
+!9 = !{!"0x11\004\00clang version 3.4 (trunk 191799)\000\00\000\00\000", !10, !2, !3, !2, !11, !2} ; [ DW_TAG_compile_unit ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp] [DW_LANG_C_plus_plus]
+!10 = !{!"tu2.cpp", !"/Users/manmanren/test-Nov/type_unique_air/ref_addr"}
+!11 = !{!12}
+!12 = !{!"0x34\00g\00g\00\002\000\001", null, !13, !4, %struct.foo* @g, null} ; [ DW_TAG_variable ] [g] [line 2] [def]
+!13 = !{!"0x29", !10}        ; [ DW_TAG_file_type ] [/Users/manmanren/test-Nov/type_unique_air/ref_addr/tu2.cpp]
+!14 = !{i32 2, !"Dwarf Version", i32 2}
+!15 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
index 4a6bdca550fd..68ff200be400 100644
--- a/test/DebugInfo/X86/reference-argument.ll
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -13,15 +13,15 @@ target triple = "x86_64-apple-macosx10.9.0"
 %class.A = type { i8 }
 
 declare void @_Z3barR4SVal(%class.SVal* %v)
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare i32 @main()
 ; Function Attrs: nounwind ssp uwtable
 define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !59), !dbg !61
-  call void @llvm.dbg.declare(metadata !{%class.SVal* %v}, metadata !62), !dbg !61
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !59, metadata !{!"0x102"}), !dbg !61
+  call void @llvm.dbg.declare(metadata %class.SVal* %v, metadata !62, metadata !{!"0x102"}), !dbg !61
   %this1 = load %class.A** %this.addr
   call void @_Z3barR4SVal(%class.SVal* %v), !dbg !61
   ret void, !dbg !61
@@ -32,72 +32,72 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!47, !68}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [aggregate-indirect-arg.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"aggregate-indirect-arg.cpp", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !29, metadata !33, metadata !34, metadata !35}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"_Z3barR4SVal", i32 19, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_Z3barR4SVal, null, null, metadata !2, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [bar]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [aggregate-indirect-arg.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from SVal]
-!9 = metadata !{i32 786434, metadata !1, null, metadata !"SVal", i32 12, i64 128, i64 64, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_class_type ] [SVal] [line 12, size 128, align 64, offset 0] [def] [from ]
-!10 = metadata !{metadata !11, metadata !14, metadata !16, metadata !21, metadata !23}
-!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Data", i32 15, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] [Data] [line 15, size 64, align 64, offset 0] [from ]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"Kind", i32 16, i64 32, i64 32, i64 64, i32 0, metadata !15} ; [ DW_TAG_member ] [Kind] [line 16, size 32, align 32, offset 64] [from unsigned int]
-!15 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!16 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"~SVal", metadata !"~SVal", metadata !"", i32 14, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !20, i32 14} ; [ DW_TAG_subprogram ] [line 14] [~SVal]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null, metadata !19}
-!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from SVal]
-!20 = metadata !{i32 786468}
-!21 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !22, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
-!22 = metadata !{i32 786468}
-!23 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"SVal", metadata !"SVal", metadata !"", i32 12, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 12} ; [ DW_TAG_subprogram ] [line 12] [SVal]
-!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{null, metadata !19, metadata !26}
-!26 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !27} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!27 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from SVal]
-!28 = metadata !{i32 786468}
-!29 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 25, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 25} ; [ DW_TAG_subprogram ] [line 25] [def] [main]
-!30 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!31 = metadata !{metadata !32}
-!32 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!33 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD1Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD1Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
-!34 = metadata !{i32 786478, metadata !1, null, metadata !"~SVal", metadata !"~SVal", metadata !"_ZN4SValD2Ev", i32 14, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.SVal*)* @_ZN4SValD2Ev, null, metadata !16, metadata !2, i32 14} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
-!35 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, null, metadata !41, metadata !2, i32 22} ; [ DW_TAG_subprogram ] [line 22] [def] [foo]
-!36 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!37 = metadata !{null, metadata !38, metadata !9}
-!38 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
-!39 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 20, size 8, align 8, offset 0] [def] [from ]
-!40 = metadata !{metadata !41, metadata !43}
-!41 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"foo", metadata !"foo", metadata !"_ZN1A3fooE4SVal", i32 22, metadata !36, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !42, i32 22} ; [ DW_TAG_subprogram ] [line 22] [foo]
-!42 = metadata !{i32 786468}
-!43 = metadata !{i32 786478, metadata !1, metadata !39, metadata !"A", metadata !"A", metadata !"", i32 20, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !46, i32 20} ; [ DW_TAG_subprogram ] [line 20] [A]
-!44 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!45 = metadata !{null, metadata !38}
-!46 = metadata !{i32 786468}
-!47 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!48 = metadata !{i32 786689, metadata !4, metadata !"v", metadata !5, i32 16777235, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [v] [line 19]
-!49 = metadata !{i32 19, i32 0, metadata !4, null}
-!50 = metadata !{i32 786688, metadata !29, metadata !"v", metadata !5, i32 26, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [v] [line 26]
-!51 = metadata !{i32 26, i32 0, metadata !29, null}
-!52 = metadata !{i32 27, i32 0, metadata !29, null}
-!53 = metadata !{i32 28, i32 0, metadata !29, null}
-!54 = metadata !{i32 786688, metadata !29, metadata !"a", metadata !5, i32 29, metadata !39, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 29]
-!55 = metadata !{i32 29, i32 0, metadata !29, null}
-!56 = metadata !{i32 30, i32 0, metadata !29, null}
-!57 = metadata !{i32 31, i32 0, metadata !29, null}
-!58 = metadata !{i32 32, i32 0, metadata !29, null}
-!59 = metadata !{i32 786689, metadata !35, metadata !"this", metadata !5, i32 16777238, metadata !60, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 22]
-!60 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!61 = metadata !{i32 22, i32 0, metadata !35, null}
-!62 = metadata !{i32 786689, metadata !35, metadata !"v", metadata !5, i32 33554454, metadata !9, i32 8192, i32 0} ; [ DW_TAG_arg_variable ] [v] [line 22]
-!63 = metadata !{i32 786689, metadata !33, metadata !"this", metadata !5, i32 16777230, metadata !64, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 14]
-!64 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from SVal]
-!65 = metadata !{i32 14, i32 0, metadata !33, null}
-!66 = metadata !{i32 786689, metadata !34, metadata !"this", metadata !5, i32 16777230, metadata !64, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 14]
-!67 = metadata !{i32 14, i32 0, metadata !34, null}
-!68 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [aggregate-indirect-arg.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"aggregate-indirect-arg.cpp", !""}
+!2 = !{}
+!3 = !{!4, !29, !33, !34, !35}
+!4 = !{!"0x2e\00bar\00bar\00_Z3barR4SVal\0019\000\001\000\006\00256\000\0019", !1, !5, !6, null, void (%class.SVal*)* @_Z3barR4SVal, null, null, !2} ; [ DW_TAG_subprogram ] [line 19] [def] [bar]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [aggregate-indirect-arg.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x10\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from SVal]
+!9 = !{!"0x2\00SVal\0012\00128\0064\000\000\000", !1, null, null, !10, null, null, null} ; [ DW_TAG_class_type ] [SVal] [line 12, size 128, align 64, offset 0] [def] [from ]
+!10 = !{!11, !14, !16, !21, !23}
+!11 = !{!"0xd\00Data\0015\0064\0064\000\000", !1, !9, !12} ; [ DW_TAG_member ] [Data] [line 15, size 64, align 64, offset 0] [from ]
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = !{!"0x26\00\000\000\000\000\000", null, null, null} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!"0xd\00Kind\0016\0032\0032\0064\000", !1, !9, !15} ; [ DW_TAG_member ] [Kind] [line 16, size 32, align 32, offset 64] [from unsigned int]
+!15 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!16 = !{!"0x2e\00~SVal\00~SVal\00\0014\000\000\000\006\00256\000\0014", !1, !9, !17, null, null, null, i32 0, !20} ; [ DW_TAG_subprogram ] [line 14] [~SVal]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19}
+!19 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from SVal]
+!20 = !{i32 786468}
+!21 = !{!"0x2e\00SVal\00SVal\00\0012\000\000\000\006\00320\000\0012", !1, !9, !17, null, null, null, i32 0, !22} ; [ DW_TAG_subprogram ] [line 12] [SVal]
+!22 = !{i32 786468}
+!23 = !{!"0x2e\00SVal\00SVal\00\0012\000\000\000\006\00320\000\0012", !1, !9, !24, null, null, null, i32 0, !28} ; [ DW_TAG_subprogram ] [line 12] [SVal]
+!24 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = !{null, !19, !26}
+!26 = !{!"0x10\00\000\000\000\000\000", null, null, !27} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = !{!"0x26\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from SVal]
+!28 = !{i32 786468}
+!29 = !{!"0x2e\00main\00main\00\0025\000\001\000\006\00256\000\0025", !1, !5, !30, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 25] [def] [main]
+!30 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !31, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!31 = !{!32}
+!32 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!33 = !{!"0x2e\00~SVal\00~SVal\00_ZN4SValD1Ev\0014\000\001\000\006\00256\000\0014", !1, null, !17, null, void (%class.SVal*)* @_ZN4SValD1Ev, null, !16, !2} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
+!34 = !{!"0x2e\00~SVal\00~SVal\00_ZN4SValD2Ev\0014\000\001\000\006\00256\000\0014", !1, null, !17, null, void (%class.SVal*)* @_ZN4SValD2Ev, null, !16, !2} ; [ DW_TAG_subprogram ] [line 14] [def] [~SVal]
+!35 = !{!"0x2e\00foo\00foo\00_ZN1A3fooE4SVal\0022\000\001\000\006\00256\000\0022", !1, null, !36, null, void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, null, !41, !2} ; [ DW_TAG_subprogram ] [line 22] [def] [foo]
+!36 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !37, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!37 = !{null, !38, !9}
+!38 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from A]
+!39 = !{!"0x2\00A\0020\008\008\000\000\000", !1, null, null, !40, null, null, null} ; [ DW_TAG_class_type ] [A] [line 20, size 8, align 8, offset 0] [def] [from ]
+!40 = !{!41, !43}
+!41 = !{!"0x2e\00foo\00foo\00_ZN1A3fooE4SVal\0022\000\000\000\006\00256\000\0022", !1, !39, !36, null, null, null, i32 0, !42} ; [ DW_TAG_subprogram ] [line 22] [foo]
+!42 = !{i32 786468}
+!43 = !{!"0x2e\00A\00A\00\0020\000\000\000\006\00320\000\0020", !1, !39, !44, null, null, null, i32 0, !46} ; [ DW_TAG_subprogram ] [line 20] [A]
+!44 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !45, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!45 = !{null, !38}
+!46 = !{i32 786468}
+!47 = !{i32 2, !"Dwarf Version", i32 3}
+!48 = !{!"0x101\00v\0016777235\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [v] [line 19]
+!49 = !MDLocation(line: 19, scope: !4)
+!50 = !{!"0x100\00v\0026\000", !29, !5, !9} ; [ DW_TAG_auto_variable ] [v] [line 26]
+!51 = !MDLocation(line: 26, scope: !29)
+!52 = !MDLocation(line: 27, scope: !29)
+!53 = !MDLocation(line: 28, scope: !29)
+!54 = !{!"0x100\00a\0029\000", !29, !5, !39} ; [ DW_TAG_auto_variable ] [a] [line 29]
+!55 = !MDLocation(line: 29, scope: !29)
+!56 = !MDLocation(line: 30, scope: !29)
+!57 = !MDLocation(line: 31, scope: !29)
+!58 = !MDLocation(line: 32, scope: !29)
+!59 = !{!"0x101\00this\0016777238\001088", !35, !5, !60} ; [ DW_TAG_arg_variable ] [this] [line 22]
+!60 = !{!"0xf\00\000\0064\0064\000\000", null, null, !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!61 = !MDLocation(line: 22, scope: !35)
+!62 = !{!"0x101\00v\0033554454\008192", !35, !5, !9} ; [ DW_TAG_arg_variable ] [v] [line 22]
+!63 = !{!"0x101\00this\0016777230\001088", !33, !5, !64} ; [ DW_TAG_arg_variable ] [this] [line 14]
+!64 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from SVal]
+!65 = !MDLocation(line: 14, scope: !33)
+!66 = !{!"0x101\00this\0016777230\001088", !34, !5, !64} ; [ DW_TAG_arg_variable ] [this] [line 14]
+!67 = !MDLocation(line: 14, scope: !34)
+!68 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index b8ed0218568f..3829966482fe 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -9,33 +9,33 @@ define void @_Z3fooOi(i32* %i) uwtable ssp {
 entry:
   %i.addr = alloca i32*, align 8
   store i32* %i, i32** %i.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32** %i.addr}, metadata !11), !dbg !12
+  call void @llvm.dbg.declare(metadata i32** %i.addr, metadata !11, metadata !{!"0x102"}), !dbg !12
   %0 = load i32** %i.addr, align 8, !dbg !13
   %1 = load i32* %0, align 4, !dbg !13
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1), !dbg !13
   ret void, !dbg !15
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @printf(i8*, ...)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.2 (trunk 157054) (llvm/trunk 157060)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooOi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @_Z3fooOi, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_rvalue_reference_type ]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 786689, metadata !5, metadata !"i", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 4, i32 17, metadata !5, null}
-!13 = metadata !{i32 6, i32 3, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !16, metadata !5, i32 5, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 7, i32 1, metadata !14, null}
-!16 = metadata !{metadata !"foo.cpp", metadata !"/Users/echristo/tmp"}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 157054) (llvm/trunk 157060)\000\00\000\00\000", !16, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00_Z3fooOi\004\000\001\000\006\00256\000\005", !16, !6, !7, null, void (i32*)* @_Z3fooOi, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !16} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0x42\00\000\000\000\000\000", null, null, !10} ; [ DW_TAG_rvalue_reference_type ]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!11 = !{!"0x101\00i\0016777220\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!12 = !MDLocation(line: 4, column: 17, scope: !5)
+!13 = !MDLocation(line: 6, column: 3, scope: !14)
+!14 = !{!"0xb\005\001\000", !16, !5} ; [ DW_TAG_lexical_block ]
+!15 = !MDLocation(line: 7, column: 1, scope: !14)
+!16 = !{!"foo.cpp", !"/Users/echristo/tmp"}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index faf51583848f..42d65695f63c 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -3,8 +3,8 @@
 
 ; Based on the debuginfo-tests/sret.cpp code.
 
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x5b59949640ec1580)
-; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x5b59949640ec1580)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
+; CHECK: DW_AT_GNU_dwo_id [DW_FORM_data8] (0x51ac5644b1937aa1)
 
 %class.A = type { i32 (...)**, i32 }
 %class.B = type { i8 }
@@ -23,9 +23,9 @@ entry:
   %this.addr = alloca %class.A*, align 8
   %i.addr = alloca i32, align 4
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !67), !dbg !69
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !67, metadata !{!"0x102"}), !dbg !69
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !70), !dbg !71
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !70, metadata !{!"0x102"}), !dbg !71
   %this1 = load %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !72
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !72
@@ -36,7 +36,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
 define void @_ZN1AC2ERKS_(%class.A* %this, %class.A* %rhs) unnamed_addr #0 align 2 {
@@ -44,9 +44,9 @@ entry:
   %this.addr = alloca %class.A*, align 8
   %rhs.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !74), !dbg !75
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !74, metadata !{!"0x102"}), !dbg !75
   store %class.A* %rhs, %class.A** %rhs.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %rhs.addr}, metadata !76), !dbg !77
+  call void @llvm.dbg.declare(metadata %class.A** %rhs.addr, metadata !76, metadata !{!"0x102"}), !dbg !77
   %this1 = load %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !78
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !78
@@ -64,9 +64,9 @@ entry:
   %this.addr = alloca %class.A*, align 8
   %rhs.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !80), !dbg !81
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !80, metadata !{!"0x102"}), !dbg !81
   store %class.A* %rhs, %class.A** %rhs.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %rhs.addr}, metadata !82), !dbg !83
+  call void @llvm.dbg.declare(metadata %class.A** %rhs.addr, metadata !82, metadata !{!"0x102"}), !dbg !83
   %this1 = load %class.A** %this.addr
   %0 = load %class.A** %rhs.addr, align 8, !dbg !84
   %m_int = getelementptr inbounds %class.A* %0, i32 0, i32 1, !dbg !84
@@ -81,7 +81,7 @@ define i32 @_ZN1A7get_intEv(%class.A* %this) #0 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !86), !dbg !87
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !86, metadata !{!"0x102"}), !dbg !87
   %this1 = load %class.A** %this.addr
   %m_int = getelementptr inbounds %class.A* %this1, i32 0, i32 1, !dbg !88
   %0 = load i32* %m_int, align 4, !dbg !88
@@ -95,10 +95,10 @@ entry:
   %nrvo = alloca i1
   %cleanup.dest.slot = alloca i32
   store %class.B* %this, %class.B** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.B** %this.addr}, metadata !89), !dbg !91
+  call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !89, metadata !{!"0x102"}), !dbg !91
   %this1 = load %class.B** %this.addr
   store i1 false, i1* %nrvo, !dbg !92
-  call void @llvm.dbg.declare(metadata !{%class.A* %agg.result}, metadata !93), !dbg !92
+  call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !93, metadata !{!"0x102"}), !dbg !92
   call void @_ZN1AC1Ei(%class.A* %agg.result, i32 12), !dbg !92
   store i1 true, i1* %nrvo, !dbg !94
   store i32 1, i32* %cleanup.dest.slot
@@ -118,7 +118,7 @@ define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !101), !dbg !102
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !101, metadata !{!"0x102"}), !dbg !102
   %this1 = load %class.A** %this.addr
   ret void, !dbg !103
 }
@@ -138,12 +138,12 @@ entry:
   %cleanup.dest.slot = alloca i32
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !104), !dbg !105
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !104, metadata !{!"0x102"}), !dbg !105
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !106), !dbg !105
-  call void @llvm.dbg.declare(metadata !{%class.B* %b}, metadata !107), !dbg !108
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !106, metadata !{!"0x102"}), !dbg !105
+  call void @llvm.dbg.declare(metadata %class.B* %b, metadata !107, metadata !{!"0x102"}), !dbg !108
   call void @_ZN1BC2Ev(%class.B* %b), !dbg !108
-  call void @llvm.dbg.declare(metadata !{i32* %return_val}, metadata !109), !dbg !110
+  call void @llvm.dbg.declare(metadata i32* %return_val, metadata !109, metadata !{!"0x102"}), !dbg !110
   call void @_ZN1B9AInstanceEv(%class.A* sret %temp.lvalue, %class.B* %b), !dbg !110
   %call = invoke i32 @_ZN1A7get_intEv(%class.A* %temp.lvalue)
           to label %invoke.cont unwind label %lpad, !dbg !110
@@ -151,7 +151,7 @@ entry:
 invoke.cont:                                      ; preds = %entry
   call void @_ZN1AD2Ev(%class.A* %temp.lvalue), !dbg !111
   store i32 %call, i32* %return_val, align 4, !dbg !111
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !113), !dbg !114
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !113, metadata !{!"0x102"}), !dbg !114
   call void @_ZN1B9AInstanceEv(%class.A* sret %a, %class.B* %b), !dbg !114
   %0 = load i32* %return_val, align 4, !dbg !115
   store i32 %0, i32* %retval, !dbg !115
@@ -193,7 +193,7 @@ define linkonce_odr void @_ZN1BC2Ev(%class.B* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %class.B*, align 8
   store %class.B* %this, %class.B** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.B** %this.addr}, metadata !123), !dbg !124
+  call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !123, metadata !{!"0x102"}), !dbg !124
   %this1 = load %class.B** %this.addr
   ret void, !dbg !125
 }
@@ -218,7 +218,7 @@ entry:
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !126), !dbg !127
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !126, metadata !{!"0x102"}), !dbg !127
   %this1 = load %class.A** %this.addr
   invoke void @_ZN1AD2Ev(%class.A* %this1)
           to label %invoke.cont unwind label %lpad, !dbg !128
@@ -263,131 +263,131 @@ attributes #7 = { builtin nounwind }
 !llvm.module.flags = !{!64, !65}
 !llvm.ident = !{!66}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !48, metadata !2, metadata !2, metadata !"sret.dwo", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/sret.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"sret.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !37}
-!4 = metadata !{i32 786434, metadata !1, null, metadata !"A", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !5, i32 0, metadata !"_ZTS1A", null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 128, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !13, metadata !14, metadata !19, metadata !25, metadata !29, metadata !33}
-!6 = metadata !{i32 786445, metadata !1, metadata !7, metadata !"_vptr$A", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !8} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
-!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
-!9 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1A", metadata !"m_int", i32 13, i64 32, i64 32, i64 64, i32 2, metadata !12} ; [ DW_TAG_member ] [m_int] [line 13, size 32, align 32, offset 64] [protected] [from int]
-!14 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 4, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 4} ; [ DW_TAG_subprogram ] [line 4] [A]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17, metadata !12}
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!19 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 5, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 5} ; [ DW_TAG_subprogram ] [line 5] [A]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{null, metadata !17, metadata !22}
-!22 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !23} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
-!25 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"operator=", metadata !"operator=", metadata !"_ZN1AaSERKS_", i32 7, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 7} ; [ DW_TAG_subprogram ] [line 7] [operator=]
-!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!27 = metadata !{metadata !22, metadata !17, metadata !22}
-!29 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"", i32 8, metadata !30, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, null, i32 8} ; [ DW_TAG_subprogram ] [line 8] [~A]
-!30 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!31 = metadata !{null, metadata !17}
-!33 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"get_int", metadata !"get_int", metadata !"_ZN1A7get_intEv", i32 10, metadata !34, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 10} ; [ DW_TAG_subprogram ] [line 10] [get_int]
-!34 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !35, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!35 = metadata !{metadata !12, metadata !17}
-!37 = metadata !{i32 786434, metadata !1, null, metadata !"B", i32 38, i64 8, i64 8, i32 0, i32 0, null, metadata !38, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 38, size 8, align 8, offset 0] [def] [from ]
-!38 = metadata !{metadata !39, metadata !44}
-!39 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"B", metadata !"B", metadata !"", i32 41, metadata !40, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 41} ; [ DW_TAG_subprogram ] [line 41] [B]
-!40 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !41, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!41 = metadata !{null, metadata !42}
-!42 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
-!44 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"AInstance", metadata !"AInstance", metadata !"_ZN1B9AInstanceEv", i32 43, metadata !45, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 43} ; [ DW_TAG_subprogram ] [line 43] [AInstance]
-!45 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !46, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!46 = metadata !{metadata !4, metadata !42}
-!48 = metadata !{metadata !49, metadata !50, metadata !51, metadata !52, metadata !53, metadata !54, metadata !61, metadata !62, metadata !63}
-!49 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2Ei", i32 16, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, i32)* @_ZN1AC2Ei, null, metadata !14, metadata !2, i32 18} ; [ DW_TAG_subprogram ] [line 16] [def] [scope 18] [A]
-!50 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2ERKS_", i32 21, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.A*)* @_ZN1AC2ERKS_, null, metadata !19, metadata !2, i32 23} ; [ DW_TAG_subprogram ] [line 21] [def] [scope 23] [A]
-!51 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"operator=", metadata !"operator=", metadata !"_ZN1AaSERKS_", i32 27, metadata !26, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, %class.A* (%class.A*, %class.A*)* @_ZN1AaSERKS_, null, metadata !25, metadata !2, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [operator=]
-!52 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"get_int", metadata !"get_int", metadata !"_ZN1A7get_intEv", i32 33, metadata !34, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*)* @_ZN1A7get_intEv, null, metadata !33, metadata !2, i32 34} ; [ DW_TAG_subprogram ] [line 33] [def] [scope 34] [get_int]
-!53 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"AInstance", metadata !"AInstance", metadata !"_ZN1B9AInstanceEv", i32 47, metadata !45, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*, %class.B*)* @_ZN1B9AInstanceEv, null, metadata !44, metadata !2, i32 48} ; [ DW_TAG_subprogram ] [line 47] [def] [scope 48] [AInstance]
-!54 = metadata !{i32 786478, metadata !1, metadata !7, metadata !"main", metadata !"main", metadata !"", i32 53, metadata !55, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 53] [def] [scope 54] [main]
-!55 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!56 = metadata !{metadata !12, metadata !12, metadata !57}
-!57 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !58} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!58 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !59} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!59 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !60} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char]
-!60 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!61 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD0Ev", i32 8, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD0Ev, null, metadata !29, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [~A]
-!62 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1B", metadata !"B", metadata !"B", metadata !"_ZN1BC2Ev", i32 41, metadata !40, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.B*)* @_ZN1BC2Ev, null, metadata !39, metadata !2, i32 41} ; [ DW_TAG_subprogram ] [line 41] [def] [B]
-!63 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"~A", metadata !"~A", metadata !"_ZN1AD2Ev", i32 8, metadata !30, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1AD2Ev, null, metadata !29, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [~A]
-!64 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!65 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!66 = metadata !{metadata !"clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)"}
-!67 = metadata !{i32 786689, metadata !49, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!68 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!69 = metadata !{i32 0, i32 0, metadata !49, null}
-!70 = metadata !{i32 786689, metadata !49, metadata !"i", metadata !7, i32 33554448, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 16]
-!71 = metadata !{i32 16, i32 0, metadata !49, null}
-!72 = metadata !{i32 18, i32 0, metadata !49, null}
-!73 = metadata !{i32 19, i32 0, metadata !49, null}
-!74 = metadata !{i32 786689, metadata !50, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!75 = metadata !{i32 0, i32 0, metadata !50, null}
-!76 = metadata !{i32 786689, metadata !50, metadata !"rhs", metadata !7, i32 33554453, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rhs] [line 21]
-!77 = metadata !{i32 21, i32 0, metadata !50, null}
-!78 = metadata !{i32 23, i32 0, metadata !50, null}
-!79 = metadata !{i32 24, i32 0, metadata !50, null}
-!80 = metadata !{i32 786689, metadata !51, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!81 = metadata !{i32 0, i32 0, metadata !51, null}
-!82 = metadata !{i32 786689, metadata !51, metadata !"rhs", metadata !7, i32 33554459, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [rhs] [line 27]
-!83 = metadata !{i32 27, i32 0, metadata !51, null}
-!84 = metadata !{i32 29, i32 0, metadata !51, null}
-!85 = metadata !{i32 30, i32 0, metadata !51, null}
-!86 = metadata !{i32 786689, metadata !52, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!87 = metadata !{i32 0, i32 0, metadata !52, null}
-!88 = metadata !{i32 35, i32 0, metadata !52, null}
-!89 = metadata !{i32 786689, metadata !53, metadata !"this", null, i32 16777216, metadata !90, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!90 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
-!91 = metadata !{i32 0, i32 0, metadata !53, null}
-!92 = metadata !{i32 49, i32 0, metadata !53, null}
-!93 = metadata !{i32 786688, metadata !53, metadata !"a", metadata !7, i32 49, metadata !4, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 49]
-!94 = metadata !{i32 50, i32 0, metadata !53, null}
-!95 = metadata !{i32 51, i32 0, metadata !53, null}
-!96 = metadata !{i32 51, i32 0, metadata !97, null}
-!97 = metadata !{i32 786443, metadata !1, metadata !53, i32 51, i32 0, i32 2, i32 5} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!98 = metadata !{i32 51, i32 0, metadata !99, null}
-!99 = metadata !{i32 786443, metadata !1, metadata !100, i32 51, i32 0, i32 3, i32 6} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!100 = metadata !{i32 786443, metadata !1, metadata !53, i32 51, i32 0, i32 1, i32 4} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!101 = metadata !{i32 786689, metadata !63, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!102 = metadata !{i32 0, i32 0, metadata !63, null}
-!103 = metadata !{i32 8, i32 0, metadata !63, null} ; [ DW_TAG_imported_declaration ]
-!104 = metadata !{i32 786689, metadata !54, metadata !"argc", metadata !7, i32 16777269, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 53]
-!105 = metadata !{i32 53, i32 0, metadata !54, null}
-!106 = metadata !{i32 786689, metadata !54, metadata !"argv", metadata !7, i32 33554485, metadata !57, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 53]
-!107 = metadata !{i32 786688, metadata !54, metadata !"b", metadata !7, i32 55, metadata !37, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 55]
-!108 = metadata !{i32 55, i32 0, metadata !54, null}
-!109 = metadata !{i32 786688, metadata !54, metadata !"return_val", metadata !7, i32 56, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [return_val] [line 56]
-!110 = metadata !{i32 56, i32 0, metadata !54, null}
-!111 = metadata !{i32 56, i32 0, metadata !112, null}
-!112 = metadata !{i32 786443, metadata !1, metadata !54, i32 56, i32 0, i32 1, i32 7} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!113 = metadata !{i32 786688, metadata !54, metadata !"a", metadata !7, i32 58, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 58]
-!114 = metadata !{i32 58, i32 0, metadata !54, null} ; [ DW_TAG_imported_module ]
-!115 = metadata !{i32 59, i32 0, metadata !54, null}
-!116 = metadata !{i32 60, i32 0, metadata !54, null}
-!117 = metadata !{i32 60, i32 0, metadata !118, null}
-!118 = metadata !{i32 786443, metadata !1, metadata !54, i32 60, i32 0, i32 1, i32 8} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!119 = metadata !{i32 60, i32 0, metadata !120, null}
-!120 = metadata !{i32 786443, metadata !1, metadata !54, i32 60, i32 0, i32 3, i32 10} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!121 = metadata !{i32 60, i32 0, metadata !122, null}
-!122 = metadata !{i32 786443, metadata !1, metadata !54, i32 60, i32 0, i32 2, i32 9} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!123 = metadata !{i32 786689, metadata !62, metadata !"this", null, i32 16777216, metadata !90, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!124 = metadata !{i32 0, i32 0, metadata !62, null}
-!125 = metadata !{i32 41, i32 0, metadata !62, null}
-!126 = metadata !{i32 786689, metadata !61, metadata !"this", null, i32 16777216, metadata !68, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!127 = metadata !{i32 0, i32 0, metadata !61, null}
-!128 = metadata !{i32 8, i32 0, metadata !61, null} ; [ DW_TAG_imported_declaration ]
-!129 = metadata !{i32 8, i32 0, metadata !130, null} ; [ DW_TAG_imported_declaration ]
-!130 = metadata !{i32 786443, metadata !1, metadata !61, i32 8, i32 0, i32 1, i32 11} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!131 = metadata !{i32 8, i32 0, metadata !132, null} ; [ DW_TAG_imported_declaration ]
-!132 = metadata !{i32 786443, metadata !1, metadata !61, i32 8, i32 0, i32 2, i32 12} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
-!133 = metadata !{i32 8, i32 0, metadata !134, null} ; [ DW_TAG_imported_declaration ]
-!134 = metadata !{i32 786443, metadata !1, metadata !61, i32 8, i32 0, i32 3, i32 13} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)\000\00\000\00sret.dwo\001", !1, !2, !3, !48, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/sret.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"sret.cpp", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4, !37}
+!4 = !{!"0x2\00A\001\00128\0064\000\000\000", !1, null, null, !5, !"_ZTS1A", null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = !{!6, !13, !14, !19, !25, !29, !33}
+!6 = !{!"0xd\00_vptr$A\000\0064\000\000\0064", !1, !7, !8} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!7 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!8 = !{!"0xf\00\000\0064\000\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!9 = !{!"0xf\00__vtbl_ptr_type\000\0064\000\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!12}
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0xd\00m_int\0013\0032\0032\0064\002", !1, !"_ZTS1A", !12} ; [ DW_TAG_member ] [m_int] [line 13, size 32, align 32, offset 64] [protected] [from int]
+!14 = !{!"0x2e\00A\00A\00\004\000\000\000\006\00256\000\004", !1, !"_ZTS1A", !15, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 4] [A]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17, !12}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!19 = !{!"0x2e\00A\00A\00\005\000\000\000\006\00256\000\005", !1, !"_ZTS1A", !20, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 5] [A]
+!20 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{null, !17, !22}
+!22 = !{!"0x10\00\000\000\000\000\000", null, null, !23} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{!"0x26\00\000\000\000\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
+!25 = !{!"0x2e\00operator=\00operator=\00_ZN1AaSERKS_\007\000\000\000\006\00256\000\007", !1, !"_ZTS1A", !26, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [operator=]
+!26 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !27, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = !{!22, !17, !22}
+!29 = !{!"0x2e\00~A\00~A\00\008\000\000\001\006\00256\000\008", !1, !"_ZTS1A", !30, !"_ZTS1A", null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 8] [~A]
+!30 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !31, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!31 = !{null, !17}
+!33 = !{!"0x2e\00get_int\00get_int\00_ZN1A7get_intEv\0010\000\000\000\006\00256\000\0010", !1, !"_ZTS1A", !34, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 10] [get_int]
+!34 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !35, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!35 = !{!12, !17}
+!37 = !{!"0x2\00B\0038\008\008\000\000\000", !1, null, null, !38, null, null, !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 38, size 8, align 8, offset 0] [def] [from ]
+!38 = !{!39, !44}
+!39 = !{!"0x2e\00B\00B\00\0041\000\000\000\006\00256\000\0041", !1, !"_ZTS1B", !40, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 41] [B]
+!40 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !41, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!41 = !{null, !42}
+!42 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!44 = !{!"0x2e\00AInstance\00AInstance\00_ZN1B9AInstanceEv\0043\000\000\000\006\00256\000\0043", !1, !"_ZTS1B", !45, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 43] [AInstance]
+!45 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !46, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!46 = !{!4, !42}
+!48 = !{!49, !50, !51, !52, !53, !54, !61, !62, !63}
+!49 = !{!"0x2e\00A\00A\00_ZN1AC2Ei\0016\000\001\000\006\00256\000\0018", !1, !"_ZTS1A", !15, null, void (%class.A*, i32)* @_ZN1AC2Ei, null, !14, !2} ; [ DW_TAG_subprogram ] [line 16] [def] [scope 18] [A]
+!50 = !{!"0x2e\00A\00A\00_ZN1AC2ERKS_\0021\000\001\000\006\00256\000\0023", !1, !"_ZTS1A", !20, null, void (%class.A*, %class.A*)* @_ZN1AC2ERKS_, null, !19, !2} ; [ DW_TAG_subprogram ] [line 21] [def] [scope 23] [A]
+!51 = !{!"0x2e\00operator=\00operator=\00_ZN1AaSERKS_\0027\000\001\000\006\00256\000\0028", !1, !"_ZTS1A", !26, null, %class.A* (%class.A*, %class.A*)* @_ZN1AaSERKS_, null, !25, !2} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [operator=]
+!52 = !{!"0x2e\00get_int\00get_int\00_ZN1A7get_intEv\0033\000\001\000\006\00256\000\0034", !1, !"_ZTS1A", !34, null, i32 (%class.A*)* @_ZN1A7get_intEv, null, !33, !2} ; [ DW_TAG_subprogram ] [line 33] [def] [scope 34] [get_int]
+!53 = !{!"0x2e\00AInstance\00AInstance\00_ZN1B9AInstanceEv\0047\000\001\000\006\00256\000\0048", !1, !"_ZTS1B", !45, null, void (%class.A*, %class.B*)* @_ZN1B9AInstanceEv, null, !44, !2} ; [ DW_TAG_subprogram ] [line 47] [def] [scope 48] [AInstance]
+!54 = !{!"0x2e\00main\00main\00\0053\000\001\000\006\00256\000\0054", !1, !7, !55, null, i32 (i32, i8**)* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 53] [def] [scope 54] [main]
+!55 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !56, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!56 = !{!12, !12, !57}
+!57 = !{!"0xf\00\000\0064\0064\000\000", null, null, !58} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!58 = !{!"0xf\00\000\0064\0064\000\000", null, null, !59} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!59 = !{!"0x26\00\000\000\000\000\000", null, null, !60} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from char]
+!60 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!61 = !{!"0x2e\00~A\00~A\00_ZN1AD0Ev\008\000\001\000\006\00256\000\008", !1, !"_ZTS1A", !30, null, void (%class.A*)* @_ZN1AD0Ev, null, !29, !2} ; [ DW_TAG_subprogram ] [line 8] [def] [~A]
+!62 = !{!"0x2e\00B\00B\00_ZN1BC2Ev\0041\000\001\000\006\00256\000\0041", !1, !"_ZTS1B", !40, null, void (%class.B*)* @_ZN1BC2Ev, null, !39, !2} ; [ DW_TAG_subprogram ] [line 41] [def] [B]
+!63 = !{!"0x2e\00~A\00~A\00_ZN1AD2Ev\008\000\001\000\006\00256\000\008", !1, !"_ZTS1A", !30, null, void (%class.A*)* @_ZN1AD2Ev, null, !29, !2} ; [ DW_TAG_subprogram ] [line 8] [def] [~A]
+!64 = !{i32 2, !"Dwarf Version", i32 4}
+!65 = !{i32 1, !"Debug Info Version", i32 2}
+!66 = !{!"clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)"}
+!67 = !{!"0x101\00this\0016777216\001088", !49, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!68 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!69 = !MDLocation(line: 0, scope: !49)
+!70 = !{!"0x101\00i\0033554448\000", !49, !7, !12} ; [ DW_TAG_arg_variable ] [i] [line 16]
+!71 = !MDLocation(line: 16, scope: !49)
+!72 = !MDLocation(line: 18, scope: !49)
+!73 = !MDLocation(line: 19, scope: !49)
+!74 = !{!"0x101\00this\0016777216\001088", !50, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!75 = !MDLocation(line: 0, scope: !50)
+!76 = !{!"0x101\00rhs\0033554453\000", !50, !7, !22} ; [ DW_TAG_arg_variable ] [rhs] [line 21]
+!77 = !MDLocation(line: 21, scope: !50)
+!78 = !MDLocation(line: 23, scope: !50)
+!79 = !MDLocation(line: 24, scope: !50)
+!80 = !{!"0x101\00this\0016777216\001088", !51, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!81 = !MDLocation(line: 0, scope: !51)
+!82 = !{!"0x101\00rhs\0033554459\000", !51, !7, !22} ; [ DW_TAG_arg_variable ] [rhs] [line 27]
+!83 = !MDLocation(line: 27, scope: !51)
+!84 = !MDLocation(line: 29, scope: !51)
+!85 = !MDLocation(line: 30, scope: !51)
+!86 = !{!"0x101\00this\0016777216\001088", !52, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!87 = !MDLocation(line: 0, scope: !52)
+!88 = !MDLocation(line: 35, scope: !52)
+!89 = !{!"0x101\00this\0016777216\001088", !53, null, !90} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!90 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
+!91 = !MDLocation(line: 0, scope: !53)
+!92 = !MDLocation(line: 49, scope: !53)
+!93 = !{!"0x100\00a\0049\008192", !53, !7, !4} ; [ DW_TAG_auto_variable ] [a] [line 49]
+!94 = !MDLocation(line: 50, scope: !53)
+!95 = !MDLocation(line: 51, scope: !53)
+!96 = !MDLocation(line: 51, scope: !97)
+!97 = !{!"0xb\0051\000\002", !1, !53} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!98 = !MDLocation(line: 51, scope: !99)
+!99 = !{!"0xb\0051\000\003", !1, !100} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!100 = !{!"0xb\0051\000\001", !1, !53} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!101 = !{!"0x101\00this\0016777216\001088", !63, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!102 = !MDLocation(line: 0, scope: !63)
+!103 = !MDLocation(line: 8, scope: !63)
+!104 = !{!"0x101\00argc\0016777269\000", !54, !7, !12} ; [ DW_TAG_arg_variable ] [argc] [line 53]
+!105 = !MDLocation(line: 53, scope: !54)
+!106 = !{!"0x101\00argv\0033554485\000", !54, !7, !57} ; [ DW_TAG_arg_variable ] [argv] [line 53]
+!107 = !{!"0x100\00b\0055\000", !54, !7, !37} ; [ DW_TAG_auto_variable ] [b] [line 55]
+!108 = !MDLocation(line: 55, scope: !54)
+!109 = !{!"0x100\00return_val\0056\000", !54, !7, !12} ; [ DW_TAG_auto_variable ] [return_val] [line 56]
+!110 = !MDLocation(line: 56, scope: !54)
+!111 = !MDLocation(line: 56, scope: !112)
+!112 = !{!"0xb\0056\000\001", !1, !54} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!113 = !{!"0x100\00a\0058\000", !54, !7, !4} ; [ DW_TAG_auto_variable ] [a] [line 58]
+!114 = !MDLocation(line: 58, scope: !54)
+!115 = !MDLocation(line: 59, scope: !54)
+!116 = !MDLocation(line: 60, scope: !54)
+!117 = !MDLocation(line: 60, scope: !118)
+!118 = !{!"0xb\0060\000\001", !1, !54} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!119 = !MDLocation(line: 60, scope: !120)
+!120 = !{!"0xb\0060\000\003", !1, !54} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!121 = !MDLocation(line: 60, scope: !122)
+!122 = !{!"0xb\0060\000\002", !1, !54} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!123 = !{!"0x101\00this\0016777216\001088", !62, null, !90} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!124 = !MDLocation(line: 0, scope: !62)
+!125 = !MDLocation(line: 41, scope: !62)
+!126 = !{!"0x101\00this\0016777216\001088", !61, null, !68} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!127 = !MDLocation(line: 0, scope: !61)
+!128 = !MDLocation(line: 8, scope: !61)
+!129 = !MDLocation(line: 8, scope: !130)
+!130 = !{!"0xb\008\000\001", !1, !61} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!131 = !MDLocation(line: 8, scope: !132)
+!132 = !{!"0xb\008\000\002", !1, !61} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
+!133 = !MDLocation(line: 8, scope: !134)
+!134 = !{!"0xb\008\000\003", !1, !61} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/sret.cpp]
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 8816fe77cf01..0b2c50ec766d 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -60,45 +60,45 @@ define i32 @test(i32 %a) nounwind uwtable ssp {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %a.addr, align 4, !dbg !17
   %call = call i32 @fn(i32 %0), !dbg !17
   ret i32 %call, !dbg !17
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @fn(i32 %a) nounwind uwtable ssp {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !19, metadata !{!"0x102"}), !dbg !20
   %0 = load i32* %a.addr, align 4, !dbg !21
   ret i32 %0, !dbg !21
 }
 
 !llvm.dbg.cu = !{!0, !10}
 !llvm.module.flags = !{!25}
-!0 = metadata !{i32 786449, metadata !23, i32 12, metadata !"clang version 3.3", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !23, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @test, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
-!6 = metadata !{i32 786473, metadata !23} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786449, metadata !24, i32 12, metadata !"clang version 3.3 (trunk 172862)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !11, metadata !1,  metadata !1, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
-!11 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !24, metadata !14, metadata !"fn", metadata !"fn", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @fn, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
-!14 = metadata !{i32 786473, metadata !24} ; [ DW_TAG_file_type ]
-!15 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777218, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 2]
-!16 = metadata !{i32 2, i32 0, metadata !5, null}
-!17 = metadata !{i32 4, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !23, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786689, metadata !13, metadata !"a", metadata !14, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!20 = metadata !{i32 1, i32 0, metadata !13, null}
-!21 = metadata !{i32 2, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !24, metadata !13, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{metadata !"simple.c", metadata !"/private/tmp"}
-!24 = metadata !{metadata !"simple2.c", metadata !"/private/tmp"}
-!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3\000\00\000\00\001", !23, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00test\00test\00\002\000\001\000\006\00256\000\003", !23, !6, !7, null, i32 (i32)* @test, null, null, !1} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [test]
+!6 = !{!"0x29", !23} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x11\0012\00clang version 3.3 (trunk 172862)\000\00\000\00\001", !24, !1, !1, !11, !1,  !1} ; [ DW_TAG_compile_unit ]
+!11 = !{!13}
+!13 = !{!"0x2e\00fn\00fn\00\001\000\001\000\006\00256\000\001", !24, !14, !7, null, i32 (i32)* @fn, null, null, !1} ; [ DW_TAG_subprogram ] [line 1] [def] [fn]
+!14 = !{!"0x29", !24} ; [ DW_TAG_file_type ]
+!15 = !{!"0x101\00a\0016777218\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [a] [line 2]
+!16 = !MDLocation(line: 2, scope: !5)
+!17 = !MDLocation(line: 4, scope: !18)
+!18 = !{!"0xb\003\000\000", !23, !5} ; [ DW_TAG_lexical_block ]
+!19 = !{!"0x101\00a\0016777217\000", !13, !14, !9} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!20 = !MDLocation(line: 1, scope: !13)
+!21 = !MDLocation(line: 2, scope: !22)
+!22 = !{!"0xb\001\000\000", !24, !13} ; [ DW_TAG_lexical_block ]
+!23 = !{!"simple.c", !"/private/tmp"}
+!24 = !{!"simple2.c", !"/private/tmp"}
+!25 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index 99bd0fc1b580..abd9006ac6c6 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -12,12 +12,12 @@ entry:
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!7}
-!5 = metadata !{metadata !0}
+!5 = !{!0}
 
-!0 = metadata !{i32 786478, metadata !6, metadata !1, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!1 = metadata !{i32 786473, metadata !6} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !6, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !5, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !6, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!6 = metadata !{metadata !"test2.c", metadata !"/home/espindola/llvm"}
-!7 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\001\001", !6, !1, !3, null, void ()* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!1 = !{!"0x29", !6} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 ()\001\00\000\00\000", !6, !4, !4, !5, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !6, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!6 = !{!"test2.c", !"/home/espindola/llvm"}
+!7 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 846d2101e590..3d5da318ccc8 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -6,13 +6,13 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = metadata !{i32 786449, metadata !8, i32 12, metadata !"clang version 3.1 (trunk 143009)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 720937, metadata !8} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{metadata !"z.c", metadata !"/home/nicholas"}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 143009)\001\00\000\00\000", !8, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00yyyy\00yyyy\00\001\000\001", null, !6, !7, i32* @yyyy, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!8 = !{!"z.c", !"/home/nicholas"}
 
 ; Verify that "yyyy" ended up in the stringpool.
 ; LINUX: .section .debug_str,"MS",@progbits,1
@@ -40,4 +40,4 @@
 ; DARWIN-NEXT:        .byte   9                       ## DW_AT_location
 ; DARWIN-NEXT:        .byte   3
 ; DARWIN-NEXT:        .quad   _yyyy
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 390d8da5d0a5..b2b7e644eae3 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -14,14 +14,14 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, metadata !11, i32 12, metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f, null} ; [ DW_TAG_variable ]
-!6 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786451, metadata !11, null, metadata !"foo", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786445, metadata !11, metadata !7, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{metadata !"struct_bug.c", metadata !"/Users/echristo/tmp"}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 152837) (llvm/trunk 152845)\000\00\000\00\000", !11, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00f\00f\00\005\000\001", null, !6, !7, %struct.foo* @f, null} ; [ DW_TAG_variable ]
+!6 = !{!"0x29", !11} ; [ DW_TAG_file_type ]
+!7 = !{!"0x13\00foo\001\0032\0032\000\000\000", !11, null, null, !8, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 1, size 32, align 32, offset 0] [def] [from ]
+!8 = !{!9}
+!9 = !{!"0xd\00a\002\0032\0032\000\000", !11, !7, !10} ; [ DW_TAG_member ]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!11 = !{!"struct_bug.c", !"/Users/echristo/tmp"}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index 14dca46c64f3..65d19f2bd442 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -12,29 +12,29 @@ entry:
   %retval = alloca i32, align 4
   %i = alloca [2 x i32], align 4
   store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{[2 x i32]* %i}, metadata !10), !dbg !15
+  call void @llvm.dbg.declare(metadata [2 x i32]* %i, metadata !10, metadata !{!"0x102"}), !dbg !15
   ret i32 0, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 786449, metadata !17, i32 12, metadata !"clang version 3.3 (trunk 171472) (llvm/trunk 171487)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !6, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !1, i32 3} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
-!6 = metadata !{i32 786473, metadata !17} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786688, metadata !11, metadata !"i", metadata !6, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 4]
-!11 = metadata !{i32 786443, metadata !6, metadata !5, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
-!12 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 64, i64 32, i32 0, i32 0, metadata !9, metadata !13, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
-!15 = metadata !{i32 4, i32 0, metadata !11, null}
-!16 = metadata !{i32 6, i32 0, metadata !11, null}
-!17 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 171472) (llvm/trunk 171487)\000\00\000\00\000", !17, !1, !1, !3, !1,  !1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00main\00main\00\002\000\001\000\006\00256\000\003", !6, !6, !7, null, i32 ()* @main, null, null, !1} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 3] [main]
+!6 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x100\00i\004\000", !11, !6, !12} ; [ DW_TAG_auto_variable ] [i] [line 4]
+!11 = !{!"0xb\003\000\000", !6, !5} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/echristo/tmp/foo.c]
+!12 = !{!"0x1\00\000\0064\0032\000\000", null, null, !9, !13, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 64, align 32, offset 0] [from int]
+!13 = !{!14}
+!14 = !{!"0x21\000\002"}        ; [ DW_TAG_subrange_type ] [0, 1]
+!15 = !MDLocation(line: 4, scope: !11)
+!16 = !MDLocation(line: 6, scope: !11)
+!17 = !{!"foo.c", !"/usr/local/google/home/echristo/tmp"}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index 22fd1a80e87b..2a1de49368ff 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -3,33 +3,32 @@
 ; We are testing that a value in a 16 bit register gets reported as
 ; being in its superregister.
 
-; CHECK: .byte   80                      # super-register
-; CHECK-NEXT:                            # DW_OP_reg0
+; CHECK: .byte   80                      # super-register DW_OP_reg0
 ; CHECK-NEXT: .byte   147                # DW_OP_piece
 ; CHECK-NEXT: .byte   2                  # 2
 
 define i16 @f(i16 signext %zzz) nounwind {
 entry:
-  call void @llvm.dbg.value(metadata !{i16 %zzz}, i64 0, metadata !0)
+  call void @llvm.dbg.value(metadata i16 %zzz, i64 0, metadata !0, metadata !{!"0x102"})
   %conv = sext i16 %zzz to i32, !dbg !7
   %conv1 = trunc i32 %conv to i16
   ret i16 %conv1
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!11}
-!9 = metadata !{metadata !1}
+!9 = !{!1}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"zzz", metadata !2, i32 16777219, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !10, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i16 (i16)* @f, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!2 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !10, i32 12, metadata !"clang version 3.0 ()", i1 false, metadata !"", i32 0, metadata !5, metadata !5, metadata !9, null,  null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !10, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{null}
-!6 = metadata !{i32 786468, null, metadata !3, metadata !"short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 4, i32 22, metadata !8, null}
-!8 = metadata !{i32 786443, metadata !10, metadata !1, i32 3, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{metadata !"/home/espindola/llvm/test.c", metadata !"/home/espindola/tmpfs/build"}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00zzz\0016777219\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00f\00f\00\003\000\001\000\006\00256\000\003", !10, !2, !4, null, i16 (i16)* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!2 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang version 3.0 ()\000\00\000\00\001", !10, !5, !5, !9, null,  null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !10, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{null}
+!6 = !{!"0x24\00short\000\0016\0016\000\000\005", null, !3} ; [ DW_TAG_base_type ]
+!7 = !MDLocation(line: 4, column: 22, scope: !8)
+!8 = !{!"0xb\003\0019\000", !10, !1} ; [ DW_TAG_lexical_block ]
+!10 = !{!"/home/espindola/llvm/test.c", !"/home/espindola/tmpfs/build"}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/subregisters.ll b/test/DebugInfo/X86/subregisters.ll
index d46a95f2c994..8e912adbed1c 100644
--- a/test/DebugInfo/X86/subregisters.ll
+++ b/test/DebugInfo/X86/subregisters.ll
@@ -40,16 +40,16 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; Function Attrs: noinline nounwind ssp uwtable
 define void @doSomething(%struct.bar* nocapture readonly %b) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.bar* %b}, i64 0, metadata !15), !dbg !25
+  tail call void @llvm.dbg.value(metadata %struct.bar* %b, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !25
   %a1 = getelementptr inbounds %struct.bar* %b, i64 0, i32 0, !dbg !26
   %0 = load i32* %a1, align 4, !dbg !26, !tbaa !27
-  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !16), !dbg !26
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !26
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) #4, !dbg !32
   ret void, !dbg !33
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
 declare i32 @printf(i8* nocapture readonly, ...) #2
@@ -59,14 +59,14 @@ define i32 @main() #3 {
 entry:
   %myBar = alloca i64, align 8, !dbg !34
   %tmpcast = bitcast i64* %myBar to %struct.bar*, !dbg !34
-  tail call void @llvm.dbg.declare(metadata !{%struct.bar* %tmpcast}, metadata !21), !dbg !34
+  tail call void @llvm.dbg.declare(metadata %struct.bar* %tmpcast, metadata !21, metadata !{!"0x102"}), !dbg !34
   store i64 17179869187, i64* %myBar, align 8, !dbg !34
   call void @doSomething(%struct.bar* %tmpcast), !dbg !35
   ret i32 0, !dbg !36
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { noinline nounwind ssp uwtable }
 attributes #1 = { nounwind readnone }
@@ -78,40 +78,40 @@ attributes #4 = { nounwind }
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [subregisters.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"subregisters.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !17}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"doSomething", metadata !"doSomething", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.bar*)* @doSomething, null, null, metadata !14, i32 11} ; [ DW_TAG_subprogram ] [line 10] [def] [scope 11] [doSomething]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [subregisters.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from bar]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 3, size 64, align 32, offset 0] [def] [from ]
-!10 = metadata !{metadata !11, metadata !13}
-!11 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"a", i32 4, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ] [a] [line 4, size 32, align 32, offset 0] [from int]
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786445, metadata !1, metadata !9, metadata !"b", i32 5, i64 32, i64 32, i64 32, i32 0, metadata !12} ; [ DW_TAG_member ] [b] [line 5, size 32, align 32, offset 32] [from int]
-!14 = metadata !{metadata !15, metadata !16}
-!15 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 16777226, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 10]
-!16 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 12]
-!17 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !20, i32 17} ; [ DW_TAG_subprogram ] [line 16] [def] [scope 17] [main]
-!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{metadata !12}
-!20 = metadata !{metadata !21}
-!21 = metadata !{i32 786688, metadata !17, metadata !"myBar", metadata !5, i32 18, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [myBar] [line 18]
-!22 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!23 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!24 = metadata !{metadata !"clang version 3.5 "}
-!25 = metadata !{i32 10, i32 0, metadata !4, null}
-!26 = metadata !{i32 12, i32 0, metadata !4, null}
-!27 = metadata !{metadata !28, metadata !29, i64 0}
-!28 = metadata !{metadata !"bar", metadata !29, i64 0, metadata !29, i64 4}
-!29 = metadata !{metadata !"int", metadata !30, i64 0}
-!30 = metadata !{metadata !"omnipotent char", metadata !31, i64 0}
-!31 = metadata !{metadata !"Simple C/C++ TBAA"}
-!32 = metadata !{i32 13, i32 0, metadata !4, null}
-!33 = metadata !{i32 14, i32 0, metadata !4, null}
-!34 = metadata !{i32 18, i32 0, metadata !17, null}
-!35 = metadata !{i32 19, i32 0, metadata !17, null}
-!36 = metadata !{i32 20, i32 0, metadata !17, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [subregisters.c] [DW_LANG_C99]
+!1 = !{!"subregisters.c", !""}
+!2 = !{}
+!3 = !{!4, !17}
+!4 = !{!"0x2e\00doSomething\00doSomething\00\0010\000\001\000\006\00256\001\0011", !1, !5, !6, null, void (%struct.bar*)* @doSomething, null, null, !14} ; [ DW_TAG_subprogram ] [line 10] [def] [scope 11] [doSomething]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [subregisters.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from bar]
+!9 = !{!"0x13\00bar\003\0064\0032\000\000\000", !1, null, null, !10, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 3, size 64, align 32, offset 0] [def] [from ]
+!10 = !{!11, !13}
+!11 = !{!"0xd\00a\004\0032\0032\000\000", !1, !9, !12} ; [ DW_TAG_member ] [a] [line 4, size 32, align 32, offset 0] [from int]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0xd\00b\005\0032\0032\0032\000", !1, !9, !12} ; [ DW_TAG_member ] [b] [line 5, size 32, align 32, offset 32] [from int]
+!14 = !{!15, !16}
+!15 = !{!"0x101\00b\0016777226\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 10]
+!16 = !{!"0x100\00a\0012\000", !4, !5, !12} ; [ DW_TAG_auto_variable ] [a] [line 12]
+!17 = !{!"0x2e\00main\00main\00\0016\000\001\000\006\000\001\0017", !1, !5, !18, null, i32 ()* @main, null, null, !20} ; [ DW_TAG_subprogram ] [line 16] [def] [scope 17] [main]
+!18 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{!12}
+!20 = !{!21}
+!21 = !{!"0x100\00myBar\0018\000", !17, !5, !9} ; [ DW_TAG_auto_variable ] [myBar] [line 18]
+!22 = !{i32 2, !"Dwarf Version", i32 2}
+!23 = !{i32 1, !"Debug Info Version", i32 2}
+!24 = !{!"clang version 3.5 "}
+!25 = !MDLocation(line: 10, scope: !4)
+!26 = !MDLocation(line: 12, scope: !4)
+!27 = !{!28, !29, i64 0}
+!28 = !{!"bar", !29, i64 0, !29, i64 4}
+!29 = !{!"int", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !MDLocation(line: 13, scope: !4)
+!33 = !MDLocation(line: 14, scope: !4)
+!34 = !MDLocation(line: 18, scope: !17)
+!35 = !MDLocation(line: 19, scope: !17)
+!36 = !MDLocation(line: 20, scope: !17)
diff --git a/test/DebugInfo/X86/template.ll b/test/DebugInfo/X86/template.ll
index 54c351c7bfd3..5125b09f91e8 100644
--- a/test/DebugInfo/X86/template.ll
+++ b/test/DebugInfo/X86/template.ll
@@ -1,12 +1,11 @@
 ; REQUIRES: object-emission
 
-; RUN: llc -mtriple=x86_64-linux -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; IR generated with `clang++ -g -emit-llvm -S` from the following code:
-; template<int x, int*, template<typename> class y, int ...z>  int func() { return 3; }
+; template<int x, int*, template<typename> class y, decltype(nullptr) n, int ...z>  int func() { return 3; }
 ; template<typename> struct y_impl { struct nested { }; };
-; int glbl = func<3, &glbl, y_impl, 1, 2>();
+; int glbl = func<3, &glbl, y_impl, nullptr, 1, 2>();
 ; y_impl<int>::nested n;
 
 ; CHECK: [[INT:0x[0-9a-f]*]]:{{ *}}DW_TAG_base_type
@@ -17,16 +16,11 @@
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_template_type_parameter
 
-; CHECK: DW_AT_name{{.*}}"func<3, &glbl, y_impl, 1, 2>"
+; CHECK: DW_AT_name{{.*}}"func<3, &glbl, y_impl, nullptr, 1, 2>"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_template_value_parameter
 ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]}
 ; CHECK-NEXT: DW_AT_name{{.*}}= "x"
-
-; This could be made shorter by encoding it as _sdata rather than data4, or
-; even as data1. DWARF strongly urges implementations to prefer 
-; _sdata/_udata rather than dataN
-
 ; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(3)
 
 ; CHECK: DW_TAG_template_value_parameter
@@ -43,6 +37,11 @@
 ; CHECK-NEXT: DW_AT_GNU_template_name{{.*}}= "y_impl"
 ; CHECK-NOT: NULL
 
+; CHECK: DW_TAG_template_value_parameter
+; CHECK-NEXT: DW_AT_type{{.*}}=> {[[NULLPTR:0x[0-9a-f]*]]}
+; CHECK-NEXT: DW_AT_name{{.*}}= "n"
+; CHECK-NEXT: DW_AT_const_value [DW_FORM_udata]{{.*}}(0)
+
 ; CHECK: DW_TAG_GNU_template_parameter_pack
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_template_value_parameter
@@ -56,71 +55,76 @@
 ; CHECK: [[INTPTR]]:{{ *}}DW_TAG_pointer_type
 ; CHECK-NEXT: DW_AT_type{{.*}} => {[[INT]]}
 
+; CHECK: [[NULLPTR]]:{{ *}}DW_TAG_unspecified_type
+; CHECK-NEXT: DW_AT_name{{.*}}= "decltype(nullptr)"
+
 %"struct.y_impl<int>::nested" = type { i8 }
 
 @glbl = global i32 0, align 4
 @n = global %"struct.y_impl<int>::nested" zeroinitializer, align 1
-@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_template.cpp, i8* null }]
 
 define internal void @__cxx_global_var_init() section ".text.startup" {
 entry:
-  %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv(), !dbg !33
-  store i32 %call, i32* @glbl, align 4, !dbg !33
-  ret void, !dbg !33
+  %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv(), !dbg !36
+  store i32 %call, i32* @glbl, align 4, !dbg !36
+  ret void, !dbg !36
 }
 
 ; Function Attrs: nounwind uwtable
-define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv() #0 {
+define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv() #0 {
 entry:
-  ret i32 3, !dbg !34
+  ret i32 3, !dbg !37
 }
 
-define internal void @_GLOBAL__I_a() section ".text.startup" {
+define internal void @_GLOBAL__sub_I_template.cpp() section ".text.startup" {
 entry:
-  call void @__cxx_global_var_init(), !dbg !35
-  ret void, !dbg !35
+  call void @__cxx_global_var_init(), !dbg !38
+  ret void
 }
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!31, !36}
-!llvm.ident = !{!32}
-
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !28, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"y_impl<int>", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS6y_implIiE"} ; [ DW_TAG_structure_type ] [y_impl<int>] [line 2, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786479, null, metadata !"", metadata !7, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 786451, metadata !1, metadata !"_ZTS6y_implIiE", metadata !"nested", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN6y_implIiE6nestedE"} ; [ DW_TAG_structure_type ] [nested] [line 2, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !14, metadata !26}
-!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 3, metadata !12, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [__cxx_global_var_init]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/bar.cpp]
-!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null}
-!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"func<3, &glbl, y_impl, 1, 2>", metadata !"_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implJLi1ELi2EEEiv, metadata !17, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func<3, &glbl, y_impl, 1, 2>]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !7}
-!17 = metadata !{metadata !18, metadata !19, metadata !21, metadata !22}
-!18 = metadata !{i32 786480, null, metadata !"x", metadata !7, i32 3, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!19 = metadata !{i32 786480, null, metadata !"", metadata !20, i32* @glbl, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!21 = metadata !{i32 803078, null, metadata !"y", null, metadata !"y_impl", null, i32 0, i32 0} ; [ DW_TAG_GNU_template_template_param ]
-!22 = metadata !{i32 803079, null, metadata !"z", null, metadata !23, null, i32 0, i32 0} ; [ DW_TAG_GNU_template_parameter_pack ]
-!23 = metadata !{metadata !24, metadata !25}
-!24 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 1, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!25 = metadata !{i32 786480, null, metadata !"", metadata !7, i32 2, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!26 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"", metadata !"", metadata !"_GLOBAL__I_a", i32 1, metadata !27, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__I_a, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [local] [def]
-!27 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!28 = metadata !{metadata !29, metadata !30}
-!29 = metadata !{i32 786484, i32 0, null, metadata !"glbl", metadata !"glbl", metadata !"", metadata !11, i32 3, metadata !7, i32 0, i32 1, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 3] [def]
-!30 = metadata !{i32 786484, i32 0, null, metadata !"n", metadata !"n", metadata !"", metadata !11, i32 4, metadata !8, i32 0, i32 1, %"struct.y_impl<int>::nested"* @n, null} ; [ DW_TAG_variable ] [n] [line 4] [def]
-!31 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!32 = metadata !{metadata !"clang version 3.4 (trunk 192849) (llvm/trunk 192850)"}
-!33 = metadata !{i32 3, i32 0, metadata !10, null}
-!34 = metadata !{i32 1, i32 0, metadata !14, null}
-!35 = metadata !{i32 1, i32 0, metadata !26, null}
-!36 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!llvm.module.flags = !{!33, !34}
+!llvm.ident = !{!35}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)\000\00\000\00\001", !1, !2, !3, !9, !30, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/template.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"template.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x13\00y_impl<int>\002\008\008\000\000\000", !1, null, null, !2, null, !5, !"_ZTS6y_implIiE"} ; [ DW_TAG_structure_type ] [y_impl<int>] [line 2, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2f\00\000\000", null, !7, null}        ; [ DW_TAG_template_type_parameter ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"0x13\00nested\002\008\008\000\000\000", !1, !"_ZTS6y_implIiE", null, !2, null, null, !"_ZTSN6y_implIiE6nestedE"} ; [ DW_TAG_structure_type ] [nested] [line 2, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10, !14, !28}
+!10 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\003\001\001\000\000\00256\000\003", !1, !11, !12, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [local] [def] [__cxx_global_var_init]
+!11 = !{!"0x29", !1}                              ; [ DW_TAG_file_type ] [/tmp/dbginfo/template.cpp]
+!12 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null}
+!14 = !{!"0x2e\00func<3, &glbl, y_impl, nullptr, 1, 2>\00func<3, &glbl, y_impl, nullptr, 1, 2>\00_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv\001\000\001\000\000\00256\000\001", !1, !11, !15, null, i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv, !17, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func<3, &glbl, y_impl, nullptr, 1, 2>]
+!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!7}
+!17 = !{!18, !19, !21, !22, !24}
+!18 = !{!"0x30\00x\000\000", null, !7, i32 3, null} ; [ DW_TAG_template_value_parameter ]
+!19 = !{!"0x30\00\000\000", null, !20, i32* @glbl, null} ; [ DW_TAG_template_value_parameter ]
+!20 = !{!"0xf\00\000\0064\0064\000\000", null, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!21 = !{!"0x4106\00y\000\000", null, null, !"y_impl", null} ; [ DW_TAG_GNU_template_template_param ]
+!22 = !{!"0x30\00n\000\000", null, !23, i8 0, null} ; [ DW_TAG_template_value_parameter ]
+!23 = !{!"0x3b\00decltype(nullptr)\000\000\000\000\000\000", null, null} ; [ DW_TAG_unspecified_type ] [decltype(nullptr)] [line 0, size 0, align 0, offset 0]
+!24 = !{!"0x4107\00z\000\000", null, null, !25, null} ; [ DW_TAG_GNU_template_parameter_pack ]
+!25 = !{!26, !27}
+!26 = !{!"0x30\00\000\000", null, !7, i32 1, null} ; [ DW_TAG_template_value_parameter ]
+!27 = !{!"0x30\00\000\000", null, !7, i32 2, null} ; [ DW_TAG_template_value_parameter ]
+!28 = !{!"0x2e\00\00\00_GLOBAL__sub_I_template.cpp\000\001\001\000\000\0064\000\000", !1, !11, !29, null, void ()* @_GLOBAL__sub_I_template.cpp, null, null, !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!29 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{!31, !32}
+!31 = !{!"0x34\00glbl\00glbl\00\003\000\001", null, !11, !7, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 3] [def]
+!32 = !{!"0x34\00n\00n\00\004\000\001", null, !11, !"_ZTSN6y_implIiE6nestedE", %"struct.y_impl<int>::nested"* @n, null} ; [ DW_TAG_variable ] [n] [line 4] [def]
+!33 = !{i32 2, !"Dwarf Version", i32 4}
+!34 = !{i32 2, !"Debug Info Version", i32 2}
+!35 = !{!"clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)"}
+!36 = !MDLocation(line: 3, column: 12, scope: !10)
+!37 = !MDLocation(line: 1, column: 96, scope: !14)
+!38 = !MDLocation(line: 0, scope: !28)
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
index e49b12fbf54d..6f673dd378bd 100644
--- a/test/DebugInfo/X86/tls.ll
+++ b/test/DebugInfo/X86/tls.ll
@@ -81,22 +81,22 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!15, !16}
 !llvm.ident = !{!17}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !12, metadata !2, metadata !"-.dwo"} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/tls.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tls.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func<&glbl>", metadata !"func<&glbl>", metadata !"_Z4funcIXadL_Z4glblEEEiv", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4funcIXadL_Z4glblEEEiv, metadata !9, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [func<&glbl>]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/tls.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786480, null, metadata !"I", metadata !11, i32* @glbl, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 786484, i32 0, null, metadata !"tls", metadata !"tls", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
-!14 = metadata !{i32 786484, i32 0, null, metadata !"glbl", metadata !"glbl", metadata !"", metadata !5, i32 2, metadata !8, i32 0, i32 1, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 2] [def]
-!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!17 = metadata !{metadata !"clang version 3.5 "}
-!18 = metadata !{i32 6, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00-.dwo\000", !1, !2, !2, !3, !12, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/tls.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tls.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func<&glbl>\00func<&glbl>\00_Z4funcIXadL_Z4glblEEEiv\005\000\001\000\006\00256\000\005", !1, !5, !6, null, i32 ()* @_Z4funcIXadL_Z4glblEEEiv, !9, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [func<&glbl>]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/tls.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x30\00I\000\000", null, !11, i32* @glbl, null} ; [ DW_TAG_template_value_parameter ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!12 = !{!13, !14}
+!13 = !{!"0x34\00tls\00tls\00\001\000\001", null, !5, !8, i32* @tls, null} ; [ DW_TAG_variable ] [tls] [line 1] [def]
+!14 = !{!"0x34\00glbl\00glbl\00\002\000\001", null, !5, !8, i32* @glbl, null} ; [ DW_TAG_variable ] [glbl] [line 2] [def]
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 1, !"Debug Info Version", i32 2}
+!17 = !{!"clang version 3.5 "}
+!18 = !MDLocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/X86/type_units_with_addresses.ll b/test/DebugInfo/X86/type_units_with_addresses.ll
index ff278f664465..0cd5439042cd 100644
--- a/test/DebugInfo/X86/type_units_with_addresses.ll
+++ b/test/DebugInfo/X86/type_units_with_addresses.ll
@@ -112,40 +112,40 @@
 !llvm.module.flags = !{!34, !35}
 !llvm.ident = !{!36}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !27, metadata !2, metadata !"tu.dwo", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/tu.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tu.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9, metadata !12, metadata !13, metadata !17, metadata !18, metadata !19, metadata !23, metadata !24}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"S1<&i>", i32 4, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS2S1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S1<&i>] [line 4, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786480, null, metadata !"I", metadata !7, i32* @i, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!7 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, metadata !1, null, metadata !"S2", i32 11, i64 8, i64 8, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTS2S2"} ; [ DW_TAG_structure_type ] [S2] [line 11, size 8, align 8, offset 0] [def] [from ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S2", metadata !"s2_1", i32 12, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s2_1] [line 12, size 8, align 8, offset 0] [from _ZTS4S2_1IXadL_Z1iEEE]
-!12 = metadata !{i32 786451, metadata !1, null, metadata !"S2_1<&i>", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S2_1<&i>] [line 9, size 8, align 8, offset 0] [def] [from ]
-!13 = metadata !{i32 786451, metadata !1, null, metadata !"S3", i32 22, i64 16, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null, metadata !"_ZTS2S3"} ; [ DW_TAG_structure_type ] [S3] [line 22, size 16, align 8, offset 0] [def] [from ]
-!14 = metadata !{metadata !15, metadata !16}
-!15 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S3", metadata !"s3_1", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s3_1] [line 23, size 8, align 8, offset 0] [from _ZTS4S3_1IXadL_Z1iEEE]
-!16 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S3", metadata !"s3_2", i32 24, i64 8, i64 8, i64 8, i32 0, metadata !"_ZTS4S3_2"} ; [ DW_TAG_member ] [s3_2] [line 24, size 8, align 8, offset 8] [from _ZTS4S3_2]
-!17 = metadata !{i32 786451, metadata !1, null, metadata !"S3_1<&i>", i32 18, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !5, metadata !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S3_1<&i>] [line 18, size 8, align 8, offset 0] [def] [from ]
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"S3_2", i32 20, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS4S3_2"} ; [ DW_TAG_structure_type ] [S3_2] [line 20, size 8, align 8, offset 0] [def] [from ]
-!19 = metadata !{i32 786451, metadata !1, null, metadata !"S4", i32 34, i64 16, i64 8, i32 0, i32 0, null, metadata !20, i32 0, null, null, metadata !"_ZTS2S4"} ; [ DW_TAG_structure_type ] [S4] [line 34, size 16, align 8, offset 0] [def] [from ]
-!20 = metadata !{metadata !21, metadata !22}
-!21 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S4", metadata !"s4_1", i32 35, i64 8, i64 8, i64 0, i32 0, metadata !"_ZTS4S4_1"} ; [ DW_TAG_member ] [s4_1] [line 35, size 8, align 8, offset 0] [from _ZTS4S4_1]
-!22 = metadata !{i32 786445, metadata !1, metadata !"_ZTS2S4", metadata !"s4_2", i32 36, i64 8, i64 8, i64 8, i32 0, metadata !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s4_2] [line 36, size 8, align 8, offset 8] [from _ZTS4S4_2IXadL_Z1iEEE]
-!23 = metadata !{i32 786451, metadata !1, null, metadata !"S4_1", i32 29, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS4S4_1"} ; [ DW_TAG_structure_type ] [S4_1] [line 29, size 8, align 8, offset 0] [def] [from ]
-!24 = metadata !{i32 786451, metadata !1, null, metadata !"S4_2<&i>", i32 32, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, metadata !25, metadata !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S4_2<&i>] [line 32, size 8, align 8, offset 0] [def] [from ]
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 786480, null, metadata !"T", metadata !7, i32* @i, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!27 = metadata !{metadata !28, metadata !30, metadata !31, metadata !32, metadata !33}
-!28 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"", metadata !29, i32 1, metadata !8, i32 0, i32 1, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 1] [def]
-!29 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/tu.cpp]
-!30 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !29, i32 6, metadata !"_ZTS2S1IXadL_Z1iEEE", i32 0, i32 1, %struct.S1* @a, null} ; [ DW_TAG_variable ] [a] [line 6] [def]
-!31 = metadata !{i32 786484, i32 0, null, metadata !"s2", metadata !"s2", metadata !"", metadata !29, i32 15, metadata !"_ZTS2S2", i32 0, i32 1, %struct.S2* @s2, null} ; [ DW_TAG_variable ] [s2] [line 15] [def]
-!32 = metadata !{i32 786484, i32 0, null, metadata !"s3", metadata !"s3", metadata !"", metadata !29, i32 27, metadata !"_ZTS2S3", i32 0, i32 1, %struct.S3* @s3, null} ; [ DW_TAG_variable ] [s3] [line 27] [def]
-!33 = metadata !{i32 786484, i32 0, null, metadata !"s4", metadata !"s4", metadata !"", metadata !29, i32 39, metadata !"_ZTS2S4", i32 0, i32 1, %struct.S4* @s4, null} ; [ DW_TAG_variable ] [s4] [line 39] [def]
-!34 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!36 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00tu.dwo\001", !1, !2, !3, !2, !27, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/tu.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tu.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !9, !12, !13, !17, !18, !19, !23, !24}
+!4 = !{!"0x13\00S1<&i>\004\008\008\000\000\000", !1, null, null, !2, null, !5, !"_ZTS2S1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S1<&i>] [line 4, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x30\00I\000\000", null, !7, i32* @i, null} ; [ DW_TAG_template_value_parameter ]
+!7 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x13\00S2\0011\008\008\000\000\000", !1, null, null, !10, null, null, !"_ZTS2S2"} ; [ DW_TAG_structure_type ] [S2] [line 11, size 8, align 8, offset 0] [def] [from ]
+!10 = !{!11}
+!11 = !{!"0xd\00s2_1\0012\008\008\000\000", !1, !"_ZTS2S2", !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s2_1] [line 12, size 8, align 8, offset 0] [from _ZTS4S2_1IXadL_Z1iEEE]
+!12 = !{!"0x13\00S2_1<&i>\009\008\008\000\000\000", !1, null, null, !2, null, !5, !"_ZTS4S2_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S2_1<&i>] [line 9, size 8, align 8, offset 0] [def] [from ]
+!13 = !{!"0x13\00S3\0022\0016\008\000\000\000", !1, null, null, !14, null, null, !"_ZTS2S3"} ; [ DW_TAG_structure_type ] [S3] [line 22, size 16, align 8, offset 0] [def] [from ]
+!14 = !{!15, !16}
+!15 = !{!"0xd\00s3_1\0023\008\008\000\000", !1, !"_ZTS2S3", !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s3_1] [line 23, size 8, align 8, offset 0] [from _ZTS4S3_1IXadL_Z1iEEE]
+!16 = !{!"0xd\00s3_2\0024\008\008\008\000", !1, !"_ZTS2S3", !"_ZTS4S3_2"} ; [ DW_TAG_member ] [s3_2] [line 24, size 8, align 8, offset 8] [from _ZTS4S3_2]
+!17 = !{!"0x13\00S3_1<&i>\0018\008\008\000\000\000", !1, null, null, !2, null, !5, !"_ZTS4S3_1IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S3_1<&i>] [line 18, size 8, align 8, offset 0] [def] [from ]
+!18 = !{!"0x13\00S3_2\0020\008\008\000\000\000", !1, null, null, !2, null, null, !"_ZTS4S3_2"} ; [ DW_TAG_structure_type ] [S3_2] [line 20, size 8, align 8, offset 0] [def] [from ]
+!19 = !{!"0x13\00S4\0034\0016\008\000\000\000", !1, null, null, !20, null, null, !"_ZTS2S4"} ; [ DW_TAG_structure_type ] [S4] [line 34, size 16, align 8, offset 0] [def] [from ]
+!20 = !{!21, !22}
+!21 = !{!"0xd\00s4_1\0035\008\008\000\000", !1, !"_ZTS2S4", !"_ZTS4S4_1"} ; [ DW_TAG_member ] [s4_1] [line 35, size 8, align 8, offset 0] [from _ZTS4S4_1]
+!22 = !{!"0xd\00s4_2\0036\008\008\008\000", !1, !"_ZTS2S4", !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_member ] [s4_2] [line 36, size 8, align 8, offset 8] [from _ZTS4S4_2IXadL_Z1iEEE]
+!23 = !{!"0x13\00S4_1\0029\008\008\000\000\000", !1, null, null, !2, null, null, !"_ZTS4S4_1"} ; [ DW_TAG_structure_type ] [S4_1] [line 29, size 8, align 8, offset 0] [def] [from ]
+!24 = !{!"0x13\00S4_2<&i>\0032\008\008\000\000\000", !1, null, null, !2, null, !25, !"_ZTS4S4_2IXadL_Z1iEEE"} ; [ DW_TAG_structure_type ] [S4_2<&i>] [line 32, size 8, align 8, offset 0] [def] [from ]
+!25 = !{!26}
+!26 = !{!"0x30\00T\000\000", null, !7, i32* @i, null} ; [ DW_TAG_template_value_parameter ]
+!27 = !{!28, !30, !31, !32, !33}
+!28 = !{!"0x34\00i\00i\00\001\000\001", null, !29, !8, i32* @i, null} ; [ DW_TAG_variable ] [i] [line 1] [def]
+!29 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/tu.cpp]
+!30 = !{!"0x34\00a\00a\00\006\000\001", null, !29, !"_ZTS2S1IXadL_Z1iEEE", %struct.S1* @a, null} ; [ DW_TAG_variable ] [a] [line 6] [def]
+!31 = !{!"0x34\00s2\00s2\00\0015\000\001", null, !29, !"_ZTS2S2", %struct.S2* @s2, null} ; [ DW_TAG_variable ] [s2] [line 15] [def]
+!32 = !{!"0x34\00s3\00s3\00\0027\000\001", null, !29, !"_ZTS2S3", %struct.S3* @s3, null} ; [ DW_TAG_variable ] [s3] [line 27] [def]
+!33 = !{!"0x34\00s4\00s4\00\0039\000\001", null, !29, !"_ZTS2S4", %struct.S4* @s4, null} ; [ DW_TAG_variable ] [s4] [line 39] [def]
+!34 = !{i32 2, !"Dwarf Version", i32 4}
+!35 = !{i32 1, !"Debug Info Version", i32 2}
+!36 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 5fdb3494cf9d..df070542687b 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -16,12 +16,12 @@ entry:
   %value.addr = alloca float, align 4
   %tempValue = alloca %"union.PR15637::Value", align 4
   store float %value, float* %value.addr, align 4
-  call void @llvm.dbg.declare(metadata !{float* %value.addr}, metadata !23), !dbg !24
-  call void @llvm.dbg.declare(metadata !{%"union.PR15637::Value"* %tempValue}, metadata !25), !dbg !26
+  call void @llvm.dbg.declare(metadata float* %value.addr, metadata !23, metadata !{!"0x102"}), !dbg !24
+  call void @llvm.dbg.declare(metadata %"union.PR15637::Value"* %tempValue, metadata !25, metadata !{!"0x102"}), !dbg !26
   ret void, !dbg !27
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
@@ -29,32 +29,32 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!28}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 (trunk 178499) (llvm/trunk 178472)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cc", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"g", metadata !"g", metadata !"_ZN7PR156371gEf", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (float)* @_ZN7PR156371gEf, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
-!5 = metadata !{i32 786489, metadata !1, null, metadata !"PR15637", i32 1} ; [ DW_TAG_namespace ] [PR15637] [line 1]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, metadata !5, metadata !"f", metadata !"f", metadata !"_ZN7PR156371fE", metadata !11, i32 6, metadata !12, i32 0, i32 1, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
-!12 = metadata !{i32 786455, metadata !1, metadata !5, metadata !"Value<float>", i32 2, i64 32, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null, metadata !21, null} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [def] [from ]
-!13 = metadata !{metadata !14, metadata !16}
-!14 = metadata !{i32 786445, metadata !1, metadata !12, metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!16 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"Value", metadata !"Value", metadata !"", i32 2, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !20, i32 2} ; [ DW_TAG_subprogram ] [line 2] [Value]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null, metadata !19}
-!19 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
-!20 = metadata !{i32 786468}
-!21 = metadata !{metadata !22}
-!22 = metadata !{i32 786479, null, metadata !"T", metadata !8, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!23 = metadata !{i32 786689, metadata !4, metadata !"value", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [value] [line 3]
-!24 = metadata !{i32 3, i32 0, metadata !4, null}
-!25 = metadata !{i32 786688, metadata !4, metadata !"tempValue", metadata !11, i32 4, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
-!26 = metadata !{i32 4, i32 0, metadata !4, null}
-!27 = metadata !{i32 5, i32 0, metadata !4, null}
-!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 178499) (llvm/trunk 178472)\000\00\000\00\000", !1, !2, !2, !3, !9,  !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cc", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00g\00g\00_ZN7PR156371gEf\003\000\001\000\006\00256\000\003", !1, !5, !6, null, void (float)* @_ZN7PR156371gEf, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [g]
+!5 = !{!"0x39\00PR15637\001", !1, null} ; [ DW_TAG_namespace ] [PR15637] [line 1]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x24\00float\000\0032\0032\000\000\004", null, null} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!9 = !{!10}
+!10 = !{!"0x34\00f\00f\00_ZN7PR156371fE\006\000\001", !5, !11, !12, %"union.PR15637::Value"* @_ZN7PR156371fE, null} ; [ DW_TAG_variable ] [f] [line 6] [def]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.cc]
+!12 = !{!"0x17\00Value<float>\002\0032\0032\000\000\000", !1, !5, null, !13, null, !21, null} ; [ DW_TAG_union_type ] [Value<float>] [line 2, size 32, align 32, offset 0] [def] [from ]
+!13 = !{!14, !16}
+!14 = !{!"0xd\00a\002\0032\0032\000\000", !1, !12, !15} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!16 = !{!"0x2e\00Value\00Value\00\002\000\000\000\006\00320\000\002", !1, !12, !17, null, null, null, i32 0, !20} ; [ DW_TAG_subprogram ] [line 2] [Value]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19}
+!19 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from Value<float>]
+!20 = !{i32 786468}
+!21 = !{!22}
+!22 = !{!"0x2f\00T\000\000", null, !8, null} ; [ DW_TAG_template_type_parameter ]
+!23 = !{!"0x101\00value\0016777219\000", !4, !11, !8} ; [ DW_TAG_arg_variable ] [value] [line 3]
+!24 = !MDLocation(line: 3, scope: !4)
+!25 = !{!"0x100\00tempValue\004\000", !4, !11, !12} ; [ DW_TAG_auto_variable ] [tempValue] [line 4]
+!26 = !MDLocation(line: 4, scope: !4)
+!27 = !MDLocation(line: 5, scope: !4)
+!28 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index a7a158539844..fc17c068caf3 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -12,19 +12,19 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 786449, metadata !12, i32 12, metadata !"clang version 3.3 (trunk 171825) (llvm/trunk 171822)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
-!6 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786454, metadata !12, null, metadata !"v4si", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !9, metadata !10, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
-!12 = metadata !{metadata !"foo.c", metadata !"/Users/echristo"}
+!0 = !{!"0x11\0012\00clang version 3.3 (trunk 171825) (llvm/trunk 171822)\000\00\000\00\000", !12, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/Users/echristo/foo.c] [DW_LANG_C99]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x34\00a\00a\00\003\000\001", null, !6, !7, <4 x i32>* @a, null} ; [ DW_TAG_variable ] [a] [line 3] [def]
+!6 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!7 = !{!"0x16\00v4si\001\000\000\000\000", !12, null, !8} ; [ DW_TAG_typedef ] [v4si] [line 1, size 0, align 0, offset 0] [from ]
+!8 = !{!"0x1\00\000\00128\00128\000\002048", null, null, !9, !10, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!12 = !{!"foo.c", !"/Users/echristo"}
 
 ; Check that we get an array type with a vector attribute.
 ; CHECK: DW_TAG_array_type
 ; CHECK-NEXT: DW_AT_GNU_vector
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/X86/vla.ll b/test/DebugInfo/X86/vla.ll
index a1a2e66685c6..b2f2f3d922d0 100644
--- a/test/DebugInfo/X86/vla.ll
+++ b/test/DebugInfo/X86/vla.ll
@@ -27,13 +27,13 @@ entry:
   %saved_stack = alloca i8*
   %cleanup.dest.slot = alloca i32
   store i32 %n, i32* %n.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %n.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %n.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %n.addr, align 4, !dbg !17
   %1 = zext i32 %0 to i64, !dbg !17
   %2 = call i8* @llvm.stacksave(), !dbg !17
   store i8* %2, i8** %saved_stack, !dbg !17
   %vla = alloca i32, i64 %1, align 16, !dbg !17
-  call void @llvm.dbg.declare(metadata !{i32* %vla}, metadata !18), !dbg !17
+  call void @llvm.dbg.declare(metadata i32* %vla, metadata !18, metadata !{!"0x102"}), !dbg !17
   %arrayidx = getelementptr inbounds i32* %vla, i64 0, !dbg !22
   store i32 42, i32* %arrayidx, align 4, !dbg !22
   %3 = load i32* %n.addr, align 4, !dbg !23
@@ -48,7 +48,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 ; Function Attrs: nounwind
 declare i8* @llvm.stacksave() nounwind
@@ -64,9 +64,9 @@ entry:
   %argv.addr = alloca i8**, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !25), !dbg !26
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !25, metadata !{!"0x102"}), !dbg !26
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !27), !dbg !26
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !27, metadata !{!"0x102"}), !dbg !26
   %0 = load i32* %argc.addr, align 4, !dbg !28
   %call = call i32 @vla(i32 %0), !dbg !28
   ret i32 %call, !dbg !28
@@ -75,33 +75,33 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/vla.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"vla.c", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"vla", metadata !"vla", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @vla, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [vla]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/vla.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{metadata !8, metadata !8, metadata !12}
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!14 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!15 = metadata !{i32 786689, metadata !4, metadata !"n", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [n] [line 1]
-!16 = metadata !{i32 1, i32 0, metadata !4, null}
-!17 = metadata !{i32 2, i32 0, metadata !4, null}
-!18 = metadata !{i32 786688, metadata !4, metadata !"a", metadata !5, i32 2, metadata !19, i32 8192, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 2]
-!19 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !8, metadata !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!20 = metadata !{metadata !21}
-!21 = metadata !{i32 786465, i64 0, i64 -1}       ; [ DW_TAG_subrange_type ] [unbounded]
-!22 = metadata !{i32 3, i32 0, metadata !4, null}
-!23 = metadata !{i32 4, i32 0, metadata !4, null}
-!24 = metadata !{i32 5, i32 0, metadata !4, null}
-!25 = metadata !{i32 786689, metadata !9, metadata !"argc", metadata !5, i32 16777223, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 7]
-!26 = metadata !{i32 7, i32 0, metadata !9, null}
-!27 = metadata !{i32 786689, metadata !9, metadata !"argv", metadata !5, i32 33554439, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 7]
-!28 = metadata !{i32 8, i32 0, metadata !9, null}
-!29 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.3 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/vla.c] [DW_LANG_C99]
+!1 = !{!"vla.c", !""}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00vla\00vla\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @vla, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [vla]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/vla.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !1, !5, !10, null, i32 (i32, i8**)* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!8, !8, !12}
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = !{!"0xf\00\000\0064\0064\000\000", null, null, !14} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!14 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!15 = !{!"0x101\00n\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [n] [line 1]
+!16 = !MDLocation(line: 1, scope: !4)
+!17 = !MDLocation(line: 2, scope: !4)
+!18 = !{!"0x100\00a\002\008192", !4, !5, !19} ; [ DW_TAG_auto_variable ] [a] [line 2]
+!19 = !{!"0x1\00\000\000\0032\000\000", null, null, !8, !20, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!20 = !{!21}
+!21 = !{!"0x21\000\00-1"}       ; [ DW_TAG_subrange_type ] [unbounded]
+!22 = !MDLocation(line: 3, scope: !4)
+!23 = !MDLocation(line: 4, scope: !4)
+!24 = !MDLocation(line: 5, scope: !4)
+!25 = !{!"0x101\00argc\0016777223\000", !9, !5, !8} ; [ DW_TAG_arg_variable ] [argc] [line 7]
+!26 = !MDLocation(line: 7, scope: !9)
+!27 = !{!"0x101\00argv\0033554439\000", !9, !5, !12} ; [ DW_TAG_arg_variable ] [argv] [line 7]
+!28 = !MDLocation(line: 8, scope: !9)
+!29 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 72b0b994c4de..4439571e7074 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -6,34 +6,34 @@ entry:
   %retval = alloca i32, align 4
   %a = alloca [0 x i32], align 4
   store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{[0 x i32]* %a}, metadata !6), !dbg !11
+  call void @llvm.dbg.declare(metadata [0 x i32]* %a, metadata !6, metadata !{!"0x102"}), !dbg !11
   ret i32 0, !dbg !12
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786478, metadata !14, metadata !1, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, null, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
-!1 = metadata !{i32 786473, metadata !14} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 786449, metadata !14, i32 12, metadata !"clang version 3.0 (trunk 129138)", i1 false, metadata !"", i32 0, metadata !15, metadata !15, metadata !13, null,  null, null} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 786453, metadata !14, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786688, metadata !7, metadata !"a", metadata !1, i32 4, metadata !8, i32 0, null} ; [ DW_TAG_auto_variable ]
-!7 = metadata !{i32 786443, metadata !14, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{i32 786433, metadata !14, metadata !2, metadata !"", i32 0, i64 0, i64 32, i32 0, i32 0, metadata !5, metadata !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
-!9 = metadata !{metadata !10}
+!0 = !{!"0x2e\00main\00main\00\003\000\001\000\006\000\000\003", !14, !1, !3, null, i32 ()* @main, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!1 = !{!"0x29", !14} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 3.0 (trunk 129138)\000\00\000\00\000", !14, !15, !15, !13, null,  null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !14, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x100\00a\004\000", !7, !1, !8} ; [ DW_TAG_auto_variable ]
+!7 = !{!"0xb\003\0012\000", !14, !0} ; [ DW_TAG_lexical_block ]
+!8 = !{!"0x1\00\000\000\0032\000\000", !14, !2, !5, !9, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!9 = !{!10}
 ;CHECK: DW_TAG_subrange_type
 ;CHECK-NEXT: DW_AT_type
 ;CHECK-NOT: DW_AT_lower_bound
 ;CHECK-NOT: DW_AT_upper_bound
 ;CHECK-NEXT: End Of Children Mark
-!10 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
-!11 = metadata !{i32 4, i32 7, metadata !7, null}
-!12 = metadata !{i32 5, i32 3, metadata !7, null}
-!13 = metadata !{metadata !0}
-!14 = metadata !{metadata !"array.c", metadata !"/private/tmp"}
-!15 = metadata !{i32 0}
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = !{!"0x21\000\00-1"}        ; [ DW_TAG_subrange_type ]
+!11 = !MDLocation(line: 4, column: 7, scope: !7)
+!12 = !MDLocation(line: 5, column: 3, scope: !7)
+!13 = !{!0}
+!14 = !{!"array.c", !"/private/tmp"}
+!15 = !{i32 0}
+!16 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/block-asan.ll b/test/DebugInfo/block-asan.ll
new file mode 100644
index 000000000000..b25aee1b5ce4
--- /dev/null
+++ b/test/DebugInfo/block-asan.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -asan %s | FileCheck %s
+
+; The IR of this testcase is generated from the following C code:
+; void bar (int);
+;
+; void foo() {
+;   __block int x;
+;   bar(x);
+; }
+; by compiling it with 'clang -emit-llvm -g -S' and then by manually
+; adding the sanitize_address attribute to the @foo() function (so
+; that ASAN accepts to instrument the function in the above opt run).
+
+; Check that the location of the ASAN instrumented __block variable is
+; correct.
+; CHECK: [ DW_TAG_expression ] [DW_OP_deref] [DW_OP_plus 8] [DW_OP_deref] [DW_OP_plus 24]
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+
+; Function Attrs: nounwind ssp uwtable
+define void @foo() #0 {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  call void @llvm.dbg.declare(metadata %struct.__block_byref_x* %x, metadata !12, metadata !22), !dbg !23
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i32 0, i32 0, !dbg !24
+  store i8* null, i8** %byref.isa, !dbg !24
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !24
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, !dbg !24
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i32 0, i32 2, !dbg !24
+  store i32 0, i32* %byref.flags, !dbg !24
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i32 0, i32 3, !dbg !24
+  store i32 32, i32* %byref.size, !dbg !24
+  %forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !25
+  %0 = load %struct.__block_byref_x** %forwarding, !dbg !25
+  %x1 = getelementptr inbounds %struct.__block_byref_x* %0, i32 0, i32 4, !dbg !25
+  %1 = load i32* %x1, align 4, !dbg !25
+  call void @bar(i32 %1), !dbg !25
+  %2 = bitcast %struct.__block_byref_x* %x to i8*, !dbg !26
+  call void @_Block_object_dispose(i8* %2, i32 8) #3, !dbg !26
+  ret void, !dbg !26
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @bar(i32) #2
+
+declare void @_Block_object_dispose(i8*, i32)
+
+attributes #0 = { nounwind ssp uwtable sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9, !10}
+!llvm.ident = !{!11}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/block.c] [DW_LANG_C99]
+!1 = !{!"block.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\003\000\001\000\000\000\000\003", !1, !5, !6, null, void ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/tmp/block.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{i32 1, !"PIC Level", i32 2}
+!11 = !{!"clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)"}
+!12 = !{!"0x100\00x\004\000", !4, !5, !13} ; [ DW_TAG_auto_variable ] [x] [line 4]
+!13 = !{!"0x13\00\000\00224\000\000\0016\000", !1, !5, null, !14, null, null, null} ; [ DW_TAG_structure_type ] [line 0, size 224, align 0, offset 0] [def] [from ]
+!14 = !{!15, !17, !18, !20, !21}
+!15 = !{!"0xd\00__isa\000\0064\0064\000\000", !1, !5, !16} ; [ DW_TAG_member ] [__isa] [line 0, size 64, align 64, offset 0] [from ]
+!16 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!17 = !{!"0xd\00__forwarding\000\0064\0064\0064\000", !1, !5, !16} ; [ DW_TAG_member ] [__forwarding] [line 0, size 64, align 64, offset 64] [from ]
+!18 = !{!"0xd\00__flags\000\0032\0032\00128\000", !1, !5, !19} ; [ DW_TAG_member ] [__flags] [line 0, size 32, align 32, offset 128] [from int]
+!19 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = !{!"0xd\00__size\000\0032\0032\00160\000", !1, !5, !19} ; [ DW_TAG_member ] [__size] [line 0, size 32, align 32, offset 160] [from int]
+!21 = !{!"0xd\00x\000\0032\0032\00192\000", !1, !5, !19} ; [ DW_TAG_member ] [x] [line 0, size 32, align 32, offset 192] [from int]
+!22 = !{!"0x102\0034\008\006\0034\0024"} ; [ DW_TAG_expression ] [DW_OP_plus 8] [DW_OP_deref] [DW_OP_plus 24]
+!23 = !MDLocation(line: 4, column: 15, scope: !4)
+!24 = !MDLocation(line: 4, column: 3, scope: !4)
+!25 = !MDLocation(line: 5, column: 3, scope: !4)
+!26 = !MDLocation(line: 6, column: 1, scope: !4)
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
index 458fb58f5bc0..784f17ef61f2 100644
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -3,6 +3,6 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"", i1 false, metadata !"", i32 0, null, null, null,  null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !"t", metadata !""}
-!2 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00\000\00\000\00\000", !1, null, null, null,  null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"t", !""}
+!2 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/constant-pointers.ll b/test/DebugInfo/constant-pointers.ll
index fdde06d4a2b2..add97801b6f6 100644
--- a/test/DebugInfo/constant-pointers.ll
+++ b/test/DebugInfo/constant-pointers.ll
@@ -30,22 +30,22 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!15, !16}
 !llvm.ident = !{!17}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/constant-pointers.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"constant-pointers.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func<nullptr, nullptr, 42>", metadata !"func<nullptr, nullptr, 42>", metadata !"_Z4funcILPv0ELPFvvE0ELi42EEvv", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, metadata !8, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func<nullptr, nullptr, 42>]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/constant-pointers.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{metadata !9, metadata !11, metadata !13}
-!9 = metadata !{i32 786480, null, metadata !"V", metadata !10, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!11 = metadata !{i32 786480, null, metadata !"F", metadata !12, i8 0, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786480, null, metadata !"i", metadata !14, i32 42, null, i32 0, i32 0} ; [ DW_TAG_template_value_parameter ]
-!14 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!16 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!17 = metadata !{metadata !"clang version 3.5.0 "}
-!18 = metadata !{i32 3, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/constant-pointers.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"constant-pointers.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func<nullptr, nullptr, 42>\00func<nullptr, nullptr, 42>\00_Z4funcILPv0ELPFvvE0ELi42EEvv\002\000\001\000\006\00256\000\002", !1, !5, !6, null, void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, !8, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [func<nullptr, nullptr, 42>]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/constant-pointers.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!9, !11, !13}
+!9 = !{!"0x30\00V\000\000", null, !10, i8 0, null} ; [ DW_TAG_template_value_parameter ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!11 = !{!"0x30\00F\000\000", null, !12, i8 0, null} ; [ DW_TAG_template_value_parameter ]
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = !{!"0x30\00i\000\000", null, !14, i32 42, null} ; [ DW_TAG_template_value_parameter ]
+!14 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 2}
+!17 = !{!"clang version 3.5.0 "}
+!18 = !MDLocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/cross-cu-inlining.ll
index 8a0e3c568f1c..fafa3fa4e8f6 100644
--- a/test/DebugInfo/cross-cu-inlining.ll
+++ b/test/DebugInfo/cross-cu-inlining.ll
@@ -1,6 +1,7 @@
 ; REQUIRES: object-emission
 
 ; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck -implicit-check-not=DW_TAG %s
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --check-prefix=CHECK-ACCEL --check-prefix=CHECK %s
 
 ; Build from source:
 ; $ clang++ a.cpp b.cpp -g -c -emit-llvm
@@ -24,10 +25,10 @@
 ; CHECK:   DW_AT_name {{.*}} "a.cpp"
 ; CHECK:   DW_TAG_subprogram
 ; CHECK:     DW_AT_type [DW_FORM_ref_addr] (0x00000000[[INT:.*]])
-; CHECK:     DW_TAG_inlined_subroutine
-; CHECK:       DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]])
+; CHECK:     0x[[INLINED:[0-9a-f]*]]:{{.*}}DW_TAG_inlined_subroutine
+; CHECK:       DW_AT_abstract_origin {{.*}}[[ABS_FUNC:........]] "_Z4funci"
 ; CHECK:       DW_TAG_formal_parameter
-; CHECK:         DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]])
+; CHECK:         DW_AT_abstract_origin {{.*}}[[ABS_VAR:........]] "x"
 
 ; Check the abstract definition is in the 'b.cpp' CU and doesn't contain any
 ; concrete information (address range or variable location)
@@ -45,13 +46,22 @@
 
 ; Check the concrete out of line definition references the abstract and
 ; provides the address range and variable location
-; CHECK: DW_TAG_subprogram
+; CHECK: 0x[[FUNC:[0-9a-f]*]]{{.*}}DW_TAG_subprogram
 ; CHECK:   DW_AT_low_pc
-; CHECK:   DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]}
+; CHECK:   DW_AT_abstract_origin {{.*}} {0x[[ABS_FUNC]]} "_Z4funci"
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK:     DW_AT_location
-; CHECK:     DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]}
-
+; CHECK:     DW_AT_abstract_origin {{.*}} {0x[[ABS_VAR]]} "x"
+
+; Check that both the inline and the non out of line version of func are
+; correctly referenced in the accelerator table. Before r221837, the one
+; in the second compilation unit had a wrong offset
+; CHECK-ACCEL: .apple_names contents:
+; CHECK-ACCEL: Name{{.*}}"func"
+; CHECK-ACCEL-NOT: Name
+; CHECK-ACCEL: Atom[0]{{.*}}[[INLINED]]
+; CHECK-ACCEL-NOT: Name
+; CHECK-ACCEL: Atom[0]{{.*}}[[FUNC]]
 
 @i = external global i32
 
@@ -65,7 +75,7 @@ entry:
   %1 = bitcast i32* %x.addr.i to i8*
   call void @llvm.lifetime.start(i64 4, i8* %1)
   store i32 %0, i32* %x.addr.i, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr.i}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !20, metadata !{!"0x102"}), !dbg !21
   %2 = load i32* %x.addr.i, align 4, !dbg !22
   %mul.i = mul nsw i32 %2, 2, !dbg !22
   %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
@@ -78,14 +88,14 @@ define i32 @_Z4funci(i32 %x) #1 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !20), !dbg !23
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !20, metadata !{!"0x102"}), !dbg !23
   %0 = load i32* %x.addr, align 4, !dbg !24
   %mul = mul nsw i32 %0, 2, !dbg !24
   ret i32 %mul, !dbg !24
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #2
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
 ; Function Attrs: nounwind
 declare void @llvm.lifetime.start(i64, i8* nocapture) #3
@@ -102,29 +112,29 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18, !18}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786449, metadata !10, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !11, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
-!10 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"}
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !10, metadata !13, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
-!13 = metadata !{i32 786473, metadata !10}        ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{metadata !8, metadata !8}
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!17 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!18 = metadata !{metadata !"clang version 3.5.0 "}
-!19 = metadata !{i32 4, i32 0, metadata !4, null}
-!20 = metadata !{i32 786689, metadata !12, metadata !"x", metadata !13, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
-!21 = metadata !{i32 1, i32 0, metadata !12, metadata !19}
-!22 = metadata !{i32 2, i32 0, metadata !12, metadata !19}
-!23 = metadata !{i32 1, i32 0, metadata !12, null}
-!24 = metadata !{i32 2, i32 0, metadata !12, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"a.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\003\000\001\000\006\00256\000\003", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !10, !2, !2, !11, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!10 = !{!"b.cpp", !"/tmp/dbginfo"}
+!11 = !{!12}
+!12 = !{!"0x2e\00func\00func\00_Z4funci\001\000\001\000\006\00256\000\001", !10, !13, !14, null, i32 (i32)* @_Z4funci, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!13 = !{!"0x29", !10}        ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{!8, !8}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 2}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !MDLocation(line: 4, scope: !4)
+!20 = !{!"0x101\00x\0016777217\000", !12, !13, !8} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!21 = !MDLocation(line: 1, scope: !12, inlinedAt: !19)
+!22 = !MDLocation(line: 2, scope: !12, inlinedAt: !19)
+!23 = !MDLocation(line: 1, scope: !12)
+!24 = !MDLocation(line: 2, scope: !12)
 
diff --git a/test/DebugInfo/cross-cu-linkonce-distinct.ll b/test/DebugInfo/cross-cu-linkonce-distinct.ll
new file mode 100644
index 000000000000..2bd7c4718351
--- /dev/null
+++ b/test/DebugInfo/cross-cu-linkonce-distinct.ll
@@ -0,0 +1,95 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Testing that two distinct (distinct by writing them in separate files, while
+; still fulfilling C++'s ODR by having identical token sequences) functions,
+; linked under LTO, get plausible debug info (and don't crash).
+
+; Built from source:
+; $ clang++ a.cpp b.cpp -g -c -emit-llvm
+; $ llvm-link a.bc b.bc -o ab.bc
+
+; This change is intended to tickle a case where the subprogram MDNode
+; associated with the llvm::Function will differ from the subprogram
+; referenced by the DbgLocs in the function.
+
+; $ sed -ie "s/!12, !0/!0, !12/" ab.ll
+; $ cat a.cpp
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*x)(int) = &func;
+; $ cat b.cpp
+; inline int func(int i) {
+;   return i * 2;
+; }
+; int (*y)(int) = &func;
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_AT_name {{.*}} "func"
+; CHECK: DW_TAG_compile_unit
+
+; FIXME: Maybe we should drop the subprogram here - since the function was
+; emitted in one CU, due to linkonce_odr uniquing. We certainly don't emit the
+; subprogram here if the source location for this definition is the same (see
+; test/DebugInfo/cross-cu-linkonce.ll), though it's very easy to tickle that
+; into failing even without duplicating the source as has been done in this
+; case (two cpp files in different directories, including the same header that
+; contains an inline function - clang will produce distinct subprogram metadata
+; that won't deduplicate owing to the file location information containing the
+; directory of the source file even though the file name is absolute, not
+; relative)
+
+; CHECK: DW_TAG_subprogram
+
+@x = global i32 (i32)* @_Z4funci, align 8
+@y = global i32 (i32)* @_Z4funci, align 8
+
+; Function Attrs: inlinehint nounwind uwtable
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
+  %1 = alloca i32, align 4
+  store i32 %i, i32* %1, align 4
+  call void @llvm.dbg.declare(metadata i32* %1, metadata !22, metadata !{!"0x102"}), !dbg !23
+  %2 = load i32* %1, align 4, !dbg !24
+  %3 = mul nsw i32 %2, 2, !dbg !24
+  ret i32 %3, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!12, !0}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21, !21}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !9, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"a.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_Z4funci\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @_Z4funci, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x34\00x\00x\00\004\000\001", null, !5, !11, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!12 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !13, !2, !2, !14, !17, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!13 = !{!"b.cpp", !"/tmp/dbginfo"}
+!14 = !{!15}
+!15 = !{!"0x2e\00func\00func\00_Z4funci\001\000\001\000\006\00256\000\001", !13, !16, !6, null, i32 (i32)* @_Z4funci, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!16 = !{!"0x29", !13}        ; [ DW_TAG_file_type ] [/tmp/dbginfo/b.cpp]
+!17 = !{!18}
+!18 = !{!"0x34\00y\00y\00\004\000\001", null, !16, !11, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def]
+!19 = !{i32 2, !"Dwarf Version", i32 4}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
+!21 = !{!"clang version 3.5.0 "}
+!22 = !{!"0x101\00i\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!23 = !MDLocation(line: 1, scope: !4)
+!24 = !MDLocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/cross-cu-linkonce.ll
index 660d5709c7a8..aaae4c169d42 100644
--- a/test/DebugInfo/cross-cu-linkonce.ll
+++ b/test/DebugInfo/cross-cu-linkonce.ll
@@ -32,14 +32,14 @@
 define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
   %1 = alloca i32, align 4
   store i32 %i, i32* %1, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata i32* %1, metadata !20, metadata !{!"0x102"}), !dbg !21
   %2 = load i32* %1, align 4, !dbg !22
   %3 = mul nsw i32 %2, 2, !dbg !22
   ret i32 %3, !dbg !22
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { inlinehint nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -48,26 +48,26 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19, !19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !10, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"func", metadata !"func", metadata !"_Z4funci", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4funci, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
-!5 = metadata !{metadata !"func.h", metadata !"/tmp/dbginfo"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/func.h]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786449, metadata !14, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !15, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
-!14 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo"}
-!15 = metadata !{metadata !16}
-!16 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 4, metadata !12, i32 0, i32 1, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def]
-!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!19 = metadata !{metadata !"clang version 3.5.0 "}
-!20 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
-!21 = metadata !{i32 1, i32 0, metadata !4, null}
-!22 = metadata !{i32 2, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !10, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/a.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"a.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_Z4funci\001\000\001\000\006\00256\000\001", !5, !6, !7, null, i32 (i32)* @_Z4funci, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!5 = !{!"func.h", !"/tmp/dbginfo"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/func.h]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!"0x34\00x\00x\00\004\000\001", null, !6, !12, i32 (i32)** @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!12 = !{!"0xf\00\000\0064\0064\000\000", null, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!13 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !14, !2, !2, !3, !15, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/b.cpp] [DW_LANG_C_plus_plus]
+!14 = !{!"b.cpp", !"/tmp/dbginfo"}
+!15 = !{!16}
+!16 = !{!"0x34\00y\00y\00\004\000\001", null, !6, !12, i32 (i32)** @y, null} ; [ DW_TAG_variable ] [y] [line 4] [def]
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
+!19 = !{!"clang version 3.5.0 "}
+!20 = !{!"0x101\00i\0016777217\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!21 = !MDLocation(line: 1, scope: !4)
+!22 = !MDLocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cu-line-tables.ll b/test/DebugInfo/cu-line-tables.ll
deleted file mode 100644
index 2496f3f3e87b..000000000000
--- a/test/DebugInfo/cu-line-tables.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -O0 -filetype=obj %s -o %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
-
-; Check that we don't emit ranges if we're emitting line tables only.
-
-; CHECK: DW_TAG_compile_unit
-; CHECK-NOT: DW_AT_ranges
-; CHECK: DW_TAG_subprogram
-
-; FIXME: We probably want to avoid printing out anything if the section isn't there.
-; CHECK: .debug_ranges contents:
-; CHECK-NOT: 00000000 <End of list>
-
-; CHECK: .debug_pubnames contents:
-; CHECK-NOT: Offset
-
-; CHECK: .debug_pubtypes contents:
-; CHECK-NOT: Offset
-
-; Function Attrs: nounwind uwtable
-define i32 @f(i32 %a) #0 {
-entry:
-  %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %0 = load i32* %a.addr, align 4, !dbg !14
-  %add = add nsw i32 %0, 4, !dbg !14
-  ret i32 %add, !dbg !14
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!9, !10}
-!llvm.ident = !{!11}
-
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 197756) (llvm/trunk 197768)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/foo.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5 (trunk 197756) (llvm/trunk 197768)"}
-!14 = metadata !{i32 2, i32 0, metadata !4, null}
diff --git a/test/DebugInfo/cu-range-hole.ll b/test/DebugInfo/cu-range-hole.ll
index 65a4956a6fc2..aa489b60d8cc 100644
--- a/test/DebugInfo/cu-range-hole.ll
+++ b/test/DebugInfo/cu-range-hole.ll
@@ -18,7 +18,7 @@ define i32 @b(i32 %c) #0 {
 entry:
   %c.addr = alloca i32, align 4
   store i32 %c, i32* %c.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %c.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %c.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   %0 = load i32* %c.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   ret i32 %add, !dbg !14
@@ -35,14 +35,14 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
 define i32 @d(i32 %e) #0 {
 entry:
   %e.addr = alloca i32, align 4
   store i32 %e, i32* %e.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %e.addr}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %e.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %e.addr, align 4, !dbg !16
   %add = add nsw i32 %0, 1, !dbg !16
   ret i32 %add, !dbg !16
@@ -55,20 +55,20 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!1}
 !llvm.module.flags = !{!11, !12}
 
-!0 = metadata !{metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!1 = metadata !{i32 786449, metadata !2, i32 12, metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, metadata !3, metadata !"", i32 1}
-!2 = metadata !{metadata !"b.c", metadata !"/usr/local/google/home/echristo"}
-!3 = metadata !{}
-!4 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, metadata !2, metadata !6, metadata !"b", metadata !"b", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @b, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [b]
-!6 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/b.c]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !2, metadata !6, metadata !"d", metadata !"d", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @d, null, null, metadata !3, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [d]
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!13 = metadata !{i32 786689, metadata !5, metadata !"c", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 1]
-!14 = metadata !{i32 1, i32 0, metadata !5, null}
-!15 = metadata !{i32 786689, metadata !10, metadata !"e", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [e] [line 3]
-!16 = metadata !{i32 3, i32 0, metadata !10, null}
+!0 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!1 = !{!"0x11\0012\00clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)\000\00\000\00\001", !2, !3, !3, !4, !3, !3} ; [ DW_TAG_compile_unit ]
+!2 = !{!"b.c", !"/usr/local/google/home/echristo"}
+!3 = !{}
+!4 = !{!5, !10}
+!5 = !{!"0x2e\00b\00b\00\001\000\001\000\006\00256\000\001", !2, !6, !7, null, i32 (i32)* @b, null, null, !3} ; [ DW_TAG_subprogram ] [line 1] [def] [b]
+!6 = !{!"0x29", !2}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/b.c]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00d\00d\00\003\000\001\000\006\00256\000\003", !2, !6, !7, null, i32 (i32)* @d, null, null, !3} ; [ DW_TAG_subprogram ] [line 3] [def] [d]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
+!13 = !{!"0x101\00c\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [c] [line 1]
+!14 = !MDLocation(line: 1, scope: !5)
+!15 = !{!"0x101\00e\0016777219\000", !10, !6, !9} ; [ DW_TAG_arg_variable ] [e] [line 3]
+!16 = !MDLocation(line: 3, scope: !10)
diff --git a/test/DebugInfo/cu-ranges.ll b/test/DebugInfo/cu-ranges.ll
index 9262a2239c7f..6296b93792c2 100644
--- a/test/DebugInfo/cu-ranges.ll
+++ b/test/DebugInfo/cu-ranges.ll
@@ -22,21 +22,21 @@ define i32 @foo(i32 %a) #0 section "__TEXT,__foo" {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   %0 = load i32* %a.addr, align 4, !dbg !15
   %add = add nsw i32 %0, 5, !dbg !15
   ret i32 %add, !dbg !15
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
 define i32 @bar(i32 %a) #0 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !16), !dbg !17
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !16, metadata !{!"0x102"}), !dbg !17
   %0 = load i32* %a.addr, align 4, !dbg !18
   %add = add nsw i32 %0, 5, !dbg !18
   ret i32 %add, !dbg !18
@@ -49,23 +49,23 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"foo.c", metadata !"/usr/local/google/home/echristo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @bar, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [bar]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 1]
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{i32 2, i32 0, metadata !4, null}
-!16 = metadata !{i32 786689, metadata !9, metadata !"a", metadata !5, i32 16777221, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 5]
-!17 = metadata !{i32 5, i32 0, metadata !9, null}
-!18 = metadata !{i32 6, i32 0, metadata !9, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/foo.c] [DW_LANG_C99]
+!1 = !{!"foo.c", !"/usr/local/google/home/echristo"}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/foo.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00bar\00bar\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, i32 (i32)* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [bar]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
+!13 = !{!"0x101\00a\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !MDLocation(line: 2, scope: !4)
+!16 = !{!"0x101\00a\0016777221\000", !9, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 5]
+!17 = !MDLocation(line: 5, scope: !9)
+!18 = !MDLocation(line: 6, scope: !9)
 
diff --git a/test/DebugInfo/dead-argument-order.ll b/test/DebugInfo/dead-argument-order.ll
index ea805a4872fa..d375412141a6 100644
--- a/test/DebugInfo/dead-argument-order.ll
+++ b/test/DebugInfo/dead-argument-order.ll
@@ -38,17 +38,17 @@
 ; Function Attrs: nounwind readnone uwtable
 define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 {
 entry:
-  tail call void @llvm.dbg.declare(metadata !19, metadata !14), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.declare(metadata %struct.S* undef, metadata !14, metadata !{!"0x102"}), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !20
   %add = add nsw i32 %i, %s.coerce, !dbg !20
   ret i32 %add, !dbg !20
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -57,25 +57,25 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !8, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dead-argument-order.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"dead-argument-order.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"S", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1S"} ; [ DW_TAG_structure_type ] [S] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1S", metadata !"i", i32 1, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [i] [line 1, size 32, align 32, offset 0] [from int]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"function", metadata !"function", metadata !"_Z8function1Si", i32 2, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i32)* @_Z8function1Si, null, null, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [function]
-!10 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/dead-argument-order.cpp]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !7, metadata !4, metadata !7}
-!13 = metadata !{metadata !14, metadata !15}
-!14 = metadata !{i32 786689, metadata !9, metadata !"s", metadata !10, i32 16777218, metadata !"_ZTS1S", i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 2]
-!15 = metadata !{i32 786689, metadata !9, metadata !"i", metadata !10, i32 33554434, metadata !7, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 2]
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!17 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!18 = metadata !{metadata !"clang version 3.5.0 "}
-!19 = metadata !{%struct.S* undef}
-!20 = metadata !{i32 2, i32 0, metadata !9, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !3, !8, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dead-argument-order.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"dead-argument-order.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00S\001\0032\0032\000\000\000", !1, null, null, !5, null, null, !"_ZTS1S"} ; [ DW_TAG_structure_type ] [S] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0xd\00i\001\0032\0032\000\000", !1, !"_ZTS1S", !7} ; [ DW_TAG_member ] [i] [line 1, size 32, align 32, offset 0] [from int]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!9}
+!9 = !{!"0x2e\00function\00function\00_Z8function1Si\002\000\001\000\006\00256\001\002", !1, !10, !11, null, i32 (i32, i32)* @_Z8function1Si, null, null, !13} ; [ DW_TAG_subprogram ] [line 2] [def] [function]
+!10 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/dead-argument-order.cpp]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!7, !4, !7}
+!13 = !{!14, !15}
+!14 = !{!"0x101\00s\0016777218\000", !9, !10, !"_ZTS1S"} ; [ DW_TAG_arg_variable ] [s] [line 2]
+!15 = !{!"0x101\00i\0033554434\000", !9, !10, !7} ; [ DW_TAG_arg_variable ] [i] [line 2]
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 2}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !{%struct.S* undef}
+!20 = !MDLocation(line: 2, scope: !9)
 
diff --git a/test/DebugInfo/debug-info-always-inline.ll b/test/DebugInfo/debug-info-always-inline.ll
new file mode 100644
index 000000000000..88ac4cb39d5a
--- /dev/null
+++ b/test/DebugInfo/debug-info-always-inline.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -always-inline -S | FileCheck %s
+;
+; Generated from the following C++ source with:
+; clang -cc1 -disable-llvm-optzns -emit-llvm -g -stack-protector 2 test.cpp
+;
+; /* BEGIN SOURCE */
+; int __attribute__((always_inline)) foo()
+; {
+;    int arr[10];
+;    arr[0] = 5;
+;    int sum = 4;
+;    return sum;
+; }
+; 
+; extern void bar();
+; 
+; int main()
+; {
+;   bar();
+;   int i = foo();
+;   return i;
+; }
+; /* END SOURCE */
+
+; The patch that includes this test case, is addressing the following issue:
+; 
+; When functions are inlined, instructions without debug information 
+; are attributed with the call site's DebugLoc. After inlining, inlined static
+; allocas are moved to the caller's entry block, adjacent to the caller's original
+; static alloca instructions. By retaining the call site's DebugLoc, these instructions
+; may cause instructions that are subsequently inserted at the entry block to pick
+; up the same DebugLoc.
+;
+; In the offending case stack protection inserts an instruction at the caller's
+; entry block, which inadvertently picks up the inlined call's DebugLoc, because
+; the entry block's first instruction is the recently moved inlined alloca instruction. 
+;
+; The stack protection instruction then becomes part of the function prologue, with the
+; result that the line number that is associated with the stack protection instruction
+; is deemed to be the end of the function prologue. Since this line number is the
+; call site's line number, setting a breakpoint at the function in the debugger
+; will make the user stop at the line of the inlined call.
+
+; Note that without the stack protection instruction this effect would not occur
+; because the allocas all get collapsed into a single instruction that reserves
+; stack space and have no further influence on the prologue's line number information.
+
+
+; The selected solution is to not attribute static allocas with the call site's
+; DebugLoc. 
+
+; At some point in the future, it may be desirable to describe the inlining 
+; in the alloca instructions, but then the code that handles prologues must
+; be able to handle this correctly, including the late insertion of instructions
+; into it.
+
+; In this context it is also important to distingush between functions
+; with the "nodebug" attribute and those without it. Alloca instructions from
+; nodebug functions should continue to have no DebugLoc, whereas those from
+; non-nodebug functions (i.e. functions with debug information) may want to
+; have their DebugLocs augmented with inlining information.
+
+
+; Make sure that after inlining the call to foo() the alloca instructions for
+; arr.i and sum.i do not retain debug information.
+
+; CHECK: %arr.i = alloca [10 x i32], align {{[0-9]*$}}
+; CHECK: %sum.i = alloca i32, align {{[0-9]*$}}
+
+
+; ModuleID = 'test.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: alwaysinline nounwind sspstrong
+define i32 @_Z3foov() #0 {
+entry:
+  %arr = alloca [10 x i32], align 16
+  %sum = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata [10 x i32]* %arr, metadata !14), !dbg !18
+  %arrayidx = getelementptr inbounds [10 x i32]* %arr, i32 0, i64 0, !dbg !19
+  store i32 5, i32* %arrayidx, align 4, !dbg !19
+  call void @llvm.dbg.declare(metadata i32* %sum, metadata !20), !dbg !21
+  store i32 4, i32* %sum, align 4, !dbg !21
+  %0 = load i32* %sum, align 4, !dbg !22
+  ret i32 %0, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata) #1
+
+; Function Attrs: nounwind sspstrong
+define i32 @main() #2 {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @_Z3barv(), !dbg !23
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !24), !dbg !25
+  %call = call i32 @_Z3foov(), !dbg !25
+  store i32 %call, i32* %i, align 4, !dbg !25
+  %0 = load i32* %i, align 4, !dbg !26
+  ret i32 %0, !dbg !26
+}
+
+declare void @_Z3barv() #3
+
+attributes #0 = { alwaysinline nounwind sspstrong "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind sspstrong "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = !{i32 786449, !1, i32 4, !"clang version 3.6.0 (217844)", i1 false, !"", i32 0, !2, !2, !3, !2, !2, !"", i32 1} ; [ DW_TAG_compile_unit ] [/home/user/test/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/home/user/test"}
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !{i32 786478, !5, !6, !"foo", !"foo", !"_Z3foov", i32 1, !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [foo]
+!5 = !{!"test.cpp", !"/home/user/test"}
+!6 = !{i32 786473, !5}          ; [ DW_TAG_file_type ] [/home/user/test/test.cpp]
+!7 = !{i32 786453, i32 0, null, !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{i32 786468, null, null, !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{i32 786478, !5, !6, !"main", !"main", !"", i32 11, !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, !2, i32 12} ; [ DW_TAG_subprogram ] [line 11] [def] [scope 12] [main]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 1}
+!13 = !{!"clang version 3.6.0 (217844)"}
+!14 = !{i32 786688, !4, !"arr", !6, i32 3, !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [arr] [line 3]
+!15 = !{i32 786433, null, null, !"", i32 0, i64 320, i64 32, i32 0, i32 0, !9, !16, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
+!16 = !{!17}
+!17 = !{i32 786465, i64 0, i64 10}       ; [ DW_TAG_subrange_type ] [0, 9]
+!18 = !MDLocation(line: 3, scope: !4)
+!19 = !MDLocation(line: 4, scope: !4)
+!20 = !{i32 786688, !4, !"sum", !6, i32 5, !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 5]
+!21 = !MDLocation(line: 5, scope: !4)
+!22 = !MDLocation(line: 6, scope: !4)
+!23 = !MDLocation(line: 13, scope: !10)
+!24 = !{i32 786688, !10, !"i", !6, i32 14, !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 14]
+!25 = !MDLocation(line: 14, scope: !10)
+!26 = !MDLocation(line: 15, scope: !10)
diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/debug-info-qualifiers.ll
index b624d3874cb3..b0f9cd4c83b8 100644
--- a/test/DebugInfo/debug-info-qualifiers.ll
+++ b/test/DebugInfo/debug-info-qualifiers.ll
@@ -39,16 +39,16 @@ define void @_Z1gv() #0 {
   %a = alloca %class.A, align 1
   %pl = alloca { i64, i64 }, align 8
   %pr = alloca { i64, i64 }, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !24), !dbg !25
-  call void @llvm.dbg.declare(metadata !{{ i64, i64 }* %pl}, metadata !26), !dbg !31
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !{!"0x102"}), !dbg !25
+  call void @llvm.dbg.declare(metadata { i64, i64 }* %pl, metadata !26, metadata !{!"0x102"}), !dbg !31
   store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKR1A1lEv to i64), i64 0 }, { i64, i64 }* %pl, align 8, !dbg !31
-  call void @llvm.dbg.declare(metadata !{{ i64, i64 }* %pr}, metadata !32), !dbg !35
+  call void @llvm.dbg.declare(metadata { i64, i64 }* %pr, metadata !32, metadata !{!"0x102"}), !dbg !35
   store { i64, i64 } { i64 ptrtoint (void (%class.A*)* @_ZNKO1A1rEv to i64), i64 0 }, { i64, i64 }* %pr, align 8, !dbg !35
   ret void, !dbg !36
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_ZNKR1A1lEv(%class.A*)
 
@@ -61,40 +61,40 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !16, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 2, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"debug-info-qualifiers.cpp", metadata !""}
-!6 = metadata !{metadata !7, metadata !13}
-!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"l", metadata !"l", metadata !"_ZNKR1A1lEv", i32 5, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 16640, i1 false, null, null, i32 0, metadata !12, i32 5} ; [ DW_TAG_subprogram ] [line 5] [reference] [l]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 16384, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [reference] [from ]
-!9 = metadata !{null, metadata !10}
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
-!11 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
-!12 = metadata !{i32 786468}
-!13 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"r", metadata !"r", metadata !"_ZNKO1A1rEv", i32 7, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 33024, i1 false, null, null, i32 0, metadata !15, i32 7} ; [ DW_TAG_subprogram ] [line 7] [rvalue reference] [r]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 32768, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [rvalue reference] [from ]
-!15 = metadata !{i32 786468}
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 786478, metadata !5, metadata !18, metadata !"g", metadata !"g", metadata !"_Z1gv", i32 10, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z1gv, null, null, metadata !2, i32 10} ; [ DW_TAG_subprogram ] [line 10] [def] [g]
-!18 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ]
-!19 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!20 = metadata !{null}
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!23 = metadata !{metadata !"clang version 3.5 "}
-!24 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !18, i32 11, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 11]
-!25 = metadata !{i32 11, i32 0, metadata !17, null}
-!26 = metadata !{i32 786688, metadata !17, metadata !"pl", metadata !18, i32 16, metadata !27, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pl] [line 16]
-!27 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !28, metadata !"_ZTS1A"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
-!28 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 16384, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [reference] [from ]
-!29 = metadata !{null, metadata !30}
-!30 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!31 = metadata !{i32 16, i32 0, metadata !17, null}
-!32 = metadata !{i32 786688, metadata !17, metadata !"pr", metadata !18, i32 21, metadata !33, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [pr] [line 21]
-!33 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34, metadata !"_ZTS1A"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
-!34 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 32768, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [rvalue reference] [from ]
-!35 = metadata !{i32 21, i32 0, metadata !17, null}
-!36 = metadata !{i32 22, i32 0, metadata !17, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !3, !16, !2, !2} ; [ DW_TAG_compile_unit ] [] [DW_LANG_C_plus_plus]
+!1 = !{!"", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\002\008\008\000\000\000", !5, null, null, !6, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"debug-info-qualifiers.cpp", !""}
+!6 = !{!7, !13}
+!7 = !{!"0x2e\00l\00l\00_ZNKR1A1lEv\005\000\000\000\006\0016640\000\005", !5, !"_ZTS1A", !8, null, null, null, i32 0, !12} ; [ DW_TAG_subprogram ] [line 5] [reference] [l]
+!8 = !{!"0x15\00\000\000\000\000\0016384\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [reference] [from ]
+!9 = !{null, !10}
+!10 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from ]
+!11 = !{!"0x26\00\000\000\000\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from _ZTS1A]
+!12 = !{i32 786468}
+!13 = !{!"0x2e\00r\00r\00_ZNKO1A1rEv\007\000\000\000\006\0033024\000\007", !5, !"_ZTS1A", !14, null, null, null, i32 0, !15} ; [ DW_TAG_subprogram ] [line 7] [rvalue reference] [r]
+!14 = !{!"0x15\00\000\000\000\000\0032768\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [rvalue reference] [from ]
+!15 = !{i32 786468}
+!16 = !{!17}
+!17 = !{!"0x2e\00g\00g\00_Z1gv\0010\000\001\000\006\00256\000\0010", !5, !18, !19, null, void ()* @_Z1gv, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [g]
+!18 = !{!"0x29", !5}         ; [ DW_TAG_file_type ]
+!19 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{null}
+!21 = !{i32 2, !"Dwarf Version", i32 4}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5 "}
+!24 = !{!"0x100\00a\0011\000", !17, !18, !4} ; [ DW_TAG_auto_variable ] [a] [line 11]
+!25 = !MDLocation(line: 11, scope: !17)
+!26 = !{!"0x100\00pl\0016\000", !17, !18, !27} ; [ DW_TAG_auto_variable ] [pl] [line 16]
+!27 = !{!"0x1f\00\000\000\000\000\000", null, null, !28, !"_ZTS1A"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
+!28 = !{!"0x15\00\000\000\000\000\0016384\000", i32 0, null, null, !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [reference] [from ]
+!29 = !{null, !30}
+!30 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!31 = !MDLocation(line: 16, scope: !17)
+!32 = !{!"0x100\00pr\0021\000", !17, !18, !33} ; [ DW_TAG_auto_variable ] [pr] [line 21]
+!33 = !{!"0x1f\00\000\000\000\000\000", null, null, !34, !"_ZTS1A"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = !{!"0x15\00\000\000\000\000\0032768\000", i32 0, null, null, !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [rvalue reference] [from ]
+!35 = !MDLocation(line: 21, scope: !17)
+!36 = !MDLocation(line: 22, scope: !17)
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/debuginfofinder-multiple-cu.ll
index 74965df591a6..0eba64d2ea63 100644
--- a/test/DebugInfo/debuginfofinder-multiple-cu.ll
+++ b/test/DebugInfo/debuginfofinder-multiple-cu.ll
@@ -22,20 +22,20 @@ define void @g() {
 !llvm.dbg.cu = !{!0, !8}
 !llvm.module.flags = !{!13, !16}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test1.c", metadata !"/tmp"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @f, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test1.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786449, metadata !9, i32 12, metadata !"clang version 3.4 (192092)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !10, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
-!9 = metadata !{metadata !"test2.c", metadata !"/tmp"}
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786478, metadata !9, metadata !12, metadata !"g", metadata !"g", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @g, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [g]
-!12 = metadata !{i32 786473, metadata !9}         ; [ DW_TAG_file_type ] [/tmp/test2.c]
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{i32 1, i32 0, metadata !11, null}
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (192092)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/test1.c] [DW_LANG_C99]
+!1 = !{!"test1.c", !"/tmp"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\001\000\001\000\006\000\000\001", !1, !5, !6, null, void ()* @f, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/test1.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x11\0012\00clang version 3.4 (192092)\000\00\000\00\000", !9, !2, !2, !10, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/test2.c] [DW_LANG_C99]
+!9 = !{!"test2.c", !"/tmp"}
+!10 = !{!11}
+!11 = !{!"0x2e\00g\00g\00\001\000\001\000\006\000\000\001", !9, !12, !6, null, void ()* @g, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [g]
+!12 = !{!"0x29", !9}         ; [ DW_TAG_file_type ] [/tmp/test2.c]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !MDLocation(line: 1, scope: !11)
+!16 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/duplicate_inline.ll b/test/DebugInfo/duplicate_inline.ll
new file mode 100644
index 000000000000..2a9c485632b3
--- /dev/null
+++ b/test/DebugInfo/duplicate_inline.ll
@@ -0,0 +1,117 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Built with clang from the following source:
+; void f1(int);
+; __attribute__((always_inline)) inline void f2(int i) { f1(i); }
+;
+; #define MULTICALL \
+;   f2(x);          \
+;   f2(y);
+;
+; void f3(int x, int y) { MULTICALL; }
+
+; FIXME: This produces only one inlined_subroutine, with two formal_parameters
+; (both named "this"), one for each of the actual inlined subroutines.  ;
+; Inlined scopes are differentiated by the combination of 'inlined at' (call)
+; location and the location within the function. If two calls to the same
+; function occur at the same location the scopes end up conflated and there
+; appears to be only one inlined function.
+; To fix this, we'd need to add some kind of unique metadata per call site, possibly something like:
+;
+; !42 = !MDLocation(line: 1, scope: !43, inlinedAt: !44)
+; !44 = !MDLocation(line: 2, scope: !45)
+;
+; ->
+;
+; !42 = !MDLocation(line: 1, scope: !43, inlinedAt: !44)
+; !44 = !{!45, !44}
+; !45 = !MDLocation(line: 2, scope: !45)
+;
+; since cycles in metadata are not uniqued, the !44 node would not be shared
+; between calls to the same function from the same location, ensuring separate
+; inlined subroutines would be generated.
+;
+; Once this is done, the (insufficient) hack in clang that adds column
+; information to call sites to differentiate inlined callers can be removed as it
+; will no longer be necessary.
+;
+; While it might be nice to omit the duplicate parameter in this case (while
+; we wait/work on the real fix), it's actually better to leave it in because it
+; allows us to hold the invariant that every DbgVariable has a DIE, every time.
+; This has proved valuable in finding other bugs, so I want to avoid removing the
+; invariant/assertion. Besides, we don't know which one's the right one anyway...
+
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_TAG_inlined_subroutine
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     DW_TAG_formal_parameter
+; CHECK-NOT: DW_TAG
+; CHECK:     NULL
+; CHECK-NOT: DW_TAG
+; CHECK:   NULL
+
+; Function Attrs: uwtable
+define void @_Z2f3ii(i32 %x, i32 %y) #0 {
+entry:
+  %i.addr.i1 = alloca i32, align 4
+  %i.addr.i = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !15, metadata !16), !dbg !17
+  store i32 %y, i32* %y.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !18, metadata !16), !dbg !19
+  %0 = load i32* %x.addr, align 4, !dbg !20
+  store i32 %0, i32* %i.addr.i, align 4, !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !21, metadata !16), !dbg !22
+  %1 = load i32* %i.addr.i, align 4, !dbg !23
+  call void @_Z2f1i(i32 %1), !dbg !23
+  %2 = load i32* %y.addr, align 4, !dbg !20
+  store i32 %2, i32* %i.addr.i1, align 4, !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %i.addr.i1, metadata !21, metadata !16), !dbg !22
+  %3 = load i32* %i.addr.i1, align 4, !dbg !23
+  call void @_Z2f1i(i32 %3), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @_Z2f1i(i32) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/duplicate_inline.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"duplicate_inline.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00f3\00f3\00_Z2f3ii\008\000\001\000\000\00256\000\008", !1, !5, !6, null, void (i32, i32)* @_Z2f3ii, null, null, !2} ; [ DW_TAG_subprogram ] [line 8] [def] [f3]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/tmp/dbginfo/duplicate_inline.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00f2\00f2\00_Z2f2i\002\000\001\000\000\00256\000\002", !1, !5, !10, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f2]
+!10 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !8}
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.6.0 "}
+!15 = !{!"0x101\00x\0016777224\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [x] [line 8]
+!16 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!17 = !MDLocation(line: 8, column: 13, scope: !4)
+!18 = !{!"0x101\00y\0033554440\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [y] [line 8]
+!19 = !MDLocation(line: 8, column: 20, scope: !4)
+!20 = !MDLocation(line: 8, column: 25, scope: !4)
+!21 = !{!"0x101\00i\0016777218\000", !9, !5, !8} ; [ DW_TAG_arg_variable ] [i] [line 2]
+!22 = !MDLocation(line: 2, column: 51, scope: !9, inlinedAt: !20)
+!23 = !MDLocation(line: 2, column: 56, scope: !9, inlinedAt: !20)
+!24 = !MDLocation(line: 8, column: 36, scope: !4)
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/dwarf-public-names.ll
index 72189641e3aa..d2b866420436 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/dwarf-public-names.ll
@@ -59,13 +59,13 @@ define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !28), !dbg !30
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !30
   %this1 = load %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
   ret void, !dbg !32
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
 entry:
@@ -90,42 +90,42 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = metadata !{i32 786449, metadata !37, i32 4, metadata !"clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !24,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!2 = metadata !{metadata !3, metadata !18, metadata !19, metadata !20}
-!3 = metadata !{i32 786478, metadata !4, null, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 9, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C15member_functionEv, null, metadata !12, metadata !1, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
-!4 = metadata !{i32 786473, metadata !37} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{null, metadata !7}
-!7 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
-!8 = metadata !{i32 786451, metadata !37, null, metadata !"C", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !12, metadata !14}
-!10 = metadata !{i32 786445, metadata !37, metadata !8, metadata !"static_member_variable", i32 4, i64 0, i64 0, i64 0, i32 4096, metadata !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"member_function", metadata !"member_function", metadata !"_ZN1C15member_functionEv", i32 2, metadata !5, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !13, i32 2} ; [ DW_TAG_subprogram ] [line 2] [member_function]
-!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!14 = metadata !{i32 786478, metadata !4, metadata !8, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17, i32 3} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
-!15 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !11}
-!17 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!18 = metadata !{i32 786478, metadata !4, null, metadata !"static_member_function", metadata !"static_member_function", metadata !"_ZN1C22static_member_functionEv", i32 13, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_ZN1C22static_member_functionEv, null, metadata !14, metadata !1, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
-!19 = metadata !{i32 786478, metadata !4, metadata !4, metadata !"global_function", metadata !"global_function", metadata !"_Z15global_functionv", i32 19, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z15global_functionv, null, null, metadata !1, i32 19} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
-!20 = metadata !{i32 786478, metadata !4, metadata !21, metadata !"global_namespace_function", metadata !"global_namespace_function", metadata !"_ZN2ns25global_namespace_functionEv", i32 24, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns25global_namespace_functionEv, null, null, metadata !1, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
-!21 = metadata !{i32 786489, metadata !4, null, metadata !"ns", i32 23} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
-!22 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null}
-!24 = metadata !{metadata !25, metadata !26, metadata !27}
-!25 = metadata !{i32 786484, i32 0, metadata !8, metadata !"static_member_variable", metadata !"static_member_variable", metadata !"_ZN1C22static_member_variableE", metadata !4, i32 7, metadata !11, i32 0, i32 1, i32* @_ZN1C22static_member_variableE, metadata !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
-!26 = metadata !{i32 786484, i32 0, null, metadata !"global_variable", metadata !"global_variable", metadata !"", metadata !4, i32 17, metadata !8, i32 0, i32 1, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
-!27 = metadata !{i32 786484, i32 0, metadata !21, metadata !"global_namespace_variable", metadata !"global_namespace_variable", metadata !"_ZN2ns25global_namespace_variableE", metadata !4, i32 27, metadata !11, i32 0, i32 1, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
-!28 = metadata !{i32 786689, metadata !3, metadata !"this", metadata !4, i32 16777225, metadata !29, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 9]
-!29 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
-!30 = metadata !{i32 9, i32 0, metadata !3, null}
-!31 = metadata !{i32 10, i32 0, metadata !3, null}
-!32 = metadata !{i32 11, i32 0, metadata !3, null}
-!33 = metadata !{i32 14, i32 0, metadata !18, null}
-!34 = metadata !{i32 20, i32 0, metadata !19, null}
-!35 = metadata !{i32 25, i32 0, metadata !20, null}
-!36 = metadata !{i32 26, i32 0, metadata !20, null}
-!37 = metadata !{metadata !"dwarf-public-names.cpp", metadata !"/usr2/kparzysz/s.hex/t"}
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)\000\00\000\00\000", !37, !1, !1, !2, !24,  !1} ; [ DW_TAG_compile_unit ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!2 = !{!3, !18, !19, !20}
+!3 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\009\000\001\000\006\00256\000\009", !4, null, !5, null, void (%struct.C*)* @_ZN1C15member_functionEv, null, !12, !1} ; [ DW_TAG_subprogram ] [line 9] [def] [member_function]
+!4 = !{!"0x29", !37} ; [ DW_TAG_file_type ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{null, !7}
+!7 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from C]
+!8 = !{!"0x13\00C\001\008\008\000\000\000", !37, null, null, !9, null, null, null} ; [ DW_TAG_structure_type ] [C] [line 1, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10, !12, !14}
+!10 = !{!"0xd\00static_member_variable\004\000\000\000\004096", !37, !8, !11, null} ; [ DW_TAG_member ] [static_member_variable] [line 4, size 0, align 0, offset 0] [static] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!"0x2e\00member_function\00member_function\00_ZN1C15member_functionEv\002\000\000\000\006\00256\000\002", !4, !8, !5, null, null, null, i32 0, !13} ; [ DW_TAG_subprogram ] [line 2] [member_function]
+!13 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!14 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\003\000\000\000\006\00256\000\003", !4, !8, !15, null, null, null, i32 0, !17} ; [ DW_TAG_subprogram ] [line 3] [static_member_function]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!11}
+!17 = !{!"0x24"}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
+!18 = !{!"0x2e\00static_member_function\00static_member_function\00_ZN1C22static_member_functionEv\0013\000\001\000\006\00256\000\0013", !4, null, !15, null, i32 ()* @_ZN1C22static_member_functionEv, null, !14, !1} ; [ DW_TAG_subprogram ] [line 13] [def] [static_member_function]
+!19 = !{!"0x2e\00global_function\00global_function\00_Z15global_functionv\0019\000\001\000\006\00256\000\0019", !4, !4, !15, null, i32 ()* @_Z15global_functionv, null, null, !1} ; [ DW_TAG_subprogram ] [line 19] [def] [global_function]
+!20 = !{!"0x2e\00global_namespace_function\00global_namespace_function\00_ZN2ns25global_namespace_functionEv\0024\000\001\000\006\00256\000\0024", !4, !21, !22, null, void ()* @_ZN2ns25global_namespace_functionEv, null, null, !1} ; [ DW_TAG_subprogram ] [line 24] [def] [global_namespace_function]
+!21 = !{!"0x39\00ns\0023", !4, null} ; [ DW_TAG_namespace ] [/usr2/kparzysz/s.hex/t/dwarf-public-names.cpp]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null}
+!24 = !{!25, !26, !27}
+!25 = !{!"0x34\00static_member_variable\00static_member_variable\00_ZN1C22static_member_variableE\007\000\001", !8, !4, !11, i32* @_ZN1C22static_member_variableE, !10} ; [ DW_TAG_variable ] [static_member_variable] [line 7] [def]
+!26 = !{!"0x34\00global_variable\00global_variable\00\0017\000\001", null, !4, !8, %struct.C* @global_variable, null} ; [ DW_TAG_variable ] [global_variable] [line 17] [def]
+!27 = !{!"0x34\00global_namespace_variable\00global_namespace_variable\00_ZN2ns25global_namespace_variableE\0027\000\001", !21, !4, !11, i32* @_ZN2ns25global_namespace_variableE, null} ; [ DW_TAG_variable ] [global_namespace_variable] [line 27] [def]
+!28 = !{!"0x101\00this\0016777225\001088", !3, !4, !29} ; [ DW_TAG_arg_variable ] [this] [line 9]
+!29 = !{!"0xf\00\000\0064\0064\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from C]
+!30 = !MDLocation(line: 9, scope: !3)
+!31 = !MDLocation(line: 10, scope: !3)
+!32 = !MDLocation(line: 11, scope: !3)
+!33 = !MDLocation(line: 14, scope: !18)
+!34 = !MDLocation(line: 20, scope: !19)
+!35 = !MDLocation(line: 25, scope: !20)
+!36 = !MDLocation(line: 26, scope: !20)
+!37 = !{!"dwarf-public-names.cpp", !"/usr2/kparzysz/s.hex/t"}
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/dwarfdump-accel.test b/test/DebugInfo/dwarfdump-accel.test
new file mode 100644
index 000000000000..c5c3b0154c11
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-accel.test
@@ -0,0 +1,63 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-objc.x86_64.o | FileCheck %s
+
+Gather some DIE indexes to verify the accelerator table contents.
+CHECK: .debug_info contents
+CHECK: [[TESTINTERFACE:0x[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+CHECK-NOT: DW_TAG
+CHECK:     DW_AT_name{{.*}}"TestInterface"
+CHECK: [[READONLY:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+CHECK-NOT: DW_TAG
+CHECK:     DW_AT_name{{.*}}"-[TestInterface ReadOnly]"
+CHECK: [[ASSIGN:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+CHECK-NOT: DW_TAG
+CHECK:     DW_AT_name{{.*}}"-[TestInterface Assign]"
+CHECK: [[SETASSIGN:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+CHECK-NOT: DW_TAG
+CHECK:     DW_AT_name{{.*}}"-[TestInterface setAssign:]"
+
+
+Check that the section header is printed correclty.
+CHECK: .apple_names contents:
+CHECK: Magic = 0x48415348
+CHECK: Version = 0x0001
+CHECK: Hash function = 0x00000000
+CHECK: Bucket count = 11
+CHECK: Hashes count = 22
+CHECK: HeaderData length = 12
+CHECK: DIE offset base = 0
+CHECK: Number of atoms = 1
+CHECK: Atom[0]  Type: DW_ATOM_die_offset Form: DW_FORM_data4
+
+Check that empty buckets are handled correctly.
+CHECK: Bucket[2]
+CHECK:   EMPTY
+CHECK: Bucket[3]
+
+Check that the accelerators point to the right DIEs.
+CHECK:     Name:{{.*}}"-[TestInterface ReadOnly]"
+CHECK-NOT: Name
+CHECK:     {Atom[0]: [[READONLY]]}
+CHECK:     Name:{{.*}}"-[TestInterface setAssign:]"
+CHECK-NOT: Name
+CHECK:     {Atom[0]: [[SETASSIGN]]}
+CHECK:     Name:{{.*}}"-[TestInterface Assign]"
+CHECK-NOT: Name
+CHECK:     {Atom[0]: [[ASSIGN]]}
+
+Check that types are referenced correctly.
+CHECK: .apple_types contents:
+CHECK:     Name{{.*}}"TestInterface"
+CHECK-NOT: Name
+CHECK:     {Atom[0]: [[TESTINTERFACE]]}
+
+Check that an empty ecceleratorsection is handled correctly.
+CHECK: .apple_namespaces contents:
+CHECK-NOT: Magic
+
+Check ObjC specific accelerators.
+CHECK: .apple_objc contents:
+CHECK:     Name{{.*}}"TestInterface"
+CHECK-NOT Name
+CHECK:     {Atom[0]: [[READONLY]]}
+CHECK:     {Atom[0]: [[ASSIGN]]}
+CHECK:     {Atom[0]: [[SETASSIGN]]}
diff --git a/test/DebugInfo/dwarfdump-objc.test b/test/DebugInfo/dwarfdump-objc.test
new file mode 100644
index 000000000000..6890c3a0471f
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-objc.test
@@ -0,0 +1,40 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-objc.x86_64.o | FileCheck %s
+
+CHECK:      .debug_info contents:
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:    DW_AT_APPLE_property_name {{.*}} "ReadOnly"
+CHECK-NOT: TAG
+CHECK:    DW_AT_APPLE_property_attribute {{.*}} (0x01 (DW_APPLE_PROPERTY_readonly))
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_name {{.*}} "Assign"
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_attribute {{.*}} (0x0c (DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite))
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_name {{.*}} "ReadWrite"
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_attribute {{.*}} (0x0c (DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite))
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_name {{.*}} "Retain"
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_attribute {{.*}} (0x18 (DW_APPLE_PROPERTY_readwrite, DW_APPLE_PROPERTY_retain))
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_name {{.*}} "Copy"
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_attribute {{.*}} (0x28 (DW_APPLE_PROPERTY_readwrite, DW_APPLE_PROPERTY_copy))
+
+CHECK: DW_TAG_APPLE_property
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_name {{.*}} "NonAtomic"
+CHECK-NOT: TAG
+CHECK:   DW_AT_APPLE_property_attribute {{.*}} (0x4c (DW_APPLE_PROPERTY_assign, DW_APPLE_PROPERTY_readwrite, DW_APPLE_PROPERTY_nonatomic))
+
diff --git a/test/DebugInfo/dwarfdump-ranges.test b/test/DebugInfo/dwarfdump-ranges.test
index c9e33dcdc975..710aec6098a4 100644
--- a/test/DebugInfo/dwarfdump-ranges.test
+++ b/test/DebugInfo/dwarfdump-ranges.test
@@ -1,5 +1,19 @@
 RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test4.elf-x86-64 | FileCheck %s
 
+CHECK: .debug_info contents:
+CHECK: DW_TAG_compile_unit
+CHECK-NOT: TAG
+CHECK:  DW_AT_ranges [DW_FORM_data4]      (0x00000000
+CHECK-NEXT:          [0x000000000000062c - 0x0000000000000637)
+CHECK-NEXT:          [0x0000000000000637 - 0x000000000000063d))
+
+CHECK: DW_TAG_compile_unit
+CHECK-NOT: TAG
+CHECK:  DW_AT_ranges [DW_FORM_data4]      (0x00000030
+CHECK-NEXT:          [0x0000000000000640 - 0x000000000000064b)
+CHECK-NEXT:          [0x0000000000000637 - 0x000000000000063d))
+
+
 CHECK:      .debug_ranges contents:
 CHECK-NEXT: 00000000 000000000000062c 0000000000000637
 CHECK-NEXT: 00000000 0000000000000637 000000000000063d
diff --git a/test/DebugInfo/empty.ll b/test/DebugInfo/empty.ll
index cf40523e7e4a..3f7f5463478c 100644
--- a/test/DebugInfo/empty.ll
+++ b/test/DebugInfo/empty.ll
@@ -24,8 +24,8 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!5}
 
-!0 = metadata !{i32 720913, metadata !4, i32 12, metadata !"clang version 3.1 (trunk 143523)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{}
-!3 = metadata !{i32 786473, metadata !4} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !"empty.c", metadata !"/home/nlewycky"}
-!5 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.1 (trunk 143523)\001\00\000\00\000", !4, !2, !2, !2, !2, null} ; [ DW_TAG_compile_unit ]
+!2 = !{}
+!3 = !{!"0x29", !4} ; [ DW_TAG_file_type ]
+!4 = !{!"empty.c", !"/home/nlewycky"}
+!5 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/enum-types.ll b/test/DebugInfo/enum-types.ll
new file mode 100644
index 000000000000..3932535b4c46
--- /dev/null
+++ b/test/DebugInfo/enum-types.ll
@@ -0,0 +1,78 @@
+; REQUIRES: object-emission
+;
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Make sure we can handle enums with the same identifier but in enum types of
+; different compile units.
+; rdar://17628609
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: 0x[[ENUM:.*]]: DW_TAG_enumeration_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "EA"
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_Z4topA2EA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM]]})
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_Z4topB2EA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ENUM]]
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4topA2EA(i32 %sa) #0 {
+entry:
+  %sa.addr = alloca i32, align 4
+  store i32 %sa, i32* %sa.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !22, metadata !{!"0x102"}), !dbg !23
+  ret void, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define void @_Z4topB2EA(i32 %sa) #0 {
+entry:
+  %sa.addr = alloca i32, align 4
+  store i32 %sa, i32* %sa.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %sa.addr, metadata !25, metadata !{!"0x102"}), !dbg !26
+  ret void, !dbg !27
+}
+
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0, !12}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21, !21}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)\000\00\000\00\001", !1, !2, !2, !6, !11, !11} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"a.cpp", !""}
+!2 = !{!3}
+!3 = !{!"0x4\00EA\001\0032\0032\000\000\000", !1, null, null, !4, null, null, !"_ZTS2EA"} ; [ DW_TAG_enumeration_type ] [EA] [line 1, size 32, align 32, offset 0] [def] [from ]
+!4 = !{!5}
+!5 = !{!"0x28\00EA_0\000"} ; [ DW_TAG_enumerator ] [EA_0 :: 0]
+!6 = !{!7}
+!7 = !{!"0x2e\00topA\00topA\00_Z4topA2EA\005\000\001\000\006\00256\000\005", !1, !8, !9, null, void (i32)* @_Z4topA2EA, null, null, !11} ; [ DW_TAG_subprogram ] [line 5] [def] [topA]
+!8 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [a.cpp]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !"_ZTS2EA"}
+!11 = !{}
+!12 = !{!"0x11\004\00clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)\000\00\000\00\001", !13, !14, !14, !16, !11, !11} ; [ DW_TAG_compile_unit ] [b.cpp] [DW_LANG_C_plus_plus]
+!13 = !{!"b.cpp", !""}
+!14 = !{!15}
+!15 = !{!"0x4\00EA\001\0032\0032\000\000\000", !13, null, null, !4, null, null, !"_ZTS2EA"} ; [ DW_TAG_enumeration_type ] [EA] [line 1, size 32, align 32, offset 0] [def] [from ]
+!16 = !{!17}
+!17 = !{!"0x2e\00topB\00topB\00_Z4topB2EA\005\000\001\000\006\00256\000\005", !13, !18, !9, null, void (i32)* @_Z4topB2EA, null, null, !11} ; [ DW_TAG_subprogram ] [line 5] [def] [topB]
+!18 = !{!"0x29", !13}        ; [ DW_TAG_file_type ] [b.cpp]
+!19 = !{i32 2, !"Dwarf Version", i32 2}
+!20 = !{i32 2, !"Debug Info Version", i32 2}
+!21 = !{!"clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)"}
+!22 = !{!"0x101\00sa\0016777221\000", !7, !8, !"_ZTS2EA"} ; [ DW_TAG_arg_variable ] [sa] [line 5]
+!23 = !MDLocation(line: 5, column: 14, scope: !7)
+!24 = !MDLocation(line: 6, column: 1, scope: !7)
+!25 = !{!"0x101\00sa\0016777221\000", !17, !18, !"_ZTS2EA"} ; [ DW_TAG_arg_variable ] [sa] [line 5]
+!26 = !MDLocation(line: 5, column: 14, scope: !17)
+!27 = !MDLocation(line: 6, column: 1, scope: !17)
diff --git a/test/DebugInfo/enum.ll b/test/DebugInfo/enum.ll
index df097a6ea92b..4dd4c688ad67 100644
--- a/test/DebugInfo/enum.ll
+++ b/test/DebugInfo/enum.ll
@@ -39,13 +39,13 @@
 define void @_Z4funcv() #0 {
 entry:
   %b = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b}, metadata !20), !dbg !22
+  call void @llvm.dbg.declare(metadata i32* %b, metadata !20, metadata !{!"0x102"}), !dbg !22
   store i32 0, i32* %b, align 4, !dbg !22
   ret void, !dbg !23
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -53,28 +53,28 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !24}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !11, metadata !12, metadata !17, metadata !11, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/enum.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"enum.cpp", metadata !"/tmp"}
-!2 = metadata !{metadata !3, metadata !8}
-!3 = metadata !{i32 786436, metadata !1, null, metadata !"e1", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e1] [line 1, size 64, align 64, offset 0] [def] [from ]
-!4 = metadata !{metadata !5, metadata !6, metadata !7}
-!5 = metadata !{i32 786472, metadata !"I", i64 0} ; [ DW_TAG_enumerator ] [I :: 0]
-!6 = metadata !{i32 786472, metadata !"J", i64 4294967295} ; [ DW_TAG_enumerator ] [J :: 4294967295]
-!7 = metadata !{i32 786472, metadata !"K", i64 -1152921504606846976} ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
-!8 = metadata !{i32 786436, metadata !1, null, metadata !"e2", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_enumeration_type ] [e2] [line 2, size 32, align 32, offset 0] [def] [from ]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786472, metadata !"X", i64 0} ; [ DW_TAG_enumerator ] [X :: 0]
-!11 = metadata !{}
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !1, metadata !14, metadata !"func", metadata !"func", metadata !"_Z4funcv", i32 3, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcv, null, null, metadata !11, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [func]
-!14 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/enum.cpp]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null}
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !14, i32 1, metadata !3, i32 0, i32 1, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
-!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!20 = metadata !{i32 786688, metadata !13, metadata !"b", metadata !14, i32 4, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 4]
-!21 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!22 = metadata !{i32 4, i32 0, metadata !13, null}
-!23 = metadata !{i32 5, i32 0, metadata !13, null}
-!24 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\000", !1, !2, !11, !12, !17, !11} ; [ DW_TAG_compile_unit ] [/tmp/enum.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"enum.cpp", !"/tmp"}
+!2 = !{!3, !8}
+!3 = !{!"0x4\00e1\001\0064\0064\000\000\000", !1, null, null, !4, null, null, null} ; [ DW_TAG_enumeration_type ] [e1] [line 1, size 64, align 64, offset 0] [def] [from ]
+!4 = !{!5, !6, !7}
+!5 = !{!"0x28\00I\000"} ; [ DW_TAG_enumerator ] [I :: 0]
+!6 = !{!"0x28\00J\004294967295"} ; [ DW_TAG_enumerator ] [J :: 4294967295]
+!7 = !{!"0x28\00K\00-1152921504606846976"} ; [ DW_TAG_enumerator ] [K :: 17293822569102704640]
+!8 = !{!"0x4\00e2\002\0032\0032\000\000\000", !1, null, null, !9, null, null, null} ; [ DW_TAG_enumeration_type ] [e2] [line 2, size 32, align 32, offset 0] [def] [from ]
+!9 = !{!10}
+!10 = !{!"0x28\00X\000"} ; [ DW_TAG_enumerator ] [X :: 0]
+!11 = !{}
+!12 = !{!13}
+!13 = !{!"0x2e\00func\00func\00_Z4funcv\003\000\001\000\006\00256\000\003", !1, !14, !15, null, void ()* @_Z4funcv, null, null, !11} ; [ DW_TAG_subprogram ] [line 3] [def] [func]
+!14 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/enum.cpp]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null}
+!17 = !{!18}
+!18 = !{!"0x34\00a\00a\00\001\000\001", null, !14, !3, i64* @a, null} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!19 = !{i32 2, !"Dwarf Version", i32 3}
+!20 = !{!"0x100\00b\004\000", !13, !14, !21} ; [ DW_TAG_auto_variable ] [b] [line 4]
+!21 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!22 = !MDLocation(line: 4, scope: !13)
+!23 = !MDLocation(line: 5, scope: !13)
+!24 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/global.ll
index 3c97f0cb2279..1715ca8388af 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/global.ll
@@ -26,17 +26,17 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/global.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"global.cpp", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/global.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"i", metadata !"i", metadata !"_ZL1i", metadata !5, i32 1, metadata !8, i32 1, i32 1, null, null}
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \001\00\000\00\000", !1, !2, !2, !3, !9, !2} ; [ DW_TAG_compile_unit ] [/tmp/global.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"global.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\002\000\001\000\006\00256\001\002", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/global.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x34\00i\00i\00_ZL1i\001\001\001", null, !5, !8, null, null} ; [ DW_TAG_variable ]
+!11 = !{i32 2, !"Dwarf Version", i32 3}
+!12 = !MDLocation(line: 4, scope: !4)
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/gmlt.test b/test/DebugInfo/gmlt.test
new file mode 100644
index 000000000000..0514dbfc461a
--- /dev/null
+++ b/test/DebugInfo/gmlt.test
@@ -0,0 +1,5 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj < %S/Inputs/gmlt.ll | llvm-dwarfdump - | FileCheck %S/Inputs/gmlt.ll
+
+; There's a darwin specific test in X86/gmlt, so it's okay to XFAIL this here.
+; XFAIL: darwin
diff --git a/test/DebugInfo/incorrect-variable-debugloc.ll b/test/DebugInfo/incorrect-variable-debugloc.ll
index 284704c54a91..afccd672f90c 100644
--- a/test/DebugInfo/incorrect-variable-debugloc.ll
+++ b/test/DebugInfo/incorrect-variable-debugloc.ll
@@ -38,11 +38,11 @@
 
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "C"
-; CHECK: [[FN3_DECL:.*]]: DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name {{.*}} "m_fn3"
 
-; CHECK: DW_AT_specification {{.*}} {[[FN3_DECL]]}
+; CHECK: DW_AT_specification {{.*}} "_ZN1C5m_fn3Ev"
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
@@ -110,7 +110,7 @@ entry:
 
 ; <label>:30                                      ; preds = %24, %5
   store i32 0, i32* %i.i, align 4, !dbg !39, !tbaa !41
-  tail call void @llvm.dbg.value(metadata !{%struct.C* %8}, i64 0, metadata !27), !dbg !46
+  tail call void @llvm.dbg.value(metadata %struct.C* %8, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !46
   call void @_ZN1C5m_fn3Ev(%struct.C* %8), !dbg !47
   unreachable, !dbg !47
 }
@@ -145,7 +145,7 @@ entry:
   %16 = add i64 %15, 0, !dbg !48
   %17 = inttoptr i64 %16 to i64*, !dbg !48
   store i64 -868083113472691727, i64* %17, !dbg !48
-  tail call void @llvm.dbg.value(metadata !{%struct.C* %this}, i64 0, metadata !30), !dbg !48
+  tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !30, metadata !{!"0x102"}), !dbg !48
   %call = call i32 @_ZN1A5m_fn1Ev(%struct.A* %8), !dbg !49
   %i.i = getelementptr inbounds %struct.C* %this, i64 0, i32 1, i32 0, !dbg !50
   %18 = ptrtoint i32* %i.i to i64, !dbg !50
@@ -198,7 +198,7 @@ entry:
 declare i32 @_ZN1A5m_fn1Ev(%struct.A*) #2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #3
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
 
 define internal void @asan.module_ctor() {
   tail call void @__asan_init_v3()
@@ -336,56 +336,56 @@ attributes #3 = { nounwind readnone }
 !llvm.module.flags = !{!36, !37}
 !llvm.ident = !{!38}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !21, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<stdin>", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !14}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"C", i32 10, i64 64, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 10, size 64, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"incorrect-variable-debug-loc.cpp", metadata !"/tmp/dbginfo"}
-!6 = metadata !{metadata !7, metadata !9, metadata !10}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1C", metadata !"j", i32 12, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [j] [line 12, size 32, align 32, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1C", metadata !"b", i32 13, i64 32, i64 32, i64 32, i32 0, metadata !"_ZTS1B"} ; [ DW_TAG_member ] [b] [line 13, size 32, align 32, offset 32] [from _ZTS1B]
-!10 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"m_fn3", metadata !"m_fn3", metadata !"_ZN1C5m_fn3Ev", i32 11, metadata !11, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 11} ; [ DW_TAG_subprogram ] [line 11] [m_fn3]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!14 = metadata !{i32 786451, metadata !5, null, metadata !"B", i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !15, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_structure_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
-!15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"i", i32 7, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [i] [line 7, size 32, align 32, offset 0] [from int]
-!17 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1B", metadata !"m_fn2", metadata !"m_fn2", metadata !"_ZN1B5m_fn2Ev", i32 6, metadata !18, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 6} ; [ DW_TAG_subprogram ] [line 6] [m_fn2]
-!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{null, metadata !20}
-!20 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
-!21 = metadata !{metadata !22, metadata !28, metadata !32}
-!22 = metadata !{i32 786478, metadata !5, metadata !23, metadata !"fn1", metadata !"fn1", metadata !"_Z3fn1v", i32 16, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @_Z3fn1v, null, null, metadata !26, i32 16} ; [ DW_TAG_subprogram ] [line 16] [def] [fn1]
-!23 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/incorrect-variable-debug-loc.cpp]
-!24 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{metadata !8}
-!26 = metadata !{metadata !27}
-!27 = metadata !{i32 786688, metadata !22, metadata !"A", metadata !23, i32 17, metadata !"_ZTS1C", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [A] [line 17]
-!28 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1C", metadata !"m_fn3", metadata !"m_fn3", metadata !"_ZN1C5m_fn3Ev", i32 21, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (%struct.C*)* @_ZN1C5m_fn3Ev, null, metadata !10, metadata !29, i32 21} ; [ DW_TAG_subprogram ] [line 21] [def] [m_fn3]
-!29 = metadata !{metadata !30}
-!30 = metadata !{i32 786689, metadata !28, metadata !"this", null, i32 16777216, metadata !31, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!31 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
-!32 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1B", metadata !"m_fn2", metadata !"m_fn2", metadata !"_ZN1B5m_fn2Ev", i32 6, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, metadata !17, metadata !33, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [m_fn2]
-!33 = metadata !{metadata !34}
-!34 = metadata !{i32 786689, metadata !32, metadata !"this", null, i32 16777216, metadata !35, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!35 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
-!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!37 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!38 = metadata !{metadata !"clang version 3.5.0 "}
-!39 = metadata !{i32 6, i32 0, metadata !32, metadata !40}
-!40 = metadata !{i32 18, i32 0, metadata !22, null}
-!41 = metadata !{metadata !42, metadata !43, i64 0}
-!42 = metadata !{metadata !"_ZTS1B", metadata !43, i64 0}
-!43 = metadata !{metadata !"int", metadata !44, i64 0}
-!44 = metadata !{metadata !"omnipotent char", metadata !45, i64 0}
-!45 = metadata !{metadata !"Simple C/C++ TBAA"}
-!46 = metadata !{i32 17, i32 0, metadata !22, null}
-!47 = metadata !{i32 19, i32 0, metadata !22, null}
-!48 = metadata !{i32 0, i32 0, metadata !28, null}
-!49 = metadata !{i32 22, i32 0, metadata !28, null}
-!50 = metadata !{i32 6, i32 0, metadata !32, metadata !51}
-!51 = metadata !{i32 23, i32 0, metadata !28, null}
-!52 = metadata !{i32 24, i32 0, metadata !28, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !3, !21, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !14}
+!4 = !{!"0x13\00C\0010\0064\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 10, size 64, align 32, offset 0] [def] [from ]
+!5 = !{!"incorrect-variable-debug-loc.cpp", !"/tmp/dbginfo"}
+!6 = !{!7, !9, !10}
+!7 = !{!"0xd\00j\0012\0032\0032\000\000", !5, !"_ZTS1C", !8} ; [ DW_TAG_member ] [j] [line 12, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xd\00b\0013\0032\0032\0032\000", !5, !"_ZTS1C", !"_ZTS1B"} ; [ DW_TAG_member ] [b] [line 13, size 32, align 32, offset 32] [from _ZTS1B]
+!10 = !{!"0x2e\00m_fn3\00m_fn3\00_ZN1C5m_fn3Ev\0011\000\000\000\006\00256\001\0011", !5, !"_ZTS1C", !11, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 11] [m_fn3]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !13}
+!13 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!14 = !{!"0x13\00B\005\0032\0032\000\000\000", !5, null, null, !15, null, null, !"_ZTS1B"} ; [ DW_TAG_structure_type ] [B] [line 5, size 32, align 32, offset 0] [def] [from ]
+!15 = !{!16, !17}
+!16 = !{!"0xd\00i\007\0032\0032\000\000", !5, !"_ZTS1B", !8} ; [ DW_TAG_member ] [i] [line 7, size 32, align 32, offset 0] [from int]
+!17 = !{!"0x2e\00m_fn2\00m_fn2\00_ZN1B5m_fn2Ev\006\000\000\000\006\00256\001\006", !5, !"_ZTS1B", !18, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 6] [m_fn2]
+!18 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{null, !20}
+!20 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!21 = !{!22, !28, !32}
+!22 = !{!"0x2e\00fn1\00fn1\00_Z3fn1v\0016\000\001\000\006\00256\001\0016", !5, !23, !24, null, i32 ()* @_Z3fn1v, null, null, !26} ; [ DW_TAG_subprogram ] [line 16] [def] [fn1]
+!23 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/incorrect-variable-debug-loc.cpp]
+!24 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !25, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!25 = !{!8}
+!26 = !{!27}
+!27 = !{!"0x100\00A\0017\000", !22, !23, !"_ZTS1C"} ; [ DW_TAG_auto_variable ] [A] [line 17]
+!28 = !{!"0x2e\00m_fn3\00m_fn3\00_ZN1C5m_fn3Ev\0021\000\001\000\006\00256\001\0021", !5, !"_ZTS1C", !11, null, void (%struct.C*)* @_ZN1C5m_fn3Ev, null, !10, !29} ; [ DW_TAG_subprogram ] [line 21] [def] [m_fn3]
+!29 = !{!30}
+!30 = !{!"0x101\00this\0016777216\001088", !28, null, !31} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!32 = !{!"0x2e\00m_fn2\00m_fn2\00_ZN1B5m_fn2Ev\006\000\001\000\006\00256\001\006", !5, !"_ZTS1B", !18, null, null, null, !17, !33} ; [ DW_TAG_subprogram ] [line 6] [def] [m_fn2]
+!33 = !{!34}
+!34 = !{!"0x101\00this\0016777216\001088", !32, null, !35} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!35 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
+!36 = !{i32 2, !"Dwarf Version", i32 4}
+!37 = !{i32 2, !"Debug Info Version", i32 2}
+!38 = !{!"clang version 3.5.0 "}
+!39 = !MDLocation(line: 6, scope: !32, inlinedAt: !40)
+!40 = !MDLocation(line: 18, scope: !22)
+!41 = !{!42, !43, i64 0}
+!42 = !{!"_ZTS1B", !43, i64 0}
+!43 = !{!"int", !44, i64 0}
+!44 = !{!"omnipotent char", !45, i64 0}
+!45 = !{!"Simple C/C++ TBAA"}
+!46 = !MDLocation(line: 17, scope: !22)
+!47 = !MDLocation(line: 19, scope: !22)
+!48 = !MDLocation(line: 0, scope: !28)
+!49 = !MDLocation(line: 22, scope: !28)
+!50 = !MDLocation(line: 6, scope: !32, inlinedAt: !51)
+!51 = !MDLocation(line: 23, scope: !28)
+!52 = !MDLocation(line: 24, scope: !28)
diff --git a/test/DebugInfo/incorrect-variable-debugloc1.ll b/test/DebugInfo/incorrect-variable-debugloc1.ll
new file mode 100644
index 000000000000..18f2dc78e3e8
--- /dev/null
+++ b/test/DebugInfo/incorrect-variable-debugloc1.ll
@@ -0,0 +1,77 @@
+; REQUIRES: object-emission
+; This test is failing for powerpc64, because a location list for the
+; variable 'c' is not generated at all. Temporary marking this test as XFAIL 
+; for powerpc, until PR21881 is fixed.
+; XFAIL: powerpc64
+
+; RUN: %llc_dwarf -O2  -dwarf-version 2 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s  --check-prefix=DWARF23
+; RUN: %llc_dwarf -O2  -dwarf-version 3 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s  --check-prefix=DWARF23
+; RUN: %llc_dwarf -O2  -dwarf-version 4 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s  --check-prefix=DWARF4
+
+; This is a test for PR21176.
+; DW_OP_const <const> doesn't describe a constant value, but a value at a constant address. 
+; The proper way to describe a constant value is DW_OP_constu <const>, DW_OP_stack_value.
+
+; Generated with clang -S -emit-llvm -g -O2 test.cpp
+
+; extern int func();
+; 
+; int main()
+; {
+;   volatile int c = 13;
+;   c = func();
+;   return c;
+; }
+
+; DWARF23: Location description: 10 0d {{$}}
+; DWARF4: Location description: 10 0d 9f
+
+; Function Attrs: uwtable
+define i32 @main() #0 {
+entry:
+  %c = alloca i32, align 4
+  tail call void @llvm.dbg.value(metadata i32 13, i64 0, metadata !10, metadata !16), !dbg !17
+  store volatile i32 13, i32* %c, align 4, !dbg !18
+  %call = tail call i32 @_Z4funcv(), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !10, metadata !16), !dbg !17
+  store volatile i32 %call, i32* %c, align 4, !dbg !19
+  tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !10, metadata !16), !dbg !17
+  %c.0.c.0. = load volatile i32* %c, align 4, !dbg !20
+  ret i32 %c.0.c.0., !dbg !20
+}
+
+declare i32 @_Z4funcv() #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 223522)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo/test.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"test.cpp", !"/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\003\000\001\000\000\00256\001\004", !1, !5, !6, null, i32 ()* @main, null, null, !9} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [main]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo/test.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x100\00c\005\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [c] [line 5]
+!11 = !{!"0x35\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_volatile_type ] [line 0, size 0, align 0, offset 0] [from int]
+!12 = !{i32 2, !"Dwarf Version", i32 2}
+!13 = !{i32 2, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.6.0 (trunk 223522)"}
+!15 = !{i32 13}
+!16 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!17 = !MDLocation(line: 5, column: 16, scope: !4)
+!18 = !MDLocation(line: 5, column: 3, scope: !4)
+!19 = !MDLocation(line: 6, column: 7, scope: !4)
+!20 = !MDLocation(line: 7, column: 3, scope: !4)
+
diff --git a/test/DebugInfo/inheritance.ll b/test/DebugInfo/inheritance.ll
index 6b3ae090e151..ab71d25d625a 100644
--- a/test/DebugInfo/inheritance.ll
+++ b/test/DebugInfo/inheritance.ll
@@ -16,7 +16,7 @@ entry:
   %0 = alloca i32                                 ; <i32*> [#uses=2]
   %tst = alloca %struct.test1                     ; <%struct.test1*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.test1* %tst}, metadata !0), !dbg !21
+  call void @llvm.dbg.declare(metadata %struct.test1* %tst, metadata !0, metadata !{!"0x102"}), !dbg !21
   call void @_ZN5test1C1Ev(%struct.test1* %tst) nounwind, !dbg !22
   store i32 0, i32* %0, align 4, !dbg !23
   %1 = load i32* %0, align 4, !dbg !23            ; <i32> [#uses=1]
@@ -32,7 +32,7 @@ define linkonce_odr void @_ZN5test1C1Ev(%struct.test1* %this) nounwind ssp align
 entry:
   %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !24), !dbg !28
+  call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !24, metadata !{!"0x102"}), !dbg !28
   store %struct.test1* %this, %struct.test1** %this_addr
   %0 = load %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !28 ; <i32 (...)***> [#uses=1]
@@ -43,13 +43,13 @@ return:                                           ; preds = %entry
   ret void, !dbg !29
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define linkonce_odr void @_ZN5test1D1Ev(%struct.test1* %this) nounwind ssp align 2 {
 entry:
   %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=3]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !32), !dbg !34
+  call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !32, metadata !{!"0x102"}), !dbg !34
   store %struct.test1* %this, %struct.test1** %this_addr
   %0 = load %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !35 ; <i32 (...)***> [#uses=1]
@@ -78,7 +78,7 @@ define linkonce_odr void @_ZN5test1D0Ev(%struct.test1* %this) nounwind ssp align
 entry:
   %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=3]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !38), !dbg !40
+  call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !38, metadata !{!"0x102"}), !dbg !40
   store %struct.test1* %this, %struct.test1** %this_addr
   %0 = load %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !41 ; <i32 (...)***> [#uses=1]
@@ -105,50 +105,50 @@ return:                                           ; preds = %bb2
 
 declare void @_ZdlPv(i8*) nounwind
 
-!0 = metadata !{i32 459008, metadata !1, metadata !"tst", metadata !4, i32 13, metadata !8} ; [ DW_TAG_auto_variable ]
-!1 = metadata !{i32 458763, metadata !44, metadata !2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!2 = metadata !{i32 458763, metadata !44, metadata !3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 458798, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"main", i32 11, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 458769, metadata !44, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!5 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 458788, null, metadata !4, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 458771, metadata !44, metadata !4, metadata !"test1", i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8, null, null} ; [ DW_TAG_structure_type ] [test1] [line 1, size 64, align 64, offset 0] [def] [from ]
-!9 = metadata !{metadata !10, metadata !14, metadata !18}
-!10 = metadata !{i32 458765, metadata !44, metadata !8, metadata !"_vptr$test1", i32 1, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
-!11 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 458767, null, metadata !4, metadata !"__vtbl_ptr_type", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
-!13 = metadata !{i32 458769, metadata !46, i32 4, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, metadata !"", i32 0, metadata !45, metadata !45, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!14 = metadata !{i32 458798, i32 0, metadata !8, metadata !"test1", metadata !"test1", metadata !"", i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i1 true, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!15 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"", i32 4, metadata !19, i1 false, i1 false, i32 1, i32 0, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!19 = metadata !{i32 458773, metadata !4, null, metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!20 = metadata !{null, metadata !17, metadata !7}
-!21 = metadata !{i32 11, i32 0, metadata !1, null}
-!22 = metadata !{i32 13, i32 0, metadata !1, null}
-!23 = metadata !{i32 14, i32 0, metadata !1, null}
-!24 = metadata !{i32 459009, metadata !25, metadata !"this", metadata !4, i32 13, metadata !26} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 458798, i32 0, metadata !4, metadata !"test1", metadata !"test1", metadata !"_ZN5test1C1Ev", i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 458790, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
-!27 = metadata !{i32 458767, metadata !4, null, metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
-!28 = metadata !{i32 1, i32 0, metadata !25, null}
-!29 = metadata !{i32 1, i32 0, metadata !30, null}
-!30 = metadata !{i32 458763, metadata !44, metadata !31, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 458763, metadata !44, metadata !25, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!32 = metadata !{i32 459009, metadata !33, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
-!33 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D1Ev", i32 4, metadata !15, i1 false, i1 true, i32 1, i32 0, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!34 = metadata !{i32 4, i32 0, metadata !33, null}
-!35 = metadata !{i32 5, i32 0, metadata !36, null}
-!36 = metadata !{i32 458763, metadata !44, metadata !33, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!37 = metadata !{i32 6, i32 0, metadata !36, null}
-!38 = metadata !{i32 459009, metadata !39, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D0Ev", i32 4, metadata !15, i1 false, i1 true, i32 1, i32 1, metadata !8, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!40 = metadata !{i32 4, i32 0, metadata !39, null}
-!41 = metadata !{i32 5, i32 0, metadata !42, null}
-!42 = metadata !{i32 458763, metadata !44, metadata !39, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!43 = metadata !{i32 6, i32 0, metadata !42, null}
-!44 = metadata !{metadata !"inheritance.cpp", metadata !"/tmp/"}
-!45 = metadata !{i32 0}
-!46 = metadata !{metadata !"<built-in>", metadata !"/tmp/"}
+!0 = !{!"0x100\00tst\0013\000", !1, !4, !8} ; [ DW_TAG_auto_variable ]
+!1 = !{!"0xb\000\000\000", !44, !2} ; [ DW_TAG_lexical_block ]
+!2 = !{!"0xb\000\000\000", !44, !3} ; [ DW_TAG_lexical_block ]
+!3 = !{!"0x2e\00main\00main\00main\0011\000\001\000\006\000\000\000", i32 0, !4, !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !44, !45, !45, null, null, null} ; [ DW_TAG_compile_unit ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !4, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !4} ; [ DW_TAG_base_type ]
+!8 = !{!"0x13\00test1\001\0064\0064\000\000\000", !44, !4, null, !9, !8, null, null} ; [ DW_TAG_structure_type ] [test1] [line 1, size 64, align 64, offset 0] [def] [from ]
+!9 = !{!10, !14, !18}
+!10 = !{!"0xd\00_vptr$test1\001\0064\0064\000\000", !44, !8, !11} ; [ DW_TAG_member ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", !4, null, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0xf\00__vtbl_ptr_type\000\000\000\000\000", null, !4, !5} ; [ DW_TAG_pointer_type ]
+!13 = !{!"0x11\004\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\000\00\000\00\000", !46, !45, !45, null, null, null} ; [ DW_TAG_compile_unit ]
+!14 = !{!"0x2e\00test1\00test1\00\001\000\000\000\006\001\000\000", i32 0, !8, !15, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!15 = !{!"0x15\00\000\000\000\000\000\000", !4, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\0064", !4, null, !8} ; [ DW_TAG_pointer_type ]
+!18 = !{!"0x2e\00~test1\00~test1\00\004\000\000\001\006\000\000\000", i32 0, !8, !19, !8, null, null, null, null} ; [ DW_TAG_subprogram ]
+!19 = !{!"0x15\00\000\000\000\000\000\000", !4, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{null, !17, !7}
+!21 = !MDLocation(line: 11, scope: !1)
+!22 = !MDLocation(line: 13, scope: !1)
+!23 = !MDLocation(line: 14, scope: !1)
+!24 = !{!"0x101\00this\0013\000", !25, !4, !26} ; [ DW_TAG_arg_variable ]
+!25 = !{!"0x2e\00test1\00test1\00_ZN5test1C1Ev\001\000\001\000\006\000\000\000", i32 0, !4, !15, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!26 = !{!"0x26\00\000\0064\0064\000\0064", !4, null, !27} ; [ DW_TAG_const_type ]
+!27 = !{!"0xf\00\000\0064\0064\000\000", !4, null, !8} ; [ DW_TAG_pointer_type ]
+!28 = !MDLocation(line: 1, scope: !25)
+!29 = !MDLocation(line: 1, scope: !30)
+!30 = !{!"0xb\000\000\000", !44, !31} ; [ DW_TAG_lexical_block ]
+!31 = !{!"0xb\000\000\000", !44, !25} ; [ DW_TAG_lexical_block ]
+!32 = !{!"0x101\00this\004\000", !33, !4, !26} ; [ DW_TAG_arg_variable ]
+!33 = !{!"0x2e\00~test1\00~test1\00_ZN5test1D1Ev\004\000\001\001\006\000\000\000", i32 0, !8, !15, !8, null, null, null, null} ; [ DW_TAG_subprogram ]
+!34 = !MDLocation(line: 4, scope: !33)
+!35 = !MDLocation(line: 5, scope: !36)
+!36 = !{!"0xb\000\000\000", !44, !33} ; [ DW_TAG_lexical_block ]
+!37 = !MDLocation(line: 6, scope: !36)
+!38 = !{!"0x101\00this\004\000", !39, !4, !26} ; [ DW_TAG_arg_variable ]
+!39 = !{!"0x2e\00~test1\00~test1\00_ZN5test1D0Ev\004\000\001\001\006\000\000\000", i32 0, !8, !15, !8, null, null, null, null} ; [ DW_TAG_subprogram ]
+!40 = !MDLocation(line: 4, scope: !39)
+!41 = !MDLocation(line: 5, scope: !42)
+!42 = !{!"0xb\000\000\000", !44, !39} ; [ DW_TAG_lexical_block ]
+!43 = !MDLocation(line: 6, scope: !42)
+!44 = !{!"inheritance.cpp", !"/tmp/"}
+!45 = !{i32 0}
+!46 = !{!"<built-in>", !"/tmp/"}
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/inline-debug-info-multiret.ll
index 594512f2d8e3..0115ecd3a8d0 100644
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/test/DebugInfo/inline-debug-info-multiret.ll
@@ -10,8 +10,8 @@
 ; CHECK: br label %invoke.cont, !dbg ![[MD]]
 ; The branch instruction has the source location of line 9 and its inlined location
 ; has the source location of line 14.
-; CHECK: ![[INL:[0-9]+]] = metadata !{i32 14, i32 0, metadata {{.*}}, null}
-; CHECK: ![[MD]] = metadata !{i32 9, i32 0, metadata {{.*}}, metadata ![[INL]]}
+; CHECK: ![[INL:[0-9]+]] = !MDLocation(line: 14, scope: {{.*}})
+; CHECK: ![[MD]] = !MDLocation(line: 9, scope: {{.*}}, inlinedAt: ![[INL]])
 
 ; ModuleID = 'test.cpp'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -27,8 +27,8 @@ entry:
   %k.addr = alloca i32, align 4
   %k2 = alloca i32, align 4
   store i32 %k, i32* %k.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %k.addr}, metadata !13), !dbg !14
-  call void @llvm.dbg.declare(metadata !{i32* %k2}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %k.addr, align 4, !dbg !16
   %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
   store i32 %call, i32* %k2, align 4, !dbg !16
@@ -53,7 +53,7 @@ return:                                           ; preds = %if.end, %if.then
 
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @_Z8test_exti(i32)
 
@@ -85,7 +85,7 @@ catch.dispatch:                                   ; preds = %lpad
   br i1 %matches, label %catch, label %eh.resume, !dbg !23
 
 catch:                                            ; preds = %catch.dispatch
-  call void @llvm.dbg.declare(metadata !{i32* %e}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !{!"0x102"}), !dbg !25
   %exn = load i8** %exn.slot, !dbg !23
   %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
   %6 = bitcast i8* %5 to i32*, !dbg !23
@@ -122,35 +122,35 @@ attributes #2 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!31}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !10}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
-!5 = metadata !{metadata !"test.cpp", metadata !""}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !9}
-!13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
-!14 = metadata !{i32 4, i32 0, metadata !4, null}
-!15 = metadata !{i32 786688, metadata !4, metadata !"k2", metadata !6, i32 5, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k2] [line 5]
-!16 = metadata !{i32 5, i32 0, metadata !4, null}
-!17 = metadata !{i32 6, i32 0, metadata !4, null}
-!18 = metadata !{i32 7, i32 0, metadata !4, null}
-!19 = metadata !{i32 8, i32 0, metadata !4, null}
-!20 = metadata !{i32 9, i32 0, metadata !4, null}
-!21 = metadata !{i32 14, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !5, metadata !10, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [test.cpp]
-!23 = metadata !{i32 15, i32 0, metadata !22, null}
-!24 = metadata !{i32 786688, metadata !10, metadata !"e", metadata !6, i32 16, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 16]
-!25 = metadata !{i32 16, i32 0, metadata !10, null}
-!26 = metadata !{i32 17, i32 0, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !5, metadata !10, i32 16, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [test.cpp]
-!28 = metadata !{i32 18, i32 0, metadata !27, null}
-!29 = metadata !{i32 19, i32 0, metadata !10, null}
-!30 = metadata !{i32 20, i32 0, metadata !10, null}
-!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{i32 0}
+!3 = !{!4, !10}
+!4 = !{!"0x2e\00test\00test\00_Z4testi\004\000\001\000\006\00256\000\004", !5, !6, !7, null, i32 (i32)* @_Z4testi, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
+!5 = !{!"test.cpp", !""}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [test.cpp]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00test2\00test2\00_Z5test2v\0011\000\001\000\006\00256\000\0011", !5, !6, !11, null, i32 ()* @_Z5test2v, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!9}
+!13 = !{!"0x101\00k\0016777220\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [k] [line 4]
+!14 = !MDLocation(line: 4, scope: !4)
+!15 = !{!"0x100\00k2\005\000", !4, !6, !9} ; [ DW_TAG_auto_variable ] [k2] [line 5]
+!16 = !MDLocation(line: 5, scope: !4)
+!17 = !MDLocation(line: 6, scope: !4)
+!18 = !MDLocation(line: 7, scope: !4)
+!19 = !MDLocation(line: 8, scope: !4)
+!20 = !MDLocation(line: 9, scope: !4)
+!21 = !MDLocation(line: 14, scope: !22)
+!22 = !{!"0xb\0013\000\000", !5, !10} ; [ DW_TAG_lexical_block ] [test.cpp]
+!23 = !MDLocation(line: 15, scope: !22)
+!24 = !{!"0x100\00e\0016\000", !10, !6, !9} ; [ DW_TAG_auto_variable ] [e] [line 16]
+!25 = !MDLocation(line: 16, scope: !10)
+!26 = !MDLocation(line: 17, scope: !27)
+!27 = !{!"0xb\0016\000\001", !5, !10} ; [ DW_TAG_lexical_block ] [test.cpp]
+!28 = !MDLocation(line: 18, scope: !27)
+!29 = !MDLocation(line: 19, scope: !10)
+!30 = !MDLocation(line: 20, scope: !10)
+!31 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/inline-debug-info.ll
index b56ca95b60f5..5d2f652034de 100644
--- a/test/DebugInfo/inline-debug-info.ll
+++ b/test/DebugInfo/inline-debug-info.ll
@@ -31,8 +31,8 @@
 ; CHECK: br label %invoke.cont, !dbg ![[MD:[0-9]+]]
 ; The branch instruction has the source location of line 9 and its inlined location
 ; has the source location of line 14.
-; CHECK: ![[INL:[0-9]+]] = metadata !{i32 14, i32 0, metadata {{.*}}, null}
-; CHECK: ![[MD]] = metadata !{i32 9, i32 0, metadata {{.*}}, metadata ![[INL]]}
+; CHECK: ![[INL:[0-9]+]] = !MDLocation(line: 14, scope: {{.*}})
+; CHECK: ![[MD]] = !MDLocation(line: 9, scope: {{.*}}, inlinedAt: ![[INL]])
 
 ; ModuleID = 'test.cpp'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -47,8 +47,8 @@ entry:
   %k.addr = alloca i32, align 4
   %k2 = alloca i32, align 4
   store i32 %k, i32* %k.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %k.addr}, metadata !13), !dbg !14
-  call void @llvm.dbg.declare(metadata !{i32* %k2}, metadata !15), !dbg !16
+  call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !{!"0x102"}), !dbg !16
   %0 = load i32* %k.addr, align 4, !dbg !16
   %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
   store i32 %call, i32* %k2, align 4, !dbg !16
@@ -71,7 +71,7 @@ return:                                           ; preds = %if.end, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @_Z8test_exti(i32)
 
@@ -103,7 +103,7 @@ catch.dispatch:                                   ; preds = %lpad
   br i1 %matches, label %catch, label %eh.resume, !dbg !23
 
 catch:                                            ; preds = %catch.dispatch
-  call void @llvm.dbg.declare(metadata !{i32* %e}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !{!"0x102"}), !dbg !25
   %exn = load i8** %exn.slot, !dbg !23
   %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
   %6 = bitcast i8* %5 to i32*, !dbg !23
@@ -140,35 +140,35 @@ attributes #2 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!31}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !10}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test", metadata !"test", metadata !"_Z4testi", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z4testi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
-!5 = metadata !{metadata !"test.cpp", metadata !""}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [test.cpp]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"test2", metadata !"test2", metadata !"_Z5test2v", i32 11, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z5test2v, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !9}
-!13 = metadata !{i32 786689, metadata !4, metadata !"k", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [k] [line 4]
-!14 = metadata !{i32 4, i32 0, metadata !4, null}
-!15 = metadata !{i32 786688, metadata !4, metadata !"k2", metadata !6, i32 5, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [k2] [line 5]
-!16 = metadata !{i32 5, i32 0, metadata !4, null}
-!17 = metadata !{i32 6, i32 0, metadata !4, null}
-!18 = metadata !{i32 7, i32 0, metadata !4, null}
-!19 = metadata !{i32 8, i32 0, metadata !4, null}
-!20 = metadata !{i32 9, i32 0, metadata !4, null}
-!21 = metadata !{i32 14, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !5, metadata !10, i32 13, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [test.cpp]
-!23 = metadata !{i32 15, i32 0, metadata !22, null}
-!24 = metadata !{i32 786688, metadata !10, metadata !"e", metadata !6, i32 16, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 16]
-!25 = metadata !{i32 16, i32 0, metadata !10, null}
-!26 = metadata !{i32 17, i32 0, metadata !27, null}
-!27 = metadata !{i32 786443, metadata !5, metadata !10, i32 16, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [test.cpp]
-!28 = metadata !{i32 18, i32 0, metadata !27, null}
-!29 = metadata !{i32 19, i32 0, metadata !10, null}
-!30 = metadata !{i32 20, i32 0, metadata !10, null}
-!31 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{i32 0}
+!3 = !{!4, !10}
+!4 = !{!"0x2e\00test\00test\00_Z4testi\004\000\001\000\006\00256\000\004", !5, !6, !7, null, i32 (i32)* @_Z4testi, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [test]
+!5 = !{!"test.cpp", !""}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [test.cpp]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00test2\00test2\00_Z5test2v\0011\000\001\000\006\00256\000\0011", !5, !6, !11, null, i32 ()* @_Z5test2v, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [test2]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!9}
+!13 = !{!"0x101\00k\0016777220\000", !4, !6, !9} ; [ DW_TAG_arg_variable ] [k] [line 4]
+!14 = !MDLocation(line: 4, scope: !4)
+!15 = !{!"0x100\00k2\005\000", !4, !6, !9} ; [ DW_TAG_auto_variable ] [k2] [line 5]
+!16 = !MDLocation(line: 5, scope: !4)
+!17 = !MDLocation(line: 6, scope: !4)
+!18 = !MDLocation(line: 7, scope: !4)
+!19 = !MDLocation(line: 8, scope: !4)
+!20 = !MDLocation(line: 9, scope: !4)
+!21 = !MDLocation(line: 14, scope: !22)
+!22 = !{!"0xb\0013\000\000", !5, !10} ; [ DW_TAG_lexical_block ] [test.cpp]
+!23 = !MDLocation(line: 15, scope: !22)
+!24 = !{!"0x100\00e\0016\000", !10, !6, !9} ; [ DW_TAG_auto_variable ] [e] [line 16]
+!25 = !MDLocation(line: 16, scope: !10)
+!26 = !MDLocation(line: 17, scope: !27)
+!27 = !{!"0xb\0016\000\001", !5, !10} ; [ DW_TAG_lexical_block ] [test.cpp]
+!28 = !MDLocation(line: 18, scope: !27)
+!29 = !MDLocation(line: 19, scope: !10)
+!30 = !MDLocation(line: 20, scope: !10)
+!31 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/inline-no-debug-info.ll b/test/DebugInfo/inline-no-debug-info.ll
index 2257b8961fb5..68fd8e7a01bc 100644
--- a/test/DebugInfo/inline-no-debug-info.ll
+++ b/test/DebugInfo/inline-no-debug-info.ll
@@ -21,10 +21,10 @@
 
 ; Debug location of the code in caller() and of the inlined code that did not
 ; have any debug location before.
-; CHECK-DAG: [[A]] = metadata !{i32 4, i32 0, metadata !{{[01-9]+}}, null}
+; CHECK-DAG: [[A]] = !MDLocation(line: 4, scope: !{{[01-9]+}})
 
 ; Debug location of the inlined code.
-; CHECK-DAG: [[B]] = metadata !{i32 2, i32 0, metadata !{{[01-9]+}}, metadata [[A]]}
+; CHECK-DAG: [[B]] = !MDLocation(line: 2, scope: !{{[01-9]+}}, inlinedAt: [[A]])
 
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -54,16 +54,16 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (210174)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"/code/llvm/build0"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"caller", metadata !"caller", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, void ()* @caller, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [caller]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/code/llvm/build0/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"callee2", metadata !"callee2", metadata !"", i32 2, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i32 0, i1 true, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [callee2]
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 (210174)"}
-!11 = metadata !{i32 2, i32 0, metadata !7, null}
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (210174)\001\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"/code/llvm/build0"}
+!2 = !{}
+!3 = !{!4, !7}
+!4 = !{!"0x2e\00caller\00caller\00\004\000\001\000\006\000\001\004", !1, !5, !6, null, void ()* @caller, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [caller]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/code/llvm/build0/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00callee2\00callee2\00\002\001\001\000\006\000\001\002", !1, !5, !6, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [callee2]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 (210174)"}
+!11 = !MDLocation(line: 2, scope: !7)
+!12 = !MDLocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/inline-scopes.ll b/test/DebugInfo/inline-scopes.ll
index 36c073516c56..ec36a2fdec62 100644
--- a/test/DebugInfo/inline-scopes.ll
+++ b/test/DebugInfo/inline-scopes.ll
@@ -43,7 +43,7 @@ entry:
   %b.i3 = alloca i8, align 1
   %retval.i = alloca i32, align 4
   %b.i = alloca i8, align 1
-  call void @llvm.dbg.declare(metadata !{i8* %b.i}, metadata !16), !dbg !19
+  call void @llvm.dbg.declare(metadata i8* %b.i, metadata !16, metadata !{!"0x102"}), !dbg !19
   %call.i = call zeroext i1 @_Z1fv(), !dbg !19
   %frombool.i = zext i1 %call.i to i8, !dbg !19
   store i8 %frombool.i, i8* %b.i, align 1, !dbg !19
@@ -61,7 +61,7 @@ if.end.i:                                         ; preds = %entry
 
 _Z2f1v.exit:                                      ; preds = %if.then.i, %if.end.i
   %1 = load i32* %retval.i, !dbg !23
-  call void @llvm.dbg.declare(metadata !{i8* %b.i3}, metadata !24), !dbg !27
+  call void @llvm.dbg.declare(metadata i8* %b.i3, metadata !24, metadata !{!"0x102"}), !dbg !27
   %call.i4 = call zeroext i1 @_Z1fv(), !dbg !27
   %frombool.i5 = zext i1 %call.i4 to i8, !dbg !27
   store i8 %frombool.i5, i8* %b.i3, align 1, !dbg !27
@@ -83,7 +83,7 @@ _Z2f2v.exit:                                      ; preds = %if.then.i7, %if.end
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare zeroext i1 @_Z1fv() #2
 
@@ -95,36 +95,36 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-scopes.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"inline-scopes.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !10, metadata !12}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!5 = metadata !{metadata !"y.cc", metadata !"/tmp/dbginfo"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/y.cc]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [f2]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-scopes.cpp]
-!12 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 2, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1]
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!15 = metadata !{metadata !"clang version 3.5.0 "}
-!16 = metadata !{i32 786688, metadata !17, metadata !"b", metadata !11, i32 3, metadata !18, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 3]
-!17 = metadata !{i32 786443, metadata !1, metadata !12, i32 3, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/inline-scopes.cpp]
-!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!19 = metadata !{i32 3, i32 0, metadata !17, metadata !20}
-!20 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!21 = metadata !{i32 4, i32 0, metadata !17, metadata !20}
-!22 = metadata !{i32 5, i32 0, metadata !12, metadata !20}
-!23 = metadata !{i32 6, i32 0, metadata !12, metadata !20}
-!24 = metadata !{i32 786688, metadata !25, metadata !"b", metadata !6, i32 2, metadata !18, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [b] [line 2]
-!25 = metadata !{i32 786443, metadata !5, metadata !26, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
-!26 = metadata !{i32 786443, metadata !5, metadata !10} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
-!27 = metadata !{i32 2, i32 0, metadata !25, metadata !28}
-!28 = metadata !{i32 9, i32 0, metadata !4, null}
-!29 = metadata !{i32 3, i32 0, metadata !25, metadata !28}
-!30 = metadata !{i32 4, i32 0, metadata !26, metadata !28}
-!31 = metadata !{i32 5, i32 0, metadata !26, metadata !28}
-!32 = metadata !{i32 10, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/inline-scopes.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"inline-scopes.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !10, !12}
+!4 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !5, !6, !7, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!5 = !{!"y.cc", !"/tmp/dbginfo"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/y.cc]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00f2\00f2\00_Z2f2v\008\000\001\000\006\00256\000\008", !1, !11, !7, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 8] [def] [f2]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/inline-scopes.cpp]
+!12 = !{!"0x2e\00f1\00f1\00_Z2f1v\002\000\001\000\006\00256\000\002", !1, !11, !7, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
+!15 = !{!"clang version 3.5.0 "}
+!16 = !{!"0x100\00b\003\000", !17, !11, !18} ; [ DW_TAG_auto_variable ] [b] [line 3]
+!17 = !{!"0xb\003\000\001", !1, !12} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/inline-scopes.cpp]
+!18 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!19 = !MDLocation(line: 3, scope: !17, inlinedAt: !20)
+!20 = !MDLocation(line: 8, scope: !4)
+!21 = !MDLocation(line: 4, scope: !17, inlinedAt: !20)
+!22 = !MDLocation(line: 5, scope: !12, inlinedAt: !20)
+!23 = !MDLocation(line: 6, scope: !12, inlinedAt: !20)
+!24 = !{!"0x100\00b\002\000", !25, !6, !18} ; [ DW_TAG_auto_variable ] [b] [line 2]
+!25 = !{!"0xb\002\000\000", !5, !26} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
+!26 = !{!"0xb\000", !5, !10} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/y.cc]
+!27 = !MDLocation(line: 2, scope: !25, inlinedAt: !28)
+!28 = !MDLocation(line: 9, scope: !4)
+!29 = !MDLocation(line: 3, scope: !25, inlinedAt: !28)
+!30 = !MDLocation(line: 4, scope: !26, inlinedAt: !28)
+!31 = !MDLocation(line: 5, scope: !26, inlinedAt: !28)
+!32 = !MDLocation(line: 10, scope: !4)
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/inlined-arguments.ll
index 6979862a536f..c705cf8e97f3 100644
--- a/test/DebugInfo/inlined-arguments.ll
+++ b/test/DebugInfo/inlined-arguments.ll
@@ -24,16 +24,16 @@
 
 ; Function Attrs: uwtable
 define void @_Z2f2v() #0 {
-  tail call void @llvm.dbg.value(metadata !15, i64 0, metadata !16), !dbg !18
-  tail call void @llvm.dbg.value(metadata !19, i64 0, metadata !20), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !18
   tail call void @_Z2f3i(i32 2), !dbg !21
   ret void, !dbg !22
 }
 
 ; Function Attrs: uwtable
 define void @_Z2f1ii(i32 %x, i32 %y) #0 {
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !13), !dbg !23
-  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !14), !dbg !23
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !23
+  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !23
   tail call void @_Z2f3i(i32 %y), !dbg !24
   ret void, !dbg !25
 }
@@ -41,7 +41,7 @@ define void @_Z2f1ii(i32 %x, i32 %y) #0 {
 declare void @_Z2f3i(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -50,30 +50,30 @@ attributes #2 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"exp.cpp", metadata !"/usr/local/google/home/blaikie/dev/scratch"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f2", metadata !"f2", metadata !"_Z2f2v", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z2f2v, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f2]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"_Z2f1ii", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32, i32)* @_Z2f1ii, null, null, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{null, metadata !11, metadata !11}
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 6]
-!14 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [y] [line 6]
-!15 = metadata !{i32 undef}
-!16 = metadata !{i32 786689, metadata !8, metadata !"x", metadata !5, i32 16777222, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [x] [line 6]
-!17 = metadata !{i32 4, i32 0, metadata !4, null}
-!18 = metadata !{i32 6, i32 0, metadata !8, metadata !17}
-!19 = metadata !{i32 2}
-!20 = metadata !{i32 786689, metadata !8, metadata !"y", metadata !5, i32 33554438, metadata !11, i32 0, metadata !17} ; [ DW_TAG_arg_variable ] [y] [line 6]
-!21 = metadata !{i32 7, i32 0, metadata !8, metadata !17}
-!22 = metadata !{i32 5, i32 0, metadata !4, null}
-!23 = metadata !{i32 6, i32 0, metadata !8, null}
-!24 = metadata !{i32 7, i32 0, metadata !8, null}
-!25 = metadata !{i32 8, i32 0, metadata !8, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"exp.cpp", !"/usr/local/google/home/blaikie/dev/scratch"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x2e\00f2\00f2\00_Z2f2v\003\000\001\000\006\00256\001\003", !1, !5, !6, null, void ()* @_Z2f2v, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f2]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/scratch/exp.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00f1\00f1\00_Z2f1ii\006\000\001\000\006\00256\001\006", !1, !5, !9, null, void (i32, i32)* @_Z2f1ii, null, null, !12} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11, !11}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13, !14}
+!13 = !{!"0x101\00x\0016777222\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!14 = !{!"0x101\00y\0033554438\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!15 = !{i32 undef}
+!16 = !{!"0x101\00x\0016777222\000", !8, !5, !11, !17} ; [ DW_TAG_arg_variable ] [x] [line 6]
+!17 = !MDLocation(line: 4, scope: !4)
+!18 = !MDLocation(line: 6, scope: !8, inlinedAt: !17)
+!19 = !{i32 2}
+!20 = !{!"0x101\00y\0033554438\000", !8, !5, !11, !17} ; [ DW_TAG_arg_variable ] [y] [line 6]
+!21 = !MDLocation(line: 7, scope: !8, inlinedAt: !17)
+!22 = !MDLocation(line: 5, scope: !4)
+!23 = !MDLocation(line: 6, scope: !8)
+!24 = !MDLocation(line: 7, scope: !8)
+!25 = !MDLocation(line: 8, scope: !8)
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/inlined-vars.ll
index 9cfde1f26039..b84e12f4ce9f 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/inlined-vars.ll
@@ -4,8 +4,8 @@
 
 define i32 @main() uwtable {
 entry:
-  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !18), !dbg !21
-  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !22), !dbg !23
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !23
   tail call void @smth(i32 0), !dbg !24
   tail call void @smth(i32 0), !dbg !25
   ret i32 0, !dbg !19
@@ -13,44 +13,44 @@ entry:
 
 declare void @smth(i32)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = metadata !{i32 786449, metadata !26, i32 4, metadata !"clang version 3.2 (trunk 159419)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2,  metadata !2, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
-!2 = metadata !{}
-!3 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 10, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !2, i32 10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786478, metadata !26, metadata !6, metadata !"f", metadata !"f", metadata !"_ZL1fi", i32 3, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !13, i32 3} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !9, metadata !9}
-!13 = metadata !{metadata !15, metadata !16}
-!15 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!0 = !{!"0x11\004\00clang version 3.2 (trunk 159419)\001\00\000\00\000", !26, !2, !2, !3, !2,  !2} ; [ DW_TAG_compile_unit ]
+!1 = !{i32 0}
+!2 = !{}
+!3 = !{!5, !10}
+!5 = !{!"0x2e\00main\00main\00\0010\000\001\000\006\00256\001\0010", !26, !6, !7, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0x2e\00f\00f\00_ZL1fi\003\001\001\000\006\00256\001\003", !26, !6, !11, null, null, null, null, !13} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!9, !9}
+!13 = !{!15, !16}
+!15 = !{!"0x101\00argument\0016777219\000", !10, !6, !9} ; [ DW_TAG_arg_variable ]
 
 ; Two DW_TAG_formal_parameter: one abstract and one inlined.
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 
-!16 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = !{!"0x100\00local\004\000", !10, !6, !9} ; [ DW_TAG_auto_variable ]
 
 ; Two DW_TAG_variable: one abstract and one inlined.
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
 
-!18 = metadata !{i32 786689, metadata !10, metadata !"argument", metadata !6, i32 16777219, metadata !9, i32 0, metadata !19} ; [ DW_TAG_arg_variable ]
-!19 = metadata !{i32 11, i32 10, metadata !5, null}
-!21 = metadata !{i32 3, i32 25, metadata !10, metadata !19}
-!22 = metadata !{i32 786688, metadata !10, metadata !"local", metadata !6, i32 4, metadata !9, i32 0, metadata !19} ; [ DW_TAG_auto_variable ]
-!23 = metadata !{i32 4, i32 16, metadata !10, metadata !19}
-!24 = metadata !{i32 5, i32 3, metadata !10, metadata !19}
-!25 = metadata !{i32 6, i32 3, metadata !10, metadata !19}
-!26 = metadata !{metadata !"inline-bug.cc", metadata !"/tmp/dbginfo/pr13202"}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!18 = !{!"0x101\00argument\0016777219\000", !10, !6, !9, !19} ; [ DW_TAG_arg_variable ]
+!19 = !MDLocation(line: 11, column: 10, scope: !5)
+!21 = !MDLocation(line: 3, column: 25, scope: !10, inlinedAt: !19)
+!22 = !{!"0x100\00local\004\000", !10, !6, !9, !19} ; [ DW_TAG_auto_variable ]
+!23 = !MDLocation(line: 4, column: 16, scope: !10, inlinedAt: !19)
+!24 = !MDLocation(line: 5, column: 3, scope: !10, inlinedAt: !19)
+!25 = !MDLocation(line: 6, column: 3, scope: !10, inlinedAt: !19)
+!26 = !{!"inline-bug.cc", !"/tmp/dbginfo/pr13202"}
+!27 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 20d3dda21ab0..8a2aaaab3a3c 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -6,9 +6,9 @@ RUN: echo "%p/Inputs/dwarfdump-test.elf-x86-64 0x400586" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test2.elf-x86-64 0x4004e8" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test2.elf-x86-64 0x4004f4" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input
-RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input
-RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x7d1" >> %t.input
-RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x785" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x8dc" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0xa05" >> %t.input
+RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x987" >> %t.input
 RUN: echo "%p/Inputs/dwarfdump-inl-test.high_pc.elf-x86-64 0x568" >> %t.input
 RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x640" >> %t.input
 RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input
@@ -19,6 +19,10 @@ RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input
 RUN: echo "%p/Inputs/llvm-symbolizer-dwo-test 0x400514" >> %t.input
 RUN: echo "%p/Inputs/fission-ranges.elf-x86_64 0x720" >> %t.input
 RUN: echo "%p/Inputs/arange-overlap.elf-x86_64 0x714" >> %t.input
+RUN: cp %p/Inputs/split-dwarf-test.dwo %T
+RUN: echo "%p/Inputs/split-dwarf-test 0x4004d0" >> %t.input
+RUN: echo "%p/Inputs/split-dwarf-test 0x4004c0" >> %t.input
+RUN: echo "%p/Inputs/cross-cu-inlining.x86_64-macho.o 0x17" >> %t.input
 
 RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
 RUN:    --default-arch=i386 < %t.input | FileCheck %s
@@ -98,6 +102,21 @@ CHECK-NEXT: {{.*}}fission-ranges.cc:6
 CHECK: _ZN1S3bazEv
 CHECK-NEXT: {{.*}}arange-overlap.cc:6
 
+CHECK: _Z3fooi
+CHECK-NEXT: {{.*}}split-dwarf-test.cc
+CHECK-NEXT: main
+CHECK-NEXT: {{.*}}split-dwarf-test.cc
+
+CHECK: _Z3fooi
+CHECK-NEXT: {{.*}}split-dwarf-test.cc
+
+; func has been inlined into main by LTO. Check that the symbolizer is able
+; to resolve the cross-cu reference and retrieve func's name
+CHECK: func
+CHECK-NEXT: /tmp{{[/\\]}}cross-cu-inlining.c:16:3
+CHECK-NEXT: main
+CHECK-NEXT: /tmp{{[/\\]}}cross-cu-inlining.c:11:0
+
 RUN: echo "unexisting-file 0x1234" > %t.input2
 RUN: llvm-symbolizer < %t.input2
 
diff --git a/test/DebugInfo/lto-comp-dir.ll b/test/DebugInfo/lto-comp-dir.ll
index d272dff6ea82..a79cf3248cfc 100644
--- a/test/DebugInfo/lto-comp-dir.ll
+++ b/test/DebugInfo/lto-comp-dir.ll
@@ -59,26 +59,26 @@ attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18, !18}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1}
-!1 = metadata !{metadata !"a.cpp", metadata !"/tmp/dbginfo/a"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_Z4funcv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4funcv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a/a.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786449, metadata !9, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !10, metadata !2, metadata !2, metadata !"", i32 1}
-!9 = metadata !{metadata !"b.cpp", metadata !"/tmp/dbginfo/b"}
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786478, metadata !9, metadata !12, metadata !"main", metadata !"main", metadata !"", i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
-!12 = metadata !{i32 786473, metadata !9}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/b/b.cpp]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!18 = metadata !{metadata !"clang version 3.5.0 "}
-!19 = metadata !{i32 2, i32 0, metadata !4, null}
-!20 = metadata !{i32 3, i32 0, metadata !11, null}
-!21 = metadata !{i32 4, i32 0, metadata !11, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"a.cpp", !"/tmp/dbginfo/a"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_Z4funcv\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void ()* @_Z4funcv, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [func]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/a/a.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !9, !2, !2, !10, !2, !2} ; [ DW_TAG_compile_unit ]
+!9 = !{!"b.cpp", !"/tmp/dbginfo/b"}
+!10 = !{!11}
+!11 = !{!"0x2e\00main\00main\00\002\000\001\000\006\00256\000\002", !9, !12, !13, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [main]
+!12 = !{!"0x29", !9}         ; [ DW_TAG_file_type ] [/tmp/dbginfo/b/b.cpp]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!15}
+!15 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
+!18 = !{!"clang version 3.5.0 "}
+!19 = !MDLocation(line: 2, scope: !4)
+!20 = !MDLocation(line: 3, scope: !11)
+!21 = !MDLocation(line: 4, scope: !11)
 
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/member-order.ll
index 652a6cd6c314..ae845719cdc9 100644
--- a/test/DebugInfo/member-order.ll
+++ b/test/DebugInfo/member-order.ll
@@ -29,13 +29,13 @@ define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !16), !dbg !18
+  call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !16, metadata !{!"0x102"}), !dbg !18
   %this1 = load %struct.foo** %this.addr
   ret void, !dbg !19
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -43,24 +43,24 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15, !20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !13, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/member-order.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"member-order.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"foo", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !11}
-!6 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !10, i32 2} ; [ DW_TAG_subprogram ] [line 2] [f1]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
-!10 = metadata !{i32 786468}
-!11 = metadata !{i32 786478, metadata !1, metadata !4, metadata !"f2", metadata !"f2", metadata !"_ZN3foo2f2Ev", i32 3, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [f2]
-!12 = metadata !{i32 786468}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786478, metadata !1, null, metadata !"f1", metadata !"f1", metadata !"_ZN3foo2f1Ev", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo2f1Ev, null, metadata !6, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
-!15 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!16 = metadata !{i32 786689, metadata !14, metadata !"this", null, i32 16777216, metadata !17, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
-!18 = metadata !{i32 0, i32 0, metadata !14, null}
-!19 = metadata !{i32 7, i32 0, metadata !14, null}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 \000\00\000\00\000", !1, !2, !3, !13, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/member-order.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"member-order.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00foo\001\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS3foo"} ; [ DW_TAG_structure_type ] [foo] [line 1, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6, !11}
+!6 = !{!"0x2e\00f1\00f1\00_ZN3foo2f1Ev\002\000\000\000\006\00256\000\002", !1, !4, !7, null, null, null, i32 0, !10} ; [ DW_TAG_subprogram ] [line 2] [f1]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS3foo]
+!10 = !{i32 786468}
+!11 = !{!"0x2e\00f2\00f2\00_ZN3foo2f2Ev\003\000\000\000\006\00256\000\003", !1, !4, !7, null, null, null, i32 0, !12} ; [ DW_TAG_subprogram ] [line 3] [f2]
+!12 = !{i32 786468}
+!13 = !{!14}
+!14 = !{!"0x2e\00f1\00f1\00_ZN3foo2f1Ev\006\000\001\000\006\00256\000\006", !1, null, !7, null, void (%struct.foo*)* @_ZN3foo2f1Ev, null, !6, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [f1]
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{!"0x101\00this\0016777216\001088", !14, null, !17} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!17 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3foo"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3foo]
+!18 = !MDLocation(line: 0, scope: !14)
+!19 = !MDLocation(line: 7, scope: !14)
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/member-pointers.ll
index 4ca69426efbe..54bb0a548b76 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/member-pointers.ll
@@ -23,18 +23,18 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16}
 
-!0 = metadata !{i32 786449, metadata !15, i32 4, metadata !"clang version 3.3 ", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{}
-!3 = metadata !{metadata !5, metadata !10}
-!5 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 4, metadata !7, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
-!6 = metadata !{i32 786473, metadata !15} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786451, metadata !15, null, metadata !"S", i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [def] [from ]
-!10 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !6, i32 5, metadata !11, i32 0, i32 1, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
-!11 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12, metadata !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null, metadata !14, metadata !8}
-!14 = metadata !{i32 786447, i32 0, null, i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
-!15 = metadata !{metadata !"simple.cpp", metadata !"/home/blaikie/Development/scratch"}
-!16 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 \000\00\000\00\000", !15, !1, !1, !1, !3,  !1} ; [ DW_TAG_compile_unit ] [/home/blaikie/Development/scratch/simple.cpp] [DW_LANG_C_plus_plus]
+!1 = !{}
+!3 = !{!5, !10}
+!5 = !{!"0x34\00x\00x\00\004\000\001", null, !6, !7, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!6 = !{!"0x29", !15} ; [ DW_TAG_file_type ]
+!7 = !{!"0x1f\00\000\000\000\000\000", null, null, !8, !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x13\00S\001\008\008\000\000\000", !15, null, null, !1, null, null, null} ; [ DW_TAG_structure_type ] [S] [line 1, size 8, align 8, offset 0] [def] [from ]
+!10 = !{!"0x34\00y\00y\00\005\000\001", null, !6, !11, { i64, i64 }* @y, null} ; [ DW_TAG_variable ] [y] [line 5] [def]
+!11 = !{!"0x1f\00\000\000\000\000\000", null, null, !12, !9} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null, !14, !8}
+!14 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from S]
+!15 = !{!"simple.cpp", !"/home/blaikie/Development/scratch"}
+!16 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/tools/llvm-profdata/Inputs/empty.profdata b/test/DebugInfo/member-pointers.o
index e69de29bb2d1..e69de29bb2d1 100644
--- a/test/tools/llvm-profdata/Inputs/empty.profdata
+++ b/test/DebugInfo/member-pointers.o
diff --git a/test/DebugInfo/missing-abstract-variable.ll b/test/DebugInfo/missing-abstract-variable.ll
index 59a38cf39d5f..dcaa2db435d9 100644
--- a/test/DebugInfo/missing-abstract-variable.ll
+++ b/test/DebugInfo/missing-abstract-variable.ll
@@ -37,19 +37,17 @@
 ;   x(u);
 ; }
 
-; CHECK: [[ABS_X:.*]]: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "x"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ABS_B:.*]]:   DW_TAG_formal_parameter
+; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "b"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:     DW_TAG_lexical_block
-; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:       DW_TAG_lexical_block
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[ABS_S:.*]]:       DW_TAG_variable
+; CHECK:   DW_TAG_variable
 ; CHECK-NOT: DW_TAG
 ; CHECK:         DW_AT_name {{.*}} "s"
 
@@ -59,11 +57,11 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_X]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "_Z1xb"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:     DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_B]]}
+; CHECK:       DW_AT_abstract_origin {{.*}} "b"
 ; Notice 'x's local variable 's' is missing. Not necessarily a bug here,
 ; since it's been optimized entirely away and it should be described in
 ; abstract subprogram.
@@ -80,35 +78,28 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_X]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "_Z1xb"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; FIXME: This formal parameter goes missing at least at -O2 (& on
 ; mips/powerpc), maybe before that. Perhaps SelectionDAG is to blame (and
 ; fastisel succeeds).
 ; CHECK:     DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
-; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_B]]}
-
-; The two lexical blocks here are caused by the scope of the if that includes
-; the condition variable, and the scope within the if's composite statement. I'm
-; not sure we really need both of them since there's no variable declared in the
-; outer of the two
+; CHECK:       DW_AT_abstract_origin {{.*}} "b"
 
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:     DW_TAG_lexical_block
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:       DW_TAG_lexical_block
-; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK:         DW_TAG_variable
+; CHECK:       DW_TAG_variable
 ; CHECK-NOT: DW_TAG
-; CHECK:           DW_AT_abstract_origin {{.*}} {[[ABS_S]]}
+; CHECK:         DW_AT_abstract_origin {{.*}} "s"
 
 @t = external global i32
 
 ; Function Attrs: uwtable
 define void @_Z1bv() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !24, i64 0, metadata !25), !dbg !27
+  tail call void @llvm.dbg.value(metadata i1 false, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !27
   tail call void @_Z1fi(i32 0), !dbg !28
   ret void, !dbg !29
 }
@@ -116,13 +107,13 @@ entry:
 ; Function Attrs: uwtable
 define void @_Z1ab(i1 zeroext %u) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i1 %u}, i64 0, metadata !13), !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i1 %u}, i64 0, metadata !31), !dbg !33
+  tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !30
+  tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !31, metadata !{!"0x102"}), !dbg !33
   br i1 %u, label %if.then.i, label %_Z1xb.exit, !dbg !34
 
 if.then.i:                                        ; preds = %entry
   %0 = load i32* @t, align 4, !dbg !35, !tbaa !36
-  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !40), !dbg !35
+  tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !40, metadata !{!"0x102"}), !dbg !35
   tail call void @_Z1fi(i32 %0), !dbg !41
   br label %_Z1xb.exit, !dbg !42
 
@@ -134,7 +125,7 @@ _Z1xb.exit:                                       ; preds = %entry, %if.then.i
 declare void @_Z1fi(i32) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -144,48 +135,48 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/missing-abstract-variables.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"missing-abstract-variables.cc", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8, metadata !14}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"b", metadata !"b", metadata !"_Z1bv", i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @_Z1bv, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [b]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/missing-abstract-variables.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"a", metadata !"a", metadata !"_Z1ab", i32 17, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i1)* @_Z1ab, null, null, metadata !12, i32 17} ; [ DW_TAG_subprogram ] [line 17] [def] [a]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{null, metadata !11}
-!11 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786689, metadata !8, metadata !"u", metadata !5, i32 16777233, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [u] [line 17]
-!14 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"x", metadata !"x", metadata !"_Z1xb", i32 5, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, null, null, null, metadata !15, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [x]
-!15 = metadata !{metadata !16, metadata !17}
-!16 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 5]
-!17 = metadata !{i32 786688, metadata !18, metadata !"s", metadata !5, i32 7, metadata !20, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 7]
-!18 = metadata !{i32 786443, metadata !1, metadata !19, i32 6, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
-!19 = metadata !{i32 786443, metadata !1, metadata !14, i32 6, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
-!20 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!22 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!23 = metadata !{metadata !"clang version 3.5.0 "}
-!24 = metadata !{i1 false}
-!25 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, metadata !26} ; [ DW_TAG_arg_variable ] [b] [line 5]
-!26 = metadata !{i32 14, i32 0, metadata !4, null}
-!27 = metadata !{i32 5, i32 0, metadata !14, metadata !26}
-!28 = metadata !{i32 10, i32 0, metadata !14, metadata !26}
-!29 = metadata !{i32 15, i32 0, metadata !4, null}
-!30 = metadata !{i32 17, i32 0, metadata !8, null}
-!31 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !5, i32 16777221, metadata !11, i32 0, metadata !32} ; [ DW_TAG_arg_variable ] [b] [line 5]
-!32 = metadata !{i32 18, i32 0, metadata !8, null}
-!33 = metadata !{i32 5, i32 0, metadata !14, metadata !32}
-!34 = metadata !{i32 6, i32 0, metadata !19, metadata !32}
-!35 = metadata !{i32 7, i32 0, metadata !18, metadata !32}
-!36 = metadata !{metadata !37, metadata !37, i64 0}
-!37 = metadata !{metadata !"int", metadata !38, i64 0}
-!38 = metadata !{metadata !"omnipotent char", metadata !39, i64 0}
-!39 = metadata !{metadata !"Simple C/C++ TBAA"}
-!40 = metadata !{i32 786688, metadata !18, metadata !"s", metadata !5, i32 7, metadata !20, i32 0, metadata !32} ; [ DW_TAG_auto_variable ] [s] [line 7]
-!41 = metadata !{i32 8, i32 0, metadata !18, metadata !32} ; [ DW_TAG_imported_declaration ]
-!42 = metadata !{i32 9, i32 0, metadata !18, metadata !32}
-!43 = metadata !{i32 10, i32 0, metadata !14, metadata !32}
-!44 = metadata !{i32 19, i32 0, metadata !8, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/missing-abstract-variables.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"missing-abstract-variables.cc", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !8, !14}
+!4 = !{!"0x2e\00b\00b\00_Z1bv\0013\000\001\000\006\00256\001\0013", !1, !5, !6, null, void ()* @_Z1bv, null, null, !2} ; [ DW_TAG_subprogram ] [line 13] [def] [b]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00a\00a\00_Z1ab\0017\000\001\000\006\00256\001\0017", !1, !5, !9, null, void (i1)* @_Z1ab, null, null, !12} ; [ DW_TAG_subprogram ] [line 17] [def] [a]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11}
+!11 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!12 = !{!13}
+!13 = !{!"0x101\00u\0016777233\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [u] [line 17]
+!14 = !{!"0x2e\00x\00x\00_Z1xb\005\000\001\000\006\00256\001\005", !1, !5, !9, null, null, null, null, !15} ; [ DW_TAG_subprogram ] [line 5] [def] [x]
+!15 = !{!16, !17}
+!16 = !{!"0x101\00b\0016777221\000", !14, !5, !11} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!17 = !{!"0x100\00s\007\000", !18, !5, !20} ; [ DW_TAG_auto_variable ] [s] [line 7]
+!18 = !{!"0xb\006\000\000", !1, !19} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!19 = !{!"0xb\006\000\000", !1, !14} ; [ DW_TAG_lexical_block ] [/tmp/dbginfo/missing-abstract-variables.cc]
+!20 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!21 = !{i32 2, !"Dwarf Version", i32 4}
+!22 = !{i32 2, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5.0 "}
+!24 = !{i1 false}
+!25 = !{!"0x101\00b\0016777221\000", !14, !5, !11, !26} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!26 = !MDLocation(line: 14, scope: !4)
+!27 = !MDLocation(line: 5, scope: !14, inlinedAt: !26)
+!28 = !MDLocation(line: 10, scope: !14, inlinedAt: !26)
+!29 = !MDLocation(line: 15, scope: !4)
+!30 = !MDLocation(line: 17, scope: !8)
+!31 = !{!"0x101\00b\0016777221\000", !14, !5, !11, !32} ; [ DW_TAG_arg_variable ] [b] [line 5]
+!32 = !MDLocation(line: 18, scope: !8)
+!33 = !MDLocation(line: 5, scope: !14, inlinedAt: !32)
+!34 = !MDLocation(line: 6, scope: !19, inlinedAt: !32)
+!35 = !MDLocation(line: 7, scope: !18, inlinedAt: !32)
+!36 = !{!37, !37, i64 0}
+!37 = !{!"int", !38, i64 0}
+!38 = !{!"omnipotent char", !39, i64 0}
+!39 = !{!"Simple C/C++ TBAA"}
+!40 = !{!"0x100\00s\007\000", !18, !5, !20, !32} ; [ DW_TAG_auto_variable ] [s] [line 7]
+!41 = !MDLocation(line: 8, scope: !18, inlinedAt: !32)
+!42 = !MDLocation(line: 9, scope: !18, inlinedAt: !32)
+!43 = !MDLocation(line: 10, scope: !14, inlinedAt: !32)
+!44 = !MDLocation(line: 19, scope: !8)
diff --git a/test/DebugInfo/missing-abstract-variable.o b/test/DebugInfo/missing-abstract-variable.o
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/test/DebugInfo/missing-abstract-variable.o
diff --git a/test/DebugInfo/multiline.ll b/test/DebugInfo/multiline.ll
new file mode 100644
index 000000000000..e67af328416f
--- /dev/null
+++ b/test/DebugInfo/multiline.ll
@@ -0,0 +1,82 @@
+; RUN: llc -filetype=asm -asm-verbose=0 -O0 < %s | FileCheck %s
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=line - | FileCheck %s --check-prefix=INT
+; XFAIL: hexagon
+
+; Check that the assembly output properly handles is_stmt changes. And since
+; we're testing anyway, check the integrated assembler too.
+
+; Generated with clang from multiline.c:
+; void f1();
+; void f2() {
+;   f1(); f1(); f1();
+;   f1(); f1(); f1();
+; }
+
+
+; CHECK: .loc	1 2 0{{$}}
+; CHECK-NOT: .loc{{ }}
+; CHECK: .loc	1 3 3 prologue_end{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 3 9 is_stmt 0{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 3 15{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 4 3 is_stmt 1{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 4 9 is_stmt 0{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 4 15{{$}}
+; CHECK-NOT: .loc
+; CHECK: .loc	1 5 1 is_stmt 1{{$}}
+
+; INT: {{^}}Address
+; INT: -----
+; INT-NEXT: 2 0 1 0 0 is_stmt{{$}}
+; INT-NEXT: 3 3 1 0 0 is_stmt prologue_end{{$}}
+; INT-NEXT: 3 9 1 0 0 {{$}}
+; INT-NEXT: 3 15 1 0 0 {{$}}
+; INT-NEXT: 4 3 1 0 0 is_stmt{{$}}
+; INT-NEXT: 4 9 1 0 0 {{$}}
+; INT-NEXT: 4 15 1 0 0 {{$}}
+; INT-NEXT: 5 1 1 0 0 is_stmt{{$}}
+
+
+; Function Attrs: nounwind uwtable
+define void @f2() #0 {
+entry:
+  call void (...)* @f1(), !dbg !11
+  call void (...)* @f1(), !dbg !12
+  call void (...)* @f1(), !dbg !13
+  call void (...)* @f1(), !dbg !14
+  call void (...)* @f1(), !dbg !15
+  call void (...)* @f1(), !dbg !16
+  ret void, !dbg !17
+}
+
+declare void @f1(...) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/multiline.c] [DW_LANG_C99]
+!1 = !{!"multiline.c", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f2\00f2\00\002\000\001\000\000\000\000\002", !1, !5, !6, null, void ()* @f2, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f2]
+!5 = !{!"0x29", !1}                               ; [ DW_TAG_file_type ] [/tmp/dbginfo/multiline.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)"}
+!11 = !MDLocation(line: 3, column: 3, scope: !4)
+!12 = !MDLocation(line: 3, column: 9, scope: !4)
+!13 = !MDLocation(line: 3, column: 15, scope: !4)
+!14 = !MDLocation(line: 4, column: 3, scope: !4)
+!15 = !MDLocation(line: 4, column: 9, scope: !4)
+!16 = !MDLocation(line: 4, column: 15, scope: !4)
+!17 = !MDLocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/namespace.ll
index a9de62c39062..a4fdbd27a89d 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/namespace.ll
@@ -5,16 +5,18 @@
 ; CHECK: debug_info contents
 ; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "A"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1:[0-9]]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x03)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1:".*debug-info-namespace.cpp"]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(5)
 ; CHECK-NOT: NULL
 ; CHECK: [[NS2:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "B"
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2:[0-9]]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x01)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2:".*foo.cpp"]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(1)
 ; CHECK-NOT: NULL
 ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
 ; CHECK-NEXT: DW_AT_name{{.*}}= "i"
+; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable
+; CHECK-NEXT: DW_AT_name{{.*}}= "var_fwd"
 ; CHECK-NOT: NULL
 ; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name{{.*}}= "foo"
@@ -22,12 +24,28 @@
 ; CHECK-NOT: NULL
 ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name{{.*}}= "bar"
-; CHECK: NULL
 ; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_MIPS_linkage_name
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_name{{.*}}= "f1"
+; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "baz"
+; CHECK: [[VAR_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "var_decl"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_declaration
+; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func_decl"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_declaration
+; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name{{.*}}= "func_fwd"
+; CHECK-NOT: DW_AT_declaration
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_MIPS_linkage_name
@@ -39,16 +57,16 @@
 ; CHECK: DW_TAG_imported_module
 ; This is a bug, it should be in F2 but it inherits the file from its
 ; enclosing scope
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x08)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(15)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
 ; CHECK: NULL
 ; CHECK-NOT: NULL
 
 ; CHECK: DW_TAG_imported_module
 ; Same bug as above, this should be F2, not F1
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F1]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0b)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F1]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(18)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NOT: NULL
 
@@ -59,71 +77,102 @@
 ; CHECK: DW_AT_name{{.*}}= "func"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x12)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(26)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x13)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(27)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FOO]]})
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x14)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(28)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAR]]})
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x15)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(29)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC1]]})
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x16)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(30)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[I]]})
 ; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(31)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[BAZ]]})
+; CHECK-NOT: NULL
 ; CHECK: [[X:0x[0-9a-f]*]]:{{ *}}DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x18)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(32)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS1]]})
 ; CHECK-NEXT: DW_AT_name{{.*}}"X"
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_declaration
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x19)
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(33)
 ; CHECK-NEXT: DW_AT_import{{.*}}=> {[[X]]})
 ; CHECK-NEXT: DW_AT_name{{.*}}"Y"
 ; CHECK-NOT: NULL
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(34)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_DECL]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(35)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_DECL]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(36)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[VAR_FWD]]})
+; CHECK: DW_TAG_imported_declaration
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(37)
+; CHECK-NEXT: DW_AT_import{{.*}}=> {[[FUNC_FWD]]})
+
 ; CHECK: DW_TAG_lexical_block
 ; CHECK-NOT: NULL
 ; CHECK: DW_TAG_imported_module
-; CHECK-NEXT: DW_AT_decl_file{{.*}}(0x0[[F2]])
-; CHECK-NEXT: DW_AT_decl_line{{.*}}(0x0f)
-; CHECK-NEXT: DW_AT_import{{.*}}=> {[[NS2]]})
+; CHECK-NEXT: DW_AT_decl_file{{.*}}([[F2]])
+; CHECK-NEXT: DW_AT_decl_line{{.*}}(23)
+; CHECK-NEXT: DW_AT_import{{.*}}=>
 ; CHECK: NULL
 ; CHECK: NULL
 ; CHECK: NULL
 
-; CHECK: file_names[  [[F1]]]{{.*}}debug-info-namespace.cpp
-; CHECK: file_names[  [[F2]]]{{.*}}foo.cpp
-
 ; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths
 ; changed to protect the guilty. The C++ source code is:
+; // RUN...
+; // RUN...
+; // RUN...
+;
 ; namespace A {
 ; #line 1 "foo.cpp"
 ; namespace B {
-; int i;
-; void f1() { }
+; extern int i;
+; int f1() { return 0; }
 ; void f1(int) { }
 ; struct foo;
 ; struct bar { };
+; typedef bar baz;
+; extern int var_decl;
+; void func_decl(void);
+; extern int var_fwd;
+; void func_fwd(void);
 ; }
+; }
+; namespace A {
 ; using namespace B;
 ; }
 ;
 ; using namespace A;
-;
+; namespace E = A;
+; int B::i = f1();
 ; int func(bool b) {
 ;   if (b) {
 ;     using namespace A::B;
@@ -134,123 +183,184 @@
 ;   using B::bar;
 ;   using B::f1;
 ;   using B::i;
-;   bar x;
+;   using B::baz;
 ;   namespace X = A;
 ;   namespace Y = X;
+;   using B::var_decl;
+;   using B::func_decl;
+;   using B::var_fwd;
+;   using B::func_fwd;
 ;   return i + X::B::i + Y::B::i;
 ; }
-
-%"struct.A::B::bar" = type { i8 }
+;
+; namespace A {
+; using B::i;
+; namespace B {
+; int var_fwd = i;
+; }
+; }
+; void B::func_fwd() {}
 
 @_ZN1A1B1iE = global i32 0, align 4
+@_ZN1A1B7var_fwdE = global i32 0, align 4
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, i8* null }]
 
-; Function Attrs: nounwind uwtable
-define void @_ZN1A1B2f1Ev() #0 {
+; Function Attrs: nounwind ssp uwtable
+define i32 @_ZN1A1B2f1Ev() #0 {
 entry:
-  ret void, !dbg !41
+  ret i32 0, !dbg !60
 }
 
-; Function Attrs: nounwind uwtable
+; Function Attrs: nounwind ssp uwtable
 define void @_ZN1A1B2f1Ei(i32) #0 {
 entry:
   %.addr = alloca i32, align 4
   store i32 %0, i32* %.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %.addr}, metadata !42), !dbg !43
-  ret void, !dbg !43
+  call void @llvm.dbg.declare(metadata i32* %.addr, metadata !61, metadata !62), !dbg !63
+  ret void, !dbg !64
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  %call = call i32 @_ZN1A1B2f1Ev(), !dbg !65
+  store i32 %call, i32* @_ZN1A1B1iE, align 4, !dbg !65
+  ret void, !dbg !65
+}
 
-; Function Attrs: nounwind uwtable
+; Function Attrs: nounwind ssp uwtable
 define i32 @_Z4funcb(i1 zeroext %b) #0 {
 entry:
   %retval = alloca i32, align 4
   %b.addr = alloca i8, align 1
-  %x = alloca %"struct.A::B::bar", align 1
   %frombool = zext i1 %b to i8
   store i8 %frombool, i8* %b.addr, align 1
-  call void @llvm.dbg.declare(metadata !{i8* %b.addr}, metadata !44), !dbg !45
-  %0 = load i8* %b.addr, align 1, !dbg !46
-  %tobool = trunc i8 %0 to i1, !dbg !46
-  br i1 %tobool, label %if.then, label %if.end, !dbg !46
+  call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !66, metadata !62), !dbg !67
+  %0 = load i8* %b.addr, align 1, !dbg !68
+  %tobool = trunc i8 %0 to i1, !dbg !68
+  br i1 %tobool, label %if.then, label %if.end, !dbg !68
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !47
-  store i32 %1, i32* %retval, !dbg !47
-  br label %return, !dbg !47
+  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !69
+  store i32 %1, i32* %retval, !dbg !69
+  br label %return, !dbg !69
 
 if.end:                                           ; preds = %entry
-  call void @llvm.dbg.declare(metadata !{%"struct.A::B::bar"* %x}, metadata !48), !dbg !49
-  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
-  %3 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
-  %add = add nsw i32 %2, %3, !dbg !50
-  %4 = load i32* @_ZN1A1B1iE, align 4, !dbg !50
-  %add1 = add nsw i32 %add, %4, !dbg !50
-  store i32 %add1, i32* %retval, !dbg !50
-  br label %return, !dbg !50
+  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %3 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %add = add nsw i32 %2, %3, !dbg !70
+  %4 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %add1 = add nsw i32 %add, %4, !dbg !70
+  store i32 %add1, i32* %retval, !dbg !70
+  br label %return, !dbg !70
 
 return:                                           ; preds = %if.end, %if.then
-  %5 = load i32* %retval, !dbg !51
-  ret i32 %5, !dbg !51
+  %5 = load i32* %retval, !dbg !71
+  ret i32 %5, !dbg !71
+}
+
+define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  %0 = load i32* @_ZN1A1B1iE, align 4, !dbg !72
+  store i32 %0, i32* @_ZN1A1B7var_fwdE, align 4, !dbg !72
+  ret void, !dbg !72
+}
+
+; Function Attrs: nounwind ssp uwtable
+define void @_ZN1A1B8func_fwdEv() #0 {
+entry:
+  ret void, !dbg !73
+}
+
+define internal void @_GLOBAL__sub_I_debug_info_namespace.cpp() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  call void @__cxx_global_var_init(), !dbg !74
+  call void @__cxx_global_var_init1(), !dbg !74
+  ret void, !dbg !74
 }
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!52}
+!llvm.module.flags = !{!57, !58}
+!llvm.ident = !{!59}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !19, metadata !21, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug//usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"/usr/local/google/home/blaikie/dev/llvm/src/tools/clang/test/CodeGenCXX/debug-info-namespace.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !10, metadata !14}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ev", i32 3, metadata !8, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN1A1B2f1Ev, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f1]
-!5 = metadata !{metadata !"foo.cpp", metadata !"/usr/local/google/home/blaikie/dev/llvm/build/clang/debug"}
-!6 = metadata !{i32 786489, metadata !5, metadata !7, metadata !"B", i32 1} ; [ DW_TAG_namespace ] [B] [line 1]
-!7 = metadata !{i32 786489, metadata !1, null, metadata !"A", i32 3} ; [ DW_TAG_namespace ] [A] [line 3]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{null}
-!10 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f1", metadata !"f1", metadata !"_ZN1A1B2f1Ei", i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_ZN1A1B2f1Ei, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [f1]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !13}
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786478, metadata !5, metadata !15, metadata !"func", metadata !"func", metadata !"_Z4funcb", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i1)* @_Z4funcb, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [func]
-!15 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
-!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{metadata !13, metadata !18}
-!18 = metadata !{i32 786468, null, null, metadata !"bool", i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 786484, i32 0, metadata !6, metadata !"i", metadata !"i", metadata !"_ZN1A1B1iE", metadata !15, i32 2, metadata !13, i32 0, i32 1, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 2] [def]
-!21 = metadata !{metadata !22, metadata !23, metadata !24, metadata !26, metadata !27, metadata !29, metadata !37, metadata !38, metadata !39, metadata !40}
-!22 = metadata !{i32 786490, metadata !7, metadata !6, i32 8} ; [ DW_TAG_imported_module ]
-!23 = metadata !{i32 786490, metadata !0, metadata !7, i32 11} ; [ DW_TAG_imported_module ]
-!24 = metadata !{i32 786490, metadata !25, metadata !6, i32 15} ; [ DW_TAG_imported_module ]
-!25 = metadata !{i32 786443, metadata !5, metadata !14, i32 14, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/home/blaikie/dev/llvm/build/clang/debug/foo.cpp]
-!26 = metadata !{i32 786490, metadata !14, metadata !7, i32 18} ; [ DW_TAG_imported_module ]
-!27 = metadata !{i32 786440, metadata !14, metadata !28, i32 19} ; [ DW_TAG_imported_declaration ]
-!28 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"foo", i32 5, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [foo] [line 5, size 0, align 0, offset 0] [decl] [from ]
-!29 = metadata !{i32 786440, metadata !14, metadata !30, i32 20} ; [ DW_TAG_imported_declaration ]
-!30 = metadata !{i32 786451, metadata !5, metadata !6, metadata !"bar", i32 6, i64 8, i64 8, i32 0, i32 0, null, metadata !31, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [bar] [line 6, size 8, align 8, offset 0] [def] [from ]
-!31 = metadata !{metadata !32}
-!32 = metadata !{i32 786478, metadata !5, metadata !30, metadata !"bar", metadata !"bar", metadata !"", i32 6, metadata !33, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !36, i32 6} ; [ DW_TAG_subprogram ] [line 6] [bar]
-!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!34 = metadata !{null, metadata !35}
-!35 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
-!36 = metadata !{i32 786468}
-!37 = metadata !{i32 786440, metadata !14, metadata !10, i32 21} ; [ DW_TAG_imported_declaration ]
-!38 = metadata !{i32 786440, metadata !14, metadata !20, i32 22} ; [ DW_TAG_imported_declaration ]
-!39 = metadata !{i32 786440, metadata !14, metadata !7, i32 24, metadata !"X"} ; [ DW_TAG_imported_declaration ]
-!40 = metadata !{i32 786440, metadata !14, metadata !39, i32 25, metadata !"Y"} ; [ DW_TAG_imported_declaration ]
-!41 = metadata !{i32 3, i32 0, metadata !4, null}
-!42 = metadata !{i32 786689, metadata !10, metadata !"", metadata !15, i32 16777220, metadata !13, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [line 4]
-!43 = metadata !{i32 4, i32 0, metadata !10, null}
-!44 = metadata !{i32 786689, metadata !14, metadata !"b", metadata !15, i32 16777229, metadata !18, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 13]
-!45 = metadata !{i32 13, i32 0, metadata !14, null}
-!46 = metadata !{i32 14, i32 0, metadata !14, null}
-!47 = metadata !{i32 16, i32 0, metadata !25, null}
-!48 = metadata !{i32 786688, metadata !14, metadata !"x", metadata !15, i32 23, metadata !30, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 23]
-!49 = metadata !{i32 23, i32 0, metadata !14, null}
-!50 = metadata !{i32 26, i32 0, metadata !14, null}
-!51 = metadata !{i32 27, i32 0, metadata !14, null}
-!52 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !3, !9, !30, !33} ; [ DW_TAG_compile_unit ] [/tmp/debug-info-namespace.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"debug-info-namespace.cpp", !"/tmp"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x13\00foo\005\000\000\000\004\000", !5, !6, null, null, null, null, !"_ZTSN1A1B3fooE"} ; [ DW_TAG_structure_type ] [foo] [line 5, size 0, align 0, offset 0] [decl] [from ]
+!5 = !{!"foo.cpp", !"/tmp"}
+!6 = !{!"0x39\00B\001", !5, !7} ; [ DW_TAG_namespace ] [B] [line 1]
+!7 = !{!"0x39\00A\005", !1, null} ; [ DW_TAG_namespace ] [A] [line 5]
+!8 = !{!"0x13\00bar\006\008\008\000\000\000", !5, !6, null, !2, null, null, !"_ZTSN1A1B3barE"} ; [ DW_TAG_structure_type ] [bar] [line 6, size 8, align 8, offset 0] [def] [from ]
+!9 = !{!10, !14, !17, !21, !25, !26, !27}
+!10 = !{!"0x2e\00f1\00f1\00_ZN1A1B2f1Ev\003\000\001\000\000\00256\000\003", !5, !6, !11, null, i32 ()* @_ZN1A1B2f1Ev, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f1]
+!11 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!"0x2e\00f1\00f1\00_ZN1A1B2f1Ei\004\000\001\000\000\00256\000\004", !5, !6, !15, null, void (i32)* @_ZN1A1B2f1Ei, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [f1]
+!15 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !13}
+!17 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\0020\001\001\000\000\00256\000\0020", !5, !18, !19, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 20] [local] [def] [__cxx_global_var_init]
+!18 = !{!"0x29", !5}   ; [ DW_TAG_file_type ] [/tmp/foo.cpp]
+!19 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !20, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!20 = !{null}
+!21 = !{!"0x2e\00func\00func\00_Z4funcb\0021\000\001\000\000\00256\000\0021", !5, !18, !22, null, i32 (i1)* @_Z4funcb, null, null, !2} ; [ DW_TAG_subprogram ] [line 21] [def] [func]
+!22 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{!13, !24}
+!24 = !{!"0x24\00bool\000\008\008\000\000\002", null, null} ; [ DW_TAG_base_type ] [bool] [line 0, size 8, align 8, offset 0, enc DW_ATE_boolean]
+!25 = !{!"0x2e\00__cxx_global_var_init1\00__cxx_global_var_init1\00\0044\001\001\000\000\00256\000\0044", !5, !18, !19, null, void ()* @__cxx_global_var_init1, null, null, !2} ; [ DW_TAG_subprogram ] [line 44] [local] [def] [__cxx_global_var_init1]
+!26 = !{!"0x2e\00func_fwd\00func_fwd\00_ZN1A1B8func_fwdEv\0047\000\001\000\000\00256\000\0047", !5, !6, !19, null, void ()* @_ZN1A1B8func_fwdEv, null, null, !2} ; [ DW_TAG_subprogram ] [line 47] [def] [func_fwd]
+!27 = !{!"0x2e\00\00\00_GLOBAL__sub_I_debug_info_namespace.cpp\000\001\001\000\000\0064\000\000", !1, !28, !29, null, void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, null, null, !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!28 = !{!"0x29", !1}   ; [ DW_TAG_file_type ] [/tmp/debug-info-namespace.cpp]
+!29 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{!31, !32}
+!31 = !{!"0x34\00i\00i\00_ZN1A1B1iE\0020\000\001", !6, !18, !13, i32* @_ZN1A1B1iE, null} ; [ DW_TAG_variable ] [i] [line 20] [def]
+!32 = !{!"0x34\00var_fwd\00var_fwd\00_ZN1A1B7var_fwdE\0044\000\001", !6, !18, !13, i32* @_ZN1A1B7var_fwdE, null} ; [ DW_TAG_variable ] [var_fwd] [line 44] [def]
+!33 = !{!34, !35, !36, !37, !40, !41, !42, !43, !44, !45, !47, !48, !49, !51, !54, !55, !56}
+!34 = !{!"0x3a\0015\00", !7, !6} ; [ DW_TAG_imported_module ]
+!35 = !{!"0x3a\0018\00", !0, !7} ; [ DW_TAG_imported_module ]
+!36 = !{!"0x8\0019\00E", !0, !7} ; [ DW_TAG_imported_declaration ]
+!37 = !{!"0x3a\0023\00", !38, !6} ; [ DW_TAG_imported_module ]
+!38 = !{!"0xb\0022\0010\001", !5, !39} ; [ DW_TAG_lexical_block ] [/tmp/foo.cpp]
+!39 = !{!"0xb\0022\007\000", !5, !21} ; [ DW_TAG_lexical_block ] [/tmp/foo.cpp]
+!40 = !{!"0x3a\0026\00", !21, !7} ; [ DW_TAG_imported_module ]
+!41 = !{!"0x8\0027\00", !21, !"_ZTSN1A1B3fooE"} ; [ DW_TAG_imported_declaration ]
+!42 = !{!"0x8\0028\00", !21, !"_ZTSN1A1B3barE"} ; [ DW_TAG_imported_declaration ]
+!43 = !{!"0x8\0029\00", !21, !14} ; [ DW_TAG_imported_declaration ]
+!44 = !{!"0x8\0030\00", !21, !31} ; [ DW_TAG_imported_declaration ]
+!45 = !{!"0x8\0031\00", !21, !46} ; [ DW_TAG_imported_declaration ]
+!46 = !{!"0x16\00baz\007\000\000\000\000", !5, !6, !"_ZTSN1A1B3barE"} ; [ DW_TAG_typedef ] [baz] [line 7, size 0, align 0, offset 0] [from _ZTSN1A1B3barE]
+!47 = !{!"0x8\0032\00X", !21, !7} ; [ DW_TAG_imported_declaration ]
+!48 = !{!"0x8\0033\00Y", !21, !47} ; [ DW_TAG_imported_declaration ]
+!49 = !{!"0x8\0034\00", !21, !50} ; [ DW_TAG_imported_declaration ]
+!50 = !{!"0x34\00var_decl\00var_decl\00_ZN1A1B8var_declE\008\000\000", !6, !18, !13, null, null} ; [ DW_TAG_variable ] [var_decl] [line 8]
+!51 = !{!"0x8\0035\00", !21, !52} ; [ DW_TAG_imported_declaration ]
+!52 = !{!"0x2e\00func_decl\00func_decl\00_ZN1A1B9func_declEv\009\000\000\000\000\00256\000\000", !5, !6, !19, null, null, null, null, !53} ; [ DW_TAG_subprogram ] [line 9] [scope 0] [func_decl]
+!53 = !{!"0x24"}
+!54 = !{!"0x8\0036\00", !21, !32} ; [ DW_TAG_imported_declaration ]
+!55 = !{!"0x8\0037\00", !21, !26} ; [ DW_TAG_imported_declaration ]
+!56 = !{!"0x8\0042\00", !7, !31} ; [ DW_TAG_imported_declaration ]
+!57 = !{i32 2, !"Dwarf Version", i32 2}
+!58 = !{i32 2, !"Debug Info Version", i32 2}
+!59 = !{!"clang version 3.6.0 "}
+!60 = !MDLocation(line: 3, column: 12, scope: !10)
+!61 = !{!"0x101\00\0016777220\000", !14, !18, !13} ; [ DW_TAG_arg_variable ] [line 4]
+!62 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!63 = !MDLocation(line: 4, column: 12, scope: !14)
+!64 = !MDLocation(line: 4, column: 16, scope: !14)
+!65 = !MDLocation(line: 20, column: 12, scope: !17)
+!66 = !{!"0x101\00b\0016777237\000", !21, !18, !24} ; [ DW_TAG_arg_variable ] [b] [line 21]
+!67 = !MDLocation(line: 21, column: 15, scope: !21)
+!68 = !MDLocation(line: 22, column: 7, scope: !21)
+!69 = !MDLocation(line: 24, column: 5, scope: !38)
+!70 = !MDLocation(line: 38, column: 3, scope: !21)
+!71 = !MDLocation(line: 39, column: 1, scope: !21)
+!72 = !MDLocation(line: 44, column: 15, scope: !25)
+!73 = !MDLocation(line: 47, column: 21, scope: !26)
+!74 = !MDLocation(line: 0, scope: !75)
+!75 = !{!"0xb\000", !5, !27} ; [ DW_TAG_lexical_block ] [/tmp/foo.cpp]
diff --git a/test/DebugInfo/namespace_function_definition.ll b/test/DebugInfo/namespace_function_definition.ll
index 590f2b301ffe..02c55bf094a2 100644
--- a/test/DebugInfo/namespace_function_definition.ll
+++ b/test/DebugInfo/namespace_function_definition.ll
@@ -30,15 +30,15 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_function_definition.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"namespace_function_definition.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"func", metadata !"func", metadata !"_ZN2ns4funcEv", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZN2ns4funcEv, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
-!5 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 1} ; [ DW_TAG_namespace ] [ns] [line 1]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 "}
-!11 = metadata !{i32 3, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_function_definition.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"namespace_function_definition.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00func\00func\00_ZN2ns4funcEv\002\000\001\000\006\00256\000\002", !1, !5, !6, null, void ()* @_ZN2ns4funcEv, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [func]
+!5 = !{!"0x39\00ns\001", !1, null} ; [ DW_TAG_namespace ] [ns] [line 1]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !MDLocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/namespace_inline_function_definition.ll
index 65fa4a442dc6..b6f1b5ff2c0b 100644
--- a/test/DebugInfo/namespace_inline_function_definition.ll
+++ b/test/DebugInfo/namespace_inline_function_definition.ll
@@ -15,19 +15,19 @@
 ; CHECK: DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name {{.*}} "ns"
 ; CHECK-NOT: DW_TAG
-; CHECK: [[ABS_DEF:0x.*]]: DW_TAG_subprogram
+; CHECK:   DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN2ns4funcEi"
 ; CHECK-NOT: DW_TAG
-; CHECK: [[ABS_PRM:0x.*]]:   DW_TAG_formal_parameter
+; CHECK:   DW_TAG_formal_parameter
 ; CHECK:   NULL
 ; CHECK-NOT: NULL
 ; CHECK:   DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} {[[ABS_DEF]]}
+; CHECK:     DW_AT_abstract_origin {{.*}} "_ZN2ns4funcEi"
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_TAG_formal_parameter
-; CHECK:       DW_AT_abstract_origin {{.*}} {[[ABS_PRM]]}
+; CHECK:       DW_AT_abstract_origin {{.*}} "i"
 ; CHECK:     NULL
 ; CHECK:   NULL
 ; CHECK: NULL
@@ -42,7 +42,7 @@ entry:
   store i32 0, i32* %retval
   %0 = load i32* @x, align 4, !dbg !16
   store i32 %0, i32* %i.addr.i, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr.i}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !17, metadata !{!"0x102"}), !dbg !18
   %1 = load i32* %i.addr.i, align 4, !dbg !18
   %mul.i = mul nsw i32 %1, 2, !dbg !18
   ret i32 %mul.i, !dbg !16
@@ -53,14 +53,14 @@ define i32 @_ZN2ns4funcEi(i32 %i) #1 {
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !17), !dbg !19
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !17, metadata !{!"0x102"}), !dbg !19
   %0 = load i32* %i.addr, align 4, !dbg !19
   %mul = mul nsw i32 %0, 2, !dbg !19
   ret i32 %mul, !dbg !19
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #2
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -70,23 +70,23 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_inline_function_definition.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"namespace_inline_function_definition.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !9}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/namespace_inline_function_definition.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !1, metadata !10, metadata !"func", metadata !"func", metadata !"_ZN2ns4funcEi", i32 6, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_ZN2ns4funcEi, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
-!10 = metadata !{i32 786489, metadata !1, null, metadata !"ns", i32 1} ; [ DW_TAG_namespace ] [ns] [line 1]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !8, metadata !8}
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!15 = metadata !{metadata !"clang version 3.5.0 "}
-!16 = metadata !{i32 5, i32 0, metadata !4, null}
-!17 = metadata !{i32 786689, metadata !9, metadata !"i", metadata !5, i32 16777222, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 6]
-!18 = metadata !{i32 6, i32 0, metadata !9, metadata !16}
-!19 = metadata !{i32 6, i32 0, metadata !9, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/namespace_inline_function_definition.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"namespace_inline_function_definition.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00main\00main\00\005\000\001\000\006\00256\000\005", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/namespace_inline_function_definition.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00func\00func\00_ZN2ns4funcEi\006\000\001\000\006\00256\000\006", !1, !10, !11, null, i32 (i32)* @_ZN2ns4funcEi, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [func]
+!10 = !{!"0x39\00ns\001", !1, null} ; [ DW_TAG_namespace ] [ns] [line 1]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!8, !8}
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 2}
+!15 = !{!"clang version 3.5.0 "}
+!16 = !MDLocation(line: 5, scope: !4)
+!17 = !{!"0x101\00i\0016777222\000", !9, !5, !8} ; [ DW_TAG_arg_variable ] [i] [line 6]
+!18 = !MDLocation(line: 6, scope: !9, inlinedAt: !16)
+!19 = !MDLocation(line: 6, scope: !9)
diff --git a/test/DebugInfo/nodebug.ll b/test/DebugInfo/nodebug.ll
new file mode 100644
index 000000000000..83ce2625603c
--- /dev/null
+++ b/test/DebugInfo/nodebug.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Test that a nodebug function (a function not appearing in the debug info IR
+; metadata subprogram list) with DebugLocs on its IR doesn't cause crashes/does
+; the right thing.
+
+; Build with clang from the following:
+; extern int i;
+; inline __attribute__((always_inline)) void f1() {
+;   i = 3;
+; }
+;
+; __attribute__((nodebug)) void f2() {
+;   f1();
+; }
+
+; Check that there's only one DW_TAG_subprogram, nothing for the 'f2' function.
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "f1"
+; CHECK-NOT: DW_TAG_subprogram
+
+@i = external global i32
+
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
+entry:
+  store i32 3, i32* @i, align 4, !dbg !11
+  ret void
+}
+
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/nodebug.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"nodebug.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00f1\00f1\00_Z2f1v\002\000\001\000\006\00256\000\002", !1, !5, !6, null, null, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [f1]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/nodebug.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
+!11 = !MDLocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/restrict.ll b/test/DebugInfo/restrict.ll
index ceb844f16efe..54bdec7affaf 100644
--- a/test/DebugInfo/restrict.ll
+++ b/test/DebugInfo/restrict.ll
@@ -21,12 +21,12 @@ define void @_Z3fooPv(i8* noalias %dst) #0 {
 entry:
   %dst.addr = alloca i8*, align 8
   store i8* %dst, i8** %dst.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8** %dst.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i8** %dst.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   ret void, !dbg !15
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -35,19 +35,19 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/restrict.c] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"restrict.c", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"_Z3fooPv", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_Z3fooPv, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/restrict.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786487, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_restrict_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5.0 "}
-!13 = metadata !{i32 786689, metadata !4, metadata !"dst", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [dst] [line 1]
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{i32 2, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/restrict.c] [DW_LANG_C_plus_plus]
+!1 = !{!"restrict.c", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00_Z3fooPv\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i8*)* @_Z3fooPv, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/restrict.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x37\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_restrict_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, null} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5.0 "}
+!13 = !{!"0x101\00dst\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [dst] [line 1]
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !MDLocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/sugared-constants.ll b/test/DebugInfo/sugared-constants.ll
index 0d2ebe663c00..9e4f374e9fa3 100644
--- a/test/DebugInfo/sugared-constants.ll
+++ b/test/DebugInfo/sugared-constants.ll
@@ -24,11 +24,11 @@
 ; Function Attrs: uwtable
 define i32 @main() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !10), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !21
   tail call void @_Z4funci(i32 42), !dbg !22
-  tail call void @llvm.dbg.value(metadata !23, i64 0, metadata !12), !dbg !24
+  tail call void @llvm.dbg.value(metadata i32 117, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !24
   tail call void @_Z4funcj(i32 117), !dbg !25
-  tail call void @llvm.dbg.value(metadata !26, i64 0, metadata !15), !dbg !27
+  tail call void @llvm.dbg.value(metadata i16 7, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !27
   tail call void @_Z4funcDs(i16 zeroext 7), !dbg !28
   ret i32 0, !dbg !29
 }
@@ -40,7 +40,7 @@ declare void @_Z4funcj(i32) #1
 declare void @_Z4funcDs(i16 zeroext) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #2
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
 attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -50,33 +50,33 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/const.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"const.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @main, null, null, metadata !9, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/const.cpp]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10, metadata !12, metadata !15}
-!10 = metadata !{i32 786688, metadata !4, metadata !"i", metadata !5, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
-!11 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
-!12 = metadata !{i32 786688, metadata !4, metadata !"j", metadata !5, i32 7, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [j] [line 7]
-!13 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from unsigned int]
-!14 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!15 = metadata !{i32 786688, metadata !4, metadata !"c", metadata !5, i32 9, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [c] [line 9]
-!16 = metadata !{i32 786468, null, null, metadata !"char16_t", i32 0, i64 16, i64 16, i64 0, i32 0, i32 16} ; [ DW_TAG_base_type ] [char16_t] [line 0, size 16, align 16, offset 0, enc DW_ATE_UTF]
-!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!19 = metadata !{metadata !"clang version 3.5.0 "}
-!20 = metadata !{i32 42}
-!21 = metadata !{i32 5, i32 0, metadata !4, null}
-!22 = metadata !{i32 6, i32 0, metadata !4, null}
-!23 = metadata !{i32 117}
-!24 = metadata !{i32 7, i32 0, metadata !4, null}
-!25 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i16 7}
-!27 = metadata !{i32 9, i32 0, metadata !4, null}
-!28 = metadata !{i32 10, i32 0, metadata !4, null}
-!29 = metadata !{i32 11, i32 0, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/const.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"const.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\004\000\001\000\006\00256\001\004", !1, !5, !6, null, i32 ()* @main, null, null, !9} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/const.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10, !12, !15}
+!10 = !{!"0x100\00i\005\000", !4, !5, !11} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!11 = !{!"0x26\00\000\000\000\000\000", null, null, !8} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from int]
+!12 = !{!"0x100\00j\007\000", !4, !5, !13} ; [ DW_TAG_auto_variable ] [j] [line 7]
+!13 = !{!"0x26\00\000\000\000\000\000", null, null, !14} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from unsigned int]
+!14 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!15 = !{!"0x100\00c\009\000", !4, !5, !16} ; [ DW_TAG_auto_variable ] [c] [line 9]
+!16 = !{!"0x24\00char16_t\000\0016\0016\000\000\0016", null, null} ; [ DW_TAG_base_type ] [char16_t] [line 0, size 16, align 16, offset 0, enc DW_ATE_UTF]
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
+!19 = !{!"clang version 3.5.0 "}
+!20 = !{i32 42}
+!21 = !MDLocation(line: 5, scope: !4)
+!22 = !MDLocation(line: 6, scope: !4)
+!23 = !{i32 117}
+!24 = !MDLocation(line: 7, scope: !4)
+!25 = !MDLocation(line: 8, scope: !4)
+!26 = !{i16 7}
+!27 = !MDLocation(line: 9, scope: !4)
+!28 = !MDLocation(line: 10, scope: !4)
+!29 = !MDLocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/template-recursive-void.ll
index ffbc30e7504c..b7e72445cd7f 100644
--- a/test/DebugInfo/template-recursive-void.ll
+++ b/test/DebugInfo/template-recursive-void.ll
@@ -25,41 +25,41 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!36, !37}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 187958) (llvm/trunk 187964)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"debug-info-template-recursive.cpp", metadata !"/usr/local/google/home/echristo/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"filters", metadata !"filters", metadata !"", metadata !5, i32 10, metadata !6, i32 0, i32 1, %class.bar* @filters, null} ; [ DW_TAG_variable ] [filters] [line 10] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp]
-!6 = metadata !{i32 786434, metadata !1, null, metadata !"bar", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 8, align 8, offset 0] [def] [from ]
-!7 = metadata !{metadata !8, metadata !31}
-!8 = metadata !{i32 786460, null, metadata !6, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from foo<void>]
-!9 = metadata !{i32 786434, metadata !1, null, metadata !"foo<void>", i32 5, i64 8, i64 8, i32 0, i32 0, null, metadata !10, i32 0, null, metadata !29, null} ; [ DW_TAG_class_type ] [foo<void>] [line 5, size 8, align 8, offset 0] [def] [from ]
-!10 = metadata !{metadata !11, metadata !19, metadata !25}
-!11 = metadata !{i32 786460, null, metadata !9, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from base]
-!12 = metadata !{i32 786434, metadata !1, null, metadata !"base", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_class_type ] [base] [line 3, size 8, align 8, offset 0] [def] [from ]
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786478, metadata !1, metadata !12, metadata !"base", metadata !"base", metadata !"", i32 3, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 3} ; [ DW_TAG_subprogram ] [line 3] [base]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from base]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"operator=", metadata !"operator=", metadata !"_ZN3fooIvEaSES0_", i32 6, metadata !20, i1 false, i1 false, i32 0, i32 0, null, i32 257, i1 false, null, null, i32 0, metadata !24, i32 6} ; [ DW_TAG_subprogram ] [line 6] [private] [operator=]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{null, metadata !22, metadata !23}
-!22 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo<void>]
-!23 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo<void>]
-!24 = metadata !{i32 786468}
-!25 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !26, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !28, i32 5} ; [ DW_TAG_subprogram ] [line 5] [foo]
-!26 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !27, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!27 = metadata !{null, metadata !22}
-!28 = metadata !{i32 786468}
-!29 = metadata !{metadata !30}
-!30 = metadata !{i32 786479, null, metadata !"T", null, null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!31 = metadata !{i32 786478, metadata !1, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 9, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !35, i32 9} ; [ DW_TAG_subprogram ] [line 9] [bar]
-!32 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!33 = metadata !{null, metadata !34}
-!34 = metadata !{i32 786447, i32 0, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
-!35 = metadata !{i32 786468}
-!36 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!37 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (trunk 187958) (llvm/trunk 187964)\000\00\000\00\000", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"debug-info-template-recursive.cpp", !"/usr/local/google/home/echristo/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00filters\00filters\00\0010\000\001", null, !5, !6, %class.bar* @filters, null} ; [ DW_TAG_variable ] [filters] [line 10] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/usr/local/google/home/echristo/tmp/debug-info-template-recursive.cpp]
+!6 = !{!"0x2\00bar\009\008\008\000\000\000", !1, null, null, !7, null, null, null} ; [ DW_TAG_class_type ] [bar] [line 9, size 8, align 8, offset 0] [def] [from ]
+!7 = !{!8, !31}
+!8 = !{!"0x1c\00\000\000\000\000\000", null, !6, !9} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!9 = !{!"0x2\00foo<void>\005\008\008\000\000\000", !1, null, null, !10, null, !29, null} ; [ DW_TAG_class_type ] [foo<void>] [line 5, size 8, align 8, offset 0] [def] [from ]
+!10 = !{!11, !19, !25}
+!11 = !{!"0x1c\00\000\000\000\000\000", null, !9, !12} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [from base]
+!12 = !{!"0x2\00base\003\008\008\000\000\000", !1, null, null, !13, null, null, null} ; [ DW_TAG_class_type ] [base] [line 3, size 8, align 8, offset 0] [def] [from ]
+!13 = !{!14}
+!14 = !{!"0x2e\00base\00base\00\003\000\000\000\006\00320\000\003", !1, !12, !15, null, null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 3] [base]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !12} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from base]
+!18 = !{i32 786468}
+!19 = !{!"0x2e\00operator=\00operator=\00_ZN3fooIvEaSES0_\006\000\000\000\006\00257\000\006", !1, !9, !20, null, null, null, i32 0, !24} ; [ DW_TAG_subprogram ] [line 6] [private] [operator=]
+!20 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{null, !22, !23}
+!22 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from foo<void>]
+!23 = !{!"0x26\00\000\000\000\000\000", null, null, !9} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo<void>]
+!24 = !{i32 786468}
+!25 = !{!"0x2e\00foo\00foo\00\005\000\000\000\006\00320\000\005", !1, !9, !26, null, null, null, i32 0, !28} ; [ DW_TAG_subprogram ] [line 5] [foo]
+!26 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !27, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!27 = !{null, !22}
+!28 = !{i32 786468}
+!29 = !{!30}
+!30 = !{!"0x2f\00T\000\000", null, null, null} ; [ DW_TAG_template_type_parameter ]
+!31 = !{!"0x2e\00bar\00bar\00\009\000\000\000\006\00320\000\009", !1, !6, !32, null, null, null, i32 0, !35} ; [ DW_TAG_subprogram ] [line 9] [bar]
+!32 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !33, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!33 = !{null, !34}
+!34 = !{!"0xf\00\000\0064\0064\000\001088", i32 0, null, !6} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from bar]
+!35 = !{i32 786468}
+!36 = !{i32 2, !"Dwarf Version", i32 3}
+!37 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/tu-composite.ll
index 7a8ff5712659..6f052ee2a82d 100644
--- a/test/DebugInfo/tu-composite.ll
+++ b/test/DebugInfo/tu-composite.ll
@@ -91,13 +91,13 @@ define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.C** %this.addr}, metadata !36), !dbg !38
+  call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !38
   %this1 = load %struct.C** %this.addr
   ret void, !dbg !39
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
 define void @_Z4testv() #0 {
@@ -108,12 +108,12 @@ entry:
   %e = alloca %"struct.D::Nested", align 1
   %p = alloca %"struct.D::Nested2"*, align 8
   %t = alloca %"struct.D::virt", align 8
-  call void @llvm.dbg.declare(metadata !{%struct.bar* %B}, metadata !40), !dbg !42
-  call void @llvm.dbg.declare(metadata !{[3 x %struct.bar]* %A}, metadata !43), !dbg !47
-  call void @llvm.dbg.declare(metadata !{%struct.bar* %B2}, metadata !48), !dbg !50
-  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested"* %e}, metadata !51), !dbg !52
-  call void @llvm.dbg.declare(metadata !{%"struct.D::Nested2"** %p}, metadata !53), !dbg !55
-  call void @llvm.dbg.declare(metadata !{%"struct.D::virt"* %t}, metadata !56), !dbg !57
+  call void @llvm.dbg.declare(metadata %struct.bar* %B, metadata !40, metadata !{!"0x102"}), !dbg !42
+  call void @llvm.dbg.declare(metadata [3 x %struct.bar]* %A, metadata !43, metadata !{!"0x102"}), !dbg !47
+  call void @llvm.dbg.declare(metadata %struct.bar* %B2, metadata !48, metadata !{!"0x102"}), !dbg !50
+  call void @llvm.dbg.declare(metadata %"struct.D::Nested"* %e, metadata !51, metadata !{!"0x102"}), !dbg !52
+  call void @llvm.dbg.declare(metadata %"struct.D::Nested2"** %p, metadata !53, metadata !{!"0x102"}), !dbg !55
+  call void @llvm.dbg.declare(metadata %"struct.D::virt"* %t, metadata !56, metadata !{!"0x102"}), !dbg !57
   ret void, !dbg !58
 }
 
@@ -123,63 +123,63 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35, !59}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !30, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [tmp.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"tmp.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !18, metadata !19, metadata !22, metadata !23, metadata !24}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"C", i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !5, i32 0, metadata !"_ZTS1C", null, metadata !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 64, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !13}
-!6 = metadata !{i32 786445, metadata !1, metadata !7, metadata !"_vptr$C", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !8} ; [ DW_TAG_member ] [_vptr$C] [line 0, size 64, align 0, offset 0] [artificial] [from ]
-!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [tmp.cpp]
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
-!9 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1C", metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 2, metadata !14, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1C", i32 256, i1 false, null, null, i32 0, metadata !17, i32 2} ; [ DW_TAG_subprogram ] [line 2] [foo]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !16}
-!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
-!17 = metadata !{i32 786468}
-!18 = metadata !{i32 786451, metadata !1, null, metadata !"bar", i32 7, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 7, size 8, align 8, offset 0] [def] [from ]
-!19 = metadata !{i32 786451, metadata !1, null, metadata !"D", i32 9, i64 8, i64 8, i32 0, i32 0, null, metadata !20, i32 0, null, null, metadata !"_ZTS1D"} ; [ DW_TAG_structure_type ] [D] [line 9, size 8, align 8, offset 0] [def] [from ]
-!20 = metadata !{metadata !21}
-!21 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1D", metadata !"a", i32 11, i64 0, i64 0, i64 0, i32 4096, metadata !12, null} ; [ DW_TAG_member ] [a] [line 11, size 0, align 0, offset 0] [static] [from int]
-!22 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested", i32 12, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, metadata !"_ZTSN1D6NestedE"} ; [ DW_TAG_structure_type ] [Nested] [line 12, size 8, align 8, offset 0] [def] [from ]
-!23 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"Nested2", i32 13, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_structure_type ] [Nested2] [line 13, size 0, align 0, offset 0] [decl] [from ]
-!24 = metadata !{i32 786451, metadata !1, metadata !"_ZTS1D", metadata !"virt<bar>", i32 15, i64 64, i64 64, i32 0, i32 0, null, metadata !25, i32 0, null, metadata !28, metadata !"_ZTSN1D4virtI3barEE"} ; [ DW_TAG_structure_type ] [virt<bar>] [line 15, size 64, align 64, offset 0] [def] [from ]
-!25 = metadata !{metadata !26}
-!26 = metadata !{i32 786445, metadata !1, metadata !"_ZTSN1D4virtI3barEE", metadata !"values", i32 16, i64 64, i64 64, i64 0, i32 0, metadata !27} ; [ DW_TAG_member ] [values] [line 16, size 64, align 64, offset 0] [from ]
-!27 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3bar]
-!28 = metadata !{metadata !29}
-!29 = metadata !{i32 786479, null, metadata !"T", metadata !"_ZTS3bar", null, i32 0, i32 0} ; [ DW_TAG_template_type_parameter ]
-!30 = metadata !{metadata !31, metadata !32}
-!31 = metadata !{i32 786478, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"_ZN1C3fooEv", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%struct.C*)* @_ZN1C3fooEv, null, metadata !13, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [foo]
-!32 = metadata !{i32 786478, metadata !1, metadata !7, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 20, metadata !33, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z4testv, null, null, metadata !2, i32 20} ; [ DW_TAG_subprogram ] [line 20] [def] [test]
-!33 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !34, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!34 = metadata !{null}
-!35 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!36 = metadata !{i32 786689, metadata !31, metadata !"this", null, i32 16777216, metadata !37, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!37 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
-!38 = metadata !{i32 0, i32 0, metadata !31, null}
-!39 = metadata !{i32 5, i32 0, metadata !31, null}
-!40 = metadata !{i32 786688, metadata !32, metadata !"B", metadata !7, i32 21, metadata !41, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B] [line 21]
-!41 = metadata !{i32 786454, metadata !1, null, metadata !"baz", i32 8, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz] [line 8, size 0, align 0, offset 0] [from _ZTS3bar]
-!42 = metadata !{i32 21, i32 0, metadata !32, null}
-!43 = metadata !{i32 786688, metadata !32, metadata !"A", metadata !7, i32 22, metadata !44, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [A] [line 22]
-!44 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 24, i64 8, i32 0, i32 0, metadata !"_ZTS3bar", metadata !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 24, align 8, offset 0] [from _ZTS3bar]
-!45 = metadata !{metadata !46}
-!46 = metadata !{i32 786465, i64 0, i64 3}        ; [ DW_TAG_subrange_type ] [0, 2]
-!47 = metadata !{i32 22, i32 0, metadata !32, null}
-!48 = metadata !{i32 786688, metadata !32, metadata !"B2", metadata !7, i32 23, metadata !49, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [B2] [line 23]
-!49 = metadata !{i32 786454, metadata !1, metadata !"_ZTS1D", metadata !"baz2", i32 10, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz2] [line 10, size 0, align 0, offset 0] [from _ZTS3bar]
-!50 = metadata !{i32 23, i32 0, metadata !32, null}
-!51 = metadata !{i32 786688, metadata !32, metadata !"e", metadata !7, i32 24, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [e] [line 24]
-!52 = metadata !{i32 24, i32 0, metadata !32, null}
-!53 = metadata !{i32 786688, metadata !32, metadata !"p", metadata !7, i32 25, metadata !54, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [p] [line 25]
-!54 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTSN1D7Nested2E"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTSN1D7Nested2E]
-!55 = metadata !{i32 25, i32 0, metadata !32, null}
-!56 = metadata !{i32 786688, metadata !32, metadata !"t", metadata !7, i32 26, metadata !24, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 26]
-!57 = metadata !{i32 26, i32 0, metadata !32, null}
-!58 = metadata !{i32 27, i32 0, metadata !32, null}
-!59 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4\000\00\000\00\000", !1, !2, !3, !30, !2, !2} ; [ DW_TAG_compile_unit ] [tmp.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"tmp.cpp", !"."}
+!2 = !{}
+!3 = !{!4, !18, !19, !22, !23, !24}
+!4 = !{!"0x13\00C\001\0064\0064\000\000\000", !1, null, null, !5, !"_ZTS1C", null, !"_ZTS1C"} ; [ DW_TAG_structure_type ] [C] [line 1, size 64, align 64, offset 0] [def] [from ]
+!5 = !{!6, !13}
+!6 = !{!"0xd\00_vptr$C\000\0064\000\000\0064", !1, !7, !8} ; [ DW_TAG_member ] [_vptr$C] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!7 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [tmp.cpp]
+!8 = !{!"0xf\00\000\0064\000\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!9 = !{!"0xf\00__vtbl_ptr_type\000\0064\000\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{!12}
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0x2e\00foo\00foo\00_ZN1C3fooEv\002\000\000\001\006\00256\000\002", !1, !"_ZTS1C", !14, !"_ZTS1C", null, null, i32 0, !17} ; [ DW_TAG_subprogram ] [line 2] [foo]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !16}
+!16 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1C]
+!17 = !{i32 786468}
+!18 = !{!"0x13\00bar\007\008\008\000\000\000", !1, null, null, !2, null, null, !"_ZTS3bar"} ; [ DW_TAG_structure_type ] [bar] [line 7, size 8, align 8, offset 0] [def] [from ]
+!19 = !{!"0x13\00D\009\008\008\000\000\000", !1, null, null, !20, null, null, !"_ZTS1D"} ; [ DW_TAG_structure_type ] [D] [line 9, size 8, align 8, offset 0] [def] [from ]
+!20 = !{!21}
+!21 = !{!"0xd\00a\0011\000\000\000\004096", !1, !"_ZTS1D", !12, null} ; [ DW_TAG_member ] [a] [line 11, size 0, align 0, offset 0] [static] [from int]
+!22 = !{!"0x13\00Nested\0012\008\008\000\000\000", !1, !"_ZTS1D", null, !2, null, null, !"_ZTSN1D6NestedE"} ; [ DW_TAG_structure_type ] [Nested] [line 12, size 8, align 8, offset 0] [def] [from ]
+!23 = !{!"0x13\00Nested2\0013\000\000\000\004\000", !1, !"_ZTS1D", null, null, null, null, !"_ZTSN1D7Nested2E"} ; [ DW_TAG_structure_type ] [Nested2] [line 13, size 0, align 0, offset 0] [decl] [from ]
+!24 = !{!"0x13\00virt<bar>\0015\0064\0064\000\000\000", !1, !"_ZTS1D", null, !25, null, !28, !"_ZTSN1D4virtI3barEE"} ; [ DW_TAG_structure_type ] [virt<bar>] [line 15, size 64, align 64, offset 0] [def] [from ]
+!25 = !{!26}
+!26 = !{!"0xd\00values\0016\0064\0064\000\000", !1, !"_ZTSN1D4virtI3barEE", !27} ; [ DW_TAG_member ] [values] [line 16, size 64, align 64, offset 0] [from ]
+!27 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS3bar"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS3bar]
+!28 = !{!29}
+!29 = !{!"0x2f\00T\000\000", null, !"_ZTS3bar", null} ; [ DW_TAG_template_type_parameter ]
+!30 = !{!31, !32}
+!31 = !{!"0x2e\00foo\00foo\00_ZN1C3fooEv\004\000\001\000\006\00256\000\004", !1, null, !14, null, void (%struct.C*)* @_ZN1C3fooEv, null, !13, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [foo]
+!32 = !{!"0x2e\00test\00test\00_Z4testv\0020\000\001\000\006\00256\000\0020", !1, !7, !33, null, void ()* @_Z4testv, null, null, !2} ; [ DW_TAG_subprogram ] [line 20] [def] [test]
+!33 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !34, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!34 = !{null}
+!35 = !{i32 2, !"Dwarf Version", i32 2}
+!36 = !{!"0x101\00this\0016777216\001088", !31, null, !37} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1C"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1C]
+!38 = !MDLocation(line: 0, scope: !31)
+!39 = !MDLocation(line: 5, scope: !31)
+!40 = !{!"0x100\00B\0021\000", !32, !7, !41} ; [ DW_TAG_auto_variable ] [B] [line 21]
+!41 = !{!"0x16\00baz\008\000\000\000\000", !1, null, !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz] [line 8, size 0, align 0, offset 0] [from _ZTS3bar]
+!42 = !MDLocation(line: 21, scope: !32)
+!43 = !{!"0x100\00A\0022\000", !32, !7, !44} ; [ DW_TAG_auto_variable ] [A] [line 22]
+!44 = !{!"0x1\00\000\0024\008\000\000", null, null, !"_ZTS3bar", !45, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 24, align 8, offset 0] [from _ZTS3bar]
+!45 = !{!46}
+!46 = !{!"0x21\000\003"}        ; [ DW_TAG_subrange_type ] [0, 2]
+!47 = !MDLocation(line: 22, scope: !32)
+!48 = !{!"0x100\00B2\0023\000", !32, !7, !49} ; [ DW_TAG_auto_variable ] [B2] [line 23]
+!49 = !{!"0x16\00baz2\0010\000\000\000\000", !1, !"_ZTS1D", !"_ZTS3bar"} ; [ DW_TAG_typedef ] [baz2] [line 10, size 0, align 0, offset 0] [from _ZTS3bar]
+!50 = !MDLocation(line: 23, scope: !32)
+!51 = !{!"0x100\00e\0024\000", !32, !7, !22} ; [ DW_TAG_auto_variable ] [e] [line 24]
+!52 = !MDLocation(line: 24, scope: !32)
+!53 = !{!"0x100\00p\0025\000", !32, !7, !54} ; [ DW_TAG_auto_variable ] [p] [line 25]
+!54 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTSN1D7Nested2E"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTSN1D7Nested2E]
+!55 = !MDLocation(line: 25, scope: !32)
+!56 = !{!"0x100\00t\0026\000", !32, !7, !24} ; [ DW_TAG_auto_variable ] [t] [line 26]
+!57 = !MDLocation(line: 26, scope: !32)
+!58 = !MDLocation(line: 27, scope: !32)
+!59 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/tu-member-pointer.ll b/test/DebugInfo/tu-member-pointer.ll
index cd37a9892f42..0e4b15a6ce16 100644
--- a/test/DebugInfo/tu-member-pointer.ll
+++ b/test/DebugInfo/tu-member-pointer.ll
@@ -16,15 +16,15 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!10, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !2, metadata !5, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"Foo", i32 1, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, metadata !"_ZTS3Foo"} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !7, i32 4, metadata !8, i32 0, i32 1, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
-!7 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [foo.cpp]
-!8 = metadata !{i32 786463, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9, metadata !"_ZTS3Foo"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4\000\00\000\00\000", !1, !2, !3, !2, !5, !2} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Foo\001\000\000\000\004\000", !1, null, null, null, null, null, !"_ZTS3Foo"} ; [ DW_TAG_structure_type ] [Foo] [line 1, size 0, align 0, offset 0] [decl] [from ]
+!5 = !{!6}
+!6 = !{!"0x34\00x\00x\00\004\000\001", null, !7, !8, i64* @x, null} ; [ DW_TAG_variable ] [x] [line 4] [def]
+!7 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [foo.cpp]
+!8 = !{!"0x1f\00\000\000\000\000\000", null, null, !9, !"_ZTS3Foo"} ; [ DW_TAG_ptr_to_member_type ] [line 0, size 0, align 0, offset 0] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
index 2ab82a9a244a..b810a914960e 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -23,51 +23,51 @@ declare i32 @puts(i8* nocapture) nounwind
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !21), !dbg !26
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !22), !dbg !27
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !26
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !27
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @str1, i32 0, i32 0)), !dbg !28
   tail call void @foo() nounwind, !dbg !30
   ret i32 0, !dbg !31
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0, !9}
 !llvm.module.flags = !{!33}
 
-!0 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !32} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 786449, metadata !32, i32 12, metadata !"clang version 3.2 (trunk 156513)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ]
-!10 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !32, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!14 = metadata !{metadata !15, metadata !15, metadata !16}
-!15 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
-!18 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!19 = metadata !{metadata !21, metadata !22}
-!21 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !6, i32 16777227, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !6, i32 33554443, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 6, i32 3, metadata !24, null}
-!24 = metadata !{i32 786443, metadata !32, metadata !5, i32 5, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
-!25 = metadata !{i32 7, i32 1, metadata !24, null}
-!26 = metadata !{i32 11, i32 14, metadata !12, null}
-!27 = metadata !{i32 11, i32 26, metadata !12, null}
-!28 = metadata !{i32 12, i32 3, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !32, metadata !12, i32 11, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!30 = metadata !{i32 13, i32 3, metadata !29, null}
-!31 = metadata !{i32 14, i32 3, metadata !29, null}
-!32 = metadata !{metadata !"foo.c", metadata !"/tmp"}
+!0 = !{!"0x11\0012\00clang version 3.2 (trunk 156513)\001\00\000\00\001", !32, !1, !1, !3, !1, !1} ; [ DW_TAG_compile_unit ]
+!1 = !{}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\005", !32, !6, !7, null, void ()* @foo, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !32} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x11\0012\00clang version 3.2 (trunk 156513)\001\00\000\00\001", !32, !1, !1, !10, !1, !1} ; [ DW_TAG_compile_unit ]
+!10 = !{!12}
+!12 = !{!"0x2e\00main\00main\00\0011\000\001\000\006\00256\001\0011", !32, !6, !13, null, i32 (i32, i8**)* @main, null, null, !19} ; [ DW_TAG_subprogram ]
+!13 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !14, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!14 = !{!15, !15, !16}
+!15 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!16 = !{!"0xf\00\000\0032\0032\000\000", null, null, !17} ; [ DW_TAG_pointer_type ]
+!17 = !{!"0xf\00\000\0032\0032\000\000", null, null, !18} ; [ DW_TAG_pointer_type ]
+!18 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!19 = !{!21, !22}
+!21 = !{!"0x101\00argc\0016777227\000", !12, !6, !15} ; [ DW_TAG_arg_variable ]
+!22 = !{!"0x101\00argv\0033554443\000", !12, !6, !16} ; [ DW_TAG_arg_variable ]
+!23 = !MDLocation(line: 6, column: 3, scope: !24)
+!24 = !{!"0xb\005\0016\000", !32, !5} ; [ DW_TAG_lexical_block ]
+!25 = !MDLocation(line: 7, column: 1, scope: !24)
+!26 = !MDLocation(line: 11, column: 14, scope: !12)
+!27 = !MDLocation(line: 11, column: 26, scope: !12)
+!28 = !MDLocation(line: 12, column: 3, scope: !29)
+!29 = !{!"0xb\0011\0034\000", !32, !12} ; [ DW_TAG_lexical_block ]
+!30 = !MDLocation(line: 13, column: 3, scope: !29)
+!31 = !MDLocation(line: 14, column: 3, scope: !29)
+!32 = !{!"foo.c", !"/tmp"}
 
 ; This test is simple to be cross platform (many targets don't yet have
 ; sufficiently good DWARF emission and/or dumping)
 ; CHECK: {{DW_TAG_compile_unit}}
 ; CHECK: {{foo\.c}}
 
-!33 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!33 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/DebugInfo/typedef.ll b/test/DebugInfo/typedef.ll
index 40cecdfc3930..b8528be07bd1 100644
--- a/test/DebugInfo/typedef.ll
+++ b/test/DebugInfo/typedef.ll
@@ -18,15 +18,15 @@
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !2, metadata !3, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/typedef.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"typedef.cpp", metadata !"/tmp/dbginfo"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786484, i32 0, null, metadata !"y", metadata !"y", metadata !"", metadata !5, i32 2, metadata !6, i32 0, i32 1, i8** @y, null} ; [ DW_TAG_variable ] [y] [line 2] [def]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/typedef.cpp]
-!6 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from x]
-!7 = metadata !{i32 786454, metadata !1, null, metadata !"x", i32 1, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [x] [line 1, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !2, !3, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/typedef.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"typedef.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x34\00y\00y\00\002\000\001", null, !5, !6, i8** @y, null} ; [ DW_TAG_variable ] [y] [line 2] [def]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/dbginfo/typedef.cpp]
+!6 = !{!"0xf\00\000\0064\0064\000\000", null, null, !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from x]
+!7 = !{!"0x16\00x\001\000\000\000\000", !1, null, null} ; [ DW_TAG_typedef ] [x] [line 1, size 0, align 0, offset 0] [from ]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5.0 "}
 
diff --git a/test/DebugInfo/unconditional-branch.ll b/test/DebugInfo/unconditional-branch.ll
index 6c31375f4644..c82f1bab10b3 100644
--- a/test/DebugInfo/unconditional-branch.ll
+++ b/test/DebugInfo/unconditional-branch.ll
@@ -22,20 +22,21 @@ define void @foo(i32 %i) #0 {
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %i.addr}, metadata !12), !dbg !13
+  call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !12, metadata !{!"0x102"}), !dbg !13
   %0 = load i32* %i.addr, align 4, !dbg !14
   switch i32 %0, label %sw.default [
   ], !dbg !14
 
+sw.epilog:                                        ; preds = %sw.default
+  ret void, !dbg !17
+
 sw.default:                                       ; preds = %entry
   br label %sw.epilog, !dbg !15
 
-sw.epilog:                                        ; preds = %sw.default
-  ret void, !dbg !17
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -44,21 +45,21 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (204712)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [D:\work\EPRs\396363/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"D:\5Cwork\5CEPRs\5C396363"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [D:\work\EPRs\396363/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5.0 (204712)"}
-!12 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
-!13 = metadata !{i32 1, i32 0, metadata !4, null}
-!14 = metadata !{i32 2, i32 0, metadata !4, null}
-!15 = metadata !{i32 4, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [D:\work\EPRs\396363/test.c]
-!17 = metadata !{i32 6, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (204712)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [D:\work\EPRs\396363/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"D:\5Cwork\5CEPRs\5C396363"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [D:\work\EPRs\396363/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0 (204712)"}
+!12 = !{!"0x101\00i\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!13 = !MDLocation(line: 1, scope: !4)
+!14 = !MDLocation(line: 2, scope: !4)
+!15 = !MDLocation(line: 4, scope: !16)
+!16 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [D:\work\EPRs\396363/test.c]
+!17 = !MDLocation(line: 6, scope: !4)
diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/varargs.ll
index ddfcd858f539..907d2e309a5e 100644
--- a/test/DebugInfo/varargs.ll
+++ b/test/DebugInfo/varargs.ll
@@ -27,6 +27,10 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_TAG_unspecified_parameters
 ;
 ; Variadic C++ member function.
@@ -51,15 +55,15 @@ define void @_Z1biz(i32 %c, ...) #0 {
   %a = alloca %struct.A, align 1
   %fptr = alloca void (i32, ...)*, align 8
   store i32 %c, i32* %1, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !21), !dbg !22
-  call void @llvm.dbg.declare(metadata !{%struct.A* %a}, metadata !23), !dbg !24
-  call void @llvm.dbg.declare(metadata !{void (i32, ...)** %fptr}, metadata !25), !dbg !27
+  call void @llvm.dbg.declare(metadata i32* %1, metadata !21, metadata !{!"0x102"}), !dbg !22
+  call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !23, metadata !{!"0x102"}), !dbg !24
+  call void @llvm.dbg.declare(metadata void (i32, ...)** %fptr, metadata !25, metadata !{!"0x102"}), !dbg !27
   store void (i32, ...)* @_Z1biz, void (i32, ...)** %fptr, align 8, !dbg !27
   ret void, !dbg !28
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable }
 attributes #1 = { nounwind readnone }
@@ -68,32 +72,31 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !13, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", metadata !"radar/13690847"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"a", metadata !"a", metadata !"_ZN1A1aEiz", i32 6, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !12, i32 6} ; [ DW_TAG_subprogram ] [line 6] [a]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !10, metadata !11}
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!10 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!11 = metadata !{i32 786456}
-!12 = metadata !{i32 786468}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 786478, metadata !1, metadata !15, metadata !"b", metadata !"b", metadata !"_Z1biz", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, ...)* @_Z1biz, null, null, metadata !2, i32 13} ; [ DW_TAG_subprogram ] [line 13] [def] [b]
-!15 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp]
-!16 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{null, metadata !10, metadata !11}
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!20 = metadata !{metadata !"clang version 3.5 "}
-!21 = metadata !{i32 786689, metadata !14, metadata !"c", metadata !15, i32 16777229, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 13]
-!22 = metadata !{i32 13, i32 0, metadata !14, null}
-!23 = metadata !{i32 786688, metadata !14, metadata !"a", metadata !15, i32 16, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 16]
-!24 = metadata !{i32 16, i32 0, metadata !14, null}
-!25 = metadata !{i32 786688, metadata !14, metadata !"fptr", metadata !15, i32 18, metadata !26, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [fptr] [line 18]
-!26 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!27 = metadata !{i32 18, i32 0, metadata !14, null}
-!28 = metadata !{i32 22, i32 0, metadata !14, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !3, !13, !2, !2} ; [ DW_TAG_compile_unit ] [llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", !"radar/13690847"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00A\003\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00a\00a\00_ZN1A1aEiz\006\000\000\000\006\00256\000\006", !1, !"_ZTS1A", !7, null, null, null, i32 0, !12} ; [ DW_TAG_subprogram ] [line 6] [a]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !10, null}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{i32 786468}
+!13 = !{!14}
+!14 = !{!"0x2e\00b\00b\00_Z1biz\0013\000\001\000\006\00256\000\0013", !1, !15, !16, null, void (i32, ...)* @_Z1biz, null, null, !2} ; [ DW_TAG_subprogram ] [line 13] [def] [b]
+!15 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp]
+!16 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{null, !10, null}
+!18 = !{i32 2, !"Dwarf Version", i32 2}
+!19 = !{i32 1, !"Debug Info Version", i32 2}
+!20 = !{!"clang version 3.5 "}
+!21 = !{!"0x101\00c\0016777229\000", !14, !15, !10} ; [ DW_TAG_arg_variable ] [c] [line 13]
+!22 = !MDLocation(line: 13, scope: !14)
+!23 = !{!"0x100\00a\0016\000", !14, !15, !4} ; [ DW_TAG_auto_variable ] [a] [line 16]
+!24 = !MDLocation(line: 16, scope: !14)
+!25 = !{!"0x100\00fptr\0018\000", !14, !15, !26} ; [ DW_TAG_auto_variable ] [fptr] [line 18]
+!26 = !{!"0xf\00\000\0064\0064\000\000", null, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!27 = !MDLocation(line: 18, scope: !14)
+!28 = !MDLocation(line: 22, scope: !14)
diff --git a/test/DebugInfo/version.ll b/test/DebugInfo/version.ll
index 9a201ebf3091..6ee33b6a940e 100644
--- a/test/DebugInfo/version.ll
+++ b/test/DebugInfo/version.ll
@@ -18,15 +18,15 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185475)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !"CodeGen/dwarf-version.c", metadata !"test"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!10 = metadata !{i32 7, i32 0, metadata !4, null}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 185475)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"CodeGen/dwarf-version.c", !"test"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\006\000\001\000\006\00256\000\006", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 3}
+!10 = !MDLocation(line: 7, scope: !4)
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index 4c03519a85af..eb2fe8c04832 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 3182193453ae..68fdefefa542 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 3e27e0607ba9..5a0311dd9393 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index 80e19ba19320..038d7500101f 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @bar(i8* %X) {
         ; pointer should be 4 byte aligned!
diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
index 6f61aa68b67e..576ef7cf6380 100644
--- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -1,7 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
 ; RUN: not %lli %s
-; XFAIL: arm
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
index 236be18d96ef..42db5fe93fcd 100644
--- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s test
-; XFAIL: arm
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
deleted file mode 100644
index 45279adbe57b..000000000000
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: %lli %s > /dev/null
-
-target datalayout = "e-p:32:32"
-
-define i32 @main() {
-entry:
-	br label %endif
-then:		; No predecessors!
-	br label %endif
-endif:		; preds = %then, %entry
-	%x = phi i32 [ 4, %entry ], [ 27, %then ]		; <i32> [#uses=0]
-	%result = phi i32 [ 32, %then ], [ 0, %entry ]		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
deleted file mode 100644
index 4342aa440975..000000000000
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: %lli %s > /dev/null
-
-; Testcase distilled from 256.bzip2.
-
-target datalayout = "e-p:32:32"
-
-define i32 @main() {
-entry:
-	br label %loopentry.0
-loopentry.0:		; preds = %loopentry.0, %entry
-	%h.0 = phi i32 [ %tmp.2, %loopentry.0 ], [ -1, %entry ]		; <i32> [#uses=1]
-	%tmp.2 = add i32 %h.0, 1		; <i32> [#uses=3]
-	%tmp.4 = icmp ne i32 %tmp.2, 0		; <i1> [#uses=1]
-	br i1 %tmp.4, label %loopentry.0, label %loopentry.1
-loopentry.1:		; preds = %loopentry.0
-	%h.1 = phi i32 [ %tmp.2, %loopentry.0 ]		; <i32> [#uses=1]
-	ret i32 %h.1
-}
-
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
deleted file mode 100644
index 03b66c43a1ce..000000000000
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: %lli %s > /dev/null
-
-; Testcase distilled from 256.bzip2.
-
-target datalayout = "e-p:32:32"
-
-define i32 @main() {
-entry:
-	%X = add i32 1, -1		; <i32> [#uses=3]
-	br label %Next
-Next:		; preds = %entry
-	%A = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
-	%B = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
-	%C = phi i32 [ %X, %entry ]		; <i32> [#uses=1]
-	ret i32 %C
-}
-
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 22dd4ccb44cf..bee409c14411 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 ; This testcase failed to work because two variable sized allocas confused the
 ; local register allocator.
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 60dc3d6b7d43..63303fcff7c6 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index 04a5e1741bb0..8fb1bbbe9d77 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 ; This testcase exposes a bug in the local register allocator where it runs out
 ; of registers (due to too many overlapping live ranges), but then attempts to
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index 6e48c60db262..6513540903e8 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 @A = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
index 8523b5e3f5b8..2ac8ad1795d8 100644
--- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -1,6 +1,5 @@
 ; PR672
 ; RUN: %lli %s
-; XFAIL: arm
 
 define i32 @main() {
 	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/Interpreter/intrinsics.ll b/test/ExecutionEngine/Interpreter/intrinsics.ll
new file mode 100644
index 000000000000..49d0bbee3048
--- /dev/null
+++ b/test/ExecutionEngine/Interpreter/intrinsics.ll
@@ -0,0 +1,35 @@
+; RUN: lli -O0 -force-interpreter < %s
+
+; libffi does not support fp128 so we don’t test it
+declare float  @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare float  @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare float  @llvm.floor.f32(float)
+declare double @llvm.floor.f64(double)
+declare float  @llvm.ceil.f32(float)
+declare double @llvm.ceil.f64(double)
+declare float  @llvm.trunc.f32(float)
+declare double @llvm.trunc.f64(double)
+declare float  @llvm.round.f32(float)
+declare double @llvm.round.f64(double)
+declare float  @llvm.copysign.f32(float, float)
+declare double @llvm.copysign.f64(double, double)
+
+define i32 @main() {
+  %sin32 = call float @llvm.sin.f32(float 0.000000e+00)
+  %sin64 = call double @llvm.sin.f64(double 0.000000e+00)
+  %cos32 = call float @llvm.cos.f32(float 0.000000e+00)
+  %cos64 = call double @llvm.cos.f64(double 0.000000e+00)
+  %floor32 = call float @llvm.floor.f32(float 0.000000e+00)
+  %floor64 = call double @llvm.floor.f64(double 0.000000e+00)
+  %ceil32 = call float @llvm.ceil.f32(float 0.000000e+00)
+  %ceil64 = call double @llvm.ceil.f64(double 0.000000e+00)
+  %trunc32 = call float @llvm.trunc.f32(float 0.000000e+00)
+  %trunc64 = call double @llvm.trunc.f64(double 0.000000e+00)
+  %round32 = call float @llvm.round.f32(float 0.000000e+00)
+  %round64 = call double @llvm.round.f64(double 0.000000e+00)
+  %copysign32 = call float @llvm.copysign.f32(float 0.000000e+00, float 0.000000e+00)
+  %copysign64 = call double @llvm.copysign.f64(double 0.000000e+00, double 0.000000e+00)
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/Interpreter/lit.local.cfg b/test/ExecutionEngine/Interpreter/lit.local.cfg
new file mode 100644
index 000000000000..8cbaf03217d5
--- /dev/null
+++ b/test/ExecutionEngine/Interpreter/lit.local.cfg
@@ -0,0 +1,3 @@
+# These tests require foreign function calls
+if config.enable_ffi != "ON":
+    config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
index babd8f6a7803..eb2fe8c04832 100644
--- a/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
index bbb81b88b16a..68fdefefa542 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
 	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
index 7574267bdcdc..5a0311dd9393 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
index 261939ad2028..48576e7c83e6 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
index f76f99832825..ed58e1184377 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
index 2b83bb9e43e8..4960e5969005 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
index d1ca2bee3994..038d7500101f 100644
--- a/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @bar(i8* %X) {
         ; pointer should be 4 byte aligned!
diff --git a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
index 20ef0ff95cdd..576ef7cf6380 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not %lli_mcjit %s
+; RUN: not %lli %s
 
 @test = global i64 0		; <i64*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
index c7bcc5450b09..42db5fe93fcd 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s test
+; RUN: %lli %s test
 
 declare i32 @puts(i8*)
 
diff --git a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
index 0512575d1b4e..2f9b1439a38a 100644
--- a/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-05-11-PHIRegAllocBug.ll
@@ -1,6 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
-
-target datalayout = "e-p:32:32"
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
index c292a818a091..3a257895b7c4 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-04-bzip2-bug.ll
@@ -1,9 +1,7 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
-target datalayout = "e-p:32:32"
-
 define i32 @main() {
 entry:
 	br label %loopentry.0
diff --git a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
index c0a83f5ecbdb..8a62e0632484 100644
--- a/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/MCJIT/2003-06-05-PHIBug.ll
@@ -1,9 +1,7 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
-target datalayout = "e-p:32:32"
-
 define i32 @main() {
 entry:
 	%X = add i32 1, -1		; <i32> [#uses=3]
diff --git a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
index 55ce689b865b..bee409c14411 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This testcase failed to work because two variable sized allocas confused the
 ; local register allocator.
diff --git a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
index 2e999967a2c5..63303fcff7c6 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ;
 ; Regression Test: EnvironmentTest.ll
diff --git a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
index 659901b9b36f..8fb1bbbe9d77 100644
--- a/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/MCJIT/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This testcase exposes a bug in the local register allocator where it runs out
 ; of registers (due to too many overlapping live ranges), but then attempts to
diff --git a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index 68e31a7074dd..6513540903e8 100644
--- a/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/MCJIT/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @A = global i32 0		; <i32*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
index 0bc010584f1e..6a3c0f2a31cb 100644
--- a/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/MCJIT/2005-12-02-TailCallBug.ll
@@ -1,5 +1,5 @@
 ; PR672
-; RUN: %lli_mcjit %s
+; RUN: %lli %s
 ; XFAIL: mcjit-ia32
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
index 43188f2be481..418361163fcf 100644
--- a/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -force-interpreter %s
+; RUN: %lli -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index 9897602250a6..349db69e4c63 100644
--- a/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
 ; CHECK: 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
index 7ed0e3870a3b..8bf03de321ba 100644
--- a/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/MCJIT/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -force-interpreter=true %s > /dev/null
+; RUN: %lli -force-interpreter=true %s > /dev/null
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
index 3f402c593116..d9ff347b0786 100644
--- a/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
+++ b/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s
+; RUN: %lli %s
 ;
 ; Verify relocations to global symbols with addend work correctly.
 ;
diff --git a/test/ExecutionEngine/MCJIT/cross-module-a.ll b/test/ExecutionEngine/MCJIT/cross-module-a.ll
index fe8d3864c9b6..5d4e16fa4a2e 100644
--- a/test/ExecutionEngine/MCJIT/cross-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/cross-module-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll %s > /dev/null
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
index ee26702dfae6..eb414249bdc3 100644
--- a/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/cross-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index 539c890f9186..9277ec4a8cae 100644
--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm
+; RUN: %lli -relocation-model=pic -code-model=large %s
+; XFAIL: cygwin, win32, mingw, mips, i686, i386, aarch64, arm, asan, msan
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
index 00c2bb056ff3..37fb628cf0a2 100644
--- a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm
+; RUN: %lli -relocation-model=pic -code-model=small %s
+; XFAIL: cygwin, win32, mingw, mips, i686, i386, darwin, aarch64, arm, asan, msan
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll
index c2135736ad48..9f73e3a9937f 100644
--- a/test/ExecutionEngine/MCJIT/eh.ll
+++ b/test/ExecutionEngine/MCJIT/eh.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit %s
-; XFAIL: arm, cygwin, win32, mingw
+; RUN: %lli %s
+; XFAIL: arm, cygwin, win32, mingw, asan, msan
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/fpbitcast.ll b/test/ExecutionEngine/MCJIT/fpbitcast.ll
index ea39617547ab..e6d06f83db49 100644
--- a/test/ExecutionEngine/MCJIT/fpbitcast.ll
+++ b/test/ExecutionEngine/MCJIT/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -force-interpreter=true %s | FileCheck %s
+; RUN: %lli -force-interpreter=true %s | FileCheck %s
 ; CHECK: 40091eb8
 
 define i32 @test(double %x) {
diff --git a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
index 115846c78642..7db60f7aeeb4 100644
--- a/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/hello-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; RUN: %lli -relocation-model=pic -code-model=small %s > /dev/null
 ; XFAIL: mips, i686, i386, darwin, aarch64, arm
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/hello.ll b/test/ExecutionEngine/MCJIT/hello.ll
index b74470724deb..47e36a589563 100644
--- a/test/ExecutionEngine/MCJIT/hello.ll
+++ b/test/ExecutionEngine/MCJIT/hello.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/MCJIT/hello2.ll b/test/ExecutionEngine/MCJIT/hello2.ll
index cd033d50947a..13b25884fee7 100644
--- a/test/ExecutionEngine/MCJIT/hello2.ll
+++ b/test/ExecutionEngine/MCJIT/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/load-object-a.ll b/test/ExecutionEngine/MCJIT/load-object-a.ll
index 9d27e41c6a50..080bf6cf5842 100644
--- a/test/ExecutionEngine/MCJIT/load-object-a.ll
+++ b/test/ExecutionEngine/MCJIT/load-object-a.ll
@@ -1,20 +1,20 @@
 ; This first line will generate the .o files for the next run line
 ; RUN: rm -rf %t.cachedir %t.cachedir2 %t.cachedir3
 ; RUN: mkdir -p %t.cachedir %t.cachedir2 %t.cachedir3
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -enable-cache-manager -object-cache-dir=%t.cachedir %s
+; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -enable-cache-manager -object-cache-dir=%t.cachedir %s
 
 ; Collect generated objects.
 ; RUN: find %t.cachedir -type f -name 'multi-module-?.o' -exec mv -v '{}' %t.cachedir2 ';'
 
 ; This line tests MCJIT object loading
-; RUN: %lli_mcjit -extra-object=%t.cachedir2/multi-module-b.o -extra-object=%t.cachedir2/multi-module-c.o %s
+; RUN: %lli -extra-object=%t.cachedir2/multi-module-b.o -extra-object=%t.cachedir2/multi-module-c.o %s
 
 ; These lines put the object files into an archive
 ; RUN: llvm-ar r %t.cachedir3/load-object.a %t.cachedir2/multi-module-b.o
 ; RUN: llvm-ar r %t.cachedir3/load-object.a %t.cachedir2/multi-module-c.o
 
 ; This line test MCJIT archive loading
-; RUN: %lli_mcjit -extra-archive=%t.cachedir3/load-object.a %s
+; RUN: %lli -extra-archive=%t.cachedir3/load-object.a %s
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/multi-module-a.ll b/test/ExecutionEngine/MCJIT/multi-module-a.ll
index 8848ca6117f4..dc3154c7b303 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll %s > /dev/null
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
index 66fafc924acd..8626626e75f6 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
-; XFAIL: arm, cygwin, win32, mingw
+; RUN: %lli -extra-module=%p/Inputs/multi-module-eh-b.ll %s
+; XFAIL: arm, cygwin, win32, mingw, asan, msan
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
index f2fa59f4821f..01faecc6495b 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-sm-pic-a.ll
@@ -1,5 +1,5 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips, i686, i386, arm
+; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips, i686, i386
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/non-extern-addend-smallcodemodel.ll b/test/ExecutionEngine/MCJIT/non-extern-addend-smallcodemodel.ll
deleted file mode 100644
index 21db67dad669..000000000000
--- a/test/ExecutionEngine/MCJIT/non-extern-addend-smallcodemodel.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: %lli_mcjit -code-model=small %s > /dev/null
-; XFAIL: mips
-;
-; FIXME: Merge this file with non-extern-addend.ll once AArch64 supports PC-rel
-;        relocations in ELF. (The code is identical, only the run line differs).
-;
-define i32 @foo(i32 %x, i32 %y, double %d) {
-entry:
-  %d.int64 = bitcast double %d to i64
-  %d.top64 = lshr i64 %d.int64, 32
-  %d.top   = trunc i64 %d.top64 to i32
-  %d.bottom = trunc i64 %d.int64 to i32
-  %topCorrect = icmp eq i32 %d.top, 3735928559
-  %bottomCorrect = icmp eq i32 %d.bottom, 4277009102
-  %right = and i1 %topCorrect, %bottomCorrect
-  %nRight = xor i1 %right, true
-  %retVal = zext i1 %nRight to i32
-  ret i32 %retVal
-}
-
-define i32 @main() {
-entry:
-  %call = call i32 @foo(i32 0, i32 1, double 0xDEADBEEFFEEDFACE)
-  ret i32 %call
-}
diff --git a/test/ExecutionEngine/MCJIT/non-extern-addend.ll b/test/ExecutionEngine/MCJIT/non-extern-addend.ll
index e0827f6add93..72e67ae93fe3 100644
--- a/test/ExecutionEngine/MCJIT/non-extern-addend.ll
+++ b/test/ExecutionEngine/MCJIT/non-extern-addend.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo(i32 %x, i32 %y, double %d) {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/pr13727.ll b/test/ExecutionEngine/MCJIT/pr13727.ll
index 1c719c5b7c27..6f5ae3999bcd 100644
--- a/test/ExecutionEngine/MCJIT/pr13727.ll
+++ b/test/ExecutionEngine/MCJIT/pr13727.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
index b540bfa3bd6e..c31572361d3d 100644
--- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
index 589ba2f6d382..d47fc6cffbb2 100644
--- a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
 ; XFAIL: mips, i686, i386, arm
 
 declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
index fbbb8bdf4bc2..0fd363b4447a 100644
--- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 declare i32 @FB()
 
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
index 9c2316959910..d248c4b2a94a 100644
--- a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
+; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
 ; XFAIL: mips, i686, i386, arm
 
 declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
index 6c8ab3d5369b..30b4dd8e7abc 100644
--- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 define i32 @bar() nounwind {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
index 48b939bbe35b..da4ddc6943c2 100644
--- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
 ; XFAIL: *
 ; This test should fail until remote symbol resolution is supported.
 
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
index 88faf21adb1b..f6a16078dfae 100644
--- a/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
 ; XFAIL: *
 ; This function should fail until remote symbol resolution is supported.
 
diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
index e07178ed283e..0f58710603d0 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+; RUN: %lli -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
index 129350b63eb0..435c21a4a9fa 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
@@ -1,4 +1,4 @@
-; RUN:  %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; RUN:  %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
index 8eec0f229885..9d11415c906f 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 define double @test(double* %DP, double %Arg) nounwind {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
index 9fbaeb794496..40b514fd8e1f 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
index 29ab24b39336..5119b72d4485 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; RUN: %lli -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
 ; XFAIL: mips, aarch64, arm, i686, i386
 
 @count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
index d62631ff50c6..ba3ffff21425 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
 @ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
index bad026fe7d4d..bbc71afdf7e3 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
+; RUN: %lli -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
 ; XFAIL: mips, aarch64, arm, i686, i386
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
diff --git a/test/ExecutionEngine/MCJIT/simplesttest.ll b/test/ExecutionEngine/MCJIT/simplesttest.ll
index 318baf4e8ffd..85c171532752 100644
--- a/test/ExecutionEngine/MCJIT/simplesttest.ll
+++ b/test/ExecutionEngine/MCJIT/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/simpletest.ll b/test/ExecutionEngine/MCJIT/simpletest.ll
index 5b0f2dd3055e..167a0fdfd130 100644
--- a/test/ExecutionEngine/MCJIT/simpletest.ll
+++ b/test/ExecutionEngine/MCJIT/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
index 9e214f5d4753..9b83ed231e9e 100644
--- a/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
+; RUN: %lli -disable-lazy-compilation=false -relocation-model=pic -code-model=small %s
 ; XFAIL: mips, i686, i386, aarch64, arm
 
 define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/MCJIT/stubs.ll b/test/ExecutionEngine/MCJIT/stubs.ll
index f4aac3339450..b7d922f2cb6d 100644
--- a/test/ExecutionEngine/MCJIT/stubs.ll
+++ b/test/ExecutionEngine/MCJIT/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -disable-lazy-compilation=false %s
+; RUN: %lli -disable-lazy-compilation=false %s
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-arith.ll b/test/ExecutionEngine/MCJIT/test-arith.ll
index e1cc23b9fcd3..79f989f7265b 100644
--- a/test/ExecutionEngine/MCJIT/test-arith.ll
+++ b/test/ExecutionEngine/MCJIT/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-branch.ll b/test/ExecutionEngine/MCJIT/test-branch.ll
index cdf10350ec11..3ae55d069b3d 100644
--- a/test/ExecutionEngine/MCJIT/test-branch.ll
+++ b/test/ExecutionEngine/MCJIT/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
index 8a36cf2953f7..c3cb93121f6b 100644
--- a/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-call-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @_Z14func_exit_codev() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-call.ll b/test/ExecutionEngine/MCJIT/test-call.ll
index 1a0f00841685..313a6c52367d 100644
--- a/test/ExecutionEngine/MCJIT/test-call.ll
+++ b/test/ExecutionEngine/MCJIT/test-call.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/MCJIT/test-cast.ll b/test/ExecutionEngine/MCJIT/test-cast.ll
index 335ec508eff1..667fa80a4897 100644
--- a/test/ExecutionEngine/MCJIT/test-cast.ll
+++ b/test/ExecutionEngine/MCJIT/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
index 989a47342339..a425b5c1e9b3 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -O0 %s
+; RUN: %lli -O0 %s
 
 ; This test checks that common symbols have been allocated addresses honouring
 ; the alignment requirement.
diff --git a/test/ExecutionEngine/MCJIT/test-common-symbols.ll b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
index 13ee06a65071..19e2ce584689 100644
--- a/test/ExecutionEngine/MCJIT/test-common-symbols.ll
+++ b/test/ExecutionEngine/MCJIT/test-common-symbols.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -O0 -disable-lazy-compilation=false %s
+; RUN: %lli -O0 -disable-lazy-compilation=false %s
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/test-constantexpr.ll b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
index 8f15cbd7f7ef..d01479a86cdc 100644
--- a/test/ExecutionEngine/MCJIT/test-constantexpr.ll
+++ b/test/ExecutionEngine/MCJIT/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/MCJIT/test-data-align.ll b/test/ExecutionEngine/MCJIT/test-data-align.ll
index 2472d95e7778..f21ea2e2a452 100644
--- a/test/ExecutionEngine/MCJIT/test-data-align.ll
+++ b/test/ExecutionEngine/MCJIT/test-data-align.ll
@@ -1,4 +1,4 @@
-; RUN:  %lli_mcjit -O0 %s
+; RUN:  %lli -O0 %s
 
 ; Check that a variable is always aligned as specified.
 
diff --git a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
index f094f3d91923..adb0550a72d7 100644
--- a/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-fp.ll b/test/ExecutionEngine/MCJIT/test-fp.ll
index b10e9d6c169d..2bf0210d8b00 100644
--- a/test/ExecutionEngine/MCJIT/test-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
index 947d8f5d423d..ec87d155d41b 100644
--- a/test/ExecutionEngine/MCJIT/test-global-ctors.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: darwin
 @var = global i32 1, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @ctor_func }]
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
index eb031f2e4f45..26bd83808026 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -relocation-model=pic -code-model=small %s > /dev/null
+; RUN: %lli -relocation-model=pic -code-model=small %s > /dev/null
 ; XFAIL: mips, aarch64, arm, i686, i386
 
 @count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
index b9f74b8be403..3877e9a58861 100644
--- a/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
+++ b/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-global.ll b/test/ExecutionEngine/MCJIT/test-global.ll
index 6a8c042ef89e..69e545555649 100644
--- a/test/ExecutionEngine/MCJIT/test-global.ll
+++ b/test/ExecutionEngine/MCJIT/test-global.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @count = global i32 0, align 4
 
diff --git a/test/ExecutionEngine/MCJIT/test-loadstore.ll b/test/ExecutionEngine/MCJIT/test-loadstore.ll
index 90381947e8fb..1797599a9d8d 100644
--- a/test/ExecutionEngine/MCJIT/test-loadstore.ll
+++ b/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-local.ll b/test/ExecutionEngine/MCJIT/test-local.ll
index d7c173427977..ec5ba165408f 100644
--- a/test/ExecutionEngine/MCJIT/test-local.ll
+++ b/test/ExecutionEngine/MCJIT/test-local.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-logical.ll b/test/ExecutionEngine/MCJIT/test-logical.ll
index a03833e5c516..05b381bb53c2 100644
--- a/test/ExecutionEngine/MCJIT/test-logical.ll
+++ b/test/ExecutionEngine/MCJIT/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/MCJIT/test-loop.ll b/test/ExecutionEngine/MCJIT/test-loop.ll
index 5ed8c4020f76..e951a14ed2e9 100644
--- a/test/ExecutionEngine/MCJIT/test-loop.ll
+++ b/test/ExecutionEngine/MCJIT/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/MCJIT/test-phi.ll b/test/ExecutionEngine/MCJIT/test-phi.ll
index 4245ccabed78..c5bdfd513edc 100644
--- a/test/ExecutionEngine/MCJIT/test-phi.ll
+++ b/test/ExecutionEngine/MCJIT/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
index 9e067422e312..21bcaef3aa80 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -O0 -relocation-model=pic -code-model=small %s
+; RUN: %lli -O0 -relocation-model=pic -code-model=small %s
 ; XFAIL: mips, aarch64, arm, i686, i386
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
diff --git a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
index 871d8bfa29e8..f139ddf12f81 100644
--- a/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
+++ b/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit -O0 %s
+; RUN: %lli -O0 %s
 
 @.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
 @ptr = global i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/test-ret.ll b/test/ExecutionEngine/MCJIT/test-ret.ll
index 6bfc48052d02..025f53e5cb9e 100644
--- a/test/ExecutionEngine/MCJIT/test-ret.ll
+++ b/test/ExecutionEngine/MCJIT/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/MCJIT/test-return.ll b/test/ExecutionEngine/MCJIT/test-return.ll
index 4db1c3fe39f0..d464a4b72d80 100644
--- a/test/ExecutionEngine/MCJIT/test-return.ll
+++ b/test/ExecutionEngine/MCJIT/test-return.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
index b4367d0337a0..68276e617a4a 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/MCJIT/test-setcond-int.ll b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
index 8c7d815446cb..48dc02198ec0 100644
--- a/test/ExecutionEngine/MCJIT/test-setcond-int.ll
+++ b/test/ExecutionEngine/MCJIT/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/MCJIT/test-shift.ll b/test/ExecutionEngine/MCJIT/test-shift.ll
index 8d9a94ef12c8..590e2620689f 100644
--- a/test/ExecutionEngine/MCJIT/test-shift.ll
+++ b/test/ExecutionEngine/MCJIT/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: %lli_mcjit %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
new file mode 100644
index 000000000000..04d269e2aebf
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
@@ -0,0 +1,67 @@
+# RUN: llvm-mc -triple=arm64-apple-ios7.0.0 -code-model=small -relocation-model=pic -filetype=obj -o %T/foo.o %s
+# RUN: llvm-rtdyld -triple=arm64-apple-ios7.0.0 -map-section foo.o,__text=0x10bc0 -verify -check=%s %/T/foo.o
+
+    .section  __TEXT,__text,regular,pure_instructions
+    .ios_version_min 7, 0
+    .globl  _foo
+    .align  2
+_foo:
+    movz  w0, #0
+    ret
+
+    .globl  _test_branch_reloc
+    .align  2
+
+
+# Test ARM64_RELOC_BRANCH26 relocation. The branch instruction only encodes 26
+# bits of the 28-bit possible branch range. The lower two bits are always zero
+# and therefore ignored.
+# rtdyld-check:  decode_operand(br1, 0)[25:0] = (_foo - br1)[27:2]
+_test_branch_reloc:
+br1:
+    b _foo
+    ret
+
+
+# Test ARM64_RELOC_PAGE21 and ARM64_RELOC_PAGEOFF12 relocation. adrp encodes
+# the PC-relative page (4 KiB) difference between the adrp instruction and the
+# variable ptr. ldr encodes the offset of the variable within the page. The ldr
+# instruction perfroms an implicit shift on the encoded immediate (imm<<3).
+# rtdyld-check:  decode_operand(adrp1, 1) = (_ptr[32:12] - adrp1[32:12])
+# rtdyld-check:  decode_operand(ldr1, 2) = _ptr[11:3]
+    .globl  _test_adrp_ldr
+    .align  2
+_test_adrp_ldr:
+adrp1:
+    adrp x0, _ptr@PAGE
+ldr1:
+    ldr  x0, [x0, _ptr@PAGEOFF]
+    ret
+
+# Test ARM64_RELOC_GOT_LOAD_PAGE21 and ARM64_RELOC_GOT_LOAD_PAGEOFF12
+# relocation. adrp encodes the PC-relative page (4 KiB) difference between the
+# adrp instruction and the GOT entry for ptr. ldr encodes the offset of the GOT
+# entry within the page. The ldr instruction perfroms an implicit shift on the
+# encoded immediate (imm<<3).
+# rtdyld-check:  *{8}(stub_addr(foo.o, __text, _ptr)) = _ptr
+# rtdyld-check:  decode_operand(adrp2, 1) = (stub_addr(foo.o, __text, _ptr)[32:12] - adrp2[32:12])
+# rtdyld-check:  decode_operand(ldr2, 2) = stub_addr(foo.o, __text, _ptr)[11:3]
+    .globl  _test_adrp_ldr
+    .align  2
+_test_got_adrp_ldr:
+adrp2:
+    adrp x0, _ptr@GOTPAGE
+ldr2:
+    ldr  x0, [x0, _ptr@GOTPAGEOFF]
+    ret
+
+
+# Test ARM64_RELOC_UNSIGNED relocation. The absolute 64-bit address of the
+# function should be stored at the 8-byte memory location.
+# rtdyld-check: *{8}_ptr = _foo
+    .section  __DATA,__data
+    .globl  _ptr
+    .align  3
+    .fill 4096, 1, 0
+_ptr:
+    .quad _foo
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
index 86041835591f..7ff3a8975769 100644
--- a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
@@ -1,27 +1,51 @@
-# RUN: llvm-mc -triple=armv7s-apple-ios7.0.0 -relocation-model=pic -filetype=obj -o %t.o %s
-# RUN: llvm-rtdyld -triple=armv7s-apple-ios7.0.0 -verify -check=%s %t.o
-# RUN: rm %t.o
+# RUN: llvm-mc -triple=armv7s-apple-ios7.0.0 -relocation-model=pic -filetype=obj -o %T/foo.o %s
+# RUN: llvm-rtdyld -triple=armv7s-apple-ios7.0.0 -verify -check=%s %/T/foo.o
 
-	.syntax unified
-	.section	__TEXT,__text,regular,pure_instructions
-	.globl	bar
-	.align	2
+        .syntax unified
+        .section        __TEXT,__text,regular,pure_instructions
+        .globl  bar
+        .align  2
 bar:
 # Check lower 16-bits of section difference relocation
-# rtdyld-check: decode_operand(insn1, 1) = (foo-(nextPC+8))[15:0]
+# rtdyld-check: decode_operand(insn1, 1) = (foo$non_lazy_ptr-(nextPC+8))[15:0]
 insn1:
-	movw	r0, :lower16:(foo-(nextPC+8))
+        movw    r0, :lower16:(foo$non_lazy_ptr-(nextPC+8))
 # Check upper 16-bits of section difference relocation
-# rtdyld-check: decode_operand(insn2, 2) = (foo-(nextPC+8))[31:16]
+# rtdyld-check: decode_operand(insn2, 2) = (foo$non_lazy_ptr-(nextPC+8))[31:16]
 insn2:
-	movt	r0, :upper16:(foo-(nextPC+8))
+        movt    r0, :upper16:(foo$non_lazy_ptr-(nextPC+8))
 nextPC:
-	add	r0, pc, r0
-	bx	lr
+        add     r1, r0, r0
 
-	.globl	foo
-	.align	2
-foo:
-	bx	lr
+# Check stub generation for external symbols by referencing a common symbol, 'baz'.
+# Check both the content of the stub, and the reference to the stub.
+# Stub should contain '0xe51ff004' (ldr pc, [pc, #-4]), followed by the target.
+#
+# rtdyld-check: *{4}(stub_addr(foo.o, __text, baz)) = 0xe51ff004
+# rtdyld-check: *{4}(stub_addr(foo.o, __text, baz) + 4) = baz
+#
+# rtdyld-check: decode_operand(insn3, 0) = stub_addr(foo.o, __text, baz) - (insn3 + 8)
+insn3:
+        bl      baz
+
+# Check stub generation for internal symbols by referencing 'bar'.
+# rtdyld-check: *{4}(stub_addr(foo.o, __text, bar) + 4) = bar
+insn4:
+        bl      bar
+        bx	lr
+
+# Add 'aaa' to the common symbols to make sure 'baz' isn't at the start of the
+# section. This ensures that we test VANILLA relocation addends correctly.
+        .comm   aaa, 4, 2
+        .comm   baz, 4, 2
+        .comm   foo, 4, 2
+
+# Check that the symbol pointer section entries are fixed up properly:
+# rtdyld-check: *{4}foo$non_lazy_ptr = foo
+        .section	__DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+  	.align	2
+foo$non_lazy_ptr:
+	.indirect_symbol	foo
+	.long	0
 
 .subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s
new file mode 100644
index 000000000000..f427b985b58c
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_DynNoPIC_relocations.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc -triple=i386-apple-macosx10.4 -relocation-model=dynamic-no-pic -filetype=obj -o %T/test_i386.o %s
+# RUN: llvm-rtdyld -triple=i386-apple-macosx10.4 -verify -check=%s %/T/test_i386.o
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	bar
+	.align	4, 0x90
+bar:
+	calll	tmp0$pb
+tmp0$pb:
+	popl	%eax
+# Test section difference relocation to non-lazy ptr section.
+# rtdyld-check: decode_operand(inst1, 4) = x$non_lazy_ptr - tmp0$pb
+inst1:
+	movl	x$non_lazy_ptr-tmp0$pb(%eax), %eax
+        movl    (%eax), %ebx
+
+# Test VANILLA relocation to jump table.
+# rtdyld-check: decode_operand(inst2, 0) = bling$stub - next_pc(inst2)
+inst2:
+        calll	bling$stub
+        addl    %ebx, %eax
+
+# Test scattered VANILLA relocations.
+inst3:
+        movl    y+4, %ecx
+        addl    %ecx, %eax
+	retl
+
+	.section	__IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5
+bling$stub:
+	.indirect_symbol	bling
+	.ascii	"\364\364\364\364\364"
+
+	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
+x$non_lazy_ptr:
+	.indirect_symbol	x
+	.long	0
+
+        .comm   x,4,2
+        .comm   bling,4,2
+
+        .globl	y
+.zerofill __DATA,__common,y,8,3
+
+.subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_eh_frame.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_eh_frame.s
new file mode 100644
index 000000000000..8814ec3c6138
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_i386_eh_frame.s
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -triple=i386-apple-macosx10.4 -relocation-model=dynamic-no-pic -filetype=obj -o %T/MachO_i386_eh_frame.o %s
+# RUN: llvm-rtdyld -triple=i386-apple-macosx10.4 -verify -map-section MachO_i386_eh_frame.o,__text=0x2000 -check=%s %/T/MachO_i386_eh_frame.o
+
+# rtdyld-check: *{4}(section_addr(MachO_i386_eh_frame.o, __eh_frame) + 0x20) = (main - (section_addr(MachO_i386_eh_frame.o, __eh_frame) + 0x20))[31:0]
+# rtdyld-check: *{4}(section_addr(MachO_i386_eh_frame.o, __eh_frame) + 0x24) = 0x9
+
+	.section	__TEXT,__text,regular,pure_instructions
+
+	.globl	bar
+	.align	4, 0x90
+bar:
+        retl
+
+        .globl	main
+	.align	4, 0x90
+main:
+	.cfi_startproc
+	pushl	%ebp
+Ltmp0:
+	.cfi_def_cfa_offset 8
+Ltmp1:
+	.cfi_offset %ebp, -8
+	movl	%esp, %ebp
+Ltmp2:
+	.cfi_def_cfa_register %ebp
+	popl	%ebp
+	jmp	bar
+	.cfi_endproc
+
+.subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
index e87b449bb237..502f276501f8 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -1,6 +1,5 @@
-# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -relocation-model=pic -filetype=obj -o %t.o %s
-# RUN: llvm-rtdyld -triple=x86_64-apple-macosx10.9 -verify -check=%s %t.o
-# RUN: rm %t.o
+# RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -relocation-model=pic -filetype=obj -o %T/test_x86-64.o %s
+# RUN: llvm-rtdyld -triple=x86_64-apple-macosx10.9 -verify -check=%s %/T/test_x86-64.o
 
         .section	__TEXT,__text,regular,pure_instructions
 	.globl	foo
@@ -20,9 +19,20 @@ insn1:
 # rtdyld-check: decode_operand(insn2, 4) = x - next_pc(insn2)
 insn2:
 	movl	x(%rip), %eax
-	movl	$0, %eax
+
+# Test PC-rel GOT relocation.
+# Verify both the contents of the GOT entry for y, and that the movq instruction
+# references the correct GOT entry address:
+# rtdyld-check: *{8}(stub_addr(test_x86-64.o, __text, y)) = y
+# rtdyld-check: decode_operand(insn3, 4) = stub_addr(test_x86-64.o, __text, y) - next_pc(insn3)
+insn3:
+        movq	y@GOTPCREL(%rip), %rax
+
+        movl	$0, %eax
 	retq
 
+        .comm   y,4,2
+
         .section	__DATA,__data
 	.globl	x
 	.align	2
diff --git a/test/ExecutionEngine/frem.ll b/test/ExecutionEngine/frem.ll
new file mode 100644
index 000000000000..7e0b6060f6fe
--- /dev/null
+++ b/test/ExecutionEngine/frem.ll
@@ -0,0 +1,20 @@
+; LLI.exe used to crash on Windows\X86 when certain single precession
+; floating point intrinsics (defined as macros) are used.
+; This unit test guards against the failure.
+;
+; RUN: %lli %s | FileCheck %s
+
+@flt = internal global float 12.0e+0
+@str = internal constant [18 x i8] c"Double value: %f\0A\00"
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() {
+  %flt = load float* @flt
+  %float2 = frem float %flt, 5.0
+  %double1 = fpext float %float2 to double
+  call i32 (i8*, ...)* @printf(i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double %double1)
+  ret i32 0
+}
+
+; CHECK: Double value: 2.0
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index f2c4a7f414d9..47e36a589563 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index 155ed41d7ca6..13b25884fee7 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 @X = global i32 7		; <i32*> [#uses=0]
 @msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/mov64zext32.ll b/test/ExecutionEngine/mov64zext32.ll
index f38c21a5698f..a5b246170cf0 100644
--- a/test/ExecutionEngine/mov64zext32.ll
+++ b/test/ExecutionEngine/mov64zext32.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i64 @foo() {
   ret i64 42
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 83f9b8405902..167a0fdfd130 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @bar() {
 	ret i32 0
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
index b40e4be63d41..b7d922f2cb6d 100644
--- a/test/ExecutionEngine/stubs.ll
+++ b/test/ExecutionEngine/stubs.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli -disable-lazy-compilation=false %s
-; XFAIL: arm
 
 define i32 @main() nounwind {
 entry:
diff --git a/test/ExecutionEngine/test-call-no-external-funcs.ll b/test/ExecutionEngine/test-call-no-external-funcs.ll
index b2dd5325f114..c3cb93121f6b 100644
--- a/test/ExecutionEngine/test-call-no-external-funcs.ll
+++ b/test/ExecutionEngine/test-call-no-external-funcs.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @_Z14func_exit_codev() nounwind uwtable {
 entry:
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index 3fd39fe094f3..313a6c52367d 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 declare void @exit(i32)
 
diff --git a/test/ExecutionEngine/test-common-symbols.ll b/test/ExecutionEngine/test-common-symbols.ll
index 4dd926512ae6..19e2ce584689 100644
--- a/test/ExecutionEngine/test-common-symbols.ll
+++ b/test/ExecutionEngine/test-common-symbols.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli -O0 -disable-lazy-compilation=false %s
-; XFAIL: arm
 
 ; The intention of this test is to verify that symbols mapped to COMMON in ELF
 ; work as expected.
diff --git a/test/ExecutionEngine/test-fp-no-external-funcs.ll b/test/ExecutionEngine/test-fp-no-external-funcs.ll
index 139b2efea57f..61b12c2abeb7 100644
--- a/test/ExecutionEngine/test-fp-no-external-funcs.ll
+++ b/test/ExecutionEngine/test-fp-no-external-funcs.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli  %s > /dev/null
-; XFAIL: arm
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index c9064500d475..2bf0210d8b00 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-global-init-nonzero.ll b/test/ExecutionEngine/test-global-init-nonzero.ll
index ef2d37b89199..749a4851c5d6 100644
--- a/test/ExecutionEngine/test-global-init-nonzero.ll
+++ b/test/ExecutionEngine/test-global-init-nonzero.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli  %s > /dev/null
-; XFAIL: arm
 
 @count = global i32 1, align 4
 
diff --git a/test/ExecutionEngine/test-global.ll b/test/ExecutionEngine/test-global.ll
index 2ea50dea99ae..69e545555649 100644
--- a/test/ExecutionEngine/test-global.ll
+++ b/test/ExecutionEngine/test-global.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 @count = global i32 0, align 4
 
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index 75743146c6eb..1797599a9d8d 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
 	%V = load i8* %P		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-local.ll b/test/ExecutionEngine/test-local.ll
index 240b174ac2fe..ec5ba165408f 100644
--- a/test/ExecutionEngine/test-local.ll
+++ b/test/ExecutionEngine/test-local.ll
@@ -1,5 +1,4 @@
 ; RUN: %lli %s > /dev/null
-; XFAIL: arm
 
 define i32 @main() nounwind uwtable {
 entry:
diff --git a/test/Feature/NamedMDNode.ll b/test/Feature/NamedMDNode.ll
index 0c6bcd9abfe5..c1e72a861e98 100644
--- a/test/Feature/NamedMDNode.ll
+++ b/test/Feature/NamedMDNode.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | grep "llvm.stuff = "
 
 ;; Simple NamedMDNode
-!0 = metadata !{i32 42}
-!1 = metadata !{metadata !"foo"}
+!0 = !{i32 42}
+!1 = !{!"foo"}
 !llvm.stuff = !{!0, !1}
 
 !samename = !{!0, !1}
diff --git a/test/Feature/NamedMDNode2.ll b/test/Feature/NamedMDNode2.ll
index 0524dd27a4a9..94e4458fb9d9 100644
--- a/test/Feature/NamedMDNode2.ll
+++ b/test/Feature/NamedMDNode2.ll
@@ -3,5 +3,5 @@
 
 
 @foo = constant i1 false
-!0 = metadata !{i1 false}
+!0 = !{i1 false}
 !a = !{!0}
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index ad1d1b08901c..c11fc4764254 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -21,10 +21,10 @@
 define i32 @foo_f() {
   ret i32 0
 }
-@bar_f = alias weak_odr %FunTy* @foo_f
+@bar_f = weak_odr alias %FunTy* @foo_f
 @bar_ff = alias i32()* @bar_f
 
-@bar_i = alias internal i32* @bar
+@bar_i = internal alias i32* @bar
 
 @A = alias bitcast (i32* @bar to i64*)
 
diff --git a/test/Feature/callingconventions.ll b/test/Feature/callingconventions.ll
index 192f07a413b5..8b339d43fcd0 100644
--- a/test/Feature/callingconventions.ll
+++ b/test/Feature/callingconventions.ll
@@ -52,4 +52,11 @@ U:
   resume { i8*, i32 } %exn
 }
 
+declare ghccc void @ghc_callee()
+
+define void @ghc_caller() {
+  call ghccc void @ghc_callee()
+  ret void
+}
+
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Feature/comdat.ll b/test/Feature/comdat.ll
index 1e878bb71cd1..c2a9d6396293 100644
--- a/test/Feature/comdat.ll
+++ b/test/Feature/comdat.ll
@@ -6,16 +6,16 @@ $f = comdat any
 $f2 = comdat any
 ; CHECK-NOT: f2
 
-@v = global i32 0, comdat $f
-; CHECK: @v = global i32 0, comdat $f
+@v = global i32 0, comdat($f)
+; CHECK: @v = global i32 0, comdat($f)
 
 @a = alias i32* @v
 ; CHECK: @a = alias i32* @v{{$}}
 
-define void @f() comdat $f {
+define void @f() comdat($f) {
   ret void
 }
-; CHECK: define void @f() comdat $f
+; CHECK: define void @f() comdat {
 
 $i = comdat largest
-@i = internal global i32 0, comdat $i
+@i = internal global i32 0, comdat($i)
diff --git a/test/Feature/md_on_instruction.ll b/test/Feature/md_on_instruction.ll
index 955ace3da840..511cc8528476 100644
--- a/test/Feature/md_on_instruction.ll
+++ b/test/Feature/md_on_instruction.ll
@@ -18,10 +18,10 @@ declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
 !llvm.module.flags = !{!6}
 
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
-!1 = metadata !{i32 458769, metadata !4, i32 12, metadata !"clang 1.0", i1 true, metadata !"", i32 0, metadata !5, metadata !5, metadata !4, null, null, metadata !""}
-!2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
-!4 = metadata !{metadata !"foo.c", metadata !"/tmp"}
-!5 = metadata !{i32 0}
-!6 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00foo\001\000\001\000\006\000\000\000", i32 0, !1, !2, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x11\0012\00clang 1.0\001\00\000\00\000", !4, !5, !5, !4, null, null} ; [ DW_TAG_compile_unit ]
+!2 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !1} ; [ DW_TAG_base_type ]
+!3 = !MDLocation(line: 1, column: 13, scope: !1, inlinedAt: !1)
+!4 = !{!"foo.c", !"/tmp"}
+!5 = !{i32 0}
+!6 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Feature/metadata.ll b/test/Feature/metadata.ll
index 9856b375495c..612a79e51199 100644
--- a/test/Feature/metadata.ll
+++ b/test/Feature/metadata.ll
@@ -2,16 +2,16 @@
 ; PR7105
 
 define void @foo(i32 %x) {
-  call void @llvm.zonk(metadata !1, i64 0, metadata !1)
-  store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"}
-  store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2}
-  ret void, !whatever !{i32 %x}
+  call void @llvm.zonk(metadata i32 %x, i64 0, metadata !1)
+  store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{!"hello"}
+  store i32 0, i32* null, !whatever !{!"hello", !1, !{}, !2}
+  ret void, !_1 !0
 }
 
 declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone
 
 !named = !{!0}
 !another_named = !{}
-!0 = metadata !{i8** null}
-!1 = metadata !{i8* null, metadata !2}
-!2 = metadata !{}
+!0 = !{i8** null}
+!1 = !{i8* null, !2}
+!2 = !{}
diff --git a/test/Feature/optnone-llc.ll b/test/Feature/optnone-llc.ll
index 6cb27d0b7d5c..f19fa88f9b26 100644
--- a/test/Feature/optnone-llc.ll
+++ b/test/Feature/optnone-llc.ll
@@ -46,7 +46,7 @@ attributes #0 = { optnone noinline }
 ; LLC-Ox-DAG: Skipping pass 'Merge disjoint stack slots'
 ; LLC-Ox-DAG: Skipping pass 'Optimize machine instruction PHIs'
 ; LLC-Ox-DAG: Skipping pass 'Peephole Optimizations'
-; LLC-Ox-DAG: Skipping pass 'Post RA top-down list latency scheduler'
+; LLC-Ox-DAG: Skipping pass 'Post{{.*}}RA{{.*}}{{[Ss]}}cheduler'
 ; LLC-Ox-DAG: Skipping pass 'Remove dead machine instructions'
 ; LLC-Ox-DAG: Skipping pass 'Tail Duplication'
 
diff --git a/test/Feature/prologuedata.ll b/test/Feature/prologuedata.ll
new file mode 100644
index 000000000000..63f424cc40e5
--- /dev/null
+++ b/test/Feature/prologuedata.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: FileCheck %s < %t1.ll
+; RUN: llvm-as < %t1.ll | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: opt -O3 -S < %t1.ll | FileCheck %s
+
+; CHECK: @i
+@i = linkonce_odr global i32 1
+
+; CHECK: f(){{.*}}prologue i32 1
+define void @f() prologue i32 1 {
+  ret void
+}
+
+; CHECK: g(){{.*}}prologue i32* @i
+define void @g() prologue i32* @i {
+  ret void
+}
diff --git a/test/Feature/weak_constant.ll b/test/Feature/weak_constant.ll
index fba7f12e1dee..d331bf59e833 100644
--- a/test/Feature/weak_constant.ll
+++ b/test/Feature/weak_constant.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -S > %t
+; RUN: opt < %s -O3 -S > %t
 ; RUN:   grep undef %t | count 1
 ; RUN:   grep 5 %t | count 1
 ; RUN:   grep 7 %t | count 1
diff --git a/test/FileCheck/check-empty.txt b/test/FileCheck/check-empty.txt
new file mode 100644
index 000000000000..9caea65ee61a
--- /dev/null
+++ b/test/FileCheck/check-empty.txt
@@ -0,0 +1,11 @@
+// RUN: not FileCheck -check-prefix=FOO %s </dev/null 2>&1 | FileCheck -check-prefix=EMPTY-ERR %s
+// RUN: not FileCheck -check-prefix=NOFOO %s </dev/null 2>&1 | FileCheck -check-prefix=EMPTY-ERR %s
+// RUN: not FileCheck -check-prefix=FOO -allow-empty %s </dev/null 2>&1 | FileCheck -check-prefix=NO-EMPTY-ERR -check-prefix=NOT-FOUND %s
+// RUN: FileCheck -check-prefix=NOFOO -allow-empty %s </dev/null 2>&1 | FileCheck -allow-empty -check-prefix=NO-EMPTY-ERR %s
+
+; FOO: foo
+; NOFOO-NOT: foo
+
+; EMPTY-ERR: FileCheck error: '-' is empty.
+; NO-EMPTY-ERR-NOT: FileCheck error: '-' is empty.
+; NOT-FOUND: error: expected string not found in input
diff --git a/test/FileCheck/validate-check-prefix.txt b/test/FileCheck/validate-check-prefix.txt
index db3392d58190..6efec440d0bc 100644
--- a/test/FileCheck/validate-check-prefix.txt
+++ b/test/FileCheck/validate-check-prefix.txt
@@ -2,6 +2,7 @@
 // RUN: FileCheck -check-prefix=A1a-B_c -input-file %s %s
 // RUN: not FileCheck -check-prefix=REPEAT -check-prefix=REPEAT -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
 // RUN: not FileCheck -check-prefix=VALID -check-prefix=A! -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
+// RUN: not FileCheck -check-prefix= -input-file %s %s 2>&1 | FileCheck -check-prefix=BAD_PREFIX %s
 foobar
 ; A1a-B_c: foobar
 
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_cfi.ll b/test/Instrumentation/AddressSanitizer/X86/asm_cfi.ll
new file mode 100644
index 000000000000..6bfb153ea937
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_cfi.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: mov8b_rbp
+; CHECK: pushq %rbp
+; CHECK-NOT: .cfi_adjust_cfa_offset 8
+; CHECK: movq %rbp, %rbp
+; CHECK: .cfi_remember_state
+; CHECK: .cfi_def_cfa_register %rbp
+; CHECK: leaq -128(%rsp)
+; CHECK: callq __asan_report_load8@PLT
+; CHECK: leaq 128(%rsp)
+; CHECK: popq %rbp
+; CHECK: .cfi_restore_state
+; CHECK-NOT: .cfi_adjust_cfa_offset -8
+; CHECK: retq
+define void @mov8b_rbp(i64* %dst, i64* %src) #0 {
+entry:
+  tail call void asm sideeffect "movq ($0), %rax \0A\09movq %rax, ($1) \0A\09", "r,r,~{rax},~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %src, i64* %dst)
+  ret void
+}
+
+; CHECK-LABEL: mov8b_rsp
+; CHECK: pushq %rbp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_remember_state
+; CHECK: .cfi_def_cfa_register %rbp
+; CHECK: leaq -128(%rsp)
+; CHECK: callq __asan_report_load8@PLT
+; CHECK: leaq 128(%rsp)
+; CHECK: popq %rbp
+; CHECK: .cfi_restore_state
+; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: retq
+define void @mov8b_rsp(i64* %dst, i64* %src) #1 {
+entry:
+  tail call void asm sideeffect "movq ($0), %rax \0A\09movq %rax, ($1) \0A\09", "r,r,~{rax},~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %src, i64* %dst)
+  ret void
+}
+
+; CHECK-LABEL: mov8b_rsp_no_cfi
+; CHECK-NOT: .cfi{{[a-z_]+}}
+define void @mov8b_rsp_no_cfi(i64* %dst, i64* %src) #2 {
+entry:
+  tail call void asm sideeffect "movq ($0), %rax \0A\09movq %rax, ($1) \0A\09", "r,r,~{rax},~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %src, i64* %dst)
+  ret void
+}
+
+attributes #0 = { nounwind sanitize_address uwtable "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+attributes #1 = { nounwind sanitize_address uwtable "no-frame-pointer-elim"="false" }
+attributes #2 = { nounwind sanitize_address "no-frame-pointer-elim"="false" }
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_cfi.s b/test/Instrumentation/AddressSanitizer/X86/asm_cfi.s
new file mode 100644
index 000000000000..417d7f3fd3e8
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_cfi.s
@@ -0,0 +1,52 @@
+# The test verifies that correct DWARF directives are emitted when
+# assembly files are instrumented.
+
+# RUN: llvm-mc %s -triple=i386-unknown-linux-gnu -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+# CHECK-LABEL: load4b_cfa_rbp
+# CHECK: pushl %ebx
+# CHECK-NOT: .cfi_adjust_cfa_offset 8
+# CHECK: movl %ebp, %ebx
+# CHECK: .cfi_remember_state
+# CHECK: .cfi_def_cfa_register %ebx
+# CHECK: popl %ebx
+# CHECK: .cfi_restore_state
+# CHECK-NOT: .cfi_adjust_cfa_offset -8
+# CHECK: retl
+
+	.text
+	.globl	load4b_cfa_rbp
+	.type	load4b_cfa_rbp,@function
+swap_cfa_rbp:                                   # @swap_cfa_rbp
+	.cfi_startproc
+	pushl	%ebp
+	.cfi_def_cfa_offset 8
+	.cfi_offset %ebp, -8
+	movl	%esp, %ebp
+	.cfi_def_cfa_register %ebp
+	movl	8(%ebp), %eax
+	popl	%ebp
+	retl
+	.cfi_endproc
+
+# CHECK-LABEL: load4b_cfa_rsp
+# CHECK: pushl %ebx
+# CHECK: .cfi_adjust_cfa_offset 4
+# CHECK: movl %esp, %ebx
+# CHECK: .cfi_remember_state
+# CHECK: .cfi_def_cfa_register %ebx
+# CHECK: popl %ebx
+# CHECK: .cfi_restore_state
+# CHECK: retl
+
+	.globl	load4b_cfa_rsp
+	.type	load4b_cfa_rsp,@function
+swap_cfa_rsp:                                   # @swap_cfa_rsp
+	.cfi_startproc
+	pushl	%ebp
+	.cfi_offset %ebp, 0
+	movl	%esp, %ebp
+	movl	8(%ebp), %eax
+	popl	%ebp
+	retl
+	.cfi_endproc
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
index ad5e02e0d065..4162d9d208af 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-LABEL: mov1b
 ; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: pushq %rcx
 ; CHECK-NEXT: pushq %rdi
+; CHECK-NEXT: pushq %rcx
 ; CHECK-NEXT: pushfq
 ; CHECK-NEXT: leaq {{.*}}, %rdi
 ; CHECK-NEXT: movq %rdi, %rax
@@ -26,8 +26,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: callq __asan_report_load1@PLT
 ; CHECK-NEXT: [[A]]:
 ; CHECK-NEXT: popfq
-; CHECK-NEXT: popq %rdi
 ; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: popq %rdi
 ; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
@@ -81,8 +81,10 @@ entry:
 ; CHECK-LABEL: mov8b
 ; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: pushfq
-; CHECK-NEXT: leaq {{.*}}, %rax
+; CHECK-NEXT: leaq {{.*}}, %rdi
+; CHECK-NEXT: movq %rdi, %rax
 ; CHECK-NEXT: shrq $3, %rax
 ; CHECK-NEXT: cmpb $0, 2147450880(%rax)
 ; CHECK-NEXT: je [[A:.*]]
@@ -92,13 +94,16 @@ entry:
 ; CHECK-NEXT: callq __asan_report_load8@PLT
 ; CHECK-NEXT: [[A]]:
 ; CHECK-NEXT: popfq
+; CHECK-NEXT: popq %rdi
 ; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
 ; CHECK: leaq -128(%rsp), %rsp
 ; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %rdi
 ; CHECK-NEXT: pushfq
-; CHECK-NEXT: leaq {{.*}}, %rax
+; CHECK-NEXT: leaq {{.*}}, %rdi
+; CHECK-NEXT: movq %rdi, %rax
 ; CHECK-NEXT: shrq $3, %rax
 ; CHECK-NEXT: cmpb $0, 2147450880(%rax)
 ; CHECK-NEXT: je [[A:.*]]
@@ -108,6 +113,7 @@ entry:
 ; CHECK-NEXT: callq __asan_report_store8@PLT
 ; CHECK-NEXT: [[A]]:
 ; CHECK-NEXT: popfq
+; CHECK-NEXT: popq %rdi
 ; CHECK-NEXT: popq %rax
 ; CHECK-NEXT: leaq 128(%rsp), %rsp
 
@@ -139,8 +145,8 @@ entry:
 attributes #0 = { nounwind uwtable sanitize_address "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind }
 
-!0 = metadata !{i32 98, i32 122, i32 160}
-!1 = metadata !{i32 305, i32 329, i32 367}
-!2 = metadata !{i32 512, i32 537, i32 576}
-!3 = metadata !{i32 721, i32 746, i32 785}
-!4 = metadata !{i32 929, i32 957, i32 999}
+!0 = !{i32 98, i32 122, i32 160}
+!1 = !{i32 305, i32 329, i32 367}
+!2 = !{i32 512, i32 537, i32 576}
+!3 = !{i32 721, i32 746, i32 785}
+!4 = !{i32 929, i32 957, i32 999}
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
index 74a788cf4d51..14fc056d72f2 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov.s
@@ -10,13 +10,13 @@
 # CHECK: callq __asan_report_load1@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movb (%rsi), %al
+# CHECK: movb (%rsi), %al
 #
-# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK: callq __asan_report_store1@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movb %al, (%rdi)
+# CHECK: movb %al, (%rdi)
 mov1b:                                  # @mov1b
 	.cfi_startproc
 # BB#0:
@@ -39,13 +39,13 @@ mov1b:                                  # @mov1b
 # CHECK: callq __asan_report_load16@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movaps (%rsi), %xmm0
+# CHECK: movaps (%rsi), %xmm0
 #
-# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK: callq __asan_report_store16@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movaps %xmm0, (%rdi)
+# CHECK: movaps %xmm0, (%rdi)
 mov16b:                                 # @mov16b
 	.cfi_startproc
 # BB#0:
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
index e3a1541e1951..5d5de5d2a13e 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_mov_no_instrumentation.s
@@ -5,6 +5,8 @@
 	.align	16, 0x90
 	.type	mov1b,@function
 # CHECK-LABEL: mov1b
+# CHECK: movb (%rsi), %al
+# CHECK: movb %al, (%rdi)
 # CHECK-NOT: callq __asan_report_load1@PLT
 # CHECK-NOT: callq __asan_report_store1@PLT
 mov1b:                                  # @mov1b
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_rep_movs.ll b/test/Instrumentation/AddressSanitizer/X86/asm_rep_movs.ll
new file mode 100644
index 000000000000..c3c2435fc87b
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_rep_movs.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2 -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: rep_movs_1b
+; CHECK: pushfq
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: je [[B:.*]]
+
+; CHECK: leaq -128(%rsp), %rsp
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: pushq %rdx
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: pushfq
+
+; CHECK: leaq (%rsi), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_load1@PLT
+
+; CHECK: leaq -1(%rsi,%rcx), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_load1@PLT
+
+; CHECK: leaq (%rdi), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_store1@PLT
+
+; CHECK: leaq -1(%rdi,%rcx), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_store1@PLT
+
+; CHECK: popfq
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: leaq 128(%rsp), %rsp
+
+; CHECK: [[B]]:
+; CHECK-NEXT: popfq
+
+; CHECK: rep
+; CHECK-NEXT: movsb (%rsi), %es:(%rdi)
+
+; Function Attrs: nounwind sanitize_address uwtable
+define void @rep_movs_1b(i8* %dst, i8* %src, i64 %n) #0 {
+entry:
+  tail call void asm sideeffect "rep movsb \0A\09", "{si},{di},{cx},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %src, i8* %dst, i64 %n) #1
+  ret void
+}
+
+; CHECK-LABEL: rep_movs_8b
+; CHECK: pushfq
+; CHECK-NEXT: testq %rcx, %rcx
+; CHECK-NEXT: je [[Q:.*]]
+
+; CHECK: leaq (%rsi), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_load8@PLT
+
+; CHECK: leaq -1(%rsi,%rcx,8), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_load8@PLT
+
+; CHECK: leaq (%rdi), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_store8@PLT
+
+; CHECK: leaq -1(%rdi,%rcx,8), %rdx
+; CHECK: movq %rdx, %rdi
+; CHECK-NEXT: callq __asan_report_store8@PLT
+
+; CHECK: [[Q]]:
+; CHECK-NEXT: popfq
+
+; CHECK: rep
+; CHECK-NEXT: movsq (%rsi), %es:(%rdi)
+
+; Function Attrs: nounwind sanitize_address uwtable
+define void @rep_movs_8b(i64* %dst, i64* %src, i64 %n) #0 {
+entry:
+  tail call void asm sideeffect "rep movsq \0A\09", "{si},{di},{cx},~{memory},~{dirflag},~{fpsr},~{flags}"(i64* %src, i64* %dst, i64 %n) #1
+  ret void
+}
+
+attributes #0 = { nounwind sanitize_address uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_rsp_mem_op.s b/test/Instrumentation/AddressSanitizer/X86/asm_rsp_mem_op.s
new file mode 100644
index 000000000000..e40ecde0daaf
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_rsp_mem_op.s
@@ -0,0 +1,45 @@
+# The test verifies that memory references through %rsp are correctly
+# adjusted after instrumentation.
+
+# RUN: llvm-mc %s -triple=x86_64-unknown-linux-gnu -asm-instrumentation=address -asan-instrument-assembly | FileCheck %s
+
+# CHECK-LABEL: rsp_access
+# CHECK: leaq -128(%rsp), %rsp
+# CHECK: pushq %rax
+# CHECK: pushq %rdi
+# CHECK: pushfq
+# CHECK: leaq 160(%rsp), %rdi
+# CHECK: callq __asan_report_load8@PLT
+# CHECK: popfq
+# CHECK: popq %rdi
+# CHECK: popq %rax
+# CHECK: leaq 128(%rsp), %rsp
+# CHECK: movq 8(%rsp), %rax
+# CHECK: retq
+
+	.text
+	.globl rsp_access
+	.type rsp_access,@function
+rsp_access:
+	movq 8(%rsp), %rax
+	retq
+
+# CHECK-LABEL: rsp_32bit_access
+# CHECK: leaq -128(%rsp), %rsp
+# CHECK: pushq %rax
+# CHECK: pushq %rdi
+# CHECK: pushfq
+# CHECK: leaq 2147483647(%rsp), %rdi
+# CHECK: leaq 145(%rdi), %rdi
+# CHECK: callq __asan_report_load8@PLT
+# CHECK: popfq
+# CHECK: popq %rdi
+# CHECK: popq %rax
+# CHECK: leaq 128(%rsp), %rsp
+# CHECK: movq 2147483640(%rsp), %rax
+# CHECK: retq
+	.globl rsp_32bit_access
+	.type rsp_32bit_access,@function
+rsp_32bit_access:
+	movq 2147483640(%rsp), %rax
+	retq
diff --git a/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
index ca3c54c1455a..093c96b0efa0 100644
--- a/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
+++ b/test/Instrumentation/AddressSanitizer/X86/asm_swap_intel.s
@@ -10,25 +10,25 @@
 # CHECK: callq __asan_report_load8@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movq (%rcx), %rax
+# CHECK: movq (%rcx), %rax
 #
-# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK: callq __asan_report_load8@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movq (%rdx), %rbx
+# CHECK: movq (%rdx), %rbx
 #
-# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK: callq __asan_report_store8@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movq %rbx, (%rcx)
+# CHECK: movq %rbx, (%rcx)
 #
-# CHECK-NEXT: leaq -128(%rsp), %rsp
+# CHECK: leaq -128(%rsp), %rsp
 # CHECK: callq __asan_report_store8@PLT
 # CHECK: leaq 128(%rsp), %rsp
 #
-# CHECK-NEXT: movq %rax, (%rdx)
+# CHECK: movq %rax, (%rdx)
 swap:                                   # @swap
 	.cfi_startproc
 # BB#0:
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 63477aacd8f8..3f944c340e4b 100644
--- a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -64,10 +64,10 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !5, metadata !5, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !6, metadata !6, i64 0}
-!4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
-!5 = metadata !{metadata !"any pointer", metadata !1}
-!6 = metadata !{metadata !"int", metadata !1}
+!0 = !{!5, !5, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!6, !6, i64 0}
+!4 = !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
+!5 = !{!"any pointer", !1}
+!6 = !{!"int", !1}
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 5436c60cb6d0..8020660471b0 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -14,7 +14,7 @@ define i32 @test_load(i32* %a) sanitize_address {
 ; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
 ; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
-; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
+; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}!prof ![[PROF:[0-9]+]]
 ;
 ; First instrumentation block refines the shadow test.
 ; CHECK:   and i64 %[[LOAD_ADDR]], 7
@@ -169,3 +169,5 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address {
 ; CHECK: __asan_memcpy
 ; CHECK: ret void
 
+; PROF
+; CHECK: ![[PROF]] = !{!"branch_weights", i32 1, i32 100000}
diff --git a/test/Instrumentation/AddressSanitizer/coverage-dbg.ll b/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
deleted file mode 100644
index 3f7998d1a738..000000000000
--- a/test/Instrumentation/AddressSanitizer/coverage-dbg.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; Test that coverage instrumentation does not lose debug location.
-
-; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s
-
-; C++ source:
-; 1: struct A {
-; 2:  int f();
-; 3:  int x;
-; 4: };
-; 5:
-; 6: int A::f() {
-; 7:    return x;
-; 8: }
-; clang++ ../1.cc -O3 -g -S -emit-llvm  -fno-strict-aliasing
-; and add sanitize_address to @_ZN1A1fEv
-
-; Test that __sanitizer_cov call has !dbg pointing to the opening { of A::f().
-; CHECK: call void @__sanitizer_cov(), !dbg [[A:!.*]]
-; CHECK: [[A]] = metadata !{i32 6, i32 0, metadata !{{.*}}, null}
-
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.A = type { i32 }
-
-; Function Attrs: nounwind readonly uwtable
-define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.A* %this}, i64 0, metadata !15), !dbg !20
-  %x = getelementptr inbounds %struct.A* %this, i64 0, i32 0, !dbg !21
-  %0 = load i32* %x, align 4, !dbg !21
-  ret i32 %0, !dbg !21
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
-
-attributes #0 = { sanitize_address nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!17, !18}
-!llvm.ident = !{!19}
-
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (210251)", i1 true, metadata !"", i32 0, metadata !2, metadata !3, metadata !12, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/../1.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"../1.cc", metadata !"/code/llvm/build0"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !1, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !5, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !6, metadata !8}
-!6 = metadata !{i32 786445, metadata !1, metadata !"_ZTS1A", metadata !"x", i32 3, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ] [x] [line 3, size 32, align 32, offset 0] [from int]
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!8 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"f", metadata !"f", metadata !"_ZN1A1fEv", i32 2, metadata !9, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, null, i32 2} ; [ DW_TAG_subprogram ] [line 2] [f]
-!9 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!10 = metadata !{metadata !7, metadata !11}
-!11 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786478, metadata !1, metadata !"_ZTS1A", metadata !"f", metadata !"f", metadata !"_ZN1A1fEv", i32 6, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.A*)* @_ZN1A1fEv, null, metadata !8, metadata !14, i32 6} ; [ DW_TAG_subprogram ] [line 6] [def] [f]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786689, metadata !13, metadata !"this", null, i32 16777216, metadata !16, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!16 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!18 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!19 = metadata !{metadata !"clang version 3.5.0 (210251)"}
-!20 = metadata !{i32 0, i32 0, metadata !13, null}
-!21 = metadata !{i32 7, i32 0, metadata !13, null}
diff --git a/test/Instrumentation/AddressSanitizer/coverage.ll b/test/Instrumentation/AddressSanitizer/coverage.ll
deleted file mode 100644
index 79bb5c135330..000000000000
--- a/test/Instrumentation/AddressSanitizer/coverage.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: opt < %s -asan -asan-module -asan-coverage=0 -S | FileCheck %s --check-prefix=CHECK0
-; RUN: opt < %s -asan -asan-module -asan-coverage=1 -S | FileCheck %s --check-prefix=CHECK1
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -S | FileCheck %s --check-prefix=CHECK2
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK2
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=1  -S | FileCheck %s --check-prefix=CHECK1
-
-; RUN: opt < %s -asan -asan-module -asan-coverage=0 -asan-globals=0 -S | \
-; RUN:     FileCheck %s --check-prefix=CHECK0
-; RUN: opt < %s -asan -asan-module -asan-coverage=1 -asan-globals=0 -S | \
-; RUN:     FileCheck %s --check-prefix=CHECK1
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-globals=0 -S | \
-; RUN:     FileCheck %s --check-prefix=CHECK2
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=10 \
-; RUN:     -asan-globals=0 -S | FileCheck %s --check-prefix=CHECK2
-; RUN: opt < %s -asan -asan-module -asan-coverage=2 -asan-coverage-block-threshold=1 \
-; RUN:     -asan-globals=0 -S | FileCheck %s --check-prefix=CHECK1
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-define void @foo(i32* %a) sanitize_address {
-entry:
-  %tobool = icmp eq i32* %a, null
-  br i1 %tobool, label %if.end, label %if.then
-
-  if.then:                                          ; preds = %entry
-  store i32 0, i32* %a, align 4
-  br label %if.end
-
-  if.end:                                           ; preds = %entry, %if.then
-  ret void
-}
-
-; CHECK0-NOT: call void @__sanitizer_cov(
-; CHECK0-NOT: call void @__sanitizer_cov_module_init(
-
-; CHECK1-LABEL: define void @foo
-; CHECK1: %0 = load atomic i8* @__asan_gen_cov_foo monotonic, align 1
-; CHECK1: %1 = icmp eq i8 0, %0
-; CHECK1: br i1 %1, label %2, label %3
-; CHECK1: call void @__sanitizer_cov
-; CHECK1-NOT: call void @__sanitizer_cov
-; CHECK1: store atomic i8 1, i8* @__asan_gen_cov_foo monotonic, align 1
-
-; CHECK1-LABEL: define internal void @asan.module_ctor
-; CHECK1-NOT: ret
-; CHECK1: call void @__sanitizer_cov_module_init(i64 1)
-; CHECK1: ret
-
-
-; CHECK2-LABEL: define void @foo
-; CHECK2: call void @__sanitizer_cov
-; CHECK2: call void @__sanitizer_cov
-; CHECK2: call void @__sanitizer_cov
-; CHECK2-NOT: call void @__sanitizer_cov
-; CHECK2: ret void
-
-; CHECK2-LABEL: define internal void @asan.module_ctor
-; CHECK2-NOT: ret
-; CHECK2: call void @__sanitizer_cov_module_init(i64 3)
-; CHECK2: ret
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index 336b98b289cd..c0939c5815d4 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -11,8 +11,8 @@ entry:
   %p.addr = alloca i32, align 4
   %r = alloca i32, align 4
   store i32 %p, i32* %p.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %p.addr}, metadata !10), !dbg !11
-  call void @llvm.dbg.declare(metadata !{i32* %r}, metadata !12), !dbg !14
+  call void @llvm.dbg.declare(metadata i32* %p.addr, metadata !10, metadata !{!"0x102"}), !dbg !11
+  call void @llvm.dbg.declare(metadata i32* %r, metadata !12, metadata !{!"0x102"}), !dbg !14
   %0 = load i32* %p.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   store i32 %add, i32* %r, align 4, !dbg !14
@@ -24,39 +24,39 @@ entry:
 ;   CHECK: entry:
 ; Verify that llvm.dbg.declare calls are in the entry basic block.
 ;   CHECK-NOT: %entry
-;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ARG_ID:[0-9]+]])
+;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ARG_ID:[0-9]+]], metadata ![[OPDEREF:[0-9]+]])
 ;   CHECK-NOT: %entry
-;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[VAR_ID:[0-9]+]])
+;   CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[VAR_ID:[0-9]+]], metadata ![[OPDEREF:[0-9]+]])
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17}
 
-!0 = metadata !{i32 786449, metadata !16, i32 4, metadata !"clang version 3.3 (trunk 169314)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !16, metadata !6, metadata !"zzz", metadata !"zzz", metadata !"_Z3zzzi", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @_Z3zzzi, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
-!6 = metadata !{i32 786473, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786689, metadata !5, metadata !"p", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
-!11 = metadata !{i32 1, i32 0, metadata !5, null}
-!12 = metadata !{i32 786688, metadata !13, metadata !"r", metadata !6, i32 2, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [r] [line 2]
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 169314)\001\00\000\00\000", !16, !1, !1, !3, !1, null} ; [ DW_TAG_compile_unit ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc] [DW_LANG_C_plus_plus]
+!1 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00zzz\00zzz\00_Z3zzzi\001\000\001\000\006\00256\000\001", !16, !6, !7, null, i32 (i32)* @_Z3zzzi, null, null, !1} ; [ DW_TAG_subprogram ] [line 1] [def] [zzz]
+!6 = !{!"0x29", !16} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x101\00p\0016777217\000", !5, !6, !9} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!11 = !MDLocation(line: 1, scope: !5)
+!12 = !{!"0x100\00r\002\000", !13, !6, !9} ; [ DW_TAG_auto_variable ] [r] [line 2]
 
 ; Verify that debug descriptors for argument and local variable will be replaced
 ; with descriptors that end with OpDeref (encoded as 2).
-;   CHECK: ![[ARG_ID]] = {{.*}}metadata ![[OPDEREF:[0-9]+]]} ; [ DW_TAG_arg_variable ] [p] [line 1]
-;   CHECK: ![[OPDEREF]] = metadata !{i64 2}
-;   CHECK: ![[VAR_ID]] = {{.*}}metadata ![[OPDEREF]]} ; [ DW_TAG_auto_variable ] [r] [line 2]
+;   CHECK: ![[ARG_ID]] = {{.*}} ; [ DW_TAG_arg_variable ] [p] [line 1]
+;   CHECK: ![[OPDEREF]] = !{!"0x102\006"}
+;   CHECK: ![[VAR_ID]] = {{.*}} ; [ DW_TAG_auto_variable ] [r] [line 2]
 ; Verify that there are no more variable descriptors.
 ;   CHECK-NOT: DW_TAG_arg_variable
 ;   CHECK-NOT: DW_TAG_auto_variable
 
 
-!13 = metadata !{i32 786443, metadata !16, metadata !5, i32 1, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc]
-!14 = metadata !{i32 2, i32 0, metadata !13, null}
-!15 = metadata !{i32 3, i32 0, metadata !13, null}
-!16 = metadata !{metadata !"a.cc", metadata !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"}
-!17 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = !{!"0xb\001\000\000", !16, !5} ; [ DW_TAG_lexical_block ] [/usr/local/google/llvm_cmake_clang/tmp/debuginfo/a.cc]
+!14 = !MDLocation(line: 2, scope: !13)
+!15 = !MDLocation(line: 3, scope: !13)
+!16 = !{!"a.cc", !"/usr/local/google/llvm_cmake_clang/tmp/debuginfo"}
+!17 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
new file mode 100644
index 000000000000..f096ac1828f2
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e"
+target triple = "x86_64-apple-darwin10.0.0"
+
+@foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+
+; CHECK: @foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+\ No newline at end of file
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
index 8d14e839962e..fcc166e966e1 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
@@ -5,7 +5,7 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 ; no action should be taken for these globals
 $global_noinst = comdat largest
-@aliasee = private unnamed_addr constant [2 x i8] [i8 1, i8 2], comdat $global_noinst
+@aliasee = private unnamed_addr constant [2 x i8] [i8 1, i8 2], comdat($global_noinst)
 @global_noinst = unnamed_addr alias [2 x i8]* @aliasee
 ; CHECK-NOT: {{asan_gen.*global_noinst}}
 ; CHECK-DAG: @global_noinst = unnamed_addr alias [2 x i8]* @aliasee
diff --git a/test/Instrumentation/AddressSanitizer/global_metadata.ll b/test/Instrumentation/AddressSanitizer/global_metadata.ll
index 4dcd53b3ad83..39017454f2ef 100644
--- a/test/Instrumentation/AddressSanitizer/global_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -11,28 +11,18 @@ target triple = "x86_64-unknown-linux-gnu"
 @.str = private unnamed_addr constant [14 x i8] c"Hello, world!\00", align 1
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_asan_globals.cpp, i8* null }]
 
-; Sanitizer location descriptors:
-@.str1 = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
-@.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
-@.asan_loc_descr1 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 7, i32 5 }
-@.asan_loc_descr2 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 12, i32 14 }
-@.asan_loc_descr4 = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 14, i32 25 }
-
-; Global names:
-@.str2 = private unnamed_addr constant [7 x i8] c"global\00", align 1
-@.str3 = private unnamed_addr constant [16 x i8] c"dyn_init_global\00", align 1
-@.str4 = private unnamed_addr constant [11 x i8] c"static_var\00", align 1
-@.str5 = private unnamed_addr constant [17 x i8] c"<string literal>\00", align 1
-
-; Check that globals were instrumented, but sanitizer location descriptors weren't:
+; Check that globals were instrumented:
 ; CHECK: @global = global { i32, [60 x i8] } zeroinitializer, align 32
 ; CHECK: @.str = internal unnamed_addr constant { [14 x i8], [50 x i8] } { [14 x i8] c"Hello, world!\00", [50 x i8] zeroinitializer }, align 32
-; CHECK: @.asan_loc_descr = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* @.str1, i32 5, i32 5 }
-; CHECK: @.str2 = private unnamed_addr constant [7 x i8] c"global\00", align 1
+
+; Check emitted location descriptions:
+; CHECK: [[VARNAME:@__asan_gen_[0-9]+]] = private unnamed_addr constant [7 x i8] c"global\00", align 1
+; CHECK: [[FILENAME:@__asan_gen_[0-9]+]] = private unnamed_addr constant [22 x i8] c"/tmp/asan-globals.cpp\00", align 1
+; CHECK: [[LOCDESCR:@__asan_gen_[0-9]+]] = private unnamed_addr constant { [22 x i8]*, i32, i32 } { [22 x i8]* [[FILENAME]], i32 5, i32 5 }
 
 ; Check that location decriptors and global names were passed into __asan_register_globals:
-; CHECK: i64 ptrtoint ([7 x i8]* @.str2 to i64)
-; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* @.asan_loc_descr to i64)
+; CHECK: i64 ptrtoint ([7 x i8]* [[VARNAME]] to i64)
+; CHECK: i64 ptrtoint ({ [22 x i8]*, i32, i32 }* [[LOCDESCR]] to i64)
 
 ; Function Attrs: nounwind sanitize_address
 define internal void @__cxx_global_var_init() #0 section ".text.startup" {
@@ -63,9 +53,15 @@ attributes #1 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-fra
 !llvm.asan.globals = !{!0, !1, !2, !3, !4}
 !llvm.ident = !{!5}
 
-!0 = metadata !{i32* @global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr, [7 x i8]* @.str2, i1 false, i1 false}
-!1 = metadata !{i32* @dyn_init_global, { [22 x i8]*, i32, i32 }* @.asan_loc_descr1, [16 x i8]* @.str3, i1 true, i1 false}
-!2 = metadata !{i32* @blacklisted_global, null, null, i1 false, i1 true}
-!3 = metadata !{i32* @_ZZ4funcvE10static_var, { [22 x i8]*, i32, i32 }* @.asan_loc_descr2, [11 x i8]* @.str4, i1 false, i1 false}
-!4 = metadata !{[14 x i8]* @.str, { [22 x i8]*, i32, i32 }* @.asan_loc_descr4, [17 x i8]* @.str5, i1 false, i1 false}
-!5 = metadata !{metadata !"clang version 3.5.0 (211282)"}
+!0 = !{i32* @global, !6, !"global", i1 false, i1 false}
+!1 = !{i32* @dyn_init_global, !7, !"dyn_init_global", i1 true, i1 false}
+!2 = !{i32* @blacklisted_global, null, null, i1 false, i1 true}
+!3 = !{i32* @_ZZ4funcvE10static_var, !8, !"static_var", i1 false, i1 false}
+!4 = !{[14 x i8]* @.str, !9, !"<string literal>", i1 false, i1 false}
+
+!5 = !{!"clang version 3.5.0 (211282)"}
+
+!6 = !{!"/tmp/asan-globals.cpp", i32 5, i32 5}
+!7 = !{!"/tmp/asan-globals.cpp", i32 7, i32 5}
+!8 = !{!"/tmp/asan-globals.cpp", i32 12, i32 14}
+!9 = !{!"/tmp/asan-globals.cpp", i32 14, i32 25}
diff --git a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
new file mode 100644
index 000000000000..25807bb05a7a
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -0,0 +1,24 @@
+; Test asan internal compiler flags:
+;   -asan-instrument-allocas=1
+
+; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s --check-prefix=CHECK-ALLOCA
+; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=0 -S | FileCheck %s --check-prefix=CHECK-NOALLOCA
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s --check-prefix=CHECK-NOALLOCA
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(i32 %len) sanitize_address {
+entry:
+; CHECK-ALLOCA: store i32 -892679478
+; CHECK-ALLOCA: store i32 -875836469
+; CHECK-NOALLOCA-NOT: store i32 -892679478
+; CHECK-NOALLOCA-NOT: store i32 -875836469
+  %0 = alloca i32, align 4
+  %1 = alloca i8*
+  store i32 %len, i32* %0, align 4
+  %2 = load i32* %0, align 4
+  %3 = zext i32 %2 to i64
+  %4 = alloca i8, i64 %3, align 32
+  ret void
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 80791d9a905f..259c8153227d 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -69,7 +69,7 @@ entry:
 
 
 !llvm.asan.globals = !{!0}
-!0 = metadata !{[10 x i32]* @GlobDy, null, null, i1 true, i1 false}
+!0 = !{[10 x i32]* @GlobDy, null, null, i1 true, i1 false}
 
 ; CHECK-LABEL: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index c119879351a9..b89ca44a076e 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -7,10 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
 @YYY = global i32 0, align 4           ; W/o dynamic initializer.
 ; Clang will emit the following metadata identifying @xxx as dynamically
 ; initialized.
-!0 = metadata !{i32* @xxx, null, null, i1 true, i1 false}
-!1 = metadata !{i32* @XXX, null, null, i1 true, i1 false}
-!2 = metadata !{i32* @yyy, null, null, i1 false, i1 false}
-!3 = metadata !{i32* @YYY, null, null, i1 false, i1 false}
+!0 = !{i32* @xxx, null, null, i1 true, i1 false}
+!1 = !{i32* @XXX, null, null, i1 true, i1 false}
+!2 = !{i32* @yyy, null, null, i1 false, i1 false}
+!3 = !{i32* @YYY, null, null, i1 false, i1 false}
 !llvm.asan.globals = !{!0, !1, !2, !3}
 
 define i32 @initializer() uwtable {
@@ -25,29 +25,39 @@ entry:
   ret void
 }
 
-@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @__late_ctor }, { i32, void ()* } { i32 0, void ()* @__early_ctor }]
 
-define internal void @_GLOBAL__I_a() sanitize_address section ".text.startup" {
+define internal void @__late_ctor() sanitize_address section ".text.startup" {
 entry:
   call void @__cxx_global_var_init()
   ret void
 }
 
 ; Clang indicated that @xxx was dynamically initailized.
-; __asan_{before,after}_dynamic_init should be called from _GLOBAL__I_a
+; __asan_{before,after}_dynamic_init should be called from __late_ctor
 
-; CHECK: define internal void @_GLOBAL__I_a
+; CHECK-LABEL: define internal void @__late_ctor
 ; CHECK-NOT: ret
 ; CHECK: call void @__asan_before_dynamic_init
 ; CHECK: call void @__cxx_global_var_init
 ; CHECK: call void @__asan_after_dynamic_init
 ; CHECK: ret
 
+; CTOR with priority 0 should not be instrumented.
+define internal void @__early_ctor() sanitize_address section ".text.startup" {
+entry:
+  call void @__cxx_global_var_init()
+  ret void
+}
+; CHECK-LABEL: define internal void @__early_ctor
+; CHECK-NOT: __asan
+; CHECK: ret
+
 ; Check that xxx is instrumented.
 define void @touch_xxx() sanitize_address {
   store i32 0, i32 *@xxx, align 4
   ret void
-; CHECK: define void @touch_xxx
+; CHECK-LABEL: touch_xxx
 ; CHECK: call void @__asan_report_store4
 ; CHECK: ret void
 }
diff --git a/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll b/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll
deleted file mode 100644
index 8726b8e5f9c9..000000000000
--- a/test/Instrumentation/AddressSanitizer/keep-instrumented_functions.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test the -asan-keep-uninstrumented-functions flag: FOO should get cloned
-; RUN: opt < %s -asan -asan-module -asan-keep-uninstrumented-functions -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@a = global i32 0, align 4
-
-define i32 @main() sanitize_address {
-entry:
-  tail call void @FOO(i32* @a)
-  ret i32 0
-}
-
-define void @FOO(i32* nocapture %x) sanitize_address {
-entry:
-  store i32 1, i32* %x, align 4
-  ret void
-}
-
-; main should not be cloned since it is not being instrumented by asan.
-; CHECK-NOT: NOASAN_main
-; CHECK: define void @FOO{{.*}} section "ASAN"
-; CHECK: define void @NOASAN_FOO{{.*}} section "NOASAN"
diff --git a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
new file mode 100644
index 000000000000..43711b7a1f27
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca \
+; RUN:       -asan-use-after-return -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @Func1() sanitize_address {
+entry:
+; CHECK-LABEL: Func1
+
+; CHECK: entry:
+; CHECK: load i32* @__asan_option_detect_stack_use_after_return
+
+; CHECK: <label>:[[UAR_ENABLED_BB:[0-9]+]]
+; CHECK: [[FAKE_STACK_RT:%[0-9]+]] = call i64 @__asan_stack_malloc_
+
+; CHECK: <label>:[[FAKE_STACK_BB:[0-9]+]]
+; CHECK: [[FAKE_STACK:%[0-9]+]] = phi i64 [ 0, %entry ], [ [[FAKE_STACK_RT]], %[[UAR_ENABLED_BB]] ]
+; CHECK: icmp eq i64 [[FAKE_STACK]], 0
+
+; CHECK: <label>:[[NO_FAKE_STACK_BB:[0-9]+]]
+; CHECK: %MyAlloca = alloca i8, i64
+; CHECK: [[ALLOCA:%[0-9]+]] = ptrtoint i8* %MyAlloca
+
+; CHECK: phi i64 [ [[FAKE_STACK]], %[[FAKE_STACK_BB]] ], [ [[ALLOCA]], %[[NO_FAKE_STACK_BB]] ]
+
+; CHECK: ret void
+
+  %XXX = alloca [20 x i8], align 1
+  ret void
+}
+
+; Test that dynamic alloca is not used for functions with inline assembly.
+define void @Func2() sanitize_address {
+entry:
+; CHECK-LABEL: Func2
+; CHECK: alloca [96 x i8]
+; CHECK: ret void
+
+  %XXX = alloca [20 x i8], align 1
+  call void asm sideeffect "mov %%rbx, %%rcx", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/stack_layout.ll b/test/Instrumentation/AddressSanitizer/stack_layout.ll
index c027acf3e4fd..97e3bbb58723 100644
--- a/test/Instrumentation/AddressSanitizer/stack_layout.ll
+++ b/test/Instrumentation/AddressSanitizer/stack_layout.ll
@@ -1,6 +1,9 @@
 ; Test the ASan's stack layout.
 ; More tests in tests/Transforms/Utils/ASanStackFrameLayoutTest.cpp
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=0 -S \
+; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-STATIC
+; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=1 -S \
+; RUN:     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DYNAMIC
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -14,7 +17,10 @@ declare void @Use(i8*)
 define void @Func1() sanitize_address {
 entry:
 ; CHECK-LABEL: Func1
-; CHECK: alloca [192 x i8]
+
+; CHECK-STATIC: alloca [192 x i8]
+; CHECK-DYNAMIC: alloca i8, i64 192
+
 ; CHECK-NOT: alloca
 ; CHECK: ret void
   %XXX = alloca [10 x i8], align 1
@@ -26,7 +32,10 @@ entry:
 define void @Func2() sanitize_address {
 entry:
 ; CHECK-LABEL: Func2
-; CHECK: alloca [864 x i8]
+
+; CHECK-STATIC: alloca [864 x i8]
+; CHECK-DYNAMIC: alloca i8, i64 864
+
 ; CHECK-NOT: alloca
 ; CHECK: ret void
   %AAA = alloca [5 x i8], align 1
@@ -39,7 +48,10 @@ entry:
 define void @Func3() sanitize_address {
 entry:
 ; CHECK-LABEL: Func3
-; CHECK: alloca [768 x i8]
+
+; CHECK-STATIC: alloca [768 x i8]
+; CHECK-DYNAMIC: alloca i8, i64 768
+
 ; CHECK-NOT: alloca
 ; CHECK: ret void
   %AAA = alloca [128 x i8], align 16
diff --git a/test/Instrumentation/AddressSanitizer/ubsan.ll b/test/Instrumentation/AddressSanitizer/ubsan.ll
index 22e4172069cf..5535efe4b4dd 100644
--- a/test/Instrumentation/AddressSanitizer/ubsan.ll
+++ b/test/Instrumentation/AddressSanitizer/ubsan.ll
@@ -49,4 +49,4 @@ cont:                                             ; preds = %handler.dynamic_typ
   ret void
 }
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll b/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll
new file mode 100644
index 000000000000..c67fb5014630
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/undecidable-dynamic-alloca-1.ll
@@ -0,0 +1,23 @@
+; Test that undecidable dynamic allocas are skipped by ASan.
+
+; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @g(i64 %n) sanitize_address {
+entry:
+  %cmp = icmp sgt i64 %n, 100
+  br i1 %cmp, label %do_alloca, label %done
+
+do_alloca:
+; CHECK-NOT: store i32 -892679478
+  %0 = alloca i8, i64 %n, align 1
+  call void @f(i8* %0)
+  br label %done
+
+done:
+  ret void
+}
+
+declare void @f(i8*)
+
diff --git a/test/Instrumentation/DataFlowSanitizer/Inputs/debuglist.txt b/test/Instrumentation/DataFlowSanitizer/Inputs/debuglist.txt
new file mode 100644
index 000000000000..daf7b5f4377a
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/Inputs/debuglist.txt
@@ -0,0 +1,2 @@
+fun:main=uninstrumented
+fun:main=discard
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
index 66ddc140a082..9a45dbcbab0f 100644
--- a/test/Instrumentation/DataFlowSanitizer/abilist.ll
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-abilist=%S/Inputs/abilist.txt -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK: i32 @discard(i32 %a, i32 %b)
 define i32 @discard(i32 %a, i32 %b) {
@@ -12,16 +13,38 @@ define i32 @functional(i32 %a, i32 %b) {
   ret i32 %c
 }
 
+; CHECK: define i32 (i32, i32)* @discardg(i32)
+; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
+; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
+; CHECK: ret {{.*}} %[[XVAL]]
+@discardg = alias i32 (i32, i32)* (i32)* @g
+
 declare void @custom1(i32 %a, i32 %b)
 
+; CHECK: define linkonce_odr { i32, i16 } @"dfsw$custom2"(i32, i32, i16, i16)
+; CHECK: %[[LABELRETURN2:.*]] = alloca i16
+; CHECK: %[[RV:.*]] = call i32 @__dfsw_custom2
+; CHECK: %[[RVSHADOW:.*]] = load i16* %[[LABELRETURN2]]
+; CHECK: insertvalue {{.*}}[[RV]], 0
+; CHECK: insertvalue {{.*}}[[RVSHADOW]], 1
+; CHECK: ret { i32, i16 }
 declare i32 @custom2(i32 %a, i32 %b)
 
+; CHECK: define linkonce_odr void @"dfsw$custom3"(i32, i16, i16*, ...)
+; CHECK: call void @__dfsan_vararg_wrapper(i8*
+; CHECK: unreachable
+declare void @custom3(i32 %a, ...)
+
+declare i32 @custom4(i32 %a, ...)
+
 declare void @customcb(i32 (i32)* %cb)
 
 declare i32 @cb(i32)
 
 ; CHECK: @"dfs$f"
-define void @f() {
+define void @f(i32 %x) {
+  ; CHECK: %[[LABELVA2:.*]] = alloca [2 x i16]
+  ; CHECK: %[[LABELVA1:.*]] = alloca [2 x i16]
   ; CHECK: %[[LABELRETURN:.*]] = alloca i16
 
   ; CHECK: call void @__dfsw_custom1(i32 1, i32 2, i16 0, i16 0)
@@ -33,22 +56,21 @@ define void @f() {
   ; CHECK: call void @__dfsw_customcb({{.*}} @"dfst0$customcb", i8* bitcast ({{.*}} @"dfs$cb" to i8*), i16 0)
   call void @customcb(i32 (i32)* @cb)
 
-  ret void
-}
+  ; CHECK: %[[LABELVA1_0:.*]] = getelementptr inbounds [2 x i16]* %[[LABELVA1]], i32 0, i32 0
+  ; CHECK: store i16 0, i16* %[[LABELVA1_0]]
+  ; CHECK: %[[LABELVA1_1:.*]] = getelementptr inbounds [2 x i16]* %[[LABELVA1]], i32 0, i32 1
+  ; CHECK: store i16 %{{.*}}, i16* %[[LABELVA1_1]]
+  ; CHECK: %[[LABELVA1_0A:.*]] = getelementptr inbounds [2 x i16]* %[[LABELVA1]], i32 0, i32 0
+  ; CHECK: call void (i32, i16, i16*, ...)* @__dfsw_custom3(i32 1, i16 0, i16* %[[LABELVA1_0A]], i32 2, i32 %{{.*}})
+  call void (i32, ...)* @custom3(i32 1, i32 2, i32 %x)
 
-; CHECK: define i32 (i32, i32)* @discardg(i32)
-; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
-; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
-; CHECK: ret {{.*}} %[[XVAL]]
-@discardg = alias i32 (i32, i32)* (i32)* @g
+  ; CHECK: %[[LABELVA2_0:.*]] = getelementptr inbounds [2 x i16]* %[[LABELVA2]], i32 0, i32 0
+  ; CHECK: %[[LABELVA2_0A:.*]] = getelementptr inbounds [2 x i16]* %[[LABELVA2]], i32 0, i32 0
+  ; CHECK: call i32 (i32, i16, i16*, i16*, ...)* @__dfsw_custom4(i32 1, i16 0, i16* %[[LABELVA2_0A]], i16* %[[LABELRETURN]], i32 2, i32 3)
+  call i32 (i32, ...)* @custom4(i32 1, i32 2, i32 3)
 
-; CHECK: define linkonce_odr { i32, i16 } @"dfsw$custom2"(i32, i32, i16, i16)
-; CHECK: %[[LABELRETURN2:.*]] = alloca i16
-; CHECK: %[[RV:.*]] = call i32 @__dfsw_custom2
-; CHECK: %[[RVSHADOW:.*]] = load i16* %[[LABELRETURN2]]
-; CHECK: insertvalue {{.*}}[[RV]], 0
-; CHECK: insertvalue {{.*}}[[RVSHADOW]], 1
-; CHECK: ret { i32, i16 }
+  ret void
+}
 
 ; CHECK: @"dfs$g"
 define i32 (i32, i32)* @g(i32) {
@@ -73,3 +95,6 @@ define i32 (i32, i32)* @g(i32) {
 ; CHECK: %[[XVAL1:.*]] = extractvalue { i32, i16 } %[[CALL]], 1
 ; CHECK: store i16 %[[XVAL1]], i16* %3
 ; CHECK: ret i32 %[[XVAL0]]
+
+; CHECK: declare void @__dfsw_custom3(i32, i16, i16*, ...)
+; CHECK: declare i32 @__dfsw_custom4(i32, i16, i16*, i16*, ...)
diff --git a/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
index a699f7523c1a..1133462688b6 100644
--- a/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
+++ b/test/Instrumentation/DataFlowSanitizer/args-unreachable-bb.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -verify -dfsan-args-abi -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: @"dfs$unreachable_bb1"
 define i8 @unreachable_bb1() {
diff --git a/test/Instrumentation/DataFlowSanitizer/arith.ll b/test/Instrumentation/DataFlowSanitizer/arith.ll
index dc618963e8bb..db33e452083c 100644
--- a/test/Instrumentation/DataFlowSanitizer/arith.ll
+++ b/test/Instrumentation/DataFlowSanitizer/arith.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 define i8 @add(i8 %a, i8 %b) {
   ; CHECK: @"dfs$add"
diff --git a/test/Instrumentation/DataFlowSanitizer/call.ll b/test/Instrumentation/DataFlowSanitizer/call.ll
index 813f4c1e76fa..dadb40fdb334 100644
--- a/test/Instrumentation/DataFlowSanitizer/call.ll
+++ b/test/Instrumentation/DataFlowSanitizer/call.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [64 x i16]
 ; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global i16
diff --git a/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
index 6bcd5c5f0c18..16de9ccf0d53 100644
--- a/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
+++ b/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
@@ -1,15 +1,19 @@
 ; RUN: opt < %s -dfsan -dfsan-args-abi -dfsan-debug-nonzero-labels -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 declare i32 @g()
 
-; CHECK: define { i32, i16 } @"dfs$f"(i32, i16)
-define i32 @f(i32) {
+; CHECK: define { i32, i16 } @"dfs$f"(i32, i32, i16, i16)
+define i32 @f(i32, i32) {
   ; CHECK: [[LOCALLABELALLOCA:%.*]] = alloca i16
-  ; CHECK: [[ARGCMP:%.*]] = icmp ne i16 %1, 0
-  ; CHECK: br i1 [[ARGCMP]]
   %i = alloca i32
-  store i32 %0, i32* %i
+  ; CHECK: [[ARGCMP1:%.*]] = icmp ne i16 %3, 0
+  ; CHECK: br i1 [[ARGCMP1]]
+  ; CHECK: [[ARGCMP2:%.*]] = icmp ne i16 %2, 0
+  ; CHECK: br i1 [[ARGCMP2]]
+  %x = add i32 %0, %1
+  store i32 %x, i32* %i
   ; CHECK: [[CALL:%.*]] = call { i32, i16 } @"dfs$g"()
   ; CHECK: [[CALLLABEL:%.*]] = extractvalue { i32, i16 } [[CALL]], 1
   ; CHECK: [[CALLCMP:%.*]] = icmp ne i16 [[CALLLABEL]], 0
diff --git a/test/Instrumentation/DataFlowSanitizer/debug.ll b/test/Instrumentation/DataFlowSanitizer/debug.ll
new file mode 100644
index 000000000000..837e95341f29
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/debug.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/debuglist.txt -S | FileCheck %s
+
+; CHECK: i32 ()* @main, {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [main]
+
+; Generated from a simple source file compiled with clang -g:
+; int main() {
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  ret i32 0, !dbg !12
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/debug.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"debug.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\001\000\001\000\000\00256\000\001", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/tmp/dbginfo/debug.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.6.0 "}
+!12 = !MDLocation(line: 2, column: 1, scope: !4)
diff --git a/test/Instrumentation/DataFlowSanitizer/load.ll b/test/Instrumentation/DataFlowSanitizer/load.ll
index 6cd5151b1260..4d36c0940edf 100644
--- a/test/Instrumentation/DataFlowSanitizer/load.ll
+++ b/test/Instrumentation/DataFlowSanitizer/load.ll
@@ -1,6 +1,19 @@
 ; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=1 -S | FileCheck %s --check-prefix=COMBINE_PTR_LABEL
 ; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=0 -S | FileCheck %s --check-prefix=NO_COMBINE_PTR_LABEL
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define {} @load0({}* %p) {
+  ; COMBINE_PTR_LABEL: @"dfs$load0"
+  ; COMBINE_PTR_LABEL: load
+  ; COMBINE_PTR_LABEL-NOT: load
+
+  ; NO_COMBINE_PTR_LABEL: @"dfs$load0"
+  ; NO_COMBINE_PTR_LABEL: load
+  ; NO_COMBINE_PTR_LABEL-NOT: load
+  %a = load {}* %p
+  ret {} %a
+}
 
 define i8 @load8(i8* %p) {
   ; COMBINE_PTR_LABEL: @"dfs$load8"
@@ -152,4 +165,4 @@ define i64 @load64(i64* %p) {
 
   %a = load i64* %p
   ret i64 %a
-}
-\ No newline at end of file
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/memset.ll b/test/Instrumentation/DataFlowSanitizer/memset.ll
index 062ef1ac9f49..7b3cb68e01c5 100644
--- a/test/Instrumentation/DataFlowSanitizer/memset.ll
+++ b/test/Instrumentation/DataFlowSanitizer/memset.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
 
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
index f3c36b17b388..b14de5f9e513 100644
--- a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -dfsan -S | FileCheck %s
 ; RUN: opt < %s -dfsan -dfsan-args-abi -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK: module asm ".symver dfs$f1,dfs$f@@version1"
 module asm ".symver f1,f@@version1"
diff --git a/test/Instrumentation/DataFlowSanitizer/store.ll b/test/Instrumentation/DataFlowSanitizer/store.ll
index 8060537f3152..365b62d9e107 100644
--- a/test/Instrumentation/DataFlowSanitizer/store.ll
+++ b/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -1,6 +1,20 @@
 ; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-store=1 -S | FileCheck %s --check-prefix=COMBINE_PTR_LABEL
 ; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-store=0 -S | FileCheck %s --check-prefix=NO_COMBINE_PTR_LABEL
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @store0({} %v, {}* %p) {
+  ; COMBINE_PTR_LABEL: @"dfs$store0"
+  ; COMBINE_PTR_LABEL: store
+  ; COMBINE_PTR_LABEL-NOT: store
+
+  ; NO_COMBINE_PTR_LABEL: @"dfs$store0"
+  ; NO_COMBINE_PTR_LABEL: store
+  ; NO_COMBINE_PTR_LABEL-NOT: store
+
+  store {} %v, {}* %p
+  ret void
+}
 
 define void @store8(i8 %v, i8* %p) {
   ; NO_COMBINE_PTR_LABEL: @"dfs$store8"
diff --git a/test/Instrumentation/DataFlowSanitizer/union-large.ll b/test/Instrumentation/DataFlowSanitizer/union-large.ll
new file mode 100644
index 000000000000..04082391f7de
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/union-large.ll
@@ -0,0 +1,3014 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that we use dfsan_union in large functions instead of __dfsan_union.
+
+; CHECK-LABEL: @"dfs$foo"
+define i32 @foo(i32 %a, i32 %b) {
+bb0:
+  br label %bb1
+
+bb1:
+  br label %bb2
+
+bb2:
+  br label %bb3
+
+bb3:
+  br label %bb4
+
+bb4:
+  br label %bb5
+
+bb5:
+  br label %bb6
+
+bb6:
+  br label %bb7
+
+bb7:
+  br label %bb8
+
+bb8:
+  br label %bb9
+
+bb9:
+  br label %bb10
+
+bb10:
+  br label %bb11
+
+bb11:
+  br label %bb12
+
+bb12:
+  br label %bb13
+
+bb13:
+  br label %bb14
+
+bb14:
+  br label %bb15
+
+bb15:
+  br label %bb16
+
+bb16:
+  br label %bb17
+
+bb17:
+  br label %bb18
+
+bb18:
+  br label %bb19
+
+bb19:
+  br label %bb20
+
+bb20:
+  br label %bb21
+
+bb21:
+  br label %bb22
+
+bb22:
+  br label %bb23
+
+bb23:
+  br label %bb24
+
+bb24:
+  br label %bb25
+
+bb25:
+  br label %bb26
+
+bb26:
+  br label %bb27
+
+bb27:
+  br label %bb28
+
+bb28:
+  br label %bb29
+
+bb29:
+  br label %bb30
+
+bb30:
+  br label %bb31
+
+bb31:
+  br label %bb32
+
+bb32:
+  br label %bb33
+
+bb33:
+  br label %bb34
+
+bb34:
+  br label %bb35
+
+bb35:
+  br label %bb36
+
+bb36:
+  br label %bb37
+
+bb37:
+  br label %bb38
+
+bb38:
+  br label %bb39
+
+bb39:
+  br label %bb40
+
+bb40:
+  br label %bb41
+
+bb41:
+  br label %bb42
+
+bb42:
+  br label %bb43
+
+bb43:
+  br label %bb44
+
+bb44:
+  br label %bb45
+
+bb45:
+  br label %bb46
+
+bb46:
+  br label %bb47
+
+bb47:
+  br label %bb48
+
+bb48:
+  br label %bb49
+
+bb49:
+  br label %bb50
+
+bb50:
+  br label %bb51
+
+bb51:
+  br label %bb52
+
+bb52:
+  br label %bb53
+
+bb53:
+  br label %bb54
+
+bb54:
+  br label %bb55
+
+bb55:
+  br label %bb56
+
+bb56:
+  br label %bb57
+
+bb57:
+  br label %bb58
+
+bb58:
+  br label %bb59
+
+bb59:
+  br label %bb60
+
+bb60:
+  br label %bb61
+
+bb61:
+  br label %bb62
+
+bb62:
+  br label %bb63
+
+bb63:
+  br label %bb64
+
+bb64:
+  br label %bb65
+
+bb65:
+  br label %bb66
+
+bb66:
+  br label %bb67
+
+bb67:
+  br label %bb68
+
+bb68:
+  br label %bb69
+
+bb69:
+  br label %bb70
+
+bb70:
+  br label %bb71
+
+bb71:
+  br label %bb72
+
+bb72:
+  br label %bb73
+
+bb73:
+  br label %bb74
+
+bb74:
+  br label %bb75
+
+bb75:
+  br label %bb76
+
+bb76:
+  br label %bb77
+
+bb77:
+  br label %bb78
+
+bb78:
+  br label %bb79
+
+bb79:
+  br label %bb80
+
+bb80:
+  br label %bb81
+
+bb81:
+  br label %bb82
+
+bb82:
+  br label %bb83
+
+bb83:
+  br label %bb84
+
+bb84:
+  br label %bb85
+
+bb85:
+  br label %bb86
+
+bb86:
+  br label %bb87
+
+bb87:
+  br label %bb88
+
+bb88:
+  br label %bb89
+
+bb89:
+  br label %bb90
+
+bb90:
+  br label %bb91
+
+bb91:
+  br label %bb92
+
+bb92:
+  br label %bb93
+
+bb93:
+  br label %bb94
+
+bb94:
+  br label %bb95
+
+bb95:
+  br label %bb96
+
+bb96:
+  br label %bb97
+
+bb97:
+  br label %bb98
+
+bb98:
+  br label %bb99
+
+bb99:
+  br label %bb100
+
+bb100:
+  br label %bb101
+
+bb101:
+  br label %bb102
+
+bb102:
+  br label %bb103
+
+bb103:
+  br label %bb104
+
+bb104:
+  br label %bb105
+
+bb105:
+  br label %bb106
+
+bb106:
+  br label %bb107
+
+bb107:
+  br label %bb108
+
+bb108:
+  br label %bb109
+
+bb109:
+  br label %bb110
+
+bb110:
+  br label %bb111
+
+bb111:
+  br label %bb112
+
+bb112:
+  br label %bb113
+
+bb113:
+  br label %bb114
+
+bb114:
+  br label %bb115
+
+bb115:
+  br label %bb116
+
+bb116:
+  br label %bb117
+
+bb117:
+  br label %bb118
+
+bb118:
+  br label %bb119
+
+bb119:
+  br label %bb120
+
+bb120:
+  br label %bb121
+
+bb121:
+  br label %bb122
+
+bb122:
+  br label %bb123
+
+bb123:
+  br label %bb124
+
+bb124:
+  br label %bb125
+
+bb125:
+  br label %bb126
+
+bb126:
+  br label %bb127
+
+bb127:
+  br label %bb128
+
+bb128:
+  br label %bb129
+
+bb129:
+  br label %bb130
+
+bb130:
+  br label %bb131
+
+bb131:
+  br label %bb132
+
+bb132:
+  br label %bb133
+
+bb133:
+  br label %bb134
+
+bb134:
+  br label %bb135
+
+bb135:
+  br label %bb136
+
+bb136:
+  br label %bb137
+
+bb137:
+  br label %bb138
+
+bb138:
+  br label %bb139
+
+bb139:
+  br label %bb140
+
+bb140:
+  br label %bb141
+
+bb141:
+  br label %bb142
+
+bb142:
+  br label %bb143
+
+bb143:
+  br label %bb144
+
+bb144:
+  br label %bb145
+
+bb145:
+  br label %bb146
+
+bb146:
+  br label %bb147
+
+bb147:
+  br label %bb148
+
+bb148:
+  br label %bb149
+
+bb149:
+  br label %bb150
+
+bb150:
+  br label %bb151
+
+bb151:
+  br label %bb152
+
+bb152:
+  br label %bb153
+
+bb153:
+  br label %bb154
+
+bb154:
+  br label %bb155
+
+bb155:
+  br label %bb156
+
+bb156:
+  br label %bb157
+
+bb157:
+  br label %bb158
+
+bb158:
+  br label %bb159
+
+bb159:
+  br label %bb160
+
+bb160:
+  br label %bb161
+
+bb161:
+  br label %bb162
+
+bb162:
+  br label %bb163
+
+bb163:
+  br label %bb164
+
+bb164:
+  br label %bb165
+
+bb165:
+  br label %bb166
+
+bb166:
+  br label %bb167
+
+bb167:
+  br label %bb168
+
+bb168:
+  br label %bb169
+
+bb169:
+  br label %bb170
+
+bb170:
+  br label %bb171
+
+bb171:
+  br label %bb172
+
+bb172:
+  br label %bb173
+
+bb173:
+  br label %bb174
+
+bb174:
+  br label %bb175
+
+bb175:
+  br label %bb176
+
+bb176:
+  br label %bb177
+
+bb177:
+  br label %bb178
+
+bb178:
+  br label %bb179
+
+bb179:
+  br label %bb180
+
+bb180:
+  br label %bb181
+
+bb181:
+  br label %bb182
+
+bb182:
+  br label %bb183
+
+bb183:
+  br label %bb184
+
+bb184:
+  br label %bb185
+
+bb185:
+  br label %bb186
+
+bb186:
+  br label %bb187
+
+bb187:
+  br label %bb188
+
+bb188:
+  br label %bb189
+
+bb189:
+  br label %bb190
+
+bb190:
+  br label %bb191
+
+bb191:
+  br label %bb192
+
+bb192:
+  br label %bb193
+
+bb193:
+  br label %bb194
+
+bb194:
+  br label %bb195
+
+bb195:
+  br label %bb196
+
+bb196:
+  br label %bb197
+
+bb197:
+  br label %bb198
+
+bb198:
+  br label %bb199
+
+bb199:
+  br label %bb200
+
+bb200:
+  br label %bb201
+
+bb201:
+  br label %bb202
+
+bb202:
+  br label %bb203
+
+bb203:
+  br label %bb204
+
+bb204:
+  br label %bb205
+
+bb205:
+  br label %bb206
+
+bb206:
+  br label %bb207
+
+bb207:
+  br label %bb208
+
+bb208:
+  br label %bb209
+
+bb209:
+  br label %bb210
+
+bb210:
+  br label %bb211
+
+bb211:
+  br label %bb212
+
+bb212:
+  br label %bb213
+
+bb213:
+  br label %bb214
+
+bb214:
+  br label %bb215
+
+bb215:
+  br label %bb216
+
+bb216:
+  br label %bb217
+
+bb217:
+  br label %bb218
+
+bb218:
+  br label %bb219
+
+bb219:
+  br label %bb220
+
+bb220:
+  br label %bb221
+
+bb221:
+  br label %bb222
+
+bb222:
+  br label %bb223
+
+bb223:
+  br label %bb224
+
+bb224:
+  br label %bb225
+
+bb225:
+  br label %bb226
+
+bb226:
+  br label %bb227
+
+bb227:
+  br label %bb228
+
+bb228:
+  br label %bb229
+
+bb229:
+  br label %bb230
+
+bb230:
+  br label %bb231
+
+bb231:
+  br label %bb232
+
+bb232:
+  br label %bb233
+
+bb233:
+  br label %bb234
+
+bb234:
+  br label %bb235
+
+bb235:
+  br label %bb236
+
+bb236:
+  br label %bb237
+
+bb237:
+  br label %bb238
+
+bb238:
+  br label %bb239
+
+bb239:
+  br label %bb240
+
+bb240:
+  br label %bb241
+
+bb241:
+  br label %bb242
+
+bb242:
+  br label %bb243
+
+bb243:
+  br label %bb244
+
+bb244:
+  br label %bb245
+
+bb245:
+  br label %bb246
+
+bb246:
+  br label %bb247
+
+bb247:
+  br label %bb248
+
+bb248:
+  br label %bb249
+
+bb249:
+  br label %bb250
+
+bb250:
+  br label %bb251
+
+bb251:
+  br label %bb252
+
+bb252:
+  br label %bb253
+
+bb253:
+  br label %bb254
+
+bb254:
+  br label %bb255
+
+bb255:
+  br label %bb256
+
+bb256:
+  br label %bb257
+
+bb257:
+  br label %bb258
+
+bb258:
+  br label %bb259
+
+bb259:
+  br label %bb260
+
+bb260:
+  br label %bb261
+
+bb261:
+  br label %bb262
+
+bb262:
+  br label %bb263
+
+bb263:
+  br label %bb264
+
+bb264:
+  br label %bb265
+
+bb265:
+  br label %bb266
+
+bb266:
+  br label %bb267
+
+bb267:
+  br label %bb268
+
+bb268:
+  br label %bb269
+
+bb269:
+  br label %bb270
+
+bb270:
+  br label %bb271
+
+bb271:
+  br label %bb272
+
+bb272:
+  br label %bb273
+
+bb273:
+  br label %bb274
+
+bb274:
+  br label %bb275
+
+bb275:
+  br label %bb276
+
+bb276:
+  br label %bb277
+
+bb277:
+  br label %bb278
+
+bb278:
+  br label %bb279
+
+bb279:
+  br label %bb280
+
+bb280:
+  br label %bb281
+
+bb281:
+  br label %bb282
+
+bb282:
+  br label %bb283
+
+bb283:
+  br label %bb284
+
+bb284:
+  br label %bb285
+
+bb285:
+  br label %bb286
+
+bb286:
+  br label %bb287
+
+bb287:
+  br label %bb288
+
+bb288:
+  br label %bb289
+
+bb289:
+  br label %bb290
+
+bb290:
+  br label %bb291
+
+bb291:
+  br label %bb292
+
+bb292:
+  br label %bb293
+
+bb293:
+  br label %bb294
+
+bb294:
+  br label %bb295
+
+bb295:
+  br label %bb296
+
+bb296:
+  br label %bb297
+
+bb297:
+  br label %bb298
+
+bb298:
+  br label %bb299
+
+bb299:
+  br label %bb300
+
+bb300:
+  br label %bb301
+
+bb301:
+  br label %bb302
+
+bb302:
+  br label %bb303
+
+bb303:
+  br label %bb304
+
+bb304:
+  br label %bb305
+
+bb305:
+  br label %bb306
+
+bb306:
+  br label %bb307
+
+bb307:
+  br label %bb308
+
+bb308:
+  br label %bb309
+
+bb309:
+  br label %bb310
+
+bb310:
+  br label %bb311
+
+bb311:
+  br label %bb312
+
+bb312:
+  br label %bb313
+
+bb313:
+  br label %bb314
+
+bb314:
+  br label %bb315
+
+bb315:
+  br label %bb316
+
+bb316:
+  br label %bb317
+
+bb317:
+  br label %bb318
+
+bb318:
+  br label %bb319
+
+bb319:
+  br label %bb320
+
+bb320:
+  br label %bb321
+
+bb321:
+  br label %bb322
+
+bb322:
+  br label %bb323
+
+bb323:
+  br label %bb324
+
+bb324:
+  br label %bb325
+
+bb325:
+  br label %bb326
+
+bb326:
+  br label %bb327
+
+bb327:
+  br label %bb328
+
+bb328:
+  br label %bb329
+
+bb329:
+  br label %bb330
+
+bb330:
+  br label %bb331
+
+bb331:
+  br label %bb332
+
+bb332:
+  br label %bb333
+
+bb333:
+  br label %bb334
+
+bb334:
+  br label %bb335
+
+bb335:
+  br label %bb336
+
+bb336:
+  br label %bb337
+
+bb337:
+  br label %bb338
+
+bb338:
+  br label %bb339
+
+bb339:
+  br label %bb340
+
+bb340:
+  br label %bb341
+
+bb341:
+  br label %bb342
+
+bb342:
+  br label %bb343
+
+bb343:
+  br label %bb344
+
+bb344:
+  br label %bb345
+
+bb345:
+  br label %bb346
+
+bb346:
+  br label %bb347
+
+bb347:
+  br label %bb348
+
+bb348:
+  br label %bb349
+
+bb349:
+  br label %bb350
+
+bb350:
+  br label %bb351
+
+bb351:
+  br label %bb352
+
+bb352:
+  br label %bb353
+
+bb353:
+  br label %bb354
+
+bb354:
+  br label %bb355
+
+bb355:
+  br label %bb356
+
+bb356:
+  br label %bb357
+
+bb357:
+  br label %bb358
+
+bb358:
+  br label %bb359
+
+bb359:
+  br label %bb360
+
+bb360:
+  br label %bb361
+
+bb361:
+  br label %bb362
+
+bb362:
+  br label %bb363
+
+bb363:
+  br label %bb364
+
+bb364:
+  br label %bb365
+
+bb365:
+  br label %bb366
+
+bb366:
+  br label %bb367
+
+bb367:
+  br label %bb368
+
+bb368:
+  br label %bb369
+
+bb369:
+  br label %bb370
+
+bb370:
+  br label %bb371
+
+bb371:
+  br label %bb372
+
+bb372:
+  br label %bb373
+
+bb373:
+  br label %bb374
+
+bb374:
+  br label %bb375
+
+bb375:
+  br label %bb376
+
+bb376:
+  br label %bb377
+
+bb377:
+  br label %bb378
+
+bb378:
+  br label %bb379
+
+bb379:
+  br label %bb380
+
+bb380:
+  br label %bb381
+
+bb381:
+  br label %bb382
+
+bb382:
+  br label %bb383
+
+bb383:
+  br label %bb384
+
+bb384:
+  br label %bb385
+
+bb385:
+  br label %bb386
+
+bb386:
+  br label %bb387
+
+bb387:
+  br label %bb388
+
+bb388:
+  br label %bb389
+
+bb389:
+  br label %bb390
+
+bb390:
+  br label %bb391
+
+bb391:
+  br label %bb392
+
+bb392:
+  br label %bb393
+
+bb393:
+  br label %bb394
+
+bb394:
+  br label %bb395
+
+bb395:
+  br label %bb396
+
+bb396:
+  br label %bb397
+
+bb397:
+  br label %bb398
+
+bb398:
+  br label %bb399
+
+bb399:
+  br label %bb400
+
+bb400:
+  br label %bb401
+
+bb401:
+  br label %bb402
+
+bb402:
+  br label %bb403
+
+bb403:
+  br label %bb404
+
+bb404:
+  br label %bb405
+
+bb405:
+  br label %bb406
+
+bb406:
+  br label %bb407
+
+bb407:
+  br label %bb408
+
+bb408:
+  br label %bb409
+
+bb409:
+  br label %bb410
+
+bb410:
+  br label %bb411
+
+bb411:
+  br label %bb412
+
+bb412:
+  br label %bb413
+
+bb413:
+  br label %bb414
+
+bb414:
+  br label %bb415
+
+bb415:
+  br label %bb416
+
+bb416:
+  br label %bb417
+
+bb417:
+  br label %bb418
+
+bb418:
+  br label %bb419
+
+bb419:
+  br label %bb420
+
+bb420:
+  br label %bb421
+
+bb421:
+  br label %bb422
+
+bb422:
+  br label %bb423
+
+bb423:
+  br label %bb424
+
+bb424:
+  br label %bb425
+
+bb425:
+  br label %bb426
+
+bb426:
+  br label %bb427
+
+bb427:
+  br label %bb428
+
+bb428:
+  br label %bb429
+
+bb429:
+  br label %bb430
+
+bb430:
+  br label %bb431
+
+bb431:
+  br label %bb432
+
+bb432:
+  br label %bb433
+
+bb433:
+  br label %bb434
+
+bb434:
+  br label %bb435
+
+bb435:
+  br label %bb436
+
+bb436:
+  br label %bb437
+
+bb437:
+  br label %bb438
+
+bb438:
+  br label %bb439
+
+bb439:
+  br label %bb440
+
+bb440:
+  br label %bb441
+
+bb441:
+  br label %bb442
+
+bb442:
+  br label %bb443
+
+bb443:
+  br label %bb444
+
+bb444:
+  br label %bb445
+
+bb445:
+  br label %bb446
+
+bb446:
+  br label %bb447
+
+bb447:
+  br label %bb448
+
+bb448:
+  br label %bb449
+
+bb449:
+  br label %bb450
+
+bb450:
+  br label %bb451
+
+bb451:
+  br label %bb452
+
+bb452:
+  br label %bb453
+
+bb453:
+  br label %bb454
+
+bb454:
+  br label %bb455
+
+bb455:
+  br label %bb456
+
+bb456:
+  br label %bb457
+
+bb457:
+  br label %bb458
+
+bb458:
+  br label %bb459
+
+bb459:
+  br label %bb460
+
+bb460:
+  br label %bb461
+
+bb461:
+  br label %bb462
+
+bb462:
+  br label %bb463
+
+bb463:
+  br label %bb464
+
+bb464:
+  br label %bb465
+
+bb465:
+  br label %bb466
+
+bb466:
+  br label %bb467
+
+bb467:
+  br label %bb468
+
+bb468:
+  br label %bb469
+
+bb469:
+  br label %bb470
+
+bb470:
+  br label %bb471
+
+bb471:
+  br label %bb472
+
+bb472:
+  br label %bb473
+
+bb473:
+  br label %bb474
+
+bb474:
+  br label %bb475
+
+bb475:
+  br label %bb476
+
+bb476:
+  br label %bb477
+
+bb477:
+  br label %bb478
+
+bb478:
+  br label %bb479
+
+bb479:
+  br label %bb480
+
+bb480:
+  br label %bb481
+
+bb481:
+  br label %bb482
+
+bb482:
+  br label %bb483
+
+bb483:
+  br label %bb484
+
+bb484:
+  br label %bb485
+
+bb485:
+  br label %bb486
+
+bb486:
+  br label %bb487
+
+bb487:
+  br label %bb488
+
+bb488:
+  br label %bb489
+
+bb489:
+  br label %bb490
+
+bb490:
+  br label %bb491
+
+bb491:
+  br label %bb492
+
+bb492:
+  br label %bb493
+
+bb493:
+  br label %bb494
+
+bb494:
+  br label %bb495
+
+bb495:
+  br label %bb496
+
+bb496:
+  br label %bb497
+
+bb497:
+  br label %bb498
+
+bb498:
+  br label %bb499
+
+bb499:
+  br label %bb500
+
+bb500:
+  br label %bb501
+
+bb501:
+  br label %bb502
+
+bb502:
+  br label %bb503
+
+bb503:
+  br label %bb504
+
+bb504:
+  br label %bb505
+
+bb505:
+  br label %bb506
+
+bb506:
+  br label %bb507
+
+bb507:
+  br label %bb508
+
+bb508:
+  br label %bb509
+
+bb509:
+  br label %bb510
+
+bb510:
+  br label %bb511
+
+bb511:
+  br label %bb512
+
+bb512:
+  br label %bb513
+
+bb513:
+  br label %bb514
+
+bb514:
+  br label %bb515
+
+bb515:
+  br label %bb516
+
+bb516:
+  br label %bb517
+
+bb517:
+  br label %bb518
+
+bb518:
+  br label %bb519
+
+bb519:
+  br label %bb520
+
+bb520:
+  br label %bb521
+
+bb521:
+  br label %bb522
+
+bb522:
+  br label %bb523
+
+bb523:
+  br label %bb524
+
+bb524:
+  br label %bb525
+
+bb525:
+  br label %bb526
+
+bb526:
+  br label %bb527
+
+bb527:
+  br label %bb528
+
+bb528:
+  br label %bb529
+
+bb529:
+  br label %bb530
+
+bb530:
+  br label %bb531
+
+bb531:
+  br label %bb532
+
+bb532:
+  br label %bb533
+
+bb533:
+  br label %bb534
+
+bb534:
+  br label %bb535
+
+bb535:
+  br label %bb536
+
+bb536:
+  br label %bb537
+
+bb537:
+  br label %bb538
+
+bb538:
+  br label %bb539
+
+bb539:
+  br label %bb540
+
+bb540:
+  br label %bb541
+
+bb541:
+  br label %bb542
+
+bb542:
+  br label %bb543
+
+bb543:
+  br label %bb544
+
+bb544:
+  br label %bb545
+
+bb545:
+  br label %bb546
+
+bb546:
+  br label %bb547
+
+bb547:
+  br label %bb548
+
+bb548:
+  br label %bb549
+
+bb549:
+  br label %bb550
+
+bb550:
+  br label %bb551
+
+bb551:
+  br label %bb552
+
+bb552:
+  br label %bb553
+
+bb553:
+  br label %bb554
+
+bb554:
+  br label %bb555
+
+bb555:
+  br label %bb556
+
+bb556:
+  br label %bb557
+
+bb557:
+  br label %bb558
+
+bb558:
+  br label %bb559
+
+bb559:
+  br label %bb560
+
+bb560:
+  br label %bb561
+
+bb561:
+  br label %bb562
+
+bb562:
+  br label %bb563
+
+bb563:
+  br label %bb564
+
+bb564:
+  br label %bb565
+
+bb565:
+  br label %bb566
+
+bb566:
+  br label %bb567
+
+bb567:
+  br label %bb568
+
+bb568:
+  br label %bb569
+
+bb569:
+  br label %bb570
+
+bb570:
+  br label %bb571
+
+bb571:
+  br label %bb572
+
+bb572:
+  br label %bb573
+
+bb573:
+  br label %bb574
+
+bb574:
+  br label %bb575
+
+bb575:
+  br label %bb576
+
+bb576:
+  br label %bb577
+
+bb577:
+  br label %bb578
+
+bb578:
+  br label %bb579
+
+bb579:
+  br label %bb580
+
+bb580:
+  br label %bb581
+
+bb581:
+  br label %bb582
+
+bb582:
+  br label %bb583
+
+bb583:
+  br label %bb584
+
+bb584:
+  br label %bb585
+
+bb585:
+  br label %bb586
+
+bb586:
+  br label %bb587
+
+bb587:
+  br label %bb588
+
+bb588:
+  br label %bb589
+
+bb589:
+  br label %bb590
+
+bb590:
+  br label %bb591
+
+bb591:
+  br label %bb592
+
+bb592:
+  br label %bb593
+
+bb593:
+  br label %bb594
+
+bb594:
+  br label %bb595
+
+bb595:
+  br label %bb596
+
+bb596:
+  br label %bb597
+
+bb597:
+  br label %bb598
+
+bb598:
+  br label %bb599
+
+bb599:
+  br label %bb600
+
+bb600:
+  br label %bb601
+
+bb601:
+  br label %bb602
+
+bb602:
+  br label %bb603
+
+bb603:
+  br label %bb604
+
+bb604:
+  br label %bb605
+
+bb605:
+  br label %bb606
+
+bb606:
+  br label %bb607
+
+bb607:
+  br label %bb608
+
+bb608:
+  br label %bb609
+
+bb609:
+  br label %bb610
+
+bb610:
+  br label %bb611
+
+bb611:
+  br label %bb612
+
+bb612:
+  br label %bb613
+
+bb613:
+  br label %bb614
+
+bb614:
+  br label %bb615
+
+bb615:
+  br label %bb616
+
+bb616:
+  br label %bb617
+
+bb617:
+  br label %bb618
+
+bb618:
+  br label %bb619
+
+bb619:
+  br label %bb620
+
+bb620:
+  br label %bb621
+
+bb621:
+  br label %bb622
+
+bb622:
+  br label %bb623
+
+bb623:
+  br label %bb624
+
+bb624:
+  br label %bb625
+
+bb625:
+  br label %bb626
+
+bb626:
+  br label %bb627
+
+bb627:
+  br label %bb628
+
+bb628:
+  br label %bb629
+
+bb629:
+  br label %bb630
+
+bb630:
+  br label %bb631
+
+bb631:
+  br label %bb632
+
+bb632:
+  br label %bb633
+
+bb633:
+  br label %bb634
+
+bb634:
+  br label %bb635
+
+bb635:
+  br label %bb636
+
+bb636:
+  br label %bb637
+
+bb637:
+  br label %bb638
+
+bb638:
+  br label %bb639
+
+bb639:
+  br label %bb640
+
+bb640:
+  br label %bb641
+
+bb641:
+  br label %bb642
+
+bb642:
+  br label %bb643
+
+bb643:
+  br label %bb644
+
+bb644:
+  br label %bb645
+
+bb645:
+  br label %bb646
+
+bb646:
+  br label %bb647
+
+bb647:
+  br label %bb648
+
+bb648:
+  br label %bb649
+
+bb649:
+  br label %bb650
+
+bb650:
+  br label %bb651
+
+bb651:
+  br label %bb652
+
+bb652:
+  br label %bb653
+
+bb653:
+  br label %bb654
+
+bb654:
+  br label %bb655
+
+bb655:
+  br label %bb656
+
+bb656:
+  br label %bb657
+
+bb657:
+  br label %bb658
+
+bb658:
+  br label %bb659
+
+bb659:
+  br label %bb660
+
+bb660:
+  br label %bb661
+
+bb661:
+  br label %bb662
+
+bb662:
+  br label %bb663
+
+bb663:
+  br label %bb664
+
+bb664:
+  br label %bb665
+
+bb665:
+  br label %bb666
+
+bb666:
+  br label %bb667
+
+bb667:
+  br label %bb668
+
+bb668:
+  br label %bb669
+
+bb669:
+  br label %bb670
+
+bb670:
+  br label %bb671
+
+bb671:
+  br label %bb672
+
+bb672:
+  br label %bb673
+
+bb673:
+  br label %bb674
+
+bb674:
+  br label %bb675
+
+bb675:
+  br label %bb676
+
+bb676:
+  br label %bb677
+
+bb677:
+  br label %bb678
+
+bb678:
+  br label %bb679
+
+bb679:
+  br label %bb680
+
+bb680:
+  br label %bb681
+
+bb681:
+  br label %bb682
+
+bb682:
+  br label %bb683
+
+bb683:
+  br label %bb684
+
+bb684:
+  br label %bb685
+
+bb685:
+  br label %bb686
+
+bb686:
+  br label %bb687
+
+bb687:
+  br label %bb688
+
+bb688:
+  br label %bb689
+
+bb689:
+  br label %bb690
+
+bb690:
+  br label %bb691
+
+bb691:
+  br label %bb692
+
+bb692:
+  br label %bb693
+
+bb693:
+  br label %bb694
+
+bb694:
+  br label %bb695
+
+bb695:
+  br label %bb696
+
+bb696:
+  br label %bb697
+
+bb697:
+  br label %bb698
+
+bb698:
+  br label %bb699
+
+bb699:
+  br label %bb700
+
+bb700:
+  br label %bb701
+
+bb701:
+  br label %bb702
+
+bb702:
+  br label %bb703
+
+bb703:
+  br label %bb704
+
+bb704:
+  br label %bb705
+
+bb705:
+  br label %bb706
+
+bb706:
+  br label %bb707
+
+bb707:
+  br label %bb708
+
+bb708:
+  br label %bb709
+
+bb709:
+  br label %bb710
+
+bb710:
+  br label %bb711
+
+bb711:
+  br label %bb712
+
+bb712:
+  br label %bb713
+
+bb713:
+  br label %bb714
+
+bb714:
+  br label %bb715
+
+bb715:
+  br label %bb716
+
+bb716:
+  br label %bb717
+
+bb717:
+  br label %bb718
+
+bb718:
+  br label %bb719
+
+bb719:
+  br label %bb720
+
+bb720:
+  br label %bb721
+
+bb721:
+  br label %bb722
+
+bb722:
+  br label %bb723
+
+bb723:
+  br label %bb724
+
+bb724:
+  br label %bb725
+
+bb725:
+  br label %bb726
+
+bb726:
+  br label %bb727
+
+bb727:
+  br label %bb728
+
+bb728:
+  br label %bb729
+
+bb729:
+  br label %bb730
+
+bb730:
+  br label %bb731
+
+bb731:
+  br label %bb732
+
+bb732:
+  br label %bb733
+
+bb733:
+  br label %bb734
+
+bb734:
+  br label %bb735
+
+bb735:
+  br label %bb736
+
+bb736:
+  br label %bb737
+
+bb737:
+  br label %bb738
+
+bb738:
+  br label %bb739
+
+bb739:
+  br label %bb740
+
+bb740:
+  br label %bb741
+
+bb741:
+  br label %bb742
+
+bb742:
+  br label %bb743
+
+bb743:
+  br label %bb744
+
+bb744:
+  br label %bb745
+
+bb745:
+  br label %bb746
+
+bb746:
+  br label %bb747
+
+bb747:
+  br label %bb748
+
+bb748:
+  br label %bb749
+
+bb749:
+  br label %bb750
+
+bb750:
+  br label %bb751
+
+bb751:
+  br label %bb752
+
+bb752:
+  br label %bb753
+
+bb753:
+  br label %bb754
+
+bb754:
+  br label %bb755
+
+bb755:
+  br label %bb756
+
+bb756:
+  br label %bb757
+
+bb757:
+  br label %bb758
+
+bb758:
+  br label %bb759
+
+bb759:
+  br label %bb760
+
+bb760:
+  br label %bb761
+
+bb761:
+  br label %bb762
+
+bb762:
+  br label %bb763
+
+bb763:
+  br label %bb764
+
+bb764:
+  br label %bb765
+
+bb765:
+  br label %bb766
+
+bb766:
+  br label %bb767
+
+bb767:
+  br label %bb768
+
+bb768:
+  br label %bb769
+
+bb769:
+  br label %bb770
+
+bb770:
+  br label %bb771
+
+bb771:
+  br label %bb772
+
+bb772:
+  br label %bb773
+
+bb773:
+  br label %bb774
+
+bb774:
+  br label %bb775
+
+bb775:
+  br label %bb776
+
+bb776:
+  br label %bb777
+
+bb777:
+  br label %bb778
+
+bb778:
+  br label %bb779
+
+bb779:
+  br label %bb780
+
+bb780:
+  br label %bb781
+
+bb781:
+  br label %bb782
+
+bb782:
+  br label %bb783
+
+bb783:
+  br label %bb784
+
+bb784:
+  br label %bb785
+
+bb785:
+  br label %bb786
+
+bb786:
+  br label %bb787
+
+bb787:
+  br label %bb788
+
+bb788:
+  br label %bb789
+
+bb789:
+  br label %bb790
+
+bb790:
+  br label %bb791
+
+bb791:
+  br label %bb792
+
+bb792:
+  br label %bb793
+
+bb793:
+  br label %bb794
+
+bb794:
+  br label %bb795
+
+bb795:
+  br label %bb796
+
+bb796:
+  br label %bb797
+
+bb797:
+  br label %bb798
+
+bb798:
+  br label %bb799
+
+bb799:
+  br label %bb800
+
+bb800:
+  br label %bb801
+
+bb801:
+  br label %bb802
+
+bb802:
+  br label %bb803
+
+bb803:
+  br label %bb804
+
+bb804:
+  br label %bb805
+
+bb805:
+  br label %bb806
+
+bb806:
+  br label %bb807
+
+bb807:
+  br label %bb808
+
+bb808:
+  br label %bb809
+
+bb809:
+  br label %bb810
+
+bb810:
+  br label %bb811
+
+bb811:
+  br label %bb812
+
+bb812:
+  br label %bb813
+
+bb813:
+  br label %bb814
+
+bb814:
+  br label %bb815
+
+bb815:
+  br label %bb816
+
+bb816:
+  br label %bb817
+
+bb817:
+  br label %bb818
+
+bb818:
+  br label %bb819
+
+bb819:
+  br label %bb820
+
+bb820:
+  br label %bb821
+
+bb821:
+  br label %bb822
+
+bb822:
+  br label %bb823
+
+bb823:
+  br label %bb824
+
+bb824:
+  br label %bb825
+
+bb825:
+  br label %bb826
+
+bb826:
+  br label %bb827
+
+bb827:
+  br label %bb828
+
+bb828:
+  br label %bb829
+
+bb829:
+  br label %bb830
+
+bb830:
+  br label %bb831
+
+bb831:
+  br label %bb832
+
+bb832:
+  br label %bb833
+
+bb833:
+  br label %bb834
+
+bb834:
+  br label %bb835
+
+bb835:
+  br label %bb836
+
+bb836:
+  br label %bb837
+
+bb837:
+  br label %bb838
+
+bb838:
+  br label %bb839
+
+bb839:
+  br label %bb840
+
+bb840:
+  br label %bb841
+
+bb841:
+  br label %bb842
+
+bb842:
+  br label %bb843
+
+bb843:
+  br label %bb844
+
+bb844:
+  br label %bb845
+
+bb845:
+  br label %bb846
+
+bb846:
+  br label %bb847
+
+bb847:
+  br label %bb848
+
+bb848:
+  br label %bb849
+
+bb849:
+  br label %bb850
+
+bb850:
+  br label %bb851
+
+bb851:
+  br label %bb852
+
+bb852:
+  br label %bb853
+
+bb853:
+  br label %bb854
+
+bb854:
+  br label %bb855
+
+bb855:
+  br label %bb856
+
+bb856:
+  br label %bb857
+
+bb857:
+  br label %bb858
+
+bb858:
+  br label %bb859
+
+bb859:
+  br label %bb860
+
+bb860:
+  br label %bb861
+
+bb861:
+  br label %bb862
+
+bb862:
+  br label %bb863
+
+bb863:
+  br label %bb864
+
+bb864:
+  br label %bb865
+
+bb865:
+  br label %bb866
+
+bb866:
+  br label %bb867
+
+bb867:
+  br label %bb868
+
+bb868:
+  br label %bb869
+
+bb869:
+  br label %bb870
+
+bb870:
+  br label %bb871
+
+bb871:
+  br label %bb872
+
+bb872:
+  br label %bb873
+
+bb873:
+  br label %bb874
+
+bb874:
+  br label %bb875
+
+bb875:
+  br label %bb876
+
+bb876:
+  br label %bb877
+
+bb877:
+  br label %bb878
+
+bb878:
+  br label %bb879
+
+bb879:
+  br label %bb880
+
+bb880:
+  br label %bb881
+
+bb881:
+  br label %bb882
+
+bb882:
+  br label %bb883
+
+bb883:
+  br label %bb884
+
+bb884:
+  br label %bb885
+
+bb885:
+  br label %bb886
+
+bb886:
+  br label %bb887
+
+bb887:
+  br label %bb888
+
+bb888:
+  br label %bb889
+
+bb889:
+  br label %bb890
+
+bb890:
+  br label %bb891
+
+bb891:
+  br label %bb892
+
+bb892:
+  br label %bb893
+
+bb893:
+  br label %bb894
+
+bb894:
+  br label %bb895
+
+bb895:
+  br label %bb896
+
+bb896:
+  br label %bb897
+
+bb897:
+  br label %bb898
+
+bb898:
+  br label %bb899
+
+bb899:
+  br label %bb900
+
+bb900:
+  br label %bb901
+
+bb901:
+  br label %bb902
+
+bb902:
+  br label %bb903
+
+bb903:
+  br label %bb904
+
+bb904:
+  br label %bb905
+
+bb905:
+  br label %bb906
+
+bb906:
+  br label %bb907
+
+bb907:
+  br label %bb908
+
+bb908:
+  br label %bb909
+
+bb909:
+  br label %bb910
+
+bb910:
+  br label %bb911
+
+bb911:
+  br label %bb912
+
+bb912:
+  br label %bb913
+
+bb913:
+  br label %bb914
+
+bb914:
+  br label %bb915
+
+bb915:
+  br label %bb916
+
+bb916:
+  br label %bb917
+
+bb917:
+  br label %bb918
+
+bb918:
+  br label %bb919
+
+bb919:
+  br label %bb920
+
+bb920:
+  br label %bb921
+
+bb921:
+  br label %bb922
+
+bb922:
+  br label %bb923
+
+bb923:
+  br label %bb924
+
+bb924:
+  br label %bb925
+
+bb925:
+  br label %bb926
+
+bb926:
+  br label %bb927
+
+bb927:
+  br label %bb928
+
+bb928:
+  br label %bb929
+
+bb929:
+  br label %bb930
+
+bb930:
+  br label %bb931
+
+bb931:
+  br label %bb932
+
+bb932:
+  br label %bb933
+
+bb933:
+  br label %bb934
+
+bb934:
+  br label %bb935
+
+bb935:
+  br label %bb936
+
+bb936:
+  br label %bb937
+
+bb937:
+  br label %bb938
+
+bb938:
+  br label %bb939
+
+bb939:
+  br label %bb940
+
+bb940:
+  br label %bb941
+
+bb941:
+  br label %bb942
+
+bb942:
+  br label %bb943
+
+bb943:
+  br label %bb944
+
+bb944:
+  br label %bb945
+
+bb945:
+  br label %bb946
+
+bb946:
+  br label %bb947
+
+bb947:
+  br label %bb948
+
+bb948:
+  br label %bb949
+
+bb949:
+  br label %bb950
+
+bb950:
+  br label %bb951
+
+bb951:
+  br label %bb952
+
+bb952:
+  br label %bb953
+
+bb953:
+  br label %bb954
+
+bb954:
+  br label %bb955
+
+bb955:
+  br label %bb956
+
+bb956:
+  br label %bb957
+
+bb957:
+  br label %bb958
+
+bb958:
+  br label %bb959
+
+bb959:
+  br label %bb960
+
+bb960:
+  br label %bb961
+
+bb961:
+  br label %bb962
+
+bb962:
+  br label %bb963
+
+bb963:
+  br label %bb964
+
+bb964:
+  br label %bb965
+
+bb965:
+  br label %bb966
+
+bb966:
+  br label %bb967
+
+bb967:
+  br label %bb968
+
+bb968:
+  br label %bb969
+
+bb969:
+  br label %bb970
+
+bb970:
+  br label %bb971
+
+bb971:
+  br label %bb972
+
+bb972:
+  br label %bb973
+
+bb973:
+  br label %bb974
+
+bb974:
+  br label %bb975
+
+bb975:
+  br label %bb976
+
+bb976:
+  br label %bb977
+
+bb977:
+  br label %bb978
+
+bb978:
+  br label %bb979
+
+bb979:
+  br label %bb980
+
+bb980:
+  br label %bb981
+
+bb981:
+  br label %bb982
+
+bb982:
+  br label %bb983
+
+bb983:
+  br label %bb984
+
+bb984:
+  br label %bb985
+
+bb985:
+  br label %bb986
+
+bb986:
+  br label %bb987
+
+bb987:
+  br label %bb988
+
+bb988:
+  br label %bb989
+
+bb989:
+  br label %bb990
+
+bb990:
+  br label %bb991
+
+bb991:
+  br label %bb992
+
+bb992:
+  br label %bb993
+
+bb993:
+  br label %bb994
+
+bb994:
+  br label %bb995
+
+bb995:
+  br label %bb996
+
+bb996:
+  br label %bb997
+
+bb997:
+  br label %bb998
+
+bb998:
+  br label %bb999
+
+bb999:
+  br label %bb1000
+
+bb1000:
+  ; CHECK: call{{.*}}@dfsan_union
+  ; CHECK-NOT: phi
+  %ab = mul i32 %a, %b
+  ret i32 %ab
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/union.ll b/test/Instrumentation/DataFlowSanitizer/union.ll
index 2b31081776b7..7fcba2cf3358 100644
--- a/test/Instrumentation/DataFlowSanitizer/union.ll
+++ b/test/Instrumentation/DataFlowSanitizer/union.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
 
 @a = common global i32 0
 @b = common global i32 0
diff --git a/test/Instrumentation/InstrProfiling/no-counters.ll b/test/Instrumentation/InstrProfiling/no-counters.ll
new file mode 100644
index 000000000000..0716b0d8c6f2
--- /dev/null
+++ b/test/Instrumentation/InstrProfiling/no-counters.ll
@@ -0,0 +1,10 @@
+;; No instrumentation should be emitted if there are no counter increments.
+
+; RUN: opt < %s -instrprof -S | FileCheck %s
+; CHECK-NOT: @__llvm_profile_counters
+; CHECK-NOT: @__llvm_profile_data
+; CHECK-NOT: @__llvm_profile_runtime
+
+define void @foo() {
+  ret void
+}
diff --git a/test/Instrumentation/InstrProfiling/noruntime.ll b/test/Instrumentation/InstrProfiling/noruntime.ll
new file mode 100644
index 000000000000..e69445dc3503
--- /dev/null
+++ b/test/Instrumentation/InstrProfiling/noruntime.ll
@@ -0,0 +1,16 @@
+;; Check that we don't emit the runtime hooks if the user provided them.
+
+; RUN: opt < %s -instrprof -S | FileCheck %s
+; CHECK-NOT: define {{.*}} @__llvm_profile_runtime_user()
+; CHECK-NOT: load i32* @__llvm_profile_runtime
+
+@__llvm_profile_runtime = global i32 0, align 4
+
+@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
+
+define void @foo() {
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  ret void
+}
+
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
diff --git a/test/Instrumentation/InstrProfiling/platform.ll b/test/Instrumentation/InstrProfiling/platform.ll
new file mode 100644
index 000000000000..e0327683b7f4
--- /dev/null
+++ b/test/Instrumentation/InstrProfiling/platform.ll
@@ -0,0 +1,29 @@
+;; Checks for platform specific section names and initialization code.
+
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s -check-prefix=MACHO
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=ELF
+
+@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
+; MACHO: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+; ELF: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__llvm_prf_names", align 1
+
+; MACHO: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; ELF: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+
+; MACHO: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; ELF: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__llvm_prf_data", align 8
+define void @foo() {
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  ret void
+}
+
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
+
+;; Emit registration functions for platforms that don't find the
+;; symbols by their sections.
+
+; MACHO-NOT: define internal void @__llvm_profile_register_functions
+; ELF: define internal void @__llvm_profile_register_functions
+
+; MACHO-NOT: define internal void @__llvm_profile_init
+; ELF: define internal void @__llvm_profile_init
diff --git a/test/Instrumentation/InstrProfiling/profiling.ll b/test/Instrumentation/InstrProfiling/profiling.ll
new file mode 100644
index 000000000000..246bf6b43f93
--- /dev/null
+++ b/test/Instrumentation/InstrProfiling/profiling.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -instrprof -S | FileCheck %s
+
+target triple = "x86_64-apple-macosx10.10.0"
+
+@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
+; CHECK: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+@__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00"
+; CHECK: @__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00", section "__DATA,__llvm_prf_names", align 1
+@baz_prof_name = hidden constant [3 x i8] c"baz"
+; CHECK: @baz_prof_name = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
+
+; CHECK: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+define void @foo() {
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  ret void
+}
+
+; CHECK: @__llvm_profile_counters_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__llvm_profile_data_bar = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+define void @bar() {
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8]* @__llvm_profile_name_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
+  ret void
+}
+
+; CHECK: @__llvm_profile_counters_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__llvm_profile_data_baz = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+define void @baz() {
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 1)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 2)
+  ret void
+}
+
+declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
+
+; CHECK: @__llvm_profile_runtime = external global i32
+; CHECK: @llvm.used = appending global {{.*}} @__llvm_profile_data_foo {{.*}} @__llvm_profile_data_bar {{.*}} @__llvm_profile_data_baz {{.*}} section "llvm.metadata"
diff --git a/test/Instrumentation/MemorySanitizer/array_types.ll b/test/Instrumentation/MemorySanitizer/array_types.ll
new file mode 100644
index 000000000000..fa3835fac77a
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/array_types.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define [2 x i32] @InsertValue(i32 %x, i32 %y) sanitize_memory {
+entry:
+  %a = insertvalue [2 x i32] undef, i32 %x, 0
+  %b = insertvalue [2 x i32] %a, i32 %y, 1
+  ret [2 x i32] %b
+}
+
+; CHECK-LABEL: @InsertValue(
+; CHECK-DAG: [[Sy:%.*]] = load i32* {{.*}}@__msan_param_tls to i64), i64 8) to i32*)
+; CHECK-DAG: [[Sx:%.*]] = load i32* {{.*}}@__msan_param_tls to i32*)
+; CHECK: [[A:%.*]] = insertvalue [2 x i32] [i32 -1, i32 -1], i32 [[Sx]], 0
+; CHECK: [[B:%.*]] = insertvalue [2 x i32] [[A]], i32 [[Sy]], 1
+; CHECK: store [2 x i32] [[B]], [2 x i32]* {{.*}}@__msan_retval_tls
+; CHECK: ret [2 x i32]
+
+
+define [2 x double] @InsertValueDouble(double %x, double %y) sanitize_memory {
+entry:
+  %a = insertvalue [2 x double] undef, double %x, 0
+  %b = insertvalue [2 x double] %a, double %y, 1
+  ret [2 x double] %b
+}
+
+; CHECK-LABEL: @InsertValueDouble(
+; CHECK-DAG: [[Sy:%.*]] = load i64* {{.*}}@__msan_param_tls to i64), i64 8) to i64*)
+; CHECK-DAG: [[Sx:%.*]] = load i64* getelementptr {{.*}}@__msan_param_tls, i32 0, i32 0
+; CHECK: [[A:%.*]] = insertvalue [2 x i64] [i64 -1, i64 -1], i64 [[Sx]], 0
+; CHECK: [[B:%.*]] = insertvalue [2 x i64] [[A]], i64 [[Sy]], 1
+; CHECK: store [2 x i64] [[B]], [2 x i64]* {{.*}}@__msan_retval_tls
+; CHECK: ret [2 x double]
+
+
+define i32 @ExtractValue([2 x i32] %a) sanitize_memory {
+entry:
+  %x = extractvalue [2 x i32] %a, 1
+  ret i32 %x
+}
+
+; CHECK-LABEL: @ExtractValue(
+; CHECK: [[Sa:%.*]] = load [2 x i32]* {{.*}}@__msan_param_tls to [2 x i32]*)
+; CHECK: [[Sx:%.*]] = extractvalue [2 x i32] [[Sa]], 1
+; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
+; CHECK: ret i32
+
+
+; Regression test for PR20493.
+
+%MyStruct = type { i32, i32, [3 x i32] }
+
+define i32 @ArrayInStruct(%MyStruct %s) sanitize_memory {
+  %x = extractvalue %MyStruct %s, 2, 1
+  ret i32 %x
+}
+
+; CHECK-LABEL: @ArrayInStruct(
+; CHECK: [[Ss:%.*]] = load { i32, i32, [3 x i32] }* {{.*}}@__msan_param_tls to { i32, i32, [3 x i32] }*)
+; CHECK: [[Sx:%.*]] = extractvalue { i32, i32, [3 x i32] } [[Ss]], 2, 1
+; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
+; CHECK: ret i32
+
+
+define i32 @ArrayOfStructs([3 x { i32, i32 }] %a) sanitize_memory {
+  %x = extractvalue [3 x { i32, i32 }] %a, 2, 1
+  ret i32 %x
+}
+
+; CHECK-LABEL: @ArrayOfStructs(
+; CHECK: [[Ss:%.*]] = load [3 x { i32, i32 }]* {{.*}}@__msan_param_tls to [3 x { i32, i32 }]*)
+; CHECK: [[Sx:%.*]] = extractvalue [3 x { i32, i32 }] [[Ss]], 2, 1
+; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
+; CHECK: ret i32
+
+
+define <8 x i16> @ArrayOfVectors([3 x <8 x i16>] %a) sanitize_memory {
+  %x = extractvalue [3 x <8 x i16>] %a, 1
+  ret <8 x i16> %x
+}
+
+; CHECK-LABEL: @ArrayOfVectors(
+; CHECK: [[Ss:%.*]] = load [3 x <8 x i16>]* {{.*}}@__msan_param_tls to [3 x <8 x i16>]*)
+; CHECK: [[Sx:%.*]] = extractvalue [3 x <8 x i16>] [[Ss]], 1
+; CHECK: store <8 x i16> [[Sx]], <8 x i16>* {{.*}}@__msan_retval_tls
+; CHECK: ret <8 x i16>
diff --git a/test/Instrumentation/MemorySanitizer/byval-alignment.ll b/test/Instrumentation/MemorySanitizer/byval-alignment.ll
new file mode 100644
index 000000000000..43e204a6a961
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/byval-alignment.ll
@@ -0,0 +1,20 @@
+; Test that copy alignment for byval arguments is limited by param-tls slot alignment.
+
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { i64, i64, i64, [8 x i8] }
+
+; CHECK: [[A:%.*]] = bitcast i64* {{.*}} add {{.*}} ptrtoint {{.*}} @__msan_param_tls {{.*}} i64 8)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[A]], i8* {{.*}}, i64 32, i32 8, i1 false)
+
+define void @Caller() sanitize_memory {
+entry:
+  %agg.tmp = alloca %struct.S, align 16
+  call void @Callee(i32 1, %struct.S* byval align 16 %agg.tmp)
+  ret void
+}
+
+declare void @Callee(i32, %struct.S* byval align 16)
diff --git a/test/Instrumentation/MemorySanitizer/check-constant-shadow.ll b/test/Instrumentation/MemorySanitizer/check-constant-shadow.ll
new file mode 100644
index 000000000000..11e44106d8fb
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/check-constant-shadow.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -msan -msan-check-constant-shadow=1 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Test that returning a literal undef from main() triggers an MSan warning.
+
+define i32 @main() nounwind uwtable sanitize_memory {
+entry:
+  ret i32 undef
+}
+
+; CHECK-LABEL: @main
+; CHECK: call void @__msan_warning_noreturn
+; CHECK: ret i32 undef
diff --git a/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll b/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll
deleted file mode 100644
index 7d0a62a256c4..000000000000
--- a/test/Instrumentation/MemorySanitizer/do-not-emit-module-limits.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; Test that MSan does not emit undefined symbol __executable_start when it is
-; not needed (i.e. without -msan-wrap-indirect-calls).
-
-; RUN: opt < %s -msan -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Function Attrs: nounwind uwtable
-define void @_Z1fv() #0 {
-entry:
-  ret void
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.ident = !{!0}
-
-!0 = metadata !{metadata !"clang version 3.5.0 (208165)"}
-
-; CHECK-NOT: __executable_start
diff --git a/test/Instrumentation/MemorySanitizer/missing_origin.ll b/test/Instrumentation/MemorySanitizer/missing_origin.ll
index 673e85369a3a..f7385b9dd4ce 100644
--- a/test/Instrumentation/MemorySanitizer/missing_origin.ll
+++ b/test/Instrumentation/MemorySanitizer/missing_origin.ll
@@ -17,3 +17,17 @@ entry:
 ; CHECK: [[A:%.*]] = load i32* {{.*}}@__msan_param_origin_tls,
 ; CHECK: store i32 [[A]], i32* @__msan_retval_origin_tls
 ; CHECK: ret <4 x i32>
+
+
+; Regression test for origin propagation in "select i1, float, float".
+; https://code.google.com/p/memory-sanitizer/issues/detail?id=78
+
+define float @SelectFloat(i1 %b, float %x, float %y) nounwind uwtable sanitize_memory {
+entry:
+  %z = select i1 %b, float %x, float %y
+  ret float %z
+}
+
+; CHECK-LABEL: @SelectFloat(
+; CHECK-NOT: select {{.*}} i32 0, i32 0
+; CHECK: ret float
diff --git a/test/Instrumentation/MemorySanitizer/origin-alignment.ll b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
new file mode 100644
index 000000000000..ce0dbfc71909
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS1 %s
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=2 -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-ORIGINS2 %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; Check origin instrumentation of stores.
+; Check that debug info for origin propagation code is set correctly.
+
+@a8 = global i8 0, align 8
+@a4 = global i8 0, align 4
+@a2 = global i8 0, align 2
+@a1 = global i8 0, align 1
+
+; 8-aligned store => 8-aligned origin store, origin address is not realigned
+define void @Store8(i8 %x) sanitize_memory {
+entry:
+  store i8 %x, i8* @a8, align 8
+  ret void
+}
+
+; CHECK-LABEL: @Store8
+; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
+; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 8
+; CHECK: ret void
+
+
+; 4-aligned store => 4-aligned origin store, origin address is not realigned
+define void @Store4(i8 %x) sanitize_memory {
+entry:
+  store i8 %x, i8* @a4, align 4
+  ret void
+}
+
+; CHECK-LABEL: @Store4
+; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
+; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 4
+; CHECK: ret void
+
+
+; 2-aligned store => 4-aligned origin store, origin address is realigned
+define void @Store2(i8 %x) sanitize_memory {
+entry:
+  store i8 %x, i8* @a2, align 2
+  ret void
+}
+
+; CHECK-LABEL: @Store2
+; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
+; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: ret void
+
+
+; 1-aligned store => 4-aligned origin store, origin address is realigned
+define void @Store1(i8 %x) sanitize_memory {
+entry:
+  store i8 %x, i8* @a1, align 1
+  ret void
+}
+
+; CHECK-LABEL: @Store1
+; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
+; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/store-origin.ll b/test/Instrumentation/MemorySanitizer/store-origin.ll
index 0bd977700e83..c2948b1dbc50 100644
--- a/test/Instrumentation/MemorySanitizer/store-origin.ll
+++ b/test/Instrumentation/MemorySanitizer/store-origin.ll
@@ -11,14 +11,14 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Function Attrs: nounwind
 define void @Store(i32* nocapture %p, i32 %x) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32* %p}, i64 0, metadata !11), !dbg !16
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !12), !dbg !16
+  tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !16
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !16
   store i32 %x, i32* %p, align 4, !dbg !17, !tbaa !18
   ret void, !dbg !22
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind sanitize_memory "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -27,29 +27,29 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 (204220)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/build0/../2.cc] [DW_LANG_C99]
-!1 = metadata !{metadata !"../2.cc", metadata !"/tmp/build0"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"Store", metadata !"Store", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @Store, null, null, metadata !10, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [Store]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/build0/../2.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8, metadata !9}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11, metadata !12}
-!11 = metadata !{i32 786689, metadata !4, metadata !"p", metadata !5, i32 16777217, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [p] [line 1]
-!12 = metadata !{i32 786689, metadata !4, metadata !"x", metadata !5, i32 33554433, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [x] [line 1]
-!13 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!15 = metadata !{metadata !"clang version 3.5.0 (204220)"}
-!16 = metadata !{i32 1, i32 0, metadata !4, null}
-!17 = metadata !{i32 2, i32 0, metadata !4, null}
-!18 = metadata !{metadata !19, metadata !19, i64 0}
-!19 = metadata !{metadata !"int", metadata !20, i64 0}
-!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
-!21 = metadata !{metadata !"Simple C/C++ TBAA"}
-!22 = metadata !{i32 3, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 (204220)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/build0/../2.cc] [DW_LANG_C99]
+!1 = !{!"../2.cc", !"/tmp/build0"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00Store\00Store\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @Store, null, null, !10} ; [ DW_TAG_subprogram ] [line 1] [def] [Store]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/build0/../2.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8, !9}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11, !12}
+!11 = !{!"0x101\00p\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [p] [line 1]
+!12 = !{!"0x101\00x\0033554433\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
+!15 = !{!"clang version 3.5.0 (204220)"}
+!16 = !MDLocation(line: 1, scope: !4)
+!17 = !MDLocation(line: 2, scope: !4)
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !MDLocation(line: 3, scope: !4)
 
 
 ; CHECK: @Store
diff --git a/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll b/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
deleted file mode 100644
index 65037cb4790b..000000000000
--- a/test/Instrumentation/MemorySanitizer/wrap_indirect_calls.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=0 -S | FileCheck %s
-; RUN: opt < %s -msan -msan-check-access-address=0 -msan-wrap-indirect-calls=zzz -msan-wrap-indirect-calls-fast=1 -S | FileCheck -check-prefix=CHECK-FAST %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-; Test for -msan-wrap-indirect-calls functionality.
-; Replaces indirect call to %f with a call to whatever is returned from the
-; wrapper function.
-
-; This does not depend on the sanitize_memory attribute.
-define i32 @func1(i32 (i32, i32)* nocapture %f, i32 %x, i32 %y) {
-entry:
-  %call = tail call i32 %f(i32 %x, i32 %y)
-  ret i32 %call
-}
-
-; CHECK: @func1
-; CHECK: bitcast i32 (i32, i32)* %f to void ()*
-; CHECK: call void ()* (void ()*)* @zzz(void ()*
-; CHECK: [[A:%[01-9a-z_.]+]] = bitcast void ()* {{.*}} to i32 (i32, i32)*
-; CHECK: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
-; CHECK: ret i32
-
-; CHECK-FAST: @func1
-; CHECK-FAST: bitcast i32 (i32, i32)* %f to void ()*
-; CHECK-FAST-DAG: icmp ult void ()* {{.*}}, bitcast (i32* @__executable_start to void ()*)
-; CHECK-FAST-DAG: icmp uge void ()* {{.*}}, bitcast (i32* @_end to void ()*)
-; CHECK-FAST: or i1
-; CHECK-FAST: br i1
-; CHECK-FAST: call void ()* (void ()*)* @zzz(void ()*
-; CHECK-FAST: br label
-; CHECK-FAST: [[A:%[01-9a-z_.]+]] = phi i32 (i32, i32)* [ %f, %entry ], [ {{.*}} ]
-; CHECK-FAST: call i32 {{.*}}[[A]](i32 {{.*}}, i32 {{.*}})
-; CHECK-FAST: ret i32
-
-
-; The same test, but with a complex expression as the call target.
-
-declare i8* @callee(i32)
-
-define i8* @func2(i64 %x) #1 {
-entry:
-  %call = tail call i8* bitcast (i8* (i32)* @callee to i8* (i64)*)(i64 %x)
-  ret i8* %call
-}
-
-; CHECK: @func2
-; CHECK: call {{.*}} @zzz
-; CHECK: [[A:%[01-9a-z_.]+]] = bitcast void ()* {{.*}} to i8* (i64)*
-; CHECK: call i8* {{.*}}[[A]](i64 {{.*}})
-; CHECK: ret i8*
-
-; CHECK-FAST: @func2
-; CHECK-FAST: {{br i1 or .* icmp ult .* bitcast .* @callee .* @__executable_start.* icmp uge .* bitcast .* @callee .* @_end}}
-; CHECK-FAST: {{call .* @zzz.* bitcast .*@callee}}
-; CHECK-FAST: bitcast void ()* {{.*}} to i8* (i64)*
-; CHECK-FAST: br label
-; CHECK-FAST: [[A:%[01-9a-z_.]+]] = phi i8* (i64)* [{{.*bitcast .* @callee.*, %entry.*}}], [ {{.*}} ]
-; CHECK-FAST: call i8* {{.*}}[[A]](i64 {{.*}})
-; CHECK-FAST: ret i8*
diff --git a/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
new file mode 100644
index 000000000000..a1b23f1b13f2
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
@@ -0,0 +1,67 @@
+; Test that coverage instrumentation does not lose debug location.
+
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s
+
+; C++ source:
+; 1: struct A {
+; 2:  int f();
+; 3:  int x;
+; 4: };
+; 5:
+; 6: int A::f() {
+; 7:    return x;
+; 8: }
+; clang++ ../1.cc -O3 -g -S -emit-llvm  -fno-strict-aliasing
+; and add sanitize_address to @_ZN1A1fEv
+
+; Test that __sanitizer_cov call has !dbg pointing to the opening { of A::f().
+; CHECK: call void @__sanitizer_cov(i32*{{.*}}), !dbg [[A:!.*]]
+; CHECK: [[A]] = !MDLocation(line: 6, scope: !{{.*}})
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.A = type { i32 }
+
+; Function Attrs: nounwind readonly uwtable
+define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 {
+entry:
+  tail call void @llvm.dbg.value(metadata %struct.A* %this, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !20
+  %x = getelementptr inbounds %struct.A* %this, i64 0, i32 0, !dbg !21
+  %0 = load i32* %x, align 4, !dbg !21
+  ret i32 %0, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { sanitize_address nounwind readonly uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 (210251)\001\00\000\00\001", !1, !2, !3, !12, !2, !2} ; [ DW_TAG_compile_unit ] [/code/llvm/build0/../1.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"../1.cc", !"/code/llvm/build0"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00A\001\0032\0032\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_structure_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!6, !8}
+!6 = !{!"0xd\00x\003\0032\0032\000\000", !1, !"_ZTS1A", !7} ; [ DW_TAG_member ] [x] [line 3, size 32, align 32, offset 0] [from int]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = !{!"0x2e\00f\00f\00_ZN1A1fEv\002\000\000\000\006\00256\001\002", !1, !"_ZTS1A", !9, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 2] [f]
+!9 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{!7, !11}
+!11 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!12 = !{!13}
+!13 = !{!"0x2e\00f\00f\00_ZN1A1fEv\006\000\001\000\006\00256\001\006", !1, !"_ZTS1A", !9, null, i32 (%struct.A*)* @_ZN1A1fEv, null, !8, !14} ; [ DW_TAG_subprogram ] [line 6] [def] [f]
+!14 = !{!15}
+!15 = !{!"0x101\00this\0016777216\001088", !13, null, !16} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!16 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 2}
+!19 = !{!"clang version 3.5.0 (210251)"}
+!20 = !MDLocation(line: 0, scope: !13)
+!21 = !MDLocation(line: 7, scope: !13)
diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll
new file mode 100644
index 000000000000..77e781e5474a
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -sancov -sanitizer-coverage-level=0 -S | FileCheck %s --check-prefix=CHECK0
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=1  -S | FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-block-threshold=10 -S | FileCheck %s --check-prefix=CHECK3
+; RUN: opt < %s -sancov -sanitizer-coverage-level=4 -S | FileCheck %s --check-prefix=CHECK4
+
+; RUN: opt < %s -sancov -sanitizer-coverage-level=0  -S | FileCheck %s --check-prefix=CHECK0
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1  -S | FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2  -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=10 \
+; RUN:      -S | FileCheck %s --check-prefix=CHECK2
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=1 \
+; RUN:      -S | FileCheck %s --check-prefix=CHECK1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @foo(i32* %a) sanitize_address {
+entry:
+  %tobool = icmp eq i32* %a, null
+  br i1 %tobool, label %if.end, label %if.then
+
+  if.then:                                          ; preds = %entry
+  store i32 0, i32* %a, align 4
+  br label %if.end
+
+  if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+; CHECK0-NOT: call void @__sanitizer_cov(
+; CHECK0-NOT: call void @__sanitizer_cov_module_init(
+
+; CHECK1-LABEL: define void @foo
+; CHECK1: %0 = load atomic i32* {{.*}} monotonic, align 4, !nosanitize
+; CHECK1: %1 = icmp sge i32 0, %0
+; CHECK1: br i1 %1, label %2, label %3
+; CHECK1: call void @__sanitizer_cov(i32*{{.*}})
+; CHECK1: call void asm sideeffect "", ""()
+; CHECK1-NOT: call void @__sanitizer_cov
+; CHECK1: ret void
+
+; CHECK1-LABEL: define internal void @sancov.module_ctor
+; CHECK1-NOT: ret
+; CHECK1: call void @__sanitizer_cov_module_init({{.*}}, i64 2)
+; CHECK1: ret
+
+
+; CHECK2-LABEL: define void @foo
+; CHECK2: call void @__sanitizer_cov
+; CHECK2: call void asm sideeffect "", ""()
+; CHECK2: call void @__sanitizer_cov
+; CHECK2: call void asm sideeffect "", ""()
+; CHECK2: call void @__sanitizer_cov
+; CHECK2: call void asm sideeffect "", ""()
+; CHECK2-NOT: call void @__sanitizer_cov
+; CHECK2: ret void
+
+; CHECK2-LABEL: define internal void @sancov.module_ctor
+; CHECK2-NOT: ret
+; CHECK2: call void @__sanitizer_cov_module_init({{.*}}, i64 4)
+; CHECK2: ret
+
+; CHECK3-LABEL: define void @foo
+; CHECK3: call void @__sanitizer_cov
+; CHECK3: call void @__sanitizer_cov
+; CHECK3: call void @__sanitizer_cov
+; CHECK3-NOT: ret void
+; CHECK3: call void @__sanitizer_cov
+; CHECK3-NOT: call void @__sanitizer_cov
+; CHECK3: ret void
+
+
+%struct.StructWithVptr = type { i32 (...)** }
+
+define void @CallViaVptr(%struct.StructWithVptr* %foo) uwtable sanitize_address {
+entry:
+  %0 = bitcast %struct.StructWithVptr* %foo to void (%struct.StructWithVptr*)***
+  %vtable = load void (%struct.StructWithVptr*)*** %0, align 8
+  %1 = load void (%struct.StructWithVptr*)** %vtable, align 8
+  tail call void %1(%struct.StructWithVptr* %foo)
+  tail call void %1(%struct.StructWithVptr* %foo)
+  tail call void asm sideeffect "", ""()
+  ret void
+}
+
+; We expect to see two calls to __sanitizer_cov_indir_call16
+; with different values of second argument.
+; CHECK4-LABEL: define void @CallViaVptr
+; CHECK4: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE:.*]])
+; CHECK4-NOT: call void @__sanitizer_cov_indir_call16({{.*}},[[CACHE]])
+; CHECK4: ret void
diff --git a/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
new file mode 100644
index 000000000000..cb436a8c948c
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
@@ -0,0 +1,75 @@
+; Test that coverage instrumentation does not lose debug location.
+
+; RUN: opt < %s -sancov  -sanitizer-coverage-level=2 -S | FileCheck %s
+
+; C++ source:
+; 1: void foo(int *a) {
+; 2:     if (a)
+; 3:         *a = 0;
+; 4: }
+; clang++ if.cc -O3 -g -S -emit-llvm
+; and add sanitize_address to @_Z3fooPi
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that __sanitizer_cov call has !dgb pointing to the beginning
+; of appropriate basic blocks.
+; CHECK-LABEL:_Z3fooPi
+; CHECK: call void @__sanitizer_cov(i32*{{.*}}), !dbg [[A:!.*]]
+; CHECK: call void @__sanitizer_cov(i32*{{.*}}), !dbg [[B:!.*]]
+; CHECK: call void @__sanitizer_cov(i32*{{.*}}), !dbg [[C:!.*]]
+; CHECK: ret void
+; CHECK: [[A]] = !MDLocation(line: 1, scope: !{{.*}})
+; CHECK: [[B]] = !MDLocation(line: 3, column: 5, scope: !{{.*}})
+; CHECK: [[C]] = !MDLocation(line: 4, column: 1, scope: !{{.*}})
+
+define void @_Z3fooPi(i32* %a) #0 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !15
+  %tobool = icmp eq i32* %a, null, !dbg !16
+  br i1 %tobool, label %if.end, label %if.then, !dbg !16
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %a, align 4, !dbg !18, !tbaa !19
+  br label %if.end, !dbg !18
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void, !dbg !23
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" sanitize_address}
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (217079)\001\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [FOO/if.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"if.cc", !"FOO"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00_Z3fooPi\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*)* @_Z3fooPi, null, null, !10} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [FOO/if.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!"0x101\00a\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 1]
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.6.0 (217079)"}
+!15 = !MDLocation(line: 1, column: 15, scope: !4)
+!16 = !MDLocation(line: 2, column: 7, scope: !17)
+!17 = !{!"0xb\002\007\000", !1, !4} ; [ DW_TAG_lexical_block ] [FOO/if.cc]
+!18 = !MDLocation(line: 3, column: 5, scope: !17)
+!19 = !{!20, !20, i64 0}
+!20 = !{!"int", !21, i64 0}
+!21 = !{!"omnipotent char", !22, i64 0}
+!22 = !{!"Simple C/C++ TBAA"}
+!23 = !MDLocation(line: 4, column: 1, scope: !4)
diff --git a/test/Instrumentation/SanitizerCoverage/tracing.ll b/test/Instrumentation/SanitizerCoverage/tracing.ll
new file mode 100644
index 000000000000..8c3a6df93a07
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/tracing.ll
@@ -0,0 +1,33 @@
+; Test -sanitizer-coverage-experimental-tracing
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-experimental-tracing  -S | FileCheck %s --check-prefix=CHECK1
+; RUN: opt < %s -sancov -sanitizer-coverage-level=3 -sanitizer-coverage-experimental-tracing  -S | FileCheck %s --check-prefix=CHECK3
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @foo(i32* %a) sanitize_address {
+entry:
+  %tobool = icmp eq i32* %a, null
+  br i1 %tobool, label %if.end, label %if.then
+
+  if.then:                                          ; preds = %entry
+  store i32 0, i32* %a, align 4
+  br label %if.end
+
+  if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+; CHECK1-LABEL: define void @foo
+; CHECK1: call void @__sanitizer_cov_trace_func_enter
+; CHECK1: call void @__sanitizer_cov_trace_basic_block
+; CHECK1: call void @__sanitizer_cov_trace_basic_block
+; CHECK1-NOT: call void @__sanitizer_cov_trace_basic_block
+; CHECK1: ret void
+
+; CHECK3-LABEL: define void @foo
+; CHECK3: call void @__sanitizer_cov_trace_func_enter
+; CHECK3: call void @__sanitizer_cov_trace_basic_block
+; CHECK3: call void @__sanitizer_cov_trace_basic_block
+; CHECK3: call void @__sanitizer_cov_trace_basic_block
+; CHECK3-NOT: call void @__sanitizer_cov_trace_basic_block
+; CHECK3: ret void
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index 33614a32f9ca..037dd56ffb73 100644
--- a/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -54,6 +54,6 @@ entry:
 ; CHECK: = load
 ; CHECK: ret void
 
-!0 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!1 = metadata !{metadata !"vtable pointer", metadata !0}
-!2 = metadata !{metadata !1, metadata !1, i64 0}
+!0 = !{!"Simple C/C++ TBAA", null}
+!1 = !{!"vtable pointer", !0}
+!2 = !{!1, !1, i64 0}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
index 811ad8d1cf57..cccdeb8245d9 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_read.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -8,6 +8,6 @@ entry:
   %0 = load i8* %a, align 8, !tbaa !0
   ret i8 %0
 }
-!0 = metadata !{metadata !2, metadata !2, i64 0}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!2 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = !{!2, !2, i64 0}
+!1 = !{!"Simple C/C++ TBAA", null}
+!2 = !{!"vtable pointer", !1}
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_update.ll b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
index 83d28b6ee217..78f7f31e1346 100644
--- a/test/Instrumentation/ThreadSanitizer/vptr_update.ll
+++ b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
@@ -35,6 +35,6 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !2, metadata !2, i64 0}
-!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!2 = metadata !{metadata !"vtable pointer", metadata !1}
+!0 = !{!2, !2, i64 0}
+!1 = !{!"Simple C/C++ TBAA", null}
+!2 = !{!"vtable pointer", !1}
diff --git a/test/JitListener/lit.local.cfg b/test/JitListener/lit.local.cfg
index d995820bc3b6..05f34a744ad6 100644
--- a/test/JitListener/lit.local.cfg
+++ b/test/JitListener/lit.local.cfg
@@ -1,3 +1,3 @@
-if not config.root.llvm_use_intel_jitevents == "ON":
+if not config.root.llvm_use_intel_jitevents == "true":
     config.unsupported = True
 
diff --git a/test/JitListener/multiple.ll b/test/JitListener/multiple.ll
new file mode 100644
index 000000000000..ae5460802f30
--- /dev/null
+++ b/test/JitListener/multiple.ll
@@ -0,0 +1,167 @@
+; Verify the behavior of the IntelJITEventListener.
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; This test was created using the following file:
+;
+;  1: int foo(int a) {
+;  2:   return a;
+;  3: }
+;  4:
+;  5: int bar(int a) {
+;  6:   if (a == 0) {
+;  7:     return 0;
+;  8:   }
+;  9:   return 100/a;
+; 10: }
+; 11: 
+; 12: int fubar(int a) {
+; 13:   switch (a) {
+; 14:     case 0:
+; 15:       return 10;
+; 16:     case 1:
+; 17:       return 20;
+; 18:     default:
+; 19:       return 30;
+; 20:   }
+; 21: }
+;
+
+; CHECK: Method load [1]: bar, Size = {{[0-9]+}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[5,6,7,9]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[5,6,7,9]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[5,6,7,9]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[5,6,7,9]}}
+
+; CHECK: Method load [2]: foo, Size = {{[0-9]+}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[1,2]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[1,2]}}
+
+; CHECK: Method load [3]: fubar, Size = {{[0-9]+}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[12,13,15,17,19]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[12,13,15,17,19]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[12,13,15,17,19]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[12,13,15,17,19]}}
+; CHECK:   Line info @ {{[0-9]+}}: multiple.c, line {{[12,13,15,17,19]}}
+
+; CHECK: Method unload [1]
+; CHECK: Method unload [2]
+; CHECK: Method unload [3]
+
+; ModuleID = 'multiple.c'
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !16), !dbg !17
+  %0 = load i32* %a.addr, align 4, !dbg !18
+  ret i32 %0, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !20, metadata !16), !dbg !21
+  %0 = load i32* %a.addr, align 4, !dbg !22
+  %cmp = icmp eq i32 %0, 0, !dbg !22
+  br i1 %cmp, label %if.then, label %if.end, !dbg !24
+
+if.then:                                          ; preds = %entry
+  store i32 0, i32* %retval, !dbg !25
+  br label %return, !dbg !25
+
+if.end:                                           ; preds = %entry
+  %1 = load i32* %a.addr, align 4, !dbg !27
+  %div = sdiv i32 100, %1, !dbg !28
+  store i32 %div, i32* %retval, !dbg !29
+  br label %return, !dbg !29
+
+return:                                           ; preds = %if.end, %if.then
+  %2 = load i32* %retval, !dbg !30
+  ret i32 %2, !dbg !30
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @fubar(i32 %a) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !31, metadata !16), !dbg !32
+  %0 = load i32* %a.addr, align 4, !dbg !33
+  switch i32 %0, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+  ], !dbg !34
+
+sw.bb:                                            ; preds = %entry
+  store i32 10, i32* %retval, !dbg !35
+  br label %return, !dbg !35
+
+sw.bb1:                                           ; preds = %entry
+  store i32 20, i32* %retval, !dbg !37
+  br label %return, !dbg !37
+
+sw.default:                                       ; preds = %entry
+  store i32 30, i32* %retval, !dbg !38
+  br label %return, !dbg !38
+
+return:                                           ; preds = %sw.default, %sw.bb1, %sw.bb
+  %1 = load i32* %retval, !dbg !39
+  ret i32 %1, !dbg !39
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12, !13}
+!llvm.ident = !{!14}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/multiple.c] [DW_LANG_C99]
+!1 = !{!"multiple.c", !"F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener"}
+!2 = !{}
+!3 = !{!4, !9, !10}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}                               ; [ DW_TAG_file_type ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/multiple.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00bar\00bar\00\005\000\001\000\000\00256\000\005", !1, !5, !6, null, i32 (i32)* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [bar]
+!10 = !{!"0x2e\00fubar\00fubar\00\0012\000\001\000\000\00256\000\0012", !1, !5, !6, null, i32 (i32)* @fubar, null, null, !2} ; [ DW_TAG_subprogram ] [line 12] [def] [fubar]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 2}
+!13 = !{i32 1, !"PIC Level", i32 2}
+!14 = !{!"clang version 3.6.0 (trunk)"}
+!15 = !{!"0x101\00a\0016777217\000", !4, !5, !8}  ; [ DW_TAG_arg_variable ] [a] [line 1]
+!16 = !{!"0x102"}                                 ; [ DW_TAG_expression ]
+!17 = !MDLocation(line: 1, column: 13, scope: !4)
+!18 = !MDLocation(line: 2, column: 10, scope: !4)
+!19 = !MDLocation(line: 2, column: 3, scope: !4)
+!20 = !{!"0x101\00a\0016777221\000", !9, !5, !8}  ; [ DW_TAG_arg_variable ] [a] [line 5]
+!21 = !MDLocation(line: 5, column: 13, scope: !9)
+!22 = !MDLocation(line: 6, column: 7, scope: !23)
+!23 = !{!"0xb\006\007\000", !1, !9}               ; [ DW_TAG_lexical_block ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/multiple.c]
+!24 = !MDLocation(line: 6, column: 7, scope: !9)
+!25 = !MDLocation(line: 7, column: 5, scope: !26)
+!26 = !{!"0xb\006\0015\001", !1, !23}             ; [ DW_TAG_lexical_block ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/multiple.c]
+!27 = !MDLocation(line: 9, column: 14, scope: !9)
+!28 = !MDLocation(line: 9, column: 10, scope: !9)
+!29 = !MDLocation(line: 9, column: 3, scope: !9)
+!30 = !MDLocation(line: 10, column: 1, scope: !9)
+!31 = !{!"0x101\00a\0016777228\000", !10, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 12]
+!32 = !MDLocation(line: 12, column: 15, scope: !10)
+!33 = !MDLocation(line: 13, column: 11, scope: !10)
+!34 = !MDLocation(line: 13, column: 3, scope: !10)
+!35 = !MDLocation(line: 15, column: 7, scope: !36)
+!36 = !{!"0xb\0013\0014\002", !1, !10}            ; [ DW_TAG_lexical_block ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/multiple.c]
+!37 = !MDLocation(line: 17, column: 7, scope: !36)
+!38 = !MDLocation(line: 19, column: 7, scope: !36)
+!39 = !MDLocation(line: 21, column: 1, scope: !10)
diff --git a/test/JitListener/simple.ll b/test/JitListener/simple.ll
new file mode 100644
index 000000000000..1732170a9654
--- /dev/null
+++ b/test/JitListener/simple.ll
@@ -0,0 +1,54 @@
+; Verify the behavior of the IntelJITEventListener.
+; RUN: llvm-jitlistener %s | FileCheck %s
+
+; This test was created using the following file:
+;
+; 1: int foo(int a) {
+; 2:   return a;
+; 3: }
+;
+
+; CHECK: Method load [1]: foo, Size = {{[0-9]+}}
+; CHECK:   Line info @ {{[0-9]+}}: simple.c, line 1
+; CHECK:   Line info @ {{[0-9]+}}: simple.c, line 2
+; CHECK: Method unload [1]
+
+; ModuleID = 'simple.c'
+
+; Function Attrs: nounwind uwtable
+define i32 @foo(i32 %a) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !12, metadata !13), !dbg !14
+  %0 = load i32* %a.addr, align 4, !dbg !15
+  ret i32 %0, !dbg !16
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/simple.c] [DW_LANG_C99]
+!1 = !{!"simple.c", !"F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}                               ; [ DW_TAG_file_type ] [F:\users\akaylor\llvm-s\llvm\test\JitListener/simple.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.6.0 (trunk)"}
+!12 = !{!"0x101\00a\0016777217\000", !4, !5, !8}  ; [ DW_TAG_arg_variable ] [a] [line 1]
+!13 = !{!"0x102"}                                 ; [ DW_TAG_expression ]
+!14 = !MDLocation(line: 1, column: 13, scope: !4)
+!15 = !MDLocation(line: 2, column: 10, scope: !4)
+!16 = !MDLocation(line: 2, column: 3, scope: !4)
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
deleted file mode 100644
index a389bf7a6bc2..000000000000
--- a/test/JitListener/test-common-symbols.ll
+++ /dev/null
@@ -1,113 +0,0 @@
-; RUN: llvm-jitlistener %s | FileCheck %s
-
-; CHECK: Method load [1]: main, Size = 164
-; CHECK: Method unload [1]
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@zero_int = common global i32 0, align 4
-@zero_arr = common global [10 x i32] zeroinitializer, align 16
-@zero_double = common global double 0.000000e+00, align 8
-
-define i32 @main() nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4, !dbg !21
-  %add = add nsw i32 %0, 5, !dbg !21
-  %idxprom = sext i32 %add to i64, !dbg !21
-  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom, !dbg !21
-  store i32 40, i32* %arrayidx, align 4, !dbg !21
-  %1 = load double* @zero_double, align 8, !dbg !23
-  %cmp = fcmp olt double %1, 1.000000e+00, !dbg !23
-  br i1 %cmp, label %if.then, label %if.end, !dbg !23
-
-if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4, !dbg !24
-  %add1 = add nsw i32 %2, 2, !dbg !24
-  %idxprom2 = sext i32 %add1 to i64, !dbg !24
-  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2, !dbg !24
-  store i32 70, i32* %arrayidx3, align 4, !dbg !24
-  br label %if.end, !dbg !24
-
-if.end:                                           ; preds = %if.then, %entry
-  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !25), !dbg !27
-  store i32 1, i32* %i, align 4, !dbg !28
-  br label %for.cond, !dbg !28
-
-for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4, !dbg !28
-  %cmp4 = icmp slt i32 %3, 10, !dbg !28
-  br i1 %cmp4, label %for.body, label %for.end, !dbg !28
-
-for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4, !dbg !29
-  %sub = sub nsw i32 %4, 1, !dbg !29
-  %idxprom5 = sext i32 %sub to i64, !dbg !29
-  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5, !dbg !29
-  %5 = load i32* %arrayidx6, align 4, !dbg !29
-  %6 = load i32* %i, align 4, !dbg !29
-  %idxprom7 = sext i32 %6 to i64, !dbg !29
-  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7, !dbg !29
-  %7 = load i32* %arrayidx8, align 4, !dbg !29
-  %add9 = add nsw i32 %5, %7, !dbg !29
-  %8 = load i32* %i, align 4, !dbg !29
-  %idxprom10 = sext i32 %8 to i64, !dbg !29
-  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10, !dbg !29
-  store i32 %add9, i32* %arrayidx11, align 4, !dbg !29
-  br label %for.inc, !dbg !31
-
-for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4, !dbg !32
-  %inc = add nsw i32 %9, 1, !dbg !32
-  store i32 %inc, i32* %i, align 4, !dbg !32
-  br label %for.cond, !dbg !32
-
-for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4, !dbg !33
-  %cmp12 = icmp eq i32 %10, 110, !dbg !33
-  %cond = select i1 %cmp12, i32 0, i32 -1, !dbg !33
-  ret i32 %cond, !dbg !33
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!35}
-
-!0 = metadata !{i32 720913, metadata !34, i32 12, metadata !"clang version 3.1 ()", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !34, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !10, i32 0} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !34} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !14, metadata !15, metadata !17}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"zero_int", metadata !"zero_int", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @zero_int, null} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 720948, i32 0, null, metadata !"zero_double", metadata !"zero_double", metadata !"", metadata !6, i32 2, metadata !16, i32 0, i32 1, double* @zero_double, null} ; [ DW_TAG_variable ]
-!16 = metadata !{i32 720932, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr, null} ; [ DW_TAG_variable ]
-!18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 320, align 32, offset 0] [from int]
-!19 = metadata !{metadata !20}
-!20 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
-!21 = metadata !{i32 7, i32 5, metadata !22, null}
-!22 = metadata !{i32 720907, metadata !34, metadata !5, i32 6, i32 1, i32 0} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 9, i32 5, metadata !22, null}
-!24 = metadata !{i32 10, i32 9, metadata !22, null}
-!25 = metadata !{i32 721152, metadata !26, metadata !"i", metadata !6, i32 12, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 720907, metadata !34, metadata !22, i32 12, i32 5, i32 1} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 12, i32 14, metadata !26, null}
-!28 = metadata !{i32 12, i32 19, metadata !26, null}
-!29 = metadata !{i32 13, i32 9, metadata !30, null}
-!30 = metadata !{i32 720907, metadata !34, metadata !26, i32 12, i32 34, i32 2} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 14, i32 5, metadata !30, null}
-!32 = metadata !{i32 12, i32 29, metadata !26, null}
-!33 = metadata !{i32 15, i32 5, metadata !22, null}
-!34 = metadata !{metadata !"test-common-symbols.c", metadata !"/store/store/llvm/build"}
-!35 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
deleted file mode 100644
index 0d365b1eaa98..000000000000
--- a/test/JitListener/test-inline.ll
+++ /dev/null
@@ -1,212 +0,0 @@
-; RUN: llvm-jitlistener %s | FileCheck %s
-
-; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
-; CHECK:   Line info @ 0: test-inline.cpp, line 33
-; CHECK:   Line info @ 35: test-inline.cpp, line 34
-; CHECK:   Line info @ 165: test-inline.cpp, line 35
-; CHECK: Method load [2]: _Z3foov, Size = 3
-; CHECK:   Line info @ 0: test-inline.cpp, line 28
-; CHECK:   Line info @ 2: test-inline.cpp, line 29
-; CHECK:   Line info @ 3: test-inline.cpp, line 29
-; CHECK: Method load [3]: main, Size = 146
-; CHECK:   Line info @ 0: test-inline.cpp, line 39
-; CHECK:   Line info @ 21: test-inline.cpp, line 41
-; CHECK:   Line info @ 39: test-inline.cpp, line 42
-; CHECK:   Line info @ 60: test-inline.cpp, line 44
-; CHECK:   Line info @ 80: test-inline.cpp, line 48
-; CHECK:   Line info @ 90: test-inline.cpp, line 45
-; CHECK:   Line info @ 95: test-inline.cpp, line 46
-; CHECK:   Line info @ 114: test-inline.cpp, line 48
-; CHECK:   Line info @ 141: test-inline.cpp, line 49
-; CHECK:   Line info @ 146: test-inline.cpp, line 49
-; CHECK: Method unload [1]
-; CHECK: Method unload [2]
-; CHECK: Method unload [3]
-
-; ModuleID = 'test-inline.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.char_struct = type { i8, [2 x i8] }
-
-@compound_char = global %struct.char_struct zeroinitializer, align 1
-@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
-
-define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) uwtable {
-entry:
-  %pf.addr = alloca float*, align 8
-  %ppd.addr = alloca [2 x double]*, align 8
-  %s.addr = alloca %struct.char_struct*, align 8
-  %ppn.addr = alloca i32**, align 8
-  %us.addr = alloca i16, align 2
-  %l.addr = alloca i64, align 8
-  %result = alloca double, align 8
-  store float* %pf, float** %pf.addr, align 8
-  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !46), !dbg !47
-  store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !48), !dbg !47
-  store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !49), !dbg !47
-  store i32** %ppn, i32*** %ppn.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !50), !dbg !47
-  store i16 %us, i16* %us.addr, align 2
-  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !51), !dbg !47
-  store i64 %l, i64* %l.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !52), !dbg !47
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !53), !dbg !55
-  %0 = load float** %pf.addr, align 8, !dbg !55
-  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !55
-  %1 = load float* %arrayidx, align 4, !dbg !55
-  %conv = fpext float %1 to double, !dbg !55
-  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !55
-  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !55
-  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !55
-  %3 = load double* %arrayidx2, align 8, !dbg !55
-  %mul = fmul double %conv, %3, !dbg !55
-  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !55
-  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !55
-  %5 = load i8* %c, align 1, !dbg !55
-  %conv3 = sext i8 %5 to i32, !dbg !55
-  %conv4 = sitofp i32 %conv3 to double, !dbg !55
-  %mul5 = fmul double %mul, %conv4, !dbg !55
-  %6 = load i16* %us.addr, align 2, !dbg !55
-  %conv6 = zext i16 %6 to i32, !dbg !55
-  %conv7 = sitofp i32 %conv6 to double, !dbg !55
-  %mul8 = fmul double %mul5, %conv7, !dbg !55
-  %7 = load i64* %l.addr, align 8, !dbg !55
-  %conv9 = uitofp i64 %7 to double, !dbg !55
-  %mul10 = fmul double %mul8, %conv9, !dbg !55
-  %call = call i32 @_Z3foov(), !dbg !55
-  %conv11 = sitofp i32 %call to double, !dbg !55
-  %add = fadd double %mul10, %conv11, !dbg !55
-  store double %add, double* %result, align 8, !dbg !55
-  %8 = load double* %result, align 8, !dbg !56
-  ret double %8, !dbg !56
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define linkonce_odr i32 @_Z3foov() nounwind uwtable inlinehint {
-entry:
-  ret i32 0, !dbg !57
-}
-
-define i32 @main(i32 %argc, i8** %argv) uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %s = alloca %struct.char_struct, align 1
-  %f = alloca float, align 4
-  %d = alloca [2 x [2 x double]], align 16
-  %result = alloca double, align 8
-  store i32 0, i32* %retval
-  store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
-  store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
-  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
-  store float 0.000000e+00, float* %f, align 4, !dbg !66
-  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
-  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
-  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
-  store i8 97, i8* %c, align 1, !dbg !71
-  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
-  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
-  store i8 48, i8* %arrayidx, align 1, !dbg !72
-  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
-  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
-  store i8 49, i8* %arrayidx2, align 1, !dbg !73
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
-  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
-  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
-  store double %call, double* %result, align 8, !dbg !75
-  %1 = load double* %result, align 8, !dbg !76
-  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
-  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
-  ret i32 %cond, !dbg !76
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!78}
-
-!0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-inline.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !35, metadata !40}
-!5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
-!6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10, metadata !12, metadata !16, metadata !29, metadata !32, metadata !33}
-!9 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
-!11 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!12 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !9, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
-!16 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!17 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
-!18 = metadata !{metadata !19, metadata !21, metadata !23}
-!19 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !20} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
-!20 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!21 = metadata !{i32 786445, metadata !77, metadata !17, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !22} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!22 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !20, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
-!23 = metadata !{i32 786478, metadata !77, metadata !17, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !27, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!24 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!25 = metadata !{null, metadata !26}
-!26 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
-!27 = metadata !{metadata !28}
-!28 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!29 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !30} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!30 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !31} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!31 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!32 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
-!33 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !34} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
-!34 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!35 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !36, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!36 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !37, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!37 = metadata !{metadata !31, metadata !31, metadata !38}
-!38 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!39 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!40 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !41, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
-!41 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !42, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!42 = metadata !{metadata !31}
-!43 = metadata !{metadata !45}
-!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !17, i32 0, i32 1, %struct.char_struct* @compound_char, null} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
-!46 = metadata !{i32 786689, metadata !5, metadata !"pf", metadata !6, i32 16777248, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
-!47 = metadata !{i32 32, i32 0, metadata !5, null}
-!48 = metadata !{i32 786689, metadata !5, metadata !"ppd", metadata !6, i32 33554464, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
-!49 = metadata !{i32 786689, metadata !5, metadata !"s", metadata !6, i32 50331680, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
-!50 = metadata !{i32 786689, metadata !5, metadata !"ppn", metadata !6, i32 67108896, metadata !29, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
-!51 = metadata !{i32 786689, metadata !5, metadata !"us", metadata !6, i32 83886112, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
-!52 = metadata !{i32 786689, metadata !5, metadata !"l", metadata !6, i32 100663328, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
-!53 = metadata !{i32 786688, metadata !54, metadata !"result", metadata !6, i32 34, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
-!54 = metadata !{i32 786443, metadata !77, metadata !5, i32 33, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
-!55 = metadata !{i32 34, i32 0, metadata !54, null}
-!56 = metadata !{i32 35, i32 0, metadata !54, null}
-!57 = metadata !{i32 29, i32 0, metadata !58, null}
-!58 = metadata !{i32 786443, metadata !77, metadata !40, i32 28, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
-!59 = metadata !{i32 786689, metadata !35, metadata !"argc", metadata !6, i32 16777254, metadata !31, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
-!60 = metadata !{i32 38, i32 0, metadata !35, null}
-!61 = metadata !{i32 786689, metadata !35, metadata !"argv", metadata !6, i32 33554470, metadata !38, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
-!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !17, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
-!63 = metadata !{i32 786443, metadata !77, metadata !35, i32 39, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-inline.cpp]
-!64 = metadata !{i32 40, i32 0, metadata !63, null}
-!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
-!66 = metadata !{i32 41, i32 0, metadata !63, null}
-!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !9, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
-!69 = metadata !{metadata !15, metadata !15}
-!70 = metadata !{i32 42, i32 0, metadata !63, null}
-!71 = metadata !{i32 44, i32 0, metadata !63, null}
-!72 = metadata !{i32 45, i32 0, metadata !63, null}
-!73 = metadata !{i32 46, i32 0, metadata !63, null}
-!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
-!75 = metadata !{i32 48, i32 0, metadata !63, null}
-!76 = metadata !{i32 49, i32 0, metadata !63, null}
-!77 = metadata !{metadata !"test-inline.cpp", metadata !"/home/akaylor/dev"}
-!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
deleted file mode 100644
index 7feb6bb65a3f..000000000000
--- a/test/JitListener/test-parameters.ll
+++ /dev/null
@@ -1,211 +0,0 @@
-; RUN: llvm-jitlistener %s | FileCheck %s
-
-; CHECK: Method load [1]: _Z15test_parametersPfPA2_dR11char_structPPitm, Size = 170
-; CHECK:   Line info @ 0: test-parameters.cpp, line 33
-; CHECK:   Line info @ 35: test-parameters.cpp, line 34
-; CHECK:   Line info @ 165: test-parameters.cpp, line 35
-; CHECK: Method load [2]: _Z3foov, Size = 3
-; CHECK:   Line info @ 0: test-parameters.cpp, line 28
-; CHECK:   Line info @ 2: test-parameters.cpp, line 29
-; CHECK: Method load [3]: main, Size = 146
-; CHECK:   Line info @ 0: test-parameters.cpp, line 39
-; CHECK:   Line info @ 21: test-parameters.cpp, line 41
-; CHECK:   Line info @ 39: test-parameters.cpp, line 42
-; CHECK:   Line info @ 60: test-parameters.cpp, line 44
-; CHECK:   Line info @ 80: test-parameters.cpp, line 48
-; CHECK:   Line info @ 90: test-parameters.cpp, line 45
-; CHECK:   Line info @ 95: test-parameters.cpp, line 46
-; CHECK:   Line info @ 114: test-parameters.cpp, line 48
-; CHECK:   Line info @ 141: test-parameters.cpp, line 49
-; CHECK:   Line info @ 146: test-parameters.cpp, line 49
-; CHECK: Method unload [1]
-; CHECK: Method unload [2]
-; CHECK: Method unload [3]
-
-; ModuleID = 'test-parameters.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.char_struct = type { i8, [2 x i8] }
-
-@compound_char = global %struct.char_struct zeroinitializer, align 1
-@_ZZ4mainE1d = private unnamed_addr constant [2 x [2 x double]] [[2 x double] [double 0.000000e+00, double 1.000000e+00], [2 x double] [double 2.000000e+00, double 3.000000e+00]], align 16
-
-define i32 @_Z3foov() nounwind uwtable {
-entry:
-  ret i32 0, !dbg !46
-}
-
-define double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %pf, [2 x double]* %ppd, %struct.char_struct* %s, i32** %ppn, i16 zeroext %us, i64 %l) nounwind uwtable {
-entry:
-  %pf.addr = alloca float*, align 8
-  %ppd.addr = alloca [2 x double]*, align 8
-  %s.addr = alloca %struct.char_struct*, align 8
-  %ppn.addr = alloca i32**, align 8
-  %us.addr = alloca i16, align 2
-  %l.addr = alloca i64, align 8
-  %result = alloca double, align 8
-  store float* %pf, float** %pf.addr, align 8
-  call void @llvm.dbg.declare(metadata !{float** %pf.addr}, metadata !48), !dbg !49
-  store [2 x double]* %ppd, [2 x double]** %ppd.addr, align 8
-  call void @llvm.dbg.declare(metadata !{[2 x double]** %ppd.addr}, metadata !50), !dbg !49
-  store %struct.char_struct* %s, %struct.char_struct** %s.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct** %s.addr}, metadata !51), !dbg !49
-  store i32** %ppn, i32*** %ppn.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i32*** %ppn.addr}, metadata !52), !dbg !49
-  store i16 %us, i16* %us.addr, align 2
-  call void @llvm.dbg.declare(metadata !{i16* %us.addr}, metadata !53), !dbg !49
-  store i64 %l, i64* %l.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %l.addr}, metadata !54), !dbg !49
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !55), !dbg !57
-  %0 = load float** %pf.addr, align 8, !dbg !57
-  %arrayidx = getelementptr inbounds float* %0, i64 0, !dbg !57
-  %1 = load float* %arrayidx, align 4, !dbg !57
-  %conv = fpext float %1 to double, !dbg !57
-  %2 = load [2 x double]** %ppd.addr, align 8, !dbg !57
-  %arrayidx1 = getelementptr inbounds [2 x double]* %2, i64 1, !dbg !57
-  %arrayidx2 = getelementptr inbounds [2 x double]* %arrayidx1, i32 0, i64 1, !dbg !57
-  %3 = load double* %arrayidx2, align 8, !dbg !57
-  %mul = fmul double %conv, %3, !dbg !57
-  %4 = load %struct.char_struct** %s.addr, align 8, !dbg !57
-  %c = getelementptr inbounds %struct.char_struct* %4, i32 0, i32 0, !dbg !57
-  %5 = load i8* %c, align 1, !dbg !57
-  %conv3 = sext i8 %5 to i32, !dbg !57
-  %conv4 = sitofp i32 %conv3 to double, !dbg !57
-  %mul5 = fmul double %mul, %conv4, !dbg !57
-  %6 = load i16* %us.addr, align 2, !dbg !57
-  %conv6 = zext i16 %6 to i32, !dbg !57
-  %conv7 = sitofp i32 %conv6 to double, !dbg !57
-  %mul8 = fmul double %mul5, %conv7, !dbg !57
-  %7 = load i64* %l.addr, align 8, !dbg !57
-  %conv9 = uitofp i64 %7 to double, !dbg !57
-  %mul10 = fmul double %mul8, %conv9, !dbg !57
-  %call = call i32 @_Z3foov(), !dbg !57
-  %conv11 = sitofp i32 %call to double, !dbg !57
-  %add = fadd double %mul10, %conv11, !dbg !57
-  store double %add, double* %result, align 8, !dbg !57
-  %8 = load double* %result, align 8, !dbg !58
-  ret double %8, !dbg !58
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-define i32 @main(i32 %argc, i8** %argv) nounwind uwtable {
-entry:
-  %retval = alloca i32, align 4
-  %argc.addr = alloca i32, align 4
-  %argv.addr = alloca i8**, align 8
-  %s = alloca %struct.char_struct, align 1
-  %f = alloca float, align 4
-  %d = alloca [2 x [2 x double]], align 16
-  %result = alloca double, align 8
-  store i32 0, i32* %retval
-  store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !59), !dbg !60
-  store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !61), !dbg !60
-  call void @llvm.dbg.declare(metadata !{%struct.char_struct* %s}, metadata !62), !dbg !64
-  call void @llvm.dbg.declare(metadata !{float* %f}, metadata !65), !dbg !66
-  store float 0.000000e+00, float* %f, align 4, !dbg !66
-  call void @llvm.dbg.declare(metadata !{[2 x [2 x double]]* %d}, metadata !67), !dbg !70
-  %0 = bitcast [2 x [2 x double]]* %d to i8*, !dbg !70
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast ([2 x [2 x double]]* @_ZZ4mainE1d to i8*), i64 32, i32 16, i1 false), !dbg !70
-  %c = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 0, !dbg !71
-  store i8 97, i8* %c, align 1, !dbg !71
-  %c2 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !72
-  %arrayidx = getelementptr inbounds [2 x i8]* %c2, i32 0, i64 0, !dbg !72
-  store i8 48, i8* %arrayidx, align 1, !dbg !72
-  %c21 = getelementptr inbounds %struct.char_struct* %s, i32 0, i32 1, !dbg !73
-  %arrayidx2 = getelementptr inbounds [2 x i8]* %c21, i32 0, i64 1, !dbg !73
-  store i8 49, i8* %arrayidx2, align 1, !dbg !73
-  call void @llvm.dbg.declare(metadata !{double* %result}, metadata !74), !dbg !75
-  %arraydecay = getelementptr inbounds [2 x [2 x double]]* %d, i32 0, i32 0, !dbg !75
-  %call = call double @_Z15test_parametersPfPA2_dR11char_structPPitm(float* %f, [2 x double]* %arraydecay, %struct.char_struct* %s, i32** null, i16 zeroext 10, i64 42), !dbg !75
-  store double %call, double* %result, align 8, !dbg !75
-  %1 = load double* %result, align 8, !dbg !76
-  %cmp = fcmp oeq double %1, 0.000000e+00, !dbg !76
-  %cond = select i1 %cmp, i32 0, i32 -1, !dbg !76
-  ret i32 %cond, !dbg !76
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!78}
-
-!0 = metadata !{i32 786449, metadata !77, i32 4, metadata !"clang version 3.3 (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-clang2 gitosis@miro.kw.intel.com:clang.git 39450d0469e0d5589ad39fd0b20b5742750619a0) (ssh://akaylor@git-amr-1.devtools.intel.com:29418/ssg_llvm-llvm gitosis@miro.kw.intel.com:llvm.git 376642ed620ecae05b68c7bc81f79aeb2065abe0)", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !43, null, metadata !""} ; [ DW_TAG_compile_unit ] [/home/akaylor/dev/test-parameters.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !10, metadata !38}
-!5 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 27, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3foov, null, null, metadata !1, i32 28} ; [ DW_TAG_subprogram ] [line 27] [def] [scope 28] [foo]
-!6 = metadata !{i32 786473, metadata !77} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"test_parameters", metadata !"test_parameters", metadata !"_Z15test_parametersPfPA2_dR11char_structPPitm", i32 32, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, double (float*, [2 x double]*, %struct.char_struct*, i32**, i16, i64)* @_Z15test_parametersPfPA2_dR11char_structPPitm, null, null, metadata !1, i32 33} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 33] [test_parameters]
-!11 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13, metadata !14, metadata !16, metadata !20, metadata !33, metadata !35, metadata !36}
-!13 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from float]
-!15 = metadata !{i32 786468, null, null, metadata !"float", i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
-!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!17 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 128, i64 64, i32 0, i32 0, metadata !13, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 64, offset 0] [from double]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786465, i64 0, i64 2}        ; [ DW_TAG_subrange_type ] [0, 1]
-!20 = metadata !{i32 786448, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_reference_type ] [line 0, size 0, align 0, offset 0] [from char_struct]
-!21 = metadata !{i32 786451, metadata !77, null, metadata !"char_struct", i32 22, i64 24, i64 8, i32 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [char_struct] [line 22, size 24, align 8, offset 0] [def] [from ]
-!22 = metadata !{metadata !23, metadata !25, metadata !27}
-!23 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c", i32 23, i64 8, i64 8, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ] [c] [line 23, size 8, align 8, offset 0] [from char]
-!24 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!25 = metadata !{i32 786445, metadata !77, metadata !21, metadata !"c2", i32 24, i64 16, i64 8, i64 8, i32 0, metadata !26} ; [ DW_TAG_member ] [c2] [line 24, size 16, align 8, offset 8] [from ]
-!26 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !24, metadata !18, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 16, align 8, offset 0] [from char]
-!27 = metadata !{i32 786478, metadata !77, metadata !21, metadata !"char_struct", metadata !"char_struct", metadata !"", i32 22, metadata !28, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !31, i32 22} ; [ DW_TAG_subprogram ] [line 22] [char_struct]
-!28 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!29 = metadata !{null, metadata !30}
-!30 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !21} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char_struct]
-!31 = metadata !{metadata !32}
-!32 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
-!33 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!34 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!35 = metadata !{i32 786468, null, null, metadata !"unsigned short", i32 0, i64 16, i64 16, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned short] [line 0, size 16, align 16, offset 0, enc DW_ATE_unsigned]
-!36 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !37} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from long unsigned int]
-!37 = metadata !{i32 786468, null, null, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [long unsigned int] [line 0, size 64, align 64, offset 0, enc DW_ATE_unsigned]
-!38 = metadata !{i32 786478, metadata !77, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 38, metadata !39, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 39} ; [ DW_TAG_subprogram ] [line 38] [def] [scope 39] [main]
-!39 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !40, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!40 = metadata !{metadata !9, metadata !9, metadata !41}
-!41 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !42} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!42 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!43 = metadata !{metadata !45}
-!45 = metadata !{i32 786484, i32 0, null, metadata !"compound_char", metadata !"compound_char", metadata !"", metadata !6, i32 25, metadata !21, i32 0, i32 1, %struct.char_struct* @compound_char, null} ; [ DW_TAG_variable ] [compound_char] [line 25] [def]
-!46 = metadata !{i32 29, i32 0, metadata !47, null}
-!47 = metadata !{i32 786443, metadata !77, metadata !5, i32 28, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
-!48 = metadata !{i32 786689, metadata !10, metadata !"pf", metadata !6, i32 16777248, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [pf] [line 32]
-!49 = metadata !{i32 32, i32 0, metadata !10, null}
-!50 = metadata !{i32 786689, metadata !10, metadata !"ppd", metadata !6, i32 33554464, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppd] [line 32]
-!51 = metadata !{i32 786689, metadata !10, metadata !"s", metadata !6, i32 50331680, metadata !20, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [s] [line 32]
-!52 = metadata !{i32 786689, metadata !10, metadata !"ppn", metadata !6, i32 67108896, metadata !33, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [ppn] [line 32]
-!53 = metadata !{i32 786689, metadata !10, metadata !"us", metadata !6, i32 83886112, metadata !35, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [us] [line 32]
-!54 = metadata !{i32 786689, metadata !10, metadata !"l", metadata !6, i32 100663328, metadata !36, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [l] [line 32]
-!55 = metadata !{i32 786688, metadata !56, metadata !"result", metadata !6, i32 34, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 34]
-!56 = metadata !{i32 786443, metadata !77, metadata !10, i32 33, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
-!57 = metadata !{i32 34, i32 0, metadata !56, null}
-!58 = metadata !{i32 35, i32 0, metadata !56, null}
-!59 = metadata !{i32 786689, metadata !38, metadata !"argc", metadata !6, i32 16777254, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 38]
-!60 = metadata !{i32 38, i32 0, metadata !38, null}
-!61 = metadata !{i32 786689, metadata !38, metadata !"argv", metadata !6, i32 33554470, metadata !41, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 38]
-!62 = metadata !{i32 786688, metadata !63, metadata !"s", metadata !6, i32 40, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 40]
-!63 = metadata !{i32 786443, metadata !77, metadata !38, i32 39, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/home/akaylor/dev/test-parameters.cpp]
-!64 = metadata !{i32 40, i32 0, metadata !63, null}
-!65 = metadata !{i32 786688, metadata !63, metadata !"f", metadata !6, i32 41, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [f] [line 41]
-!66 = metadata !{i32 41, i32 0, metadata !63, null}
-!67 = metadata !{i32 786688, metadata !63, metadata !"d", metadata !6, i32 42, metadata !68, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [d] [line 42]
-!68 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 256, i64 64, i32 0, i32 0, metadata !13, metadata !69, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 256, align 64, offset 0] [from double]
-!69 = metadata !{metadata !19, metadata !19}
-!70 = metadata !{i32 42, i32 0, metadata !63, null}
-!71 = metadata !{i32 44, i32 0, metadata !63, null}
-!72 = metadata !{i32 45, i32 0, metadata !63, null}
-!73 = metadata !{i32 46, i32 0, metadata !63, null}
-!74 = metadata !{i32 786688, metadata !63, metadata !"result", metadata !6, i32 48, metadata !13, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 48]
-!75 = metadata !{i32 48, i32 0, metadata !63, null}
-!76 = metadata !{i32 49, i32 0, metadata !63, null}
-!77 = metadata !{metadata !"test-parameters.cpp", metadata !"/home/akaylor/dev"}
-!78 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
diff --git a/test/LTO/Inputs/bcsection.macho.s b/test/LTO/Inputs/bcsection.macho.s
new file mode 100644
index 000000000000..cb7fe03b3e73
--- /dev/null
+++ b/test/LTO/Inputs/bcsection.macho.s
@@ -0,0 +1,2 @@
+.section .llvmbc,.llvmbc
+.incbin "bcsection.bc"
diff --git a/test/LTO/Inputs/bcsection.s b/test/LTO/Inputs/bcsection.s
new file mode 100644
index 000000000000..ede1e5c532dd
--- /dev/null
+++ b/test/LTO/Inputs/bcsection.s
@@ -0,0 +1,2 @@
+.section .llvmbc
+.incbin "bcsection.bc"
diff --git a/test/LTO/Inputs/invalid.ll.bc b/test/LTO/Inputs/invalid.ll.bc
new file mode 100644
index 000000000000..a85c3644b3ab
--- /dev/null
+++ b/test/LTO/Inputs/invalid.ll.bc
diff --git a/test/LTO/Inputs/list-symbols.ll b/test/LTO/Inputs/list-symbols.ll
new file mode 100644
index 000000000000..9443d535503a
--- /dev/null
+++ b/test/LTO/Inputs/list-symbols.ll
@@ -0,0 +1,4 @@
+@glob = global i32 0
+define void @bar() {
+  ret void
+}
diff --git a/test/LTO/bcsection.ll b/test/LTO/bcsection.ll
new file mode 100644
index 000000000000..e65ade623536
--- /dev/null
+++ b/test/LTO/bcsection.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as -o %T/bcsection.bc %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-pc-win32 -o %T/bcsection.coff.bco %p/Inputs/bcsection.s
+; RUN: llvm-nm %T/bcsection.coff.bco | FileCheck %s
+; RUN: llvm-lto -exported-symbol=main -exported-symbol=_main -o %T/bcsection.coff.o %T/bcsection.coff.bco
+; RUN: llvm-nm %T/bcsection.coff.o | FileCheck %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-linux-gnu -o %T/bcsection.elf.bco %p/Inputs/bcsection.s
+; RUN: llvm-nm %T/bcsection.elf.bco | FileCheck %s
+; RUN: llvm-lto -exported-symbol=main -exported-symbol=_main -o %T/bcsection.elf.o %T/bcsection.elf.bco
+; RUN: llvm-nm %T/bcsection.elf.o | FileCheck %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-apple-darwin11 -o %T/bcsection.macho.bco %p/Inputs/bcsection.macho.s
+; RUN: llvm-nm %T/bcsection.macho.bco | FileCheck %s
+; RUN: llvm-lto -exported-symbol=main -exported-symbol=_main -o %T/bcsection.macho.o %T/bcsection.macho.bco
+; RUN: llvm-nm %T/bcsection.macho.o | FileCheck %s
+
+; CHECK: main
+define i32 @main() {
+  ret i32 0
+}
diff --git a/test/LTO/cfi_endproc.ll b/test/LTO/cfi_endproc.ll
index a5cc649fc863..1a69bf60f54b 100644
--- a/test/LTO/cfi_endproc.ll
+++ b/test/LTO/cfi_endproc.ll
@@ -35,3 +35,8 @@ define i32* @get_zed1() {
 
 ; ZED1_AND_ZED2: d zed2
 @zed2 = linkonce_odr unnamed_addr global i32 42
+
+define i32 @useZed2() {
+  %x = load i32* @zed2
+  ret i32 %x
+}
diff --git a/test/LTO/diagnostic-handler-remarks.ll b/test/LTO/diagnostic-handler-remarks.ll
new file mode 100644
index 000000000000..4da9101117ec
--- /dev/null
+++ b/test/LTO/diagnostic-handler-remarks.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as < %s >%t.bc
+; PR21108: Diagnostic handlers get pass remarks, even if they're not enabled.
+
+; Confirm that there are -pass-remarks.
+; RUN: llvm-lto -pass-remarks=inline \
+; RUN:          -exported-symbol _main -o %t.o %t.bc 2>&1 | \
+; RUN:     FileCheck %s -allow-empty -check-prefix=REMARKS
+; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
+
+; RUN: llvm-lto -pass-remarks=inline -use-diagnostic-handler \
+; RUN:         -exported-symbol _main -o %t.o %t.bc 2>&1 | \
+; RUN:     FileCheck %s -allow-empty -check-prefix=REMARKS
+; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
+
+; Confirm that -pass-remarks are not printed by default.
+; RUN: llvm-lto \
+; RUN:         -exported-symbol _main -o %t.o %t.bc 2>&1 | \
+; RUN:     FileCheck %s -allow-empty
+; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
+
+; RUN: llvm-lto -use-diagnostic-handler \
+; RUN:         -exported-symbol _main -o %t.o %t.bc 2>&1 | \
+; RUN:     FileCheck %s -allow-empty
+; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
+
+; REMARKS: remark:
+; CHECK-NOT: remark:
+; NM-NOT: foo
+; NM: main
+
+target triple = "x86_64-apple-darwin"
+
+define i32 @foo() {
+  ret i32 7
+}
+
+define i32 @main() {
+  %i = call i32 @foo()
+  ret i32 %i
+}
diff --git a/test/LTO/invalid.ll b/test/LTO/invalid.ll
new file mode 100644
index 000000000000..5b6996d4ad35
--- /dev/null
+++ b/test/LTO/invalid.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-lto %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
+
+
+; CHECK: llvm-lto{{.*}}: error loading file '{{.*}}/Inputs/invalid.ll.bc': Unknown attribute kind (48)
diff --git a/test/LTO/jump-table-type.ll b/test/LTO/jump-table-type.ll
index a39d3e959830..a806c303f8c5 100644
--- a/test/LTO/jump-table-type.ll
+++ b/test/LTO/jump-table-type.ll
@@ -2,8 +2,8 @@
 ; RUN: llvm-lto -o %t2 %t1 -jump-table-type=arity
 ; RUN: llvm-nm %t2 | FileCheck %s
 
-; CHECK: T __llvm_jump_instr_table_0_1
-; CHECK: T __llvm_jump_instr_table_1_1
+; CHECK: t __llvm_jump_instr_table_0_1
+; CHECK: t __llvm_jump_instr_table_1_1
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/LTO/linkonce_odr_func.ll b/test/LTO/linkonce_odr_func.ll
index a67ffc0dd48e..48da7953885f 100644
--- a/test/LTO/linkonce_odr_func.ll
+++ b/test/LTO/linkonce_odr_func.ll
@@ -29,9 +29,19 @@ define linkonce_odr void @foo4() noinline {
 ; CHECK: r v1
 @v1 = linkonce_odr constant i32 32
 
+define i32 @useV1() {
+  %x = load i32* @v1
+  ret i32 %x
+}
+
 ; CHECK: V v2
 @v2 = linkonce_odr global i32 32
 
+define i32 @useV2() {
+  %x = load i32* @v2
+  ret i32 %x
+}
+
 declare void @f(void()*)
 
 declare void @p()
diff --git a/test/LTO/list-symbols.ll b/test/LTO/list-symbols.ll
new file mode 100644
index 000000000000..41b7d00deecc
--- /dev/null
+++ b/test/LTO/list-symbols.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as -o %T/1.bc %s
+; RUN: llvm-as -o %T/2.bc %S/Inputs/list-symbols.ll
+; RUN: llvm-lto -list-symbols-only %T/1.bc %T/2.bc | FileCheck %s
+
+; CHECK-LABEL: 1.bc:
+; CHECK-DAG: foo
+; CHECK-DAG: glob
+; CHECK-LABEL: 2.bc:
+; CHECK-DAG: glob
+; CHECK-DAG: bar
+
+@glob = global i32 0
+define void @foo() {
+  ret void
+}
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index cbf754133411..1e25d3eef0c6 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -1,10 +1,17 @@
-; This fails because the linker renames the external symbol not the internal 
-; one...
-
-; RUN: echo "define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
+; RUN: llvm-as %S/Inputs/2003-01-30-LinkerRename.ll -o %t.1.bc
 ; RUN: llvm-as %s -o %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: internal{{.*}}@foo{{[0-9]}}()
 
-define i32 @foo() { ret i32 0 }
+; CHECK: @bar = global i32 ()* @foo2
+
+; CHECK:      define internal i32 @foo2() {
+; CHECK-NEXT:   ret i32 7
+; CHECK-NEXT: }
+
+; CHECK:      define i32 @foo() {
+; CHECK-NEXT:   ret i32 0
+; CHECK-NEXT: }
 
+define i32 @foo() {
+  ret i32 0
+}
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index 2e734beba6d9..0261fe3a9208 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -1,18 +1,23 @@
-; The funcresolve pass will (intentionally) llvm-link an _internal_ function 
-; body with an external declaration.  Because of this, if we LINK an internal 
-; function body into a program that already has an external declaration for 
-; the function name, we must rename the internal function to something that 
-; does not conflict.
-
-; RUN: echo " define internal i32 @foo() { ret i32 7 } " | llvm-as > %t.1.bc
-; RUN: llvm-as < %s > %t.2.bc
+; RUN: llvm-as %S/Inputs/2003-05-31-LinkerRename.ll -o %t.1.bc
+; RUN: llvm-as  %s -o %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: internal {{.*}} @foo{{[0-9]}}(
 
-declare i32 @foo() 
+; CHECK: @bar = global i32 ()* @foo2
+
+; CHECK:      define internal i32 @foo2() {
+; CHECK-NEXT:   ret i32 7
+; CHECK-NEXT: }
+
+; CHECK: declare i32 @foo()
 
-define i32 @test() { 
+; CHECK:      define i32 @test() {
+; CHECK-NEXT:   %X = call i32 @foo()
+; CHECK-NEXT:   ret i32 %X
+; CHECK-NEXT: }
+
+declare i32 @foo()
+
+define i32 @test() {
   %X = call i32 @foo()
   ret i32 %X
 }
-
diff --git a/test/Linker/2006-06-15-GlobalVarAlignment.ll b/test/Linker/2006-06-15-GlobalVarAlignment.ll
deleted file mode 100644
index c9f9b0e2ab43..000000000000
--- a/test/Linker/2006-06-15-GlobalVarAlignment.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; The linker should choose the largest alignment when linking.
-
-; RUN: echo "@X = global i32 7, align 8" | llvm-as > %t.2.bc
-; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: align 8
-
-@X = weak global i32 7, align 4
diff --git a/test/Linker/2008-03-05-AliasReference.ll b/test/Linker/2008-03-05-AliasReference.ll
index 7c19dfa15a05..8ce1ccb33857 100644
--- a/test/Linker/2008-03-05-AliasReference.ll
+++ b/test/Linker/2008-03-05-AliasReference.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 @foo = weak global i32 0		; <i32*> [#uses=1]
 
-@bar = alias weak i32* @foo		; <i32*> [#uses=1]
+@bar = weak alias i32* @foo		; <i32*> [#uses=1]
 
 define i32 @baz() nounwind  {
 entry:
diff --git a/test/Linker/2009-09-03-mdnode.ll b/test/Linker/2009-09-03-mdnode.ll
index 11862f70b293..1f308e77e9dd 100644
--- a/test/Linker/2009-09-03-mdnode.ll
+++ b/test/Linker/2009-09-03-mdnode.ll
@@ -26,5 +26,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 2, null, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"a.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
+!0 = !{!"0x2e\00main\00main\00main\002\000\001\000\006\000\000\000", i32 0, !1, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x11\0012\00ellcc 0.1.0\001\00\000\00\000", !2, null, null, null, null, null} ; [ DW_TAG_compile_unit ]
+!2 = !{!"a.c", !"/home/rich/ellcc/test/source"}
diff --git a/test/Linker/2009-09-03-mdnode2.ll b/test/Linker/2009-09-03-mdnode2.ll
index 21589a49b79e..68e329432a0a 100644
--- a/test/Linker/2009-09-03-mdnode2.ll
+++ b/test/Linker/2009-09-03-mdnode2.ll
@@ -21,5 +21,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"f", metadata !1, i32 1, null, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"b.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
+!0 = !{!"0x2e\00f\00f\00f\001\000\001\000\006\000\000\000", i32 0, !1, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x11\0012\00ellcc 0.1.0\001\00\000\00\000", !2, null, null, null, null, null} ; [ DW_TAG_compile_unit ]
+!2 = !{!"b.c", !"/home/rich/ellcc/test/source"}
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index d26e8cd04c58..85b9e17c5666 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -17,15 +17,15 @@ define i32 @foo() nounwind ssp {
 !llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 2, i32 13, metadata !7, null}
-!7 = metadata !{i32 589835, metadata !8, metadata !1, i32 2, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
-!9 = metadata !{i32 0}
-!10 = metadata !{metadata !1}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)\001\00\000\00\000", !8, !9, !9, !10, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\000\000\000", !8, !2, !3, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!2 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !MDLocation(line: 2, column: 13, scope: !7)
+!7 = !{!"0xb\002\0011\000", !8, !1} ; [ DW_TAG_lexical_block ]
+!8 = !{!"a.c", !"/private/tmp"}
+!9 = !{i32 0}
+!10 = !{!1}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index c20941d36858..a3cd00100920 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -14,15 +14,15 @@ define i32 @bar() nounwind ssp {
 !llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, metadata !10, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 589870, metadata !8, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
-!2 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 589845, metadata !8, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 1, i32 13, metadata !7, null}
-!7 = metadata !{i32 589835, metadata !8, metadata !1, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!8 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
-!9 = metadata !{i32 0}
-!10 = metadata !{metadata !1}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)\001\00\000\00\000", !8, !9, !9, !10, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00bar\00bar\00\001\000\001\000\006\000\000\000", !8, !2, !3, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
+!2 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !MDLocation(line: 1, column: 13, scope: !7)
+!7 = !{!"0xb\001\0011\000", !8, !1} ; [ DW_TAG_lexical_block ]
+!8 = !{!"b.c", !"/private/tmp"}
+!9 = !{i32 0}
+!10 = !{!1}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index cdf4f6f14367..da98f20e3f18 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -2,7 +2,8 @@
 ; RUN: llvm-dis < %t.bc | FileCheck %s
 ; Test if internal global variable's debug info is merged appropriately or not.
 
-;CHECK:  metadata !{i32 589876, i32 0, metadata !{{[0-9]+}}, metadata !"x", metadata !"x", metadata !"", metadata !{{[0-9]+}}, i32 1, metadata !{{[0-9]+}}, i32 1, i32 1, i32* @x1}
+;CHECK:   !"0x34\00x\00x\00\002\001\001", !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, i32* @x}
+;CHECK:   !"0x34\00x\00x\00\001\001\001", !{{[0-9]+}}, !{{[0-9]+}}, !{{[0-9]+}}, i32* @x1}
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
@@ -19,15 +20,15 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
-!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
-!2 = metadata !{i32 589865, metadata !9}
-!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 2, metadata !6, i32 1, i32 1, i32* @x}
-!6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!7 = metadata !{i32 3, i32 14, metadata !8, null}
-!8 = metadata !{i32 589835, metadata !9, metadata !1, i32 3, i32 12, i32 0}
-!9 = metadata !{metadata !"/tmp/one.c", metadata !"/Volumes/Lalgate/Slate/D"}
-!10 = metadata !{metadata !1}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 ()\001\00\000\00\000", !9, !4, !4, !10, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\000\000\000", !9, !2, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
+!2 = !{!"0x29", !9} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !9, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x34\00x\00x\00\002\001\001", !0, !2, !6, i32* @x} ; [ DW_TAG_variable ]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!7 = !MDLocation(line: 3, column: 14, scope: !8)
+!8 = !{!"0xb\003\0012\000", !9, !1} ; [ DW_TAG_lexical_block ]
+!9 = !{!"/tmp/one.c", !"/Volumes/Lalgate/Slate/D"}
+!10 = !{!1}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index 80884cc70dba..fb196d96df71 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -19,15 +19,15 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = metadata !{i32 589841, metadata !9, i32 12, metadata !"clang version 3.0 ()", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !10, null, null, metadata !""}
-!1 = metadata !{i32 589870, metadata !9, metadata !2, metadata !"bar", metadata !"bar", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [bar]
-!2 = metadata !{i32 589865, metadata !9}
-!3 = metadata !{i32 589845, metadata !9, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 589876, i32 0, metadata !0, metadata !"x", metadata !"x", metadata !"", metadata !2, i32 1, metadata !6, i32 1, i32 1, i32* @x}
-!6 = metadata !{i32 589860, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!7 = metadata !{i32 2, i32 14, metadata !8, null}
-!8 = metadata !{i32 589835, metadata !9, metadata !1, i32 2, i32 12, i32 0}
-!9 = metadata !{metadata !"/tmp/two.c", metadata !"/Volumes/Lalgate/Slate/D"}
-!10 = metadata !{metadata !1}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 ()\001\00\000\00\000", !9, !4, !4, !10, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00bar\00bar\00\002\000\001\000\006\000\000\000", !9, !2, !3, null, void ()* @bar, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [bar]
+!2 = !{!"0x29", !9} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !9, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x34\00x\00x\00\001\001\001", !0, !2, !6, i32* @x} ; [ DW_TAG_variable ]
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!7 = !MDLocation(line: 2, column: 14, scope: !8)
+!8 = !{!"0xb\002\0012\000", !9, !1} ; [ DW_TAG_lexical_block ]
+!9 = !{!"/tmp/two.c", !"/Volumes/Lalgate/Slate/D"}
+!10 = !{!1}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index b077f2357892..a8f13500a754 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -11,30 +11,30 @@ target triple = "x86_64-apple-macosx10.7.0"
 define void @_Z3fooN2N11AE() nounwind uwtable ssp {
 entry:
   %mya = alloca %"class.N1::A", align 1
-  call void @llvm.dbg.declare(metadata !{%"class.N1::A"* %mya}, metadata !9), !dbg !13
+  call void @llvm.dbg.declare(metadata %"class.N1::A"* %mya, metadata !9, metadata !{!"0x102"}), !dbg !13
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !16, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3fooN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
-!6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 721153, metadata !5, metadata !"mya", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
-!11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
-!12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!13 = metadata !{i32 4, i32 12, metadata !5, null}
-!14 = metadata !{i32 4, i32 18, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{metadata !"n1.c", metadata !"/private/tmp"}
-!17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.0 (trunk 137954)\001\00\000\00\000", !16, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00_Z3fooN2N11AE\004\000\001\000\006\00256\000\000", !16, !6, !7, null, void ()* @_Z3fooN2N11AE, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [foo]
+!6 = !{!"0x29", !16} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !16, !6, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x101\00mya\0016777220\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x2\00A\003\008\008\000\000\000", !17, !11, null, !2, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!11 = !{!"0x39\00N1\002", !17, null} ; [ DW_TAG_namespace ]
+!12 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!13 = !MDLocation(line: 4, column: 12, scope: !5)
+!14 = !MDLocation(line: 4, column: 18, scope: !15)
+!15 = !{!"0xb\004\0017\000", !16, !5} ; [ DW_TAG_lexical_block ]
+!16 = !{!"n1.c", !"/private/tmp"}
+!17 = !{!"./n.h", !"/private/tmp"}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index 7bfcd919678a..dd0df5889ec6 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -9,30 +9,30 @@ target triple = "x86_64-apple-macosx10.7.0"
 define void @_Z3barN2N11AE() nounwind uwtable ssp {
 entry:
   %youra = alloca %"class.N1::A", align 1
-  call void @llvm.dbg.declare(metadata !{%"class.N1::A"* %youra}, metadata !9), !dbg !13
+  call void @llvm.dbg.declare(metadata %"class.N1::A"* %youra, metadata !9, metadata !{!"0x102"}), !dbg !13
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = metadata !{i32 720913, metadata !16, i32 4, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"_Z3barN2N11AE", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3barN2N11AE, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
-!6 = metadata !{i32 720937, metadata !16} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !16, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 721153, metadata !5, metadata !"youra", metadata !6, i32 16777220, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 720898, metadata !17, metadata !11, metadata !"A", i32 3, i64 8, i64 8, i32 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
-!11 = metadata !{i32 720953, metadata !17, null, metadata !"N1", i32 2} ; [ DW_TAG_namespace ]
-!12 = metadata !{i32 720937, metadata !17} ; [ DW_TAG_file_type ]
-!13 = metadata !{i32 4, i32 12, metadata !5, null}
-!14 = metadata !{i32 4, i32 20, metadata !15, null}
-!15 = metadata !{i32 720907, metadata !16, metadata !5, i32 4, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{metadata !"n2.c", metadata !"/private/tmp"}
-!17 = metadata !{metadata !"./n.h", metadata !"/private/tmp"}
-!18 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.0 (trunk 137954)\001\00\000\00\000", !16, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00bar\00bar\00_Z3barN2N11AE\004\000\001\000\006\00256\000\000", i32 0, !6, !7, null, void ()* @_Z3barN2N11AE, null, null, null} ; [ DW_TAG_subprogram ] [line 4] [def] [scope 0] [bar]
+!6 = !{!"0x29", !16} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !16, !6, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x101\00youra\0016777220\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x2\00A\003\008\008\000\000\000", !17, !11, null, !2, null, null, null} ; [ DW_TAG_class_type ] [A] [line 3, size 8, align 8, offset 0] [def] [from ]
+!11 = !{!"0x39\00N1\002", !17, null} ; [ DW_TAG_namespace ]
+!12 = !{!"0x29", !17} ; [ DW_TAG_file_type ]
+!13 = !MDLocation(line: 4, column: 12, scope: !5)
+!14 = !MDLocation(line: 4, column: 20, scope: !15)
+!15 = !{!"0xb\004\0019\000", !16, !5} ; [ DW_TAG_lexical_block ]
+!16 = !{!"n2.c", !"/private/tmp"}
+!17 = !{!"./n.h", !"/private/tmp"}
+!18 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 0e14f464bbbe..c1b3a1d79395 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | FileCheck %s
 ; Test to check only one MDNode for "int" after linking.
-; CHECK: !"int"
+; CHECK: !"0x24\00int\00{{.*}}"
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
@@ -12,16 +12,16 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
-!6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 1, i32 13, metadata !11, null}
-!11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{metadata !"one.c", metadata !"/private/tmp"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 137954)\001\00\000\00\000", !12, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\000\000\000", !12, !6, !7, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [foo]
+!6 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !12, !6, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !MDLocation(line: 1, column: 13, scope: !11)
+!11 = !{!"0xb\001\0011\000", !12, !5} ; [ DW_TAG_lexical_block ]
+!12 = !{!"one.c", !"/private/tmp"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index 1185100b9380..49c0a5cf6568 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -12,16 +12,16 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = metadata !{i32 720913, metadata !12, i32 12, metadata !"clang version 3.0 (trunk 137954)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, metadata !12, metadata !6, metadata !"bar", metadata !"bar", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @bar, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
-!6 = metadata !{i32 720937, metadata !12} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, metadata !12, metadata !6, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 1, i32 13, metadata !11, null}
-!11 = metadata !{i32 720907, metadata !12, metadata !5, i32 1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{metadata !"two.c", metadata !"/private/tmp"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 137954)\001\00\000\00\000", !12, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00bar\00bar\00\001\000\001\000\006\000\000\000", !12, !6, !7, null, i32 ()* @bar, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 0] [bar]
+!6 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !12, !6, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !MDLocation(line: 1, column: 13, scope: !11)
+!11 = !{!"0xb\001\0011\000", !12, !5} ; [ DW_TAG_lexical_block ]
+!12 = !{!"two.c", !"/private/tmp"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/2011-08-22-ResolveAlias.ll b/test/Linker/2011-08-22-ResolveAlias.ll
index 6b99233dd778..d6df82d45cfc 100644
--- a/test/Linker/2011-08-22-ResolveAlias.ll
+++ b/test/Linker/2011-08-22-ResolveAlias.ll
@@ -7,32 +7,32 @@
 %union.pthread_mutexattr_t = type { [4 x i8] }
 %union.pthread_cond_t = type { [48 x i8] }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create
-@_ZL20__gthrw_pthread_joinmPPv = alias weak i32 (i64, i8**)* @pthread_join
-@_ZL21__gthrw_pthread_equalmm = alias weak i32 (i64, i64)* @pthread_equal
-@_ZL20__gthrw_pthread_selfv = alias weak i64 ()* @pthread_self
-@_ZL22__gthrw_pthread_detachm = alias weak i32 (i64)* @pthread_detach
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel
-@_ZL19__gthrw_sched_yieldv = alias weak i32 ()* @sched_yield
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock
-@_ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec = alias weak i32 (%union.pthread_mutex_t*, %struct.timespec*)* @pthread_mutex_timedlock
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init
-@_ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_destroy
-@_ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_broadcast
-@_ZL27__gthrw_pthread_cond_signalP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_signal
-@_ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t = alias weak i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*)* @pthread_cond_wait
-@_ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec = alias weak i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*, %struct.timespec*)* @pthread_cond_timedwait
-@_ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_destroy
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create
+@_ZL20__gthrw_pthread_joinmPPv = weak alias i32 (i64, i8**)* @pthread_join
+@_ZL21__gthrw_pthread_equalmm = weak alias i32 (i64, i64)* @pthread_equal
+@_ZL20__gthrw_pthread_selfv = weak alias i64 ()* @pthread_self
+@_ZL22__gthrw_pthread_detachm = weak alias i32 (i64)* @pthread_detach
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel
+@_ZL19__gthrw_sched_yieldv = weak alias i32 ()* @sched_yield
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock
+@_ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec = weak alias i32 (%union.pthread_mutex_t*, %struct.timespec*)* @pthread_mutex_timedlock
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init
+@_ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_destroy
+@_ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_broadcast
+@_ZL27__gthrw_pthread_cond_signalP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_signal
+@_ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t = weak alias i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*)* @pthread_cond_wait
+@_ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec = weak alias i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*, %struct.timespec*)* @pthread_cond_timedwait
+@_ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_destroy
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy
 
 declare extern_weak i32 @pthread_once(i32*, void ()*)
 
diff --git a/test/Linker/2011-08-22-ResolveAlias2.ll b/test/Linker/2011-08-22-ResolveAlias2.ll
index eee60d49d026..c380c23abe41 100644
--- a/test/Linker/2011-08-22-ResolveAlias2.ll
+++ b/test/Linker/2011-08-22-ResolveAlias2.ll
@@ -10,32 +10,32 @@
 %union.pthread_cond_t = type { [48 x i8] }
 
 @_ZN13HexxagonBoardC1ERKS_ = alias void (%struct.HexxagonBoard*, %struct.HexxagonBoard*)* @_ZN13HexxagonBoardC2ERKS_
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create
-@_ZL20__gthrw_pthread_joinmPPv = alias weak i32 (i64, i8**)* @pthread_join
-@_ZL21__gthrw_pthread_equalmm = alias weak i32 (i64, i64)* @pthread_equal
-@_ZL20__gthrw_pthread_selfv = alias weak i64 ()* @pthread_self
-@_ZL22__gthrw_pthread_detachm = alias weak i32 (i64)* @pthread_detach
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel
-@_ZL19__gthrw_sched_yieldv = alias weak i32 ()* @sched_yield
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock
-@_ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec = alias weak i32 (%union.pthread_mutex_t*, %struct.timespec*)* @pthread_mutex_timedlock
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init
-@_ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t = alias weak i32 (%union.pthread_mutex_t*)* @pthread_mutex_destroy
-@_ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_broadcast
-@_ZL27__gthrw_pthread_cond_signalP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_signal
-@_ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t = alias weak i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*)* @pthread_cond_wait
-@_ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec = alias weak i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*, %struct.timespec*)* @pthread_cond_timedwait
-@_ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t = alias weak i32 (%union.pthread_cond_t*)* @pthread_cond_destroy
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %union.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create
+@_ZL20__gthrw_pthread_joinmPPv = weak alias i32 (i64, i8**)* @pthread_join
+@_ZL21__gthrw_pthread_equalmm = weak alias i32 (i64, i64)* @pthread_equal
+@_ZL20__gthrw_pthread_selfv = weak alias i64 ()* @pthread_self
+@_ZL22__gthrw_pthread_detachm = weak alias i32 (i64)* @pthread_detach
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel
+@_ZL19__gthrw_sched_yieldv = weak alias i32 ()* @sched_yield
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_lock
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_trylock
+@_ZL31__gthrw_pthread_mutex_timedlockP15pthread_mutex_tPK8timespec = weak alias i32 (%union.pthread_mutex_t*, %struct.timespec*)* @pthread_mutex_timedlock
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_unlock
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutex_t*, %union.pthread_mutexattr_t*)* @pthread_mutex_init
+@_ZL29__gthrw_pthread_mutex_destroyP15pthread_mutex_t = weak alias i32 (%union.pthread_mutex_t*)* @pthread_mutex_destroy
+@_ZL30__gthrw_pthread_cond_broadcastP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_broadcast
+@_ZL27__gthrw_pthread_cond_signalP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_signal
+@_ZL25__gthrw_pthread_cond_waitP14pthread_cond_tP15pthread_mutex_t = weak alias i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*)* @pthread_cond_wait
+@_ZL30__gthrw_pthread_cond_timedwaitP14pthread_cond_tP15pthread_mutex_tPK8timespec = weak alias i32 (%union.pthread_cond_t*, %union.pthread_mutex_t*, %struct.timespec*)* @pthread_cond_timedwait
+@_ZL28__gthrw_pthread_cond_destroyP14pthread_cond_t = weak alias i32 (%union.pthread_cond_t*)* @pthread_cond_destroy
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_init
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%union.pthread_mutexattr_t*, i32)* @pthread_mutexattr_settype
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%union.pthread_mutexattr_t*)* @pthread_mutexattr_destroy
 
 define void @_ZN13HexxagonBoardC2ERKS_(%struct.HexxagonBoard*, %struct.HexxagonBoard*) uwtable align 2 {
   ret void
diff --git a/test/Linker/ConstantGlobals.ll b/test/Linker/ConstantGlobals.ll
new file mode 100644
index 000000000000..49f86a51bd7f
--- /dev/null
+++ b/test/Linker/ConstantGlobals.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link %s %S/Inputs/ConstantGlobals.ll -S | FileCheck %s
+; RUN: llvm-link %S/Inputs/ConstantGlobals.ll %s -S | FileCheck %s
+
+; CHECK-DAG: @X = constant [1 x i32] [i32 8]
+@X = external global [1 x i32]
+
+; CHECK-DAG: @Y = external global [1 x i32]
+@Y = external global [1 x i32]
diff --git a/test/Linker/ConstantGlobals1.ll b/test/Linker/ConstantGlobals1.ll
deleted file mode 100644
index a2bb6fbfba8a..000000000000
--- a/test/Linker/ConstantGlobals1.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Test that appending linkage works correctly when arrays are the same size.
-
-; RUN: echo "@X = constant [1 x i32] [i32 8] " | \
-; RUN:   llvm-as > %t.2.bc
-; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: constant
-
-@X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
-
diff --git a/test/Linker/ConstantGlobals2.ll b/test/Linker/ConstantGlobals2.ll
deleted file mode 100644
index 471377998713..000000000000
--- a/test/Linker/ConstantGlobals2.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Test that appending linkage works correctly when arrays are the same size.
-
-; RUN: echo "@X = external global [1 x i32] " | \
-; RUN:   llvm-as > %t.2.bc
-; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: constant
-
-@X = constant [1 x i32] [ i32 12 ]		; <[1 x i32]*> [#uses=0]
-
diff --git a/test/Linker/ConstantGlobals3.ll b/test/Linker/ConstantGlobals3.ll
deleted file mode 100644
index 6b4ed24c31cf..000000000000
--- a/test/Linker/ConstantGlobals3.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test that appending linkage works correctly when arrays are the same size.
-
-; RUN: echo "@X = external constant [1 x i32] " | \
-; RUN:   llvm-as > %t.2.bc
-; RUN: llvm-as < %s > %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
-; CHECK: constant
-
-@X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index 4cca9d576d65..de5ac9e97b25 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -4,12 +4,12 @@
 
 ; rdar://13089880
 ; CHECK: define i32 @main(i32 %argc, i8** %argv)
-; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
-; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !{{[0-9]+}}, metadata {{.*}})
+; CHECK: call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !{{[0-9]+}}, metadata {{.*}})
 ; CHECK: define void @test(i32 %argc, i8** %argv)
-; CHECK: call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !{{[0-9]+}})
-; CHECK: call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !{{[0-9]+}})
-; CHECK: call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !{{[0-9]+}})
+; CHECK: call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !{{[0-9]+}}, metadata {{.*}})
+; CHECK: call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !{{[0-9]+}}, metadata {{.*}})
+; CHECK: call void @llvm.dbg.declare(metadata i32* %i, metadata !{{[0-9]+}}, metadata {{.*}})
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -21,40 +21,40 @@ entry:
   %argv.addr = alloca i8**, align 8
   store i32 0, i32* %retval
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !16, metadata !{!"0x102"}), !dbg !15
   %0 = load i32* %argc.addr, align 4, !dbg !17
   %1 = load i8*** %argv.addr, align 8, !dbg !17
   call void @test(i32 %0, i8** %1), !dbg !17
   ret i32 0, !dbg !19
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @test(i32, i8**)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !20, null, metadata !"main", metadata !"main", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777219, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 3, i32 0, metadata !5, null}
-!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554435, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 5, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !20, metadata !5, i32 4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 6, i32 0, metadata !18, null}
-!20 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 173515)\001\00\000\00\000", !20, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00main\00main\00\003\000\001\000\006\00256\000\004", !20, null, !7, null, i32 (i32, i8**)* @main, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x26\00\000\000\000\000\000", null, null, !13} ; [ DW_TAG_const_type ]
+!13 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!14 = !{!"0x101\00argc\0016777219\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!15 = !MDLocation(line: 3, scope: !5)
+!16 = !{!"0x101\00argv\0033554435\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!17 = !MDLocation(line: 5, scope: !18)
+!18 = !{!"0xb\004\000\000", !20, !5} ; [ DW_TAG_lexical_block ]
+!19 = !MDLocation(line: 6, scope: !18)
+!20 = !{!"main.cpp", !"/private/tmp"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index 2649fccbcab8..2f01b0eaea2e 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -11,10 +11,10 @@ entry:
   %argv.addr = alloca i8**, align 8
   %i = alloca i32, align 4
   store i32 %argc, i32* %argc.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !14), !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
   store i8** %argv, i8*** %argv.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !16), !dbg !15
-  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !17), !dbg !20
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !16, metadata !{!"0x102"}), !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !17, metadata !{!"0x102"}), !dbg !20
   store i32 0, i32* %i, align 4, !dbg !20
   br label %for.cond, !dbg !20
 
@@ -43,37 +43,37 @@ for.end:                                          ; preds = %for.cond
   ret void, !dbg !24
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @puts(i8*)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang version 3.3 (trunk 173515)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !26, null, metadata !"print_args", metadata !"print_args", metadata !"test", i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32, i8**)* @test, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 786473, metadata !26} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null, metadata !9, metadata !10}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 786447, null, null, null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
-!12 = metadata !{i32 786470, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_const_type ]
-!13 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786689, metadata !5, metadata !"argc", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!15 = metadata !{i32 4, i32 0, metadata !5, null}
-!16 = metadata !{i32 786689, metadata !5, metadata !"argv", metadata !6, i32 33554436, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786688, metadata !18, metadata !"i", metadata !6, i32 6, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!18 = metadata !{i32 786443, metadata !26, metadata !19, i32 6, i32 0, i32 1} ; [ DW_TAG_lexical_block ]
-!19 = metadata !{i32 786443, metadata !26, metadata !5, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!20 = metadata !{i32 6, i32 0, metadata !18, null}
-!21 = metadata !{i32 8, i32 0, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !26, metadata !18, i32 7, i32 0, i32 2} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 9, i32 0, metadata !22, null}
-!24 = metadata !{i32 10, i32 0, metadata !19, null}
-!25 = metadata !{metadata !"main.cpp", metadata !"/private/tmp"}
-!26 = metadata !{metadata !"test.cpp", metadata !"/private/tmp"}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 173515)\001\00\000\00\000", !25, !2, !2, !3, !2, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!2}
+!2 = !{i32 0}
+!3 = !{!5}
+!5 = !{!"0x2e\00print_args\00print_args\00test\004\000\001\000\006\00256\000\005", !26, null, !7, null, void (i32, i8**)* @test, null, null, !1} ; [ DW_TAG_subprogram ]
+!6 = !{!"0x29", !26} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !10}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ]
+!11 = !{!"0xf\00\000\0064\0064\000\000", null, null, !12} ; [ DW_TAG_pointer_type ]
+!12 = !{!"0x26\00\000\000\000\000\000", null, null, !13} ; [ DW_TAG_const_type ]
+!13 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ]
+!14 = !{!"0x101\00argc\0016777220\000", !5, !6, !9} ; [ DW_TAG_arg_variable ]
+!15 = !MDLocation(line: 4, scope: !5)
+!16 = !{!"0x101\00argv\0033554436\000", !5, !6, !10} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x100\00i\006\000", !18, !6, !9} ; [ DW_TAG_auto_variable ]
+!18 = !{!"0xb\006\000\001", !26, !19} ; [ DW_TAG_lexical_block ]
+!19 = !{!"0xb\005\000\000", !26, !5} ; [ DW_TAG_lexical_block ]
+!20 = !MDLocation(line: 6, scope: !18)
+!21 = !MDLocation(line: 8, scope: !22)
+!22 = !{!"0xb\007\000\002", !26, !18} ; [ DW_TAG_lexical_block ]
+!23 = !MDLocation(line: 9, scope: !22)
+!24 = !MDLocation(line: 10, scope: !19)
+!25 = !{!"main.cpp", !"/private/tmp"}
+!26 = !{!"test.cpp", !"/private/tmp"}
+!27 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/Inputs/2003-01-30-LinkerRename.ll b/test/Linker/Inputs/2003-01-30-LinkerRename.ll
new file mode 100644
index 000000000000..5c6b5f567ff8
--- /dev/null
+++ b/test/Linker/Inputs/2003-01-30-LinkerRename.ll
@@ -0,0 +1,4 @@
+@bar = global i32()* @foo
+define internal i32 @foo() {
+  ret i32 7
+}
diff --git a/test/Linker/Inputs/2003-05-31-LinkerRename.ll b/test/Linker/Inputs/2003-05-31-LinkerRename.ll
new file mode 100644
index 000000000000..dcd0bf5d2172
--- /dev/null
+++ b/test/Linker/Inputs/2003-05-31-LinkerRename.ll
@@ -0,0 +1,5 @@
+@bar = global i32()* @foo
+
+define internal i32 @foo() {
+  ret i32 7
+}
diff --git a/test/Linker/Inputs/ConstantGlobals.ll b/test/Linker/Inputs/ConstantGlobals.ll
new file mode 100644
index 000000000000..56c2ba555706
--- /dev/null
+++ b/test/Linker/Inputs/ConstantGlobals.ll
@@ -0,0 +1,2 @@
+@X = constant [1 x i32] [i32 8]
+@Y = external constant [1 x i32]
diff --git a/test/Linker/Inputs/alignment.ll b/test/Linker/Inputs/alignment.ll
new file mode 100644
index 000000000000..337cb8e5a16d
--- /dev/null
+++ b/test/Linker/Inputs/alignment.ll
@@ -0,0 +1,12 @@
+@A = global i32 7, align 8
+@B = global i32 7, align 4
+
+define void @C() align 8 {
+  ret void
+}
+
+define void @D() align 4 {
+  ret void
+}
+
+@E = common global i32 0, align 8
diff --git a/test/Linker/Inputs/comdat.ll b/test/Linker/Inputs/comdat.ll
index fdcca49c3c37..74a805cf8626 100644
--- a/test/Linker/Inputs/comdat.ll
+++ b/test/Linker/Inputs/comdat.ll
@@ -2,19 +2,19 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat largest
-@foo = global i64 43, comdat $foo
+@foo = global i64 43, comdat($foo)
 
-define i32 @bar() comdat $foo {
+define i32 @bar() comdat($foo) {
   ret i32 43
 }
 
 $qux = comdat largest
-@qux = global i32 13, comdat $qux
-@in_unselected_group = global i32 13, comdat $qux
+@qux = global i32 13, comdat($qux)
+@in_unselected_group = global i32 13, comdat($qux)
 
-define i32 @baz() comdat $qux {
+define i32 @baz() comdat($qux) {
   ret i32 13
 }
 
 $any = comdat any
-@any = global i64 7, comdat $any
+@any = global i64 7, comdat($any)
diff --git a/test/Linker/Inputs/comdat2.ll b/test/Linker/Inputs/comdat2.ll
index 9e18304744b5..ed2af6239cd8 100644
--- a/test/Linker/Inputs/comdat2.ll
+++ b/test/Linker/Inputs/comdat2.ll
@@ -1,2 +1,2 @@
 $foo = comdat largest
-@foo = global i64 43, comdat $foo
+@foo = global i64 43, comdat($foo)
diff --git a/test/Linker/Inputs/comdat3.ll b/test/Linker/Inputs/comdat3.ll
index 06f08b947af1..a1b730f216f7 100644
--- a/test/Linker/Inputs/comdat3.ll
+++ b/test/Linker/Inputs/comdat3.ll
@@ -1,2 +1,2 @@
 $foo = comdat noduplicates
-@foo = global i64 43, comdat $foo
+@foo = global i64 43, comdat($foo)
diff --git a/test/Linker/Inputs/comdat4.ll b/test/Linker/Inputs/comdat4.ll
index bbfe3f794abf..5b4b812c7747 100644
--- a/test/Linker/Inputs/comdat4.ll
+++ b/test/Linker/Inputs/comdat4.ll
@@ -2,4 +2,4 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat samesize
-@foo = global i64 42, comdat $foo
+@foo = global i64 42, comdat($foo)
diff --git a/test/Linker/Inputs/comdat5.ll b/test/Linker/Inputs/comdat5.ll
index 800af18534b1..98c42b7be0c6 100644
--- a/test/Linker/Inputs/comdat5.ll
+++ b/test/Linker/Inputs/comdat5.ll
@@ -1,15 +1,9 @@
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
 
-%MSRTTICompleteObjectLocator = type { i32, i32, i32, i8*, %MSRTTIClassHierarchyDescriptor* }
-%MSRTTIClassHierarchyDescriptor = type { i32, i32, i32, %MSRTTIBaseClassDescriptor** }
-%MSRTTIBaseClassDescriptor = type { i8*, i32, i32, i32, i32, i32, %MSRTTIClassHierarchyDescriptor* }
-%struct.S = type { i32 (...)** }
+$foo = comdat largest
 
-$"\01??_7S@@6B@" = comdat largest
+@zed = external constant i8
+@some_name = private unnamed_addr constant [2 x i8*] [i8* @zed, i8* bitcast (void ()* @bar to i8*)], comdat($foo)
+@foo = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
 
-@"\01??_R4S@@6B@" = external constant %MSRTTICompleteObjectLocator
-@some_name = private unnamed_addr constant [2 x i8*] [i8* bitcast (%MSRTTICompleteObjectLocator* @"\01??_R4S@@6B@" to i8*), i8* bitcast (void (%struct.S*, i32)* @"\01??_GS@@UAEPAXI@Z" to i8*)], comdat $"\01??_7S@@6B@"
-@"\01??_7S@@6B@" = alias getelementptr([2 x i8*]* @some_name, i32 0, i32 1)
-
-declare x86_thiscallcc void @"\01??_GS@@UAEPAXI@Z"(%struct.S*, i32) unnamed_addr
+declare void @bar() unnamed_addr
diff --git a/test/Linker/Inputs/comdat8.ll b/test/Linker/Inputs/comdat8.ll
new file mode 100644
index 000000000000..a2833b05cf13
--- /dev/null
+++ b/test/Linker/Inputs/comdat8.ll
@@ -0,0 +1,4 @@
+$c1 = comdat largest
+
+@some_name = private unnamed_addr constant i32 42, comdat($c1)
+@c1 = alias i32* @some_name
diff --git a/test/Linker/Inputs/constructor-comdat.ll b/test/Linker/Inputs/constructor-comdat.ll
new file mode 100644
index 000000000000..b5f23da90356
--- /dev/null
+++ b/test/Linker/Inputs/constructor-comdat.ll
@@ -0,0 +1,7 @@
+define weak_odr void @_ZN3fooIiEC2Ev() {
+  ret void
+}
+
+define weak_odr void @_ZN3fooIiEC1Ev() {
+  ret void
+}
diff --git a/test/Linker/Inputs/ctors.ll b/test/Linker/Inputs/ctors.ll
new file mode 100644
index 000000000000..f3307bc554e9
--- /dev/null
+++ b/test/Linker/Inputs/ctors.ll
@@ -0,0 +1,6 @@
+@v = weak global i8 1
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v}]
+
+define weak void @f() {
+  ret void
+}
diff --git a/test/Linker/Inputs/distinct.ll b/test/Linker/Inputs/distinct.ll
new file mode 100644
index 000000000000..07ae224d28ae
--- /dev/null
+++ b/test/Linker/Inputs/distinct.ll
@@ -0,0 +1,13 @@
+@global = linkonce global i32 0
+
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+
+!0 = !{}
+!1 = !{!0}
+!2 = !{i32* @global}
+!3 = distinct !{}
+!4 = distinct !{!0}
+!5 = distinct !{i32* @global}
+!6 = !{!3}
+!7 = !{!4}
+!8 = !{!5}
diff --git a/test/Linker/Inputs/ident.a.ll b/test/Linker/Inputs/ident.a.ll
new file mode 100644
index 000000000000..3c3fb19f945b
--- /dev/null
+++ b/test/Linker/Inputs/ident.a.ll
@@ -0,0 +1,3 @@
+!llvm.ident = !{!0, !1}
+!0 = !{!"Compiler V1"}
+!1 = !{!"Compiler V2"}
diff --git a/test/Linker/Inputs/ident.b.ll b/test/Linker/Inputs/ident.b.ll
new file mode 100644
index 000000000000..d9daf8558a70
--- /dev/null
+++ b/test/Linker/Inputs/ident.b.ll
@@ -0,0 +1,2 @@
+!llvm.ident = !{!0}
+!0 = !{!"Compiler V3"}
diff --git a/test/Linker/Inputs/linkage2.ll b/test/Linker/Inputs/linkage2.ll
new file mode 100644
index 000000000000..ce01c9d90d4e
--- /dev/null
+++ b/test/Linker/Inputs/linkage2.ll
@@ -0,0 +1,7 @@
+@test1_a = weak global i8 1
+
+@test2_a = external dllimport global i8
+
+@test3_a = common global i16 0
+
+@test4_a = common global i16 0, align 4
diff --git a/test/Linker/Inputs/mdlocation.ll b/test/Linker/Inputs/mdlocation.ll
new file mode 100644
index 000000000000..f85c1dc7475d
--- /dev/null
+++ b/test/Linker/Inputs/mdlocation.ll
@@ -0,0 +1,13 @@
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+
+!0 = !{} ; Use this as a scope.
+!1 = !MDLocation(line: 3, column: 7, scope: !0)
+!2 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
+!3 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
+!4 = distinct !{} ; Test actual remapping.
+!5 = !MDLocation(line: 3, column: 7, scope: !4)
+!6 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
+!7 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
+; Test distinct nodes.
+!8 = distinct !MDLocation(line: 3, column: 7, scope: !0)
+!9 = distinct !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
diff --git a/test/Linker/Inputs/module-flags-dont-change-others.ll b/test/Linker/Inputs/module-flags-dont-change-others.ll
new file mode 100644
index 000000000000..61d57e525560
--- /dev/null
+++ b/test/Linker/Inputs/module-flags-dont-change-others.ll
@@ -0,0 +1,8 @@
+!llvm.module.flags = !{!3, !4, !5}
+
+!0 = !{}
+!1 = !{!0}
+!2 = !{!0, !1}
+!3 = !{i32 4, !"foo", i32 37} ; Override the "foo" value.
+!4 = !{i32 5, !"bar", !1}
+!5 = !{i32 6, !"baz", !2}
diff --git a/test/Linker/Inputs/module-flags-pic-1-b.ll b/test/Linker/Inputs/module-flags-pic-1-b.ll
new file mode 100644
index 000000000000..8b137891791f
--- /dev/null
+++ b/test/Linker/Inputs/module-flags-pic-1-b.ll
@@ -0,0 +1 @@
+
diff --git a/test/Linker/Inputs/module-flags-pic-2-b.ll b/test/Linker/Inputs/module-flags-pic-2-b.ll
new file mode 100644
index 000000000000..0d78cafc6a0f
--- /dev/null
+++ b/test/Linker/Inputs/module-flags-pic-2-b.ll
@@ -0,0 +1,3 @@
+!0 = !{ i32 1, !"PIC Level", i32 2 }
+
+!llvm.module.flags = !{!0}
diff --git a/test/Linker/Inputs/opaque.ll b/test/Linker/Inputs/opaque.ll
new file mode 100644
index 000000000000..2b0d7d3cea8b
--- /dev/null
+++ b/test/Linker/Inputs/opaque.ll
@@ -0,0 +1,13 @@
+%A = type { }
+%B = type { %D, %E, %B* }
+
+%D = type { %E }
+%E = type opaque
+
+@g2 = external global %A
+@g3 = external global %B
+
+define void @f1()  {
+  getelementptr %A* null, i32 0
+  ret void
+}
diff --git a/test/Linker/Inputs/pr21374.ll b/test/Linker/Inputs/pr21374.ll
new file mode 100644
index 000000000000..fcddeafc2e72
--- /dev/null
+++ b/test/Linker/Inputs/pr21374.ll
@@ -0,0 +1,4 @@
+%foo = type { i8* }
+define void @g(%foo* %x) {
+  ret void
+}
diff --git a/test/Linker/Inputs/redefinition.ll b/test/Linker/Inputs/redefinition.ll
new file mode 100644
index 000000000000..0f580e649d4a
--- /dev/null
+++ b/test/Linker/Inputs/redefinition.ll
@@ -0,0 +1 @@
+define void @foo(i32 %x) { ret void }
diff --git a/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
new file mode 100644
index 000000000000..a5de89f5a605
--- /dev/null
+++ b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
@@ -0,0 +1,27 @@
+%struct.Class = type { i8 }
+
+define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+entry:
+  %this.addr = alloca %struct.Class*, align 8
+  store %struct.Class* %this, %struct.Class** %this.addr, align 8
+  %this1 = load %struct.Class** %this.addr
+  ret i32 0, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9, !10}
+!llvm.ident = !{!11}
+
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)\000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2/t2.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"t2.cpp", !"/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\002\000\001\000\000\00256\000\002", !5, !6, !7, null, i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = !{!"../t.h", !"/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2"}
+!6 = !{!"0x29", !5}    ; [ DW_TAG_file_type ] [/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2/../t.h]
+!7 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 2}
+!10 = !{i32 1, !"PIC Level", i32 2}
+!11 = !{!"clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)"}
+!12 = !MDLocation(line: 2, column: 15, scope: !4)
diff --git a/test/Linker/Inputs/testlink.ll b/test/Linker/Inputs/testlink.ll
new file mode 100644
index 000000000000..89f02ba95904
--- /dev/null
+++ b/test/Linker/Inputs/testlink.ll
@@ -0,0 +1,56 @@
+%intlist = type { %intlist*, i32 }
+
+
+%Ty1 = type { %Ty2* }
+%Ty2 = type opaque
+
+%VecSize = type { <10 x i32> }
+
+@GVTy1 = global %Ty1* null
+@GVTy2 = external global %Ty2*
+
+
+@MyVar = global i32 4
+@MyIntList = external global %intlist
+@AConst = constant i32 1234
+
+;; Intern in both testlink[12].ll
+@Intern1 = internal constant i32 52
+
+@Use2Intern1 = global i32* @Intern1
+
+;; Intern in one but not in other
+@Intern2 = constant i32 12345
+
+@MyIntListPtr = constant { %intlist* } { %intlist* @MyIntList }
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+@0 = constant i32 412
+
+; Provides definition of Struct1 and of S1GV.
+%Struct1 = type { i32 }
+@S1GV = global %Struct1* null
+
+define i32 @foo(i32 %blah) {
+  store i32 %blah, i32* @MyVar
+  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
+  store i32 12, i32* %idx
+  %ack = load i32* @0
+  %fzo = add i32 %ack, %blah
+  ret i32 %fzo
+}
+
+declare void @unimp(float, double)
+
+define internal void @testintern() {
+  ret void
+}
+
+define void @Testintern() {
+  ret void
+}
+
+define internal void @testIntern() {
+  ret void
+}
+
+declare void @VecSizeCrash1(%VecSize)
diff --git a/test/Linker/Inputs/type-unique-alias.ll b/test/Linker/Inputs/type-unique-alias.ll
new file mode 100644
index 000000000000..3ee162ccfcfe
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-alias.ll
@@ -0,0 +1,4 @@
+%u = type { i8 }
+
+@g2 = global %u zeroinitializer
+@a = weak alias %u* @g2
diff --git a/test/Linker/Inputs/type-unique-dst-types2.ll b/test/Linker/Inputs/type-unique-dst-types2.ll
new file mode 100644
index 000000000000..b565c6d73649
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-dst-types2.ll
@@ -0,0 +1,3 @@
+%A.11 = type { %B }
+%B = type { i8 }
+@g1 = external global %A.11
diff --git a/test/Linker/Inputs/type-unique-dst-types3.ll b/test/Linker/Inputs/type-unique-dst-types3.ll
new file mode 100644
index 000000000000..c5794ad839a2
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-dst-types3.ll
@@ -0,0 +1,2 @@
+%A.11 = type opaque
+@g2 = external global %A.11
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
index 381210cd3cf2..c503919349ff 100644
--- a/test/Linker/Inputs/type-unique-inheritance-a.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -52,13 +52,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %class.A, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
-  call void @llvm.dbg.declare(metadata !{%class.A* %t}, metadata !22), !dbg !23
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !20, metadata !{!"0x102"}), !dbg !21
+  call void @llvm.dbg.declare(metadata %class.A* %t, metadata !22, metadata !{!"0x102"}), !dbg !23
   ret void, !dbg !24
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -66,29 +66,29 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !25}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!6 = metadata !{metadata !7, metadata !13}
-!7 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !8} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
-!8 = metadata !{i32 786434, metadata !9, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
-!9 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 786445, metadata !9, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !12} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
-!12 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!13 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !12} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786478, metadata !1, metadata !16, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 5, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 5} ; [ DW_TAG_subprogram ] [line 5] [def] [f]
-!16 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null, metadata !12}
-!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!20 = metadata !{i32 786689, metadata !15, metadata !"a", metadata !16, i32 16777221, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 5]
-!21 = metadata !{i32 5, i32 0, metadata !15, null}
-!22 = metadata !{i32 786688, metadata !15, metadata !"t", metadata !16, i32 6, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 6]
-!23 = metadata !{i32 6, i32 0, metadata !15, null}
-!24 = metadata !{i32 7, i32 0, metadata !15, null}
-!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)\000\00\000\00\000", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = !{i32 0}
+!3 = !{!4, !8}
+!4 = !{!"0x2\00A\003\0064\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!5 = !{!"./a.hpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = !{!7, !13}
+!7 = !{!"0x1c\00\000\000\000\000\001", null, !"_ZTS1A", !8} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!8 = !{!"0x2\00Base\003\0032\0032\000\000\000", !9, null, null, !10, null, null, !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!9 = !{!"./b.hpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!10 = !{!11}
+!11 = !{!"0xd\00b\004\0032\0032\000\001", !9, !"_ZTS4Base", !12} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!12 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!13 = !{!"0xd\00x\004\0032\0032\0032\001", !5, !"_ZTS1A", !12} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!14 = !{!15}
+!15 = !{!"0x2e\00f\00f\00_Z1fi\005\000\001\000\006\00256\000\005", !1, !16, !17, null, void (i32)* @_Z1fi, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [f]
+!16 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/foo.cpp]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !12}
+!19 = !{i32 2, !"Dwarf Version", i32 2}
+!20 = !{!"0x101\00a\0016777221\000", !15, !16, !12} ; [ DW_TAG_arg_variable ] [a] [line 5]
+!21 = !MDLocation(line: 5, scope: !15)
+!22 = !{!"0x100\00t\006\000", !15, !16, !4} ; [ DW_TAG_auto_variable ] [t] [line 6]
+!23 = !MDLocation(line: 6, scope: !15)
+!24 = !MDLocation(line: 7, scope: !15)
+!25 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
index 0cd43f6a9d44..d3b9dead03cd 100644
--- a/test/Linker/Inputs/type-unique-inheritance-b.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -10,13 +10,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %class.B, align 8
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !28), !dbg !29
-  call void @llvm.dbg.declare(metadata !{%class.B* %t}, metadata !30), !dbg !31
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !28, metadata !{!"0x102"}), !dbg !29
+  call void @llvm.dbg.declare(metadata %class.B* %t, metadata !30, metadata !{!"0x102"}), !dbg !31
   ret void, !dbg !32
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
 define i32 @main() #2 {
@@ -24,7 +24,7 @@ entry:
   %retval = alloca i32, align 4
   %a = alloca %class.A, align 4
   store i32 0, i32* %retval
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !33), !dbg !34
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !33, metadata !{!"0x102"}), !dbg !34
   call void @_Z1fi(i32 0), !dbg !35
   call void @_Z1gi(i32 1), !dbg !36
   ret i32 0, !dbg !37
@@ -40,42 +40,42 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27, !38}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !19, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !11, metadata !15}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"B", i32 7, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 7, size 128, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./b.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!6 = metadata !{metadata !7, metadata !9}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"bb", i32 8, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [bb] [line 8, size 32, align 32, offset 0] [private] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1B", metadata !"a", i32 9, i64 64, i64 64, i64 64, i32 1, metadata !10} ; [ DW_TAG_member ] [a] [line 9, size 64, align 64, offset 64] [private] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
-!11 = metadata !{i32 786434, metadata !12, null, metadata !"A", i32 3, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
-!12 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/inher"}
-!13 = metadata !{metadata !14, metadata !18}
-!14 = metadata !{i32 786460, null, metadata !"_ZTS1A", null, i32 0, i64 0, i64 0, i64 0, i32 1, metadata !15} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
-!15 = metadata !{i32 786434, metadata !5, null, metadata !"Base", i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !16, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
-!16 = metadata !{metadata !17}
-!17 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 4, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
-!18 = metadata !{i32 786445, metadata !12, metadata !"_ZTS1A", metadata !"x", i32 4, i64 32, i64 32, i64 32, i32 1, metadata !8} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
-!19 = metadata !{metadata !20, metadata !24}
-!20 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !22, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
-!21 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp]
-!22 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!23 = metadata !{null, metadata !8}
-!24 = metadata !{i32 786478, metadata !1, metadata !21, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !25, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 9} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
-!25 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !26, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!26 = metadata !{metadata !8}
-!27 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!28 = metadata !{i32 786689, metadata !20, metadata !"a", metadata !21, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
-!29 = metadata !{i32 4, i32 0, metadata !20, null}
-!30 = metadata !{i32 786688, metadata !20, metadata !"t", metadata !21, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
-!31 = metadata !{i32 5, i32 0, metadata !20, null}
-!32 = metadata !{i32 6, i32 0, metadata !20, null}
-!33 = metadata !{i32 786688, metadata !24, metadata !"a", metadata !21, i32 10, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 10]
-!34 = metadata !{i32 10, i32 0, metadata !24, null}
-!35 = metadata !{i32 11, i32 0, metadata !24, null}
-!36 = metadata !{i32 12, i32 0, metadata !24, null}
-!37 = metadata !{i32 13, i32 0, metadata !24, null}
-!38 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)\000\00\000\00\000", !1, !2, !3, !19, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"bar.cpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!2 = !{i32 0}
+!3 = !{!4, !11, !15}
+!4 = !{!"0x2\00B\007\00128\0064\000\000\000", !5, null, null, !6, null, null, !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 7, size 128, align 64, offset 0] [def] [from ]
+!5 = !{!"./b.hpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00bb\008\0032\0032\000\001", !5, !"_ZTS1B", !8} ; [ DW_TAG_member ] [bb] [line 8, size 32, align 32, offset 0] [private] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xd\00a\009\0064\0064\0064\001", !5, !"_ZTS1B", !10} ; [ DW_TAG_member ] [a] [line 9, size 64, align 64, offset 64] [private] [from ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!11 = !{!"0x2\00A\003\0064\0032\000\000\000", !12, null, null, !13, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 3, size 64, align 32, offset 0] [def] [from ]
+!12 = !{!"./a.hpp", !"/Users/mren/c_testing/type_unique_air/inher"}
+!13 = !{!14, !18}
+!14 = !{!"0x1c\00\000\000\000\000\001", null, !"_ZTS1A", !15} ; [ DW_TAG_inheritance ] [line 0, size 0, align 0, offset 0] [private] [from Base]
+!15 = !{!"0x2\00Base\003\0032\0032\000\000\000", !5, null, null, !16, null, null, !"_ZTS4Base"} ; [ DW_TAG_class_type ] [Base] [line 3, size 32, align 32, offset 0] [def] [from ]
+!16 = !{!17}
+!17 = !{!"0xd\00b\004\0032\0032\000\001", !5, !"_ZTS4Base", !8} ; [ DW_TAG_member ] [b] [line 4, size 32, align 32, offset 0] [private] [from int]
+!18 = !{!"0xd\00x\004\0032\0032\0032\001", !12, !"_ZTS1A", !8} ; [ DW_TAG_member ] [x] [line 4, size 32, align 32, offset 32] [private] [from int]
+!19 = !{!20, !24}
+!20 = !{!"0x2e\00g\00g\00_Z1gi\004\000\001\000\006\00256\000\004", !1, !21, !22, null, void (i32)* @_Z1gi, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!21 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/inher/bar.cpp]
+!22 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !23, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!23 = !{null, !8}
+!24 = !{!"0x2e\00main\00main\00\009\000\001\000\006\00256\000\009", !1, !21, !25, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 9] [def] [main]
+!25 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !26, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!26 = !{!8}
+!27 = !{i32 2, !"Dwarf Version", i32 2}
+!28 = !{!"0x101\00a\0016777220\000", !20, !21, !8} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!29 = !MDLocation(line: 4, scope: !20)
+!30 = !{!"0x100\00t\005\000", !20, !21, !4} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!31 = !MDLocation(line: 5, scope: !20)
+!32 = !MDLocation(line: 6, scope: !20)
+!33 = !{!"0x100\00a\0010\000", !24, !21, !11} ; [ DW_TAG_auto_variable ] [a] [line 10]
+!34 = !MDLocation(line: 10, scope: !24)
+!35 = !MDLocation(line: 11, scope: !24)
+!36 = !MDLocation(line: 12, scope: !24)
+!37 = !MDLocation(line: 13, scope: !24)
+!38 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/Inputs/type-unique-name.ll b/test/Linker/Inputs/type-unique-name.ll
new file mode 100644
index 000000000000..2553246251ff
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-name.ll
@@ -0,0 +1,5 @@
+%t = type { i8 }
+
+define %t* @f() {
+  ret %t* null
+}
diff --git a/test/Linker/Inputs/type-unique-opaque.ll b/test/Linker/Inputs/type-unique-opaque.ll
new file mode 100644
index 000000000000..872b601f6910
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-opaque.ll
@@ -0,0 +1,6 @@
+%t = type { i8 }
+%t2 = type { %t*, i16 }
+
+define %t2* @f() {
+  ret %t2* null
+}
diff --git a/test/Linker/Inputs/type-unique-simple2-a.ll b/test/Linker/Inputs/type-unique-simple2-a.ll
index 676b4109c0d9..6d6e93c68f00 100644
--- a/test/Linker/Inputs/type-unique-simple2-a.ll
+++ b/test/Linker/Inputs/type-unique-simple2-a.ll
@@ -49,13 +49,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 8
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !17), !dbg !18
-  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !17, metadata !{!"0x102"}), !dbg !18
+  call void @llvm.dbg.declare(metadata %struct.Base* %t, metadata !19, metadata !{!"0x102"}), !dbg !20
   ret void, !dbg !21
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -63,26 +63,26 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16, !22}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./a.hpp", metadata !"."}
-!6 = metadata !{metadata !7, metadata !9}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
-!11 = metadata !{metadata !12}
-!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [foo.cpp]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !8}
-!16 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!17 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!18 = metadata !{i32 3, i32 0, metadata !12, null}
-!19 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
-!20 = metadata !{i32 4, i32 0, metadata !12, null}
-!21 = metadata !{i32 5, i32 0, metadata !12, null}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)\000\00\000\00\000", !1, !2, !3, !11, !2, !2} ; [ DW_TAG_compile_unit ] [foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Base\001\00128\0064\000\000\000", !5, null, null, !6, null, null, !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = !{!"./a.hpp", !"."}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00a\002\0032\0032\000\000", !5, !"_ZTS4Base", !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xd\00b\003\0064\0064\0064\000", !5, !"_ZTS4Base", !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4Base"} ; [ DW_TAG_pointer_type ]
+!11 = !{!12}
+!12 = !{!"0x2e\00f\00f\00_Z1fi\003\000\001\000\006\00256\000\003", !1, !13, !14, null, void (i32)* @_Z1fi, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!13 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [foo.cpp]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !8}
+!16 = !{i32 2, !"Dwarf Version", i32 2}
+!17 = !{!"0x101\00a\0016777219\000", !12, !13, !8} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!18 = !MDLocation(line: 3, scope: !12)
+!19 = !{!"0x100\00t\004\000", !12, !13, !4} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!20 = !MDLocation(line: 4, scope: !12)
+!21 = !MDLocation(line: 5, scope: !12)
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/Inputs/type-unique-simple2-b.ll b/test/Linker/Inputs/type-unique-simple2-b.ll
index 3ec79e5d9cb6..d3b2f7e514c6 100644
--- a/test/Linker/Inputs/type-unique-simple2-b.ll
+++ b/test/Linker/Inputs/type-unique-simple2-b.ll
@@ -8,13 +8,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 8
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !20), !dbg !21
-  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !22), !dbg !23
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !20, metadata !{!"0x102"}), !dbg !21
+  call void @llvm.dbg.declare(metadata %struct.Base* %t, metadata !22, metadata !{!"0x102"}), !dbg !23
   ret void, !dbg !24
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
 define i32 @main() #2 {
@@ -36,32 +36,32 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !28}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !11, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 128, i64 64, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./a.hpp", metadata !"."}
-!6 = metadata !{metadata !7, metadata !9}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"b", i32 3, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS4Base"}
-!11 = metadata !{metadata !12, metadata !16}
-!12 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
-!13 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [bar.cpp]
-!14 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!15 = metadata !{null, metadata !8}
-!16 = metadata !{i32 786478, metadata !1, metadata !13, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{metadata !8}
-!19 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!20 = metadata !{i32 786689, metadata !12, metadata !"a", metadata !13, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
-!21 = metadata !{i32 4, i32 0, metadata !12, null}
-!22 = metadata !{i32 786688, metadata !12, metadata !"t", metadata !13, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
-!23 = metadata !{i32 5, i32 0, metadata !12, null}
-!24 = metadata !{i32 6, i32 0, metadata !12, null}
-!25 = metadata !{i32 8, i32 0, metadata !16, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i32 9, i32 0, metadata !16, null}
-!27 = metadata !{i32 10, i32 0, metadata !16, null}
-!28 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)\000\00\000\00\000", !1, !2, !3, !11, !2, !2} ; [ DW_TAG_compile_unit ] [bar.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"bar.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Base\001\00128\0064\000\000\000", !5, null, null, !6, null, null, !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 128, align 64, offset 0] [def] [from ]
+!5 = !{!"./a.hpp", !"."}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00a\002\0032\0032\000\000", !5, !"_ZTS4Base", !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xd\00b\003\0064\0064\0064\000", !5, !"_ZTS4Base", !10} ; [ DW_TAG_member ] [b] [line 3, size 64, align 64, offset 64] [from ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS4Base"} ; [ DW_TAG_pointer_type ]
+!11 = !{!12, !16}
+!12 = !{!"0x2e\00g\00g\00_Z1gi\004\000\001\000\006\00256\000\004", !1, !13, !14, null, void (i32)* @_Z1gi, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!13 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [bar.cpp]
+!14 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !15, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!15 = !{null, !8}
+!16 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !1, !13, !17, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{!8}
+!19 = !{i32 2, !"Dwarf Version", i32 2}
+!20 = !{!"0x101\00a\0016777220\000", !12, !13, !8} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!21 = !MDLocation(line: 4, scope: !12)
+!22 = !{!"0x100\00t\005\000", !12, !13, !4} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!23 = !MDLocation(line: 5, scope: !12)
+!24 = !MDLocation(line: 6, scope: !12)
+!25 = !MDLocation(line: 8, scope: !16)
+!26 = !MDLocation(line: 9, scope: !16)
+!27 = !MDLocation(line: 10, scope: !16)
+!28 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/Inputs/type-unique-unrelated2.ll b/test/Linker/Inputs/type-unique-unrelated2.ll
new file mode 100644
index 000000000000..b7c2cec18a36
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-unrelated2.ll
@@ -0,0 +1,7 @@
+%t = type { i8* }
+declare %t @g()
+
+define %t @g2() {
+ %x = call %t @g()
+ ret %t %x
+}
diff --git a/test/Linker/Inputs/type-unique-unrelated3.ll b/test/Linker/Inputs/type-unique-unrelated3.ll
new file mode 100644
index 000000000000..e3f2dd944dd0
--- /dev/null
+++ b/test/Linker/Inputs/type-unique-unrelated3.ll
@@ -0,0 +1,7 @@
+%t = type { i8* }
+declare %t @f()
+
+define %t @g() {
+ %x = call %t @f()
+ ret %t %x
+}
diff --git a/test/Linker/Inputs/unique-fwd-decl-b.ll b/test/Linker/Inputs/unique-fwd-decl-b.ll
new file mode 100644
index 000000000000..24099b8e2f9f
--- /dev/null
+++ b/test/Linker/Inputs/unique-fwd-decl-b.ll
@@ -0,0 +1,3 @@
+!b = !{!0}
+!0 = !{!1}
+!1 = !{}
diff --git a/test/Linker/Inputs/unique-fwd-decl-order.ll b/test/Linker/Inputs/unique-fwd-decl-order.ll
new file mode 100644
index 000000000000..e87ac84d59b1
--- /dev/null
+++ b/test/Linker/Inputs/unique-fwd-decl-order.ll
@@ -0,0 +1,6 @@
+!named = !{!0}
+
+; These nodes are intentionally in the opposite order from the test-driver.
+; However, they are numbered the same for the reader's convenience.
+!1 = !{}
+!0 = !{!1}
diff --git a/test/Linker/Inputs/visibility.ll b/test/Linker/Inputs/visibility.ll
new file mode 100644
index 000000000000..2cd112ed37f2
--- /dev/null
+++ b/test/Linker/Inputs/visibility.ll
@@ -0,0 +1,26 @@
+$c1 = comdat any
+
+; Variables
+@v1 = weak hidden global i32 0
+@v2 = weak protected global i32 0
+@v3 = weak hidden global i32 0
+@v4 = hidden global i32 1, comdat($c1)
+
+; Aliases
+@a1 = weak hidden alias i32* @v1
+@a2 = weak protected alias i32* @v2
+@a3 = weak hidden alias i32* @v3
+
+; Functions
+define weak hidden void @f1() {
+entry:
+  ret void
+}
+define weak protected void @f2() {
+entry:
+  ret void
+}
+define weak hidden void @f3() {
+entry:
+  ret void
+}
diff --git a/test/Linker/alignment.ll b/test/Linker/alignment.ll
new file mode 100644
index 000000000000..16cfe625fdc3
--- /dev/null
+++ b/test/Linker/alignment.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-link %p/alignment.ll %p/Inputs/alignment.ll -S | FileCheck %s
+; RUN: llvm-link %p/Inputs/alignment.ll %p/alignment.ll -S | FileCheck %s
+
+
+@A = weak global i32 7, align 4
+; CHECK-DAG: @A = global i32 7, align 8
+
+@B = weak global i32 7, align 8
+; CHECK-DAG: @B = global i32 7, align 4
+
+define weak void @C() align 4 {
+  ret void
+}
+; CHECK-DAG: define void @C() align 8 {
+
+define weak void @D() align 8 {
+  ret void
+}
+; CHECK-DAG: define void @D() align 4 {
+
+@E = common global i32 0, align 4
+; CHECK-DAG: @E = common global i32 0, align 8
diff --git a/test/Linker/comdat.ll b/test/Linker/comdat.ll
index 4d2aef7b8a09..2a2ec3bbb6b2 100644
--- a/test/Linker/comdat.ll
+++ b/test/Linker/comdat.ll
@@ -3,30 +3,30 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat largest
-@foo = global i32 42, comdat $foo
+@foo = global i32 42, comdat($foo)
 
-define i32 @bar() comdat $foo {
+define i32 @bar() comdat($foo) {
   ret i32 42
 }
 
 $qux = comdat largest
-@qux = global i64 12, comdat $qux
+@qux = global i64 12, comdat($qux)
 
-define i32 @baz() comdat $qux {
+define i32 @baz() comdat($qux) {
   ret i32 12
 }
 
 $any = comdat any
-@any = global i64 6, comdat $any
+@any = global i64 6, comdat($any)
 
 ; CHECK: $qux = comdat largest
 ; CHECK: $foo = comdat largest
 ; CHECK: $any = comdat any
 
-; CHECK: @qux = global i64 12, comdat $qux
-; CHECK: @any = global i64 6, comdat $any
-; CHECK: @foo = global i64 43, comdat $foo
+; CHECK: @qux = global i64 12, comdat{{$}}
+; CHECK: @any = global i64 6, comdat{{$}}
+; CHECK: @foo = global i64 43, comdat{{$}}
 ; CHECK-NOT: @in_unselected_group = global i32 13, comdat $qux
 
-; CHECK: define i32 @baz() comdat $qux
-; CHECK: define i32 @bar() comdat $foo
+; CHECK: define i32 @baz() comdat($qux)
+; CHECK: define i32 @bar() comdat($foo)
diff --git a/test/Linker/comdat2.ll b/test/Linker/comdat2.ll
index 60c3d7cf5502..ba8115dbae5b 100644
--- a/test/Linker/comdat2.ll
+++ b/test/Linker/comdat2.ll
@@ -3,5 +3,5 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat samesize
-@foo = global i32 42, comdat $foo
+@foo = global i32 42, comdat($foo)
 ; CHECK: Linking COMDATs named 'foo': invalid selection kinds!
diff --git a/test/Linker/comdat3.ll b/test/Linker/comdat3.ll
index f0d9a48bb9dd..3b5db0a39a0b 100644
--- a/test/Linker/comdat3.ll
+++ b/test/Linker/comdat3.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-link %s %p/Inputs/comdat2.ll -S -o - 2>&1 | FileCheck %s
 
 $foo = comdat largest
-@foo = global i32 43, comdat $foo
+@foo = global i32 43, comdat($foo)
 ; CHECK: Linking COMDATs named 'foo': can't do size dependent selection without DataLayout!
diff --git a/test/Linker/comdat4.ll b/test/Linker/comdat4.ll
index 50c1778e894d..cf7ac5f5d0dd 100644
--- a/test/Linker/comdat4.ll
+++ b/test/Linker/comdat4.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-link %s %p/Inputs/comdat3.ll -S -o - 2>&1 | FileCheck %s
 
 $foo = comdat noduplicates
-@foo = global i64 43, comdat $foo
+@foo = global i64 43, comdat($foo)
 ; CHECK: Linking COMDATs named 'foo': noduplicates has been violated!
diff --git a/test/Linker/comdat5.ll b/test/Linker/comdat5.ll
index 011fb8c0f462..759688ee47ea 100644
--- a/test/Linker/comdat5.ll
+++ b/test/Linker/comdat5.ll
@@ -3,5 +3,5 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 $foo = comdat samesize
-@foo = global i32 42, comdat $foo
+@foo = global i32 42, comdat($foo)
 ; CHECK: Linking COMDATs named 'foo': SameSize violated!
diff --git a/test/Linker/comdat6.ll b/test/Linker/comdat6.ll
index efa5dfb4d677..b5360a7c7da3 100644
--- a/test/Linker/comdat6.ll
+++ b/test/Linker/comdat6.ll
@@ -1,13 +1,10 @@
-; RUN: llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
-; RUN: llvm-link %p/Inputs/comdat5.ll %s -S -o - 2>&1 | FileCheck %s
+; RUN: llvm-link %s %p/Inputs/comdat5.ll -S -o - | FileCheck %s
+; RUN: llvm-link %p/Inputs/comdat5.ll %s -S -o - | FileCheck %s
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
 
-%struct.S = type { i32 (...)** }
+$foo = comdat largest
+@foo = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @bar to i8*)], comdat($foo)
 
-$"\01??_7S@@6B@" = comdat largest
-@"\01??_7S@@6B@" = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void (%struct.S*, i32)* @"\01??_GS@@UAEPAXI@Z" to i8*)], comdat $"\01??_7S@@6B@"
+; CHECK: @foo = alias getelementptr inbounds ([2 x i8*]* @some_name, i32 0, i32 1)
 
-; CHECK: @"\01??_7S@@6B@" = alias getelementptr inbounds ([2 x i8*]* @some_name, i32 0, i32 1)
-
-declare x86_thiscallcc void @"\01??_GS@@UAEPAXI@Z"(%struct.S*, i32) unnamed_addr
+declare void @bar() unnamed_addr
diff --git a/test/Linker/comdat7.ll b/test/Linker/comdat7.ll
index c3ff3f6cd7fb..ef938a706bf8 100644
--- a/test/Linker/comdat7.ll
+++ b/test/Linker/comdat7.ll
@@ -1,9 +1,8 @@
-; RUN: not llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
-target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
+; RUN: not llvm-link %s %s -S -o - 2>&1 | FileCheck %s
 
-$"\01??_7S@@6B@" = comdat largest
-define void @"\01??_7S@@6B@"() {
+$c1 = comdat largest
+
+define void @c1() comdat($c1) {
   ret void
 }
 ; CHECK: GlobalVariable required for data dependent selection!
diff --git a/test/Linker/comdat8.ll b/test/Linker/comdat8.ll
index 21669f69bd50..5ca352a85099 100644
--- a/test/Linker/comdat8.ll
+++ b/test/Linker/comdat8.ll
@@ -1,10 +1,8 @@
-; RUN: not llvm-link %s %p/Inputs/comdat5.ll -S -o - 2>&1 | FileCheck %s
-target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
+; RUN: not llvm-link %s %p/Inputs/comdat8.ll -S -o - 2>&1 | FileCheck %s
+
+$c1 = comdat largest
+
+@some_name = private unnamed_addr constant i32 42, comdat($c1)
+@c1 = alias i8* inttoptr (i32 ptrtoint (i32* @some_name to i32) to i8*)
 
-$"\01??_7S@@6B@" = comdat largest
-define void @some_name() {
-  ret void
-}
-@"\01??_7S@@6B@" = alias i8* inttoptr (i32 ptrtoint (void ()* @some_name to i32) to i8*)
 ; CHECK: COMDAT key involves incomputable alias size.
diff --git a/test/Linker/comdat9.ll b/test/Linker/comdat9.ll
new file mode 100644
index 000000000000..f155a6e35626
--- /dev/null
+++ b/test/Linker/comdat9.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-link %s -S -o - | FileCheck %s
+
+$c = comdat any
+@a = alias void ()* @f
+define internal void @f() comdat($c) {
+  ret void
+}
+
+; CHECK-DAG: $c = comdat any
+; CHECK-DAG: @a = alias void ()* @f
+; CHECK-DAG: define internal void @f() comdat($c)
+
+$f2 = comdat largest
+define internal void @f2() comdat($f2) {
+  ret void
+}
+
+; CHECK-DAG: $f2 = comdat largest
+; CHECK-DAG: define internal void @f2() comdat {
diff --git a/test/Linker/constructor-comdat.ll b/test/Linker/constructor-comdat.ll
new file mode 100644
index 000000000000..dfc899208aa3
--- /dev/null
+++ b/test/Linker/constructor-comdat.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link %s %p/Inputs/constructor-comdat.ll -S -o - 2>&1 | FileCheck %s
+; RUN: llvm-link %p/Inputs/constructor-comdat.ll %s -S -o - 2>&1 | FileCheck %s
+
+$_ZN3fooIiEC5Ev = comdat any
+; CHECK: $_ZN3fooIiEC5Ev = comdat any
+
+@_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
+; CHECK: @_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
+
+; CHECK: define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
+define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
+  ret void
+}
diff --git a/test/Linker/ctors.ll b/test/Linker/ctors.ll
new file mode 100644
index 000000000000..67bf45637180
--- /dev/null
+++ b/test/Linker/ctors.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-link %s %p/Inputs/ctors.ll -S -o - | \
+; RUN:   FileCheck --check-prefix=ALL --check-prefix=CHECK1 %s
+; RUN: llvm-link %p/Inputs/ctors.ll %s -S -o - | \
+; RUN:   FileCheck --check-prefix=ALL --check-prefix=CHECK2 %s
+
+@v = weak global i8 0
+; CHECK1: @v = weak global i8 0
+; CHECK2: @v = weak global i8 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+; ALL: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+
+define weak void @f() {
+  ret void
+}
diff --git a/test/Linker/debug-info-version-a.ll b/test/Linker/debug-info-version-a.ll
index c3d9c87a2e90..352fcdcb77ca 100644
--- a/test/Linker/debug-info-version-a.ll
+++ b/test/Linker/debug-info-version-a.ll
@@ -4,13 +4,13 @@
 ; from the other file should be dropped.
 
 ; CHECK-NOT: metadata !{metadata !"b.c", metadata !""}
-; CHECK: metadata !{metadata !"a.c", metadata !""}
+; CHECK:  !"a.c", !""}
 ; CHECK-NOT: metadata !{metadata !"b.c", metadata !""}
 
 !llvm.module.flags = !{ !0 }
 !llvm.dbg.cu = !{!1}
 
-!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!1 = metadata !{i32 589841, metadata !2, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !3, metadata !3, metadata !3, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{metadata !"a.c", metadata !""}
-!3 = metadata !{}
+!0 = !{i32 2, !"Debug Info Version", i32 2}
+!1 = !{!"0x11\0012\00clang\001\00\000\00\000", !2, !3, !3, !3, null, null} ; [ DW_TAG_compile_unit ]
+!2 = !{!"a.c", !""}
+!3 = !{}
diff --git a/test/Linker/debug-info-version-b.ll b/test/Linker/debug-info-version-b.ll
index 2b4f1844d169..e4944202f421 100644
--- a/test/Linker/debug-info-version-b.ll
+++ b/test/Linker/debug-info-version-b.ll
@@ -4,7 +4,7 @@
 !llvm.module.flags = !{ !0 }
 !llvm.dbg.cu = !{!1}
 
-!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 42}
-!1 = metadata !{i32 589841, metadata !2, i32 12, metadata !"clang", metadata !"I AM UNEXPECTED!"} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{metadata !"b.c", metadata !""}
-!3 = metadata !{}
+!0 = !{i32 2, !"Debug Info Version", i32 42}
+!1 = !{!"0x11\0012\00clang\000\00", !"I AM UNEXPECTED!"} ; [ DW_TAG_compile_unit ]
+!2 = !{!"b.c", !""}
+!3 = !{}
diff --git a/test/Linker/distinct.ll b/test/Linker/distinct.ll
new file mode 100644
index 000000000000..c8e5c89eb095
--- /dev/null
+++ b/test/Linker/distinct.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-link %s %S/Inputs/distinct.ll -o - -S | FileCheck %s
+
+; Test that distinct nodes from other modules remain distinct.  The @global
+; cases are the most interesting, since the operands actually need to be
+; remapped.
+
+; CHECK: @global = linkonce global i32 0
+@global = linkonce global i32 0
+
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !0, !1, !2, !9, !10, !11, !12, !13, !14}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
+
+; CHECK:      !0 = !{}
+; CHECK-NEXT: !1 = !{!0}
+; CHECK-NEXT: !2 = !{i32* @global}
+; CHECK-NEXT: !3 = distinct !{}
+; CHECK-NEXT: !4 = distinct !{!0}
+; CHECK-NEXT: !5 = distinct !{i32* @global}
+; CHECK-NEXT: !6 = !{!3}
+; CHECK-NEXT: !7 = !{!4}
+; CHECK-NEXT: !8 = !{!5}
+; CHECK-NEXT: !9 = distinct !{}
+; CHECK-NEXT: !10 = distinct !{!0}
+; CHECK-NEXT: !11 = distinct !{i32* @global}
+; CHECK-NEXT: !12 = !{!9}
+; CHECK-NEXT: !13 = !{!10}
+; CHECK-NEXT: !14 = !{!11}
+; CHECK-NOT:  !
+!0 = !{}
+!1 = !{!0}
+!2 = !{i32* @global}
+!3 = distinct !{}
+!4 = distinct !{!0}
+!5 = distinct !{i32* @global}
+!6 = !{!3}
+!7 = !{!4}
+!8 = !{!5}
diff --git a/test/Linker/global_ctors.ll b/test/Linker/global_ctors.ll
index 541f0d4f91bb..49df81a00759 100644
--- a/test/Linker/global_ctors.ll
+++ b/test/Linker/global_ctors.ll
@@ -1,5 +1,6 @@
 ; RUN: llvm-as %s -o %t.new.bc
 ; RUN: llvm-link %t.new.bc %S/Inputs/old_global_ctors.3.4.bc | llvm-dis | FileCheck %s
+; RUN: llvm-link %S/Inputs/old_global_ctors.3.4.bc %t.new.bc | llvm-dis | FileCheck %s
 
 ; old_global_ctors.3.4.bc contains the following LLVM IL, assembled into
 ; bitcode by llvm-as from 3.4.  It uses a two element @llvm.global_ctors array.
diff --git a/test/Linker/ident.ll b/test/Linker/ident.ll
new file mode 100644
index 000000000000..93bf8c7b2ca7
--- /dev/null
+++ b/test/Linker/ident.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link %S/Inputs/ident.a.ll %S/Inputs/ident.b.ll -S | FileCheck %s
+
+; Verify that multiple input llvm.ident metadata are linked together.
+
+; CHECK-DAG: !llvm.ident = !{!0, !1, !2}
+; CHECK-DAG: "Compiler V1"
+; CHECK-DAG: "Compiler V2"
+; CHECK-DAG: "Compiler V3"
+
diff --git a/test/Linker/link-messages.ll b/test/Linker/link-messages.ll
deleted file mode 100644
index 4e7ffbc97d64..000000000000
--- a/test/Linker/link-messages.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Test that linking two files with the same definition causes an error and
-; that error is printed out.
-; RUN: llvm-as %s -o %t.one.bc
-; RUN: llvm-as %s -o %t.two.bc
-; RUN: not llvm-link %t.one.bc %t.two.bc -o %t.bc 2>&1 | FileCheck %s
-
-; CHECK: symbol multiply defined
-define i32 @bar() {
-  ret i32 0
-}
diff --git a/test/Linker/linkage2.ll b/test/Linker/linkage2.ll
new file mode 100644
index 000000000000..dbae7caee54b
--- /dev/null
+++ b/test/Linker/linkage2.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/Inputs/linkage2.ll -S | FileCheck %s
+; RUN: llvm-link %p/Inputs/linkage2.ll %s -S | FileCheck %s
+
+@test1_a = common global i8 0
+; CHECK-DAG: @test1_a = common global i8 0
+
+@test2_a = global i8 0
+; CHECK-DAG: @test2_a = global i8 0
+
+@test3_a = common global i8 0
+; CHECK-DAG: @test3_a = common global i16 0
+
+@test4_a = common global i8 0, align 8
+; CHECK-DAG: @test4_a = common global i16 0, align 8
diff --git a/test/Linker/linkmdnode.ll b/test/Linker/linkmdnode.ll
index 5f1158039fce..2469c29d43e4 100644
--- a/test/Linker/linkmdnode.ll
+++ b/test/Linker/linkmdnode.ll
@@ -3,7 +3,7 @@
 ; RUN: llvm-link %t.bc %t2.bc
 
 
-!21 = metadata !{i32 42, metadata !"foobar"}
+!21 = !{i32 42, !"foobar"}
 
 declare i8 @llvm.something(metadata %a)
 define void @foo() {
diff --git a/test/Linker/linkmdnode2.ll b/test/Linker/linkmdnode2.ll
index a7d991a8a4f8..5b2b58df9b79 100644
--- a/test/Linker/linkmdnode2.ll
+++ b/test/Linker/linkmdnode2.ll
@@ -2,7 +2,7 @@
 ;
 ; RUN: true
 
-!22 = metadata !{i32 42, metadata !"foobar"}
+!22 = !{i32 42, !"foobar"}
 
 declare i8 @llvm.something(metadata %a)
 define void @foo1() {
@@ -18,5 +18,5 @@ define void @test() {
   ret void, !abc !0
 }
 
-!0 = metadata !{metadata !0, i32 42 }
+!0 = !{!0, i32 42 }
 
diff --git a/test/Linker/linknamedmdnode.ll b/test/Linker/linknamedmdnode.ll
index 73e7554a9077..60f95a63512d 100644
--- a/test/Linker/linknamedmdnode.ll
+++ b/test/Linker/linknamedmdnode.ll
@@ -3,5 +3,5 @@
 ; RUN: llvm-link %t.bc %t2.bc -S | FileCheck %s
 ; CHECK: !llvm.stuff = !{!0, !1}
 
-!0 = metadata !{i32 42}
+!0 = !{i32 42}
 !llvm.stuff = !{!0}
diff --git a/test/Linker/linknamedmdnode2.ll b/test/Linker/linknamedmdnode2.ll
index d16f62abed33..ff4330d53d77 100644
--- a/test/Linker/linknamedmdnode2.ll
+++ b/test/Linker/linknamedmdnode2.ll
@@ -2,5 +2,5 @@
 ;
 ; RUN: true
 
-!0 = metadata !{i32 41}
+!0 = !{i32 41}
 !llvm.stuff = !{!0}
diff --git a/test/Linker/lto-attributes.ll b/test/Linker/lto-attributes.ll
new file mode 100644
index 000000000000..e55029a0e2be
--- /dev/null
+++ b/test/Linker/lto-attributes.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-link -S %s -o - | FileCheck %s
+
+; CHECK-DAG: @foo = private externally_initialized global i8* null
+@foo = private externally_initialized global i8* null
+
+@useFoo = global i8** @foo
+
+; CHECK-DAG: @array = appending global [7 x i8] c"abcdefg", align 1
+@array = appending global [7 x i8] c"abcdefg", align 1
+
diff --git a/test/Linker/mdlocation.ll b/test/Linker/mdlocation.ll
new file mode 100644
index 000000000000..5ee366cbb433
--- /dev/null
+++ b/test/Linker/mdlocation.ll
@@ -0,0 +1,34 @@
+; RUN: llvm-link %s %S/Inputs/mdlocation.ll -o - -S | FileCheck %s
+
+; Test that MDLocations are remapped properly.
+
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !0, !1, !2, !3, !10, !11, !12, !13, !14, !15}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+
+; CHECK:      !0 = !{}
+; CHECK-NEXT: !1 = !MDLocation(line: 3, column: 7, scope: !0)
+; CHECK-NEXT: !2 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
+; CHECK-NEXT: !3 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
+; CHECK-NEXT: !4 = distinct !{}
+; CHECK-NEXT: !5 = !MDLocation(line: 3, column: 7, scope: !4)
+; CHECK-NEXT: !6 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
+; CHECK-NEXT: !7 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
+; CHECK-NEXT: !8 = distinct !MDLocation(line: 3, column: 7, scope: !0)
+; CHECK-NEXT: !9 = distinct !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
+; CHECK-NEXT: !10 = distinct !{}
+; CHECK-NEXT: !11 = !MDLocation(line: 3, column: 7, scope: !10)
+; CHECK-NEXT: !12 = !MDLocation(line: 3, column: 7, scope: !10, inlinedAt: !11)
+; CHECK-NEXT: !13 = !MDLocation(line: 3, column: 7, scope: !10, inlinedAt: !12)
+; CHECK-NEXT: !14 = distinct !MDLocation(line: 3, column: 7, scope: !0)
+; CHECK-NEXT: !15 = distinct !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !14)
+!0 = !{} ; Use this as a scope.
+!1 = !MDLocation(line: 3, column: 7, scope: !0)
+!2 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
+!3 = !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
+!4 = distinct !{} ; Test actual remapping.
+!5 = !MDLocation(line: 3, column: 7, scope: !4)
+!6 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
+!7 = !MDLocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
+; Test distinct nodes.
+!8 = distinct !MDLocation(line: 3, column: 7, scope: !0)
+!9 = distinct !MDLocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
diff --git a/test/Linker/metadata-a.ll b/test/Linker/metadata-a.ll
index 5a9d2e40b948..78715c69bd5c 100644
--- a/test/Linker/metadata-a.ll
+++ b/test/Linker/metadata-a.ll
@@ -1,15 +1,15 @@
 ; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s
 
 ; CHECK: define void @foo(i32 %a)
-; CHECK: ret void, !attach !0, !also !{i32 %a}
+; CHECK: ret void, !attach !0
 ; CHECK: define void @goo(i32 %b)
-; CHECK: ret void, !attach !1, !and !{i32 %b}
-; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo}
-; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo}
+; CHECK: ret void, !attach !1
+; CHECK: !0 = !{i32 524334, void (i32)* @foo}
+; CHECK: !1 = !{i32 524334, void (i32)* @goo}
 
 define void @foo(i32 %a) nounwind {
 entry:
-  ret void, !attach !0, !also !{ i32 %a }
+  ret void, !attach !0
 }
 
-!0 = metadata !{i32 524334, void (i32)* @foo}
+!0 = !{i32 524334, void (i32)* @foo}
diff --git a/test/Linker/metadata-b.ll b/test/Linker/metadata-b.ll
index ef0270af0756..fd2fd28e1d99 100644
--- a/test/Linker/metadata-b.ll
+++ b/test/Linker/metadata-b.ll
@@ -3,7 +3,7 @@
 
 define void @goo(i32 %b) nounwind {
 entry:
-  ret void, !attach !0, !and !{ i32 %b }
+  ret void, !attach !0
 }
 
-!0 = metadata !{i32 524334, void (i32)* @goo}
+!0 = !{i32 524334, void (i32)* @goo}
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
index 32f189cf99f1..fb3fcc1201f3 100644
--- a/test/Linker/module-flags-1-a.ll
+++ b/test/Linker/module-flags-1-a.ll
@@ -2,15 +2,15 @@
 
 ; Test basic functionality of module flags.
 
-; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 2, metadata !"bar", i32 42}
-; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
-; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
-; CHECK: !4 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !0 = !{i32 1, !"foo", i32 37}
+; CHECK: !1 = !{i32 2, !"bar", i32 42}
+; CHECK: !2 = !{i32 1, !"mux", !3}
+; CHECK: !3 = !{!"hello world", i32 927}
+; CHECK: !4 = !{i32 1, !"qux", i32 42}
 ; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
-!1 = metadata !{ i32 2, metadata !"bar", i32 42 }
-!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+!0 = !{ i32 1, !"foo", i32 37 }
+!1 = !{ i32 2, !"bar", i32 42 }
+!2 = !{ i32 1, !"mux", !{ !"hello world", i32 927 } }
 
 !llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-1-b.ll b/test/Linker/module-flags-1-b.ll
index bf3f5e555508..b3d1412c7d26 100644
--- a/test/Linker/module-flags-1-b.ll
+++ b/test/Linker/module-flags-1-b.ll
@@ -1,8 +1,8 @@
 ; This file is used with module-flags-1-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
-!1 = metadata !{ i32 1, metadata !"qux", i32 42 }
-!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+!0 = !{ i32 1, !"foo", i32 37 }
+!1 = !{ i32 1, !"qux", i32 42 }
+!2 = !{ i32 1, !"mux", !{ !"hello world", i32 927 } }
 
 !llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-2-a.ll b/test/Linker/module-flags-2-a.ll
index 3ae02889d16b..5f034618c9f2 100644
--- a/test/Linker/module-flags-2-a.ll
+++ b/test/Linker/module-flags-2-a.ll
@@ -2,9 +2,9 @@
 
 ; Test the 'override' behavior.
 
-; CHECK: !0 = metadata !{i32 4, metadata !"foo", i32 37}
+; CHECK: !0 = !{i32 4, !"foo", i32 37}
 ; CHECK: !llvm.module.flags = !{!0}
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 927 }
+!0 = !{ i32 1, !"foo", i32 927 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-2-b.ll b/test/Linker/module-flags-2-b.ll
index ab55e4b997c0..93d7ac1ca0af 100644
--- a/test/Linker/module-flags-2-b.ll
+++ b/test/Linker/module-flags-2-b.ll
@@ -1,6 +1,6 @@
 ; This file is used with module-flags-2-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+!0 = !{ i32 4, !"foo", i32 37 } ; Override the "foo" value.
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
index e7a720e9c024..26a939158702 100644
--- a/test/Linker/module-flags-3-a.ll
+++ b/test/Linker/module-flags-3-a.ll
@@ -2,13 +2,13 @@
 
 ; Test 'require' behavior.
 
-; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
-; CHECK: !1 = metadata !{i32 1, metadata !"bar", i32 42}
-; CHECK: !2 = metadata !{i32 3, metadata !"foo", metadata !3}
-; CHECK: !3 = metadata !{metadata !"bar", i32 42}
+; CHECK: !0 = !{i32 1, !"foo", i32 37}
+; CHECK: !1 = !{i32 1, !"bar", i32 42}
+; CHECK: !2 = !{i32 3, !"foo", !3}
+; CHECK: !3 = !{!"bar", i32 42}
 ; CHECK: !llvm.module.flags = !{!0, !1, !2}
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
-!1 = metadata !{ i32 1, metadata !"bar", i32 42 }
+!0 = !{ i32 1, !"foo", i32 37 }
+!1 = !{ i32 1, !"bar", i32 42 }
 
 !llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-3-b.ll b/test/Linker/module-flags-3-b.ll
index 76be80263376..2e6e5297eada 100644
--- a/test/Linker/module-flags-3-b.ll
+++ b/test/Linker/module-flags-3-b.ll
@@ -1,8 +1,6 @@
 ; This file is used with module-flags-3-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 3, metadata !"foo",
-  metadata !{ metadata !"bar", i32 42 }
-}
+!0 = !{i32 3, !"foo", !{!"bar", i32 42}}
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll
index a656c8b84b9f..14d850dd59c7 100644
--- a/test/Linker/module-flags-4-a.ll
+++ b/test/Linker/module-flags-4-a.ll
@@ -4,7 +4,7 @@
 
 ; CHECK: linking module flags 'bar': does not have the required value
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
-!1 = metadata !{ i32 1, metadata !"bar", i32 927 }
+!0 = !{ i32 1, !"foo", i32 37 }
+!1 = !{ i32 1, !"bar", i32 927 }
 
 !llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-4-b.ll b/test/Linker/module-flags-4-b.ll
index 3a460bbeb0ba..25aaf190dc81 100644
--- a/test/Linker/module-flags-4-b.ll
+++ b/test/Linker/module-flags-4-b.ll
@@ -1,8 +1,6 @@
 ; This file is used with module-flags-4-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 3, metadata !"foo",
-  metadata !{ metadata !"bar", i32 42 }
-}
+!0 = !{i32 3, !"foo", !{!"bar", i32 42}}
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll
index 8d625cd8c9de..00fb4d15017b 100644
--- a/test/Linker/module-flags-5-a.ll
+++ b/test/Linker/module-flags-5-a.ll
@@ -4,6 +4,6 @@
 
 ; CHECK: linking module flags 'foo': IDs have conflicting override values
 
-!0 = metadata !{ i32 4, metadata !"foo", i32 927 }
+!0 = !{ i32 4, !"foo", i32 927 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-b.ll b/test/Linker/module-flags-5-b.ll
index 1e99b207544e..71823173426b 100644
--- a/test/Linker/module-flags-5-b.ll
+++ b/test/Linker/module-flags-5-b.ll
@@ -1,6 +1,6 @@
 ; This file is used with module-flags-5-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+!0 = !{ i32 4, !"foo", i32 37 } ; Override the "foo" value.
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll
index 5329c436a473..e15fd349eb5c 100644
--- a/test/Linker/module-flags-6-a.ll
+++ b/test/Linker/module-flags-6-a.ll
@@ -4,6 +4,6 @@
 
 ; CHECK: linking module flags 'foo': IDs have conflicting values
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!0 = !{ i32 1, !"foo", i32 37 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-b.ll b/test/Linker/module-flags-6-b.ll
index 2bc5a96045bd..e27770031775 100644
--- a/test/Linker/module-flags-6-b.ll
+++ b/test/Linker/module-flags-6-b.ll
@@ -1,6 +1,6 @@
 ; This file is used with module-flags-6-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 38 }
+!0 = !{ i32 1, !"foo", i32 38 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-7-a.ll b/test/Linker/module-flags-7-a.ll
index 976c8fecf5fe..46c9d265ea30 100644
--- a/test/Linker/module-flags-7-a.ll
+++ b/test/Linker/module-flags-7-a.ll
@@ -4,6 +4,6 @@
 
 ; CHECK: linking module flags 'foo': IDs have conflicting behaviors
 
-!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!0 = !{ i32 1, !"foo", i32 37 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-7-b.ll b/test/Linker/module-flags-7-b.ll
index 2bc72508d468..47d44e90220c 100644
--- a/test/Linker/module-flags-7-b.ll
+++ b/test/Linker/module-flags-7-b.ll
@@ -1,6 +1,6 @@
 ; This file is used with module-flags-7-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 2, metadata !"foo", i32 37 }
+!0 = !{ i32 2, !"foo", i32 37 }
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-8-a.ll b/test/Linker/module-flags-8-a.ll
index 146cae763d6d..b65a50fda8ed 100644
--- a/test/Linker/module-flags-8-a.ll
+++ b/test/Linker/module-flags-8-a.ll
@@ -2,13 +2,13 @@
 
 ; Test append-type module flags.
 
-; CHECK: !0 = metadata !{i32 5, metadata !"flag-0", metadata !1}
-; CHECK: !1 = metadata !{i32 0, i32 0, i32 1}
-; CHECK: !2 = metadata !{i32 6, metadata !"flag-1", metadata !3}
-; CHECK: !3 = metadata !{i32 0, i32 1, i32 2}
+; CHECK: !0 = !{i32 5, !"flag-0", !1}
+; CHECK: !1 = !{i32 0, i32 0, i32 1}
+; CHECK: !2 = !{i32 6, !"flag-1", !3}
+; CHECK: !3 = !{i32 0, i32 1, i32 2}
 ; CHECK: !llvm.module.flags = !{!0, !2}
 
-!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0 } }
-!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 0, i32 1 } }
+!0 = !{ i32 5, !"flag-0", !{ i32 0 } }
+!1 = !{ i32 6, !"flag-1", !{ i32 0, i32 1 } }
 
 !llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-8-b.ll b/test/Linker/module-flags-8-b.ll
index 08f9bc49ee5e..afd737ac37cd 100644
--- a/test/Linker/module-flags-8-b.ll
+++ b/test/Linker/module-flags-8-b.ll
@@ -1,7 +1,7 @@
 ; This file is used with module-flags-6-a.ll
 ; RUN: true
 
-!0 = metadata !{ i32 5, metadata !"flag-0", metadata !{ i32 0, i32 1 } }
-!1 = metadata !{ i32 6, metadata !"flag-1", metadata !{ i32 1, i32 2 } }
+!0 = !{ i32 5, !"flag-0", !{ i32 0, i32 1 } }
+!1 = !{ i32 6, !"flag-1", !{ i32 1, i32 2 } }
 
 !llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-dont-change-others.ll b/test/Linker/module-flags-dont-change-others.ll
new file mode 100644
index 000000000000..4cc9f39027a3
--- /dev/null
+++ b/test/Linker/module-flags-dont-change-others.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-link %s %S/Inputs/module-flags-dont-change-others.ll -S -o - | FileCheck %s
+
+; Test that module-flag linking doesn't change other metadata.  In particular,
+; !named should still point at the unmodified tuples (!3, !4, and !5) that
+; happen to also serve as module flags.
+
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5}
+; CHECK: !llvm.module.flags = !{!6, !7, !8}
+!named = !{!0, !1, !2, !3, !4, !5}
+!llvm.module.flags = !{!3, !4, !5}
+
+; CHECK: !0 = !{}
+; CHECK: !1 = !{!0}
+; CHECK: !2 = !{!0, !1}
+; CHECK: !3 = !{i32 1, !"foo", i32 927}
+; CHECK: !4 = !{i32 5, !"bar", !0}
+; CHECK: !5 = !{i32 6, !"baz", !1}
+; CHECK: !6 = !{i32 4, !"foo", i32 37}
+; CHECK: !7 = !{i32 5, !"bar", !1}
+; CHECK: !8 = !{i32 6, !"baz", !2}
+!0 = !{}
+!1 = !{!0}
+!2 = !{!0, !1}
+!3 = !{i32 1, !"foo", i32 927}
+!4 = !{i32 5, !"bar", !0}
+!5 = !{i32 6, !"baz", !1}
diff --git a/test/Linker/module-flags-pic-1-a.ll b/test/Linker/module-flags-pic-1-a.ll
new file mode 100644
index 000000000000..ea933359ac66
--- /dev/null
+++ b/test/Linker/module-flags-pic-1-a.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link %s %p/Inputs/module-flags-pic-1-b.ll -S -o - | FileCheck %s
+
+; test linking modules with specified and default PIC levels
+
+!0 = !{ i32 1, !"PIC Level", i32 1 }
+
+!llvm.module.flags = !{!0}
+; CHECK: !llvm.module.flags = !{!0}
+; CHECK: !0 = !{i32 1, !"PIC Level", i32 1}
diff --git a/test/Linker/module-flags-pic-2-a.ll b/test/Linker/module-flags-pic-2-a.ll
new file mode 100644
index 000000000000..e09af6bcd128
--- /dev/null
+++ b/test/Linker/module-flags-pic-2-a.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-link %s %p/Inputs/module-flags-pic-2-b.ll -S -o - 2> %t
+; RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+; test linking modules with two different PIC levels
+
+!0 = !{ i32 1, !"PIC Level", i32 1 }
+
+!llvm.module.flags = !{!0}
+
+; CHECK-ERRORS: ERROR: linking module flags 'PIC Level': IDs have conflicting values
diff --git a/test/Linker/opaque.ll b/test/Linker/opaque.ll
new file mode 100644
index 000000000000..1ba878c6a36f
--- /dev/null
+++ b/test/Linker/opaque.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-link %p/opaque.ll %p/Inputs/opaque.ll -S -o - | FileCheck %s
+
+; CHECK-DAG: %A =   type {}
+; CHECK-DAG: %B =   type { %C, %C, %B* }
+; CHECK-DAG: %B.1 = type { %D, %E, %B.1* }
+; CHECK-DAG: %C =   type { %A }
+; CHECK-DAG: %D =   type { %E }
+; CHECK-DAG: %E =   type opaque
+
+; CHECK-DAG: @g1 = external global %B
+; CHECK-DAG: @g2 = external global %A
+; CHECK-DAG: @g3 = external global %B.1
+
+; CHECK-DAG: getelementptr %A* null, i32 0
+
+%A = type opaque
+%B = type { %C, %C, %B* }
+
+%C = type { %A }
+
+@g1 = external global %B
diff --git a/test/Linker/pr21374.ll b/test/Linker/pr21374.ll
new file mode 100644
index 000000000000..d777971adedb
--- /dev/null
+++ b/test/Linker/pr21374.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-link -S -o - %p/pr21374.ll %p/Inputs/pr21374.ll | FileCheck %s
+; RUN: llvm-link -S -o - %p/Inputs/pr21374.ll %p/pr21374.ll | FileCheck %s
+
+; RUN: llvm-as -o %t1.bc %p/pr21374.ll
+; RUN: llvm-as -o %t2.bc %p/Inputs/pr21374.ll
+
+; RUN: llvm-link -S -o - %t1.bc %t2.bc | FileCheck %s
+; RUN: llvm-link -S -o - %t2.bc %t1.bc | FileCheck %s
+
+; Test that we get the same result with or without lazy loading.
+
+; CHECK: %foo = type { i8* }
+; CHECK-DAG: bitcast i32* null to %foo*
+; CHECK-DAG: define void @g(%foo* %x)
+
+%foo = type { i8* }
+define void @f() {
+  bitcast i32* null to %foo*
+  ret void
+}
diff --git a/test/Linker/pr21494.ll b/test/Linker/pr21494.ll
new file mode 100644
index 000000000000..8a17233b0eb6
--- /dev/null
+++ b/test/Linker/pr21494.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-link %s -S -o - | FileCheck %s
+
+@g1 = private global i8 0
+; CHECK-NOT: @g1
+
+@g2 = linkonce_odr global i8 0
+; CHECK-NOT: @g2
+
+@a1 = private alias i8* @g1
+; CHECK-NOT: @a1
+
+@a2 = linkonce_odr alias i8* @g2
+; CHECK-NOT: @a2
+
+define private void @f1() {
+  ret void
+}
+; CHECK-NOT: @f1
+
+define linkonce_odr void @f2() {
+  ret void
+}
+; CHECK-NOT: @f2
diff --git a/test/Linker/prefixdata.ll b/test/Linker/prefixdata.ll
deleted file mode 100644
index 1f11dc7083eb..000000000000
--- a/test/Linker/prefixdata.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: echo > %t.ll
-; RUN: llvm-link %t.ll %s -S -o - | FileCheck %s
-
-@i = linkonce_odr global i32 1
-
-; CHECK: define void @f() prefix i32* @i
-define void @f() prefix i32* @i {
-  ret void
-}
diff --git a/test/Linker/prologuedata.ll b/test/Linker/prologuedata.ll
new file mode 100644
index 000000000000..70204fdaacdd
--- /dev/null
+++ b/test/Linker/prologuedata.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-link %s -S -o - | FileCheck %s
+
+@g1 = global void()* @f2
+; CHECK: @g1 = global void ()* @f2
+
+@p1 = global i8 42
+; CHECK: @p1 = global i8 42
+
+@p2 = internal global i8 43
+; CHECK: @p2 = internal global i8 43
+
+define void @f1() prologue i8* @p1 {
+  ret void
+}
+; CHECK: define void @f1() prologue i8* @p1 {
+
+define internal void @f2() prologue i8* @p2 {
+  ret void
+}
+
+; CHECK: define internal void @f2() prologue i8* @p2 {
diff --git a/test/Linker/redefinition.ll b/test/Linker/redefinition.ll
index 64a8c341fedb..1177a7094e0c 100644
--- a/test/Linker/redefinition.ll
+++ b/test/Linker/redefinition.ll
@@ -1,9 +1,6 @@
-; Test linking two functions with different prototypes and two globals 
+; Test linking two functions with different prototypes and two globals
 ; in different modules.
-; RUN: llvm-as %s -o %t.foo1.bc
-; RUN: llvm-as %s -o %t.foo2.bc
-; RUN: echo "define void @foo(i32 %x) { ret void }" | llvm-as -o %t.foo3.bc
-; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc 2>&1 | FileCheck %s
-; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc 2>&1 | FileCheck %s
-; CHECK: symbol multiply defined
+; RUN: not llvm-link %s %s -o %t.bc 2>&1 | FileCheck %s
+; RUN: not llvm-link %s %S/Inputs/redefinition.ll -o %t.bc 2>&1 | FileCheck %s
+; CHECK: ERROR: Linking globals named 'foo': symbol multiply defined!
 define void @foo() { ret void }
diff --git a/test/Linker/replaced-function-matches-first-subprogram.ll b/test/Linker/replaced-function-matches-first-subprogram.ll
new file mode 100644
index 000000000000..c0ec5f3cd228
--- /dev/null
+++ b/test/Linker/replaced-function-matches-first-subprogram.ll
@@ -0,0 +1,75 @@
+; RUN: llvm-link %s %S/Inputs/replaced-function-matches-first-subprogram.ll -S | FileCheck %s
+
+; Generated from C++ source:
+;
+; // repro/t.h
+; template <class T> struct Class {
+;   int foo() { return 0; }
+; };
+; // repro/d1/t1.cpp
+; #include "t.h"
+; int foo() { return Class<int>().foo(); }
+; // repro/d2/t2.cpp
+; #include "t.h"
+; template struct Class<int>;
+
+%struct.Class = type { i8 }
+
+define i32 @_Z3foov() {
+entry:
+  %tmp = alloca %struct.Class, align 1
+  %call = call i32 @_ZN5ClassIiE3fooEv(%struct.Class* %tmp), !dbg !14
+  ret i32 %call, !dbg !14
+}
+
+; CHECK: define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this){{.*}}{
+; CHECK-NOT: }
+; CHECK: !dbg ![[LOC:[0-9]+]]
+define linkonce_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+entry:
+  %this.addr = alloca %struct.Class*, align 8
+  store %struct.Class* %this, %struct.Class** %this.addr, align 8
+  %this1 = load %struct.Class** %this.addr
+  ret i32 0, !dbg !15
+}
+
+; CHECK: !llvm.dbg.cu = !{![[CU1:[0-9]+]], ![[CU2:[0-9]+]]}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+; Extract out the list of subprograms from each compile unit.
+; CHECK-DAG: ![[CU1]] = !{!"0x11{{[^"]+}}", {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, ![[SPs1:[0-9]+]],
+; CHECK-DAG: ![[CU2]] = !{!"0x11{{[^"]+}}", {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, ![[SPs2:[0-9]+]],
+!0 = !{!"0x11\004\00clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)\000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1/t1.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"t1.cpp", !"/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1"}
+!2 = !{}
+
+; Extract out each compile unit's single subprogram.  The replaced subprogram
+; should be dropped by the first compile unit.
+; CHECK-DAG: ![[SPs1]] = !{![[SP1:[0-9]+]]}
+; CHECK-DAG: ![[SPs2]] = !{![[SP2:[0-9]+]]}
+!3 = !{!4, !7}
+!4 = !{!"0x2e\00foo\00foo\00\002\000\001\000\000\00256\000\002", !1, !5, !6, null, i32 ()* @_Z3foov, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1/t1.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+
+; Extract out the file from the replaced subprogram.  Confirm that each
+; subprogram is pointing at the correct function.
+; CHECK-DAG: ![[SP1]] = !{!"0x2e{{[^"]+}}", {{.*}}, i32 ()* @_Z3foov,
+; CHECK-DAG: ![[SP2]] = !{!"0x2e{{[^"]+}}", ![[FILE:[0-9]+]], {{.*}}, i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv,
+!7 = !{!"0x2e\00foo\00foo\00\002\000\001\000\000\00256\000\002", !8, !9, !6, null, i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+
+; The new subprogram should be pointing at the new directory.
+; CHECK-DAG: ![[FILE]] = !{!"../t.h", !"/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2"}
+!8 = !{!"../t.h", !"/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1"}
+!9 = !{!"0x29", !8}    ; [ DW_TAG_file_type ] [/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1/../t.h]
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 2, !"Debug Info Version", i32 2}
+!12 = !{i32 1, !"PIC Level", i32 2}
+!13 = !{!"clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)"}
+!14 = !MDLocation(line: 2, column: 20, scope: !4)
+
+; The same subprogram should be pointed to by inside the !dbg reference.
+; CHECK: ![[LOC]] = !MDLocation(line: 2, column: 15, scope: ![[SP2]])
+!15 = !MDLocation(line: 2, column: 15, scope: !7)
diff --git a/test/Linker/targettriple.ll b/test/Linker/targettriple.ll
index 71830477bddc..c544a14a19d3 100644
--- a/test/Linker/targettriple.ll
+++ b/test/Linker/targettriple.ll
@@ -5,6 +5,9 @@
 ; RUN: llvm-link %s %S/Inputs/targettriple-b.ll -S -o - 2>%t.b.err | FileCheck %s
 ; RUN: cat %t.b.err | FileCheck --check-prefix=WARN-B %s
 
+; RUN: llvm-link -suppress-warnings %s %S/Inputs/targettriple-b.ll -S -o - 2>%t.no-warn.err | FileCheck %s
+; RUN: (echo foo ;cat %t.no-warn.err) | FileCheck --check-prefix=WARN-A %s
+
 target triple = "e"
 
 ; CHECK: target triple = "e"
diff --git a/test/Linker/testlink.ll b/test/Linker/testlink.ll
new file mode 100644
index 000000000000..aad4b9bfb07a
--- /dev/null
+++ b/test/Linker/testlink.ll
@@ -0,0 +1,104 @@
+; RUN: llvm-link %s %S/Inputs/testlink.ll -S | FileCheck %s
+
+; CHECK: %Ty2 = type { %Ty1* }
+; CHECK: %Ty1 = type { %Ty2* }
+%Ty1 = type opaque
+%Ty2 = type { %Ty1* }
+
+; CHECK: %intlist = type { %intlist*, i32 }
+%intlist = type { %intlist*, i32 }
+
+; The uses of intlist in the other file should be remapped.
+; CHECK-NOT: {{%intlist.[0-9]}}
+
+; CHECK: %VecSize = type { <5 x i32> }
+; CHECK: %VecSize.{{[0-9]}} = type { <10 x i32> }
+%VecSize = type { <5 x i32> }
+
+%Struct1 = type opaque
+@S1GV = external global %Struct1*
+
+
+@GVTy1 = external global %Ty1*
+@GVTy2 = global %Ty2* null
+
+
+; This should stay the same
+; CHECK-DAG: @MyIntList = global %intlist { %intlist* null, i32 17 }
+@MyIntList = global %intlist { %intlist* null, i32 17 }
+
+
+; Nothing to link here.
+
+; CHECK-DAG: @0 = external global i32
+@0 = external global i32
+; CHECK-DAG: @Inte = global i32 1
+@Inte = global i32 1
+
+; Intern1 is intern in both files, rename testlink2's.
+; CHECK-DAG: @Intern1 = internal constant i32 42
+@Intern1 = internal constant i32 42
+
+@UseIntern1 = global i32* @Intern1
+
+; This should get renamed since there is a definition that is non-internal in
+; the other module.
+; CHECK-DAG: @Intern2{{[0-9]+}} = internal constant i32 792
+@Intern2 = internal constant i32 792
+
+@UseIntern2 = global i32* @Intern2
+
+; CHECK-DAG: @MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+
+@UseMyVarPtr = global { i32* }* @MyVarPtr
+
+; CHECK-DAG: @MyVar = global i32 4
+@MyVar = external global i32
+
+; Take value from other module.
+; CHECK-DAG: AConst = constant i32 1234
+@AConst = linkonce constant i32 123
+
+; Renamed version of Intern1.
+; CHECK-DAG: @Intern1{{[0-9]+}} = internal constant i32 52
+
+
+; Globals linked from testlink2.
+; CHECK-DAG: @Intern2 = constant i32 12345
+
+; CHECK-DAG: @MyIntListPtr = constant
+; CHECK-DAG: @1 = constant i32 412
+
+
+declare i32 @foo(i32)
+
+declare void @print(i32)
+
+define void @main() {
+  %v1 = load i32* @MyVar
+  call void @print(i32 %v1)
+  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
+  %v2 = load i32* %idx
+  call void @print(i32 %v2)
+  %1 = call i32 @foo(i32 5)
+  %v3 = load i32* @MyVar
+  call void @print(i32 %v3)
+  %v4 = load i32* %idx
+  call void @print(i32 %v4)
+  ret void
+}
+
+define internal void @testintern() {
+  ret void
+}
+
+define internal void @Testintern() {
+  ret void
+}
+
+define void @testIntern() {
+  ret void
+}
+
+declare void @VecSizeCrash(%VecSize)
diff --git a/test/Linker/testlink1.ll b/test/Linker/testlink1.ll
deleted file mode 100644
index 6ba6fd5fd7e9..000000000000
--- a/test/Linker/testlink1.ll
+++ /dev/null
@@ -1,101 +0,0 @@
-; RUN: llvm-as < %s > %t.bc
-; RUN: llvm-as < %p/testlink2.ll > %t2.bc
-; RUN: llvm-link %t.bc %t2.bc -S | FileCheck %s
-
-; CHECK: %Ty2 = type { %Ty1* }
-; CHECK: %Ty1 = type { %Ty2* }
-%Ty1 = type opaque
-%Ty2 = type { %Ty1* }
-
-; CHECK: %intlist = type { %intlist*, i32 }
-%intlist = type { %intlist*, i32 }
-
-; The uses of intlist in the other file should be remapped.
-; CHECK-NOT: {{%intlist.[0-9]}}
-
-; CHECK: %VecSize = type { <5 x i32> }
-; CHECK: %VecSize.{{[0-9]}} = type { <10 x i32> }
-%VecSize = type { <5 x i32> }
-
-%Struct1 = type opaque
-@S1GV = external global %Struct1*
-
-
-@GVTy1 = external global %Ty1*
-@GVTy2 = global %Ty2* null
-
-
-; This should stay the same
-; CHECK: @MyIntList = global %intlist { %intlist* null, i32 17 }
-@MyIntList = global %intlist { %intlist* null, i32 17 }
-
-
-; Nothing to link here.
-
-; CHECK: @0 = external global i32
-@0 = external global i32
-; CHECK: @Inte = global i32 1
-@Inte = global i32 1
-
-; Intern1 is intern in both files, rename testlink2's.
-; CHECK: @Intern1 = internal constant i32 42
-@Intern1 = internal constant i32 42
-
-; This should get renamed since there is a definition that is non-internal in
-; the other module.
-; CHECK: @Intern2{{[0-9]+}} = internal constant i32 792
-@Intern2 = internal constant i32 792
-
-
-; CHECK: @MyVarPtr = linkonce global { i32* } { i32* @MyVar }
-@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
-
-; CHECK: @MyVar = global i32 4
-@MyVar = external global i32
-
-; Take value from other module.
-; CHECK: AConst = constant i32 1234
-@AConst = linkonce constant i32 123
-
-; Renamed version of Intern1.
-; CHECK: @Intern1{{[0-9]+}} = internal constant i32 52
-
-
-; Globals linked from testlink2.
-; CHECK: @Intern2 = constant i32 12345
-
-; CHECK: @MyIntListPtr = constant 
-; CHECK: @1 = constant i32 412
-
-
-declare i32 @foo(i32)
-
-declare void @print(i32)
-
-define void @main() {
-  %v1 = load i32* @MyVar
-  call void @print(i32 %v1)
-  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
-  %v2 = load i32* %idx
-  call void @print(i32 %v2)
-  %1 = call i32 @foo(i32 5)
-  %v3 = load i32* @MyVar
-  call void @print(i32 %v3)
-  %v4 = load i32* %idx
-  call void @print(i32 %v4)
-  ret void
-}
-
-define internal void @testintern() {
-  ret void
-}
-
-define internal void @Testintern() {
-  ret void
-}
-
-define void @testIntern() {
-  ret void
-}
-
-declare void @VecSizeCrash(%VecSize)
diff --git a/test/Linker/testlink2.ll b/test/Linker/testlink2.ll
deleted file mode 100644
index ff8e5299869a..000000000000
--- a/test/Linker/testlink2.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; This file is used by testlink1.ll, so it doesn't actually do anything itself
-;
-; RUN: true
-
-%intlist = type { %intlist*, i32 }
-
-
-%Ty1 = type { %Ty2* }
-%Ty2 = type opaque
-
-%VecSize = type { <10 x i32> }
-
-@GVTy1 = global %Ty1* null
-@GVTy2 = external global %Ty2*
-
-
-@MyVar = global i32 4
-@MyIntList = external global %intlist
-@AConst = constant i32 1234
-
-;; Intern in both testlink[12].ll
-@Intern1 = internal constant i32 52
-
-;; Intern in one but not in other
-@Intern2 = constant i32 12345
-
-@MyIntListPtr = constant { %intlist* } { %intlist* @MyIntList }
-@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
-@0 = constant i32 412
-
-; Provides definition of Struct1 and of S1GV.
-%Struct1 = type { i32 }
-@S1GV = global %Struct1* null
-
-define i32 @foo(i32 %blah) {
-  store i32 %blah, i32* @MyVar
-  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
-  store i32 12, i32* %idx
-  %ack = load i32* @0
-  %fzo = add i32 %ack, %blah
-  ret i32 %fzo
-}
-
-declare void @unimp(float, double)
-
-define internal void @testintern() {
-  ret void
-}
-
-define void @Testintern() {
-  ret void
-}
-
-define internal void @testIntern() {
-  ret void
-}
-
-declare void @VecSizeCrash1(%VecSize)
diff --git a/test/Linker/type-unique-alias.ll b/test/Linker/type-unique-alias.ll
new file mode 100644
index 000000000000..e43450fbbeb3
--- /dev/null
+++ b/test/Linker/type-unique-alias.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-link -S %s %p/Inputs/type-unique-alias.ll | FileCheck %s
+
+%t = type { i8 }
+
+@g = global %t zeroinitializer
+@a = weak alias %t* @g
+
+; CHECK: @g = global %t zeroinitializer
+; CHECK: @g2 = global %t zeroinitializer
+; CHECK: @a = weak alias %t* @g
diff --git a/test/Linker/type-unique-dst-types.ll b/test/Linker/type-unique-dst-types.ll
new file mode 100644
index 000000000000..30aecbb970cb
--- /dev/null
+++ b/test/Linker/type-unique-dst-types.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-link %p/type-unique-dst-types.ll \
+; RUN:           %p/Inputs/type-unique-dst-types2.ll \
+; RUN:           %p/Inputs/type-unique-dst-types3.ll -S -o - | FileCheck %s
+
+; This tests the importance of keeping track of which types are part of the
+; destination module.
+; When the second input is merged in, the context gets an unused A.11. When
+; the third module is then merged, we should pretend it doesn't exist.
+
+; CHECK: %A = type { %B }
+; CHECK-NEXT: %B = type { i8 }
+
+; CHECK: @g3 = external global %A
+; CHECK: @g1 = external global %A
+; CHECK: @g2 = external global %A
+
+%A = type { %B }
+%B = type { i8 }
+@g3 = external global %A
diff --git a/test/Linker/type-unique-name.ll b/test/Linker/type-unique-name.ll
new file mode 100644
index 000000000000..191b6ddb2352
--- /dev/null
+++ b/test/Linker/type-unique-name.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link -S %s %p/Inputs/type-unique-name.ll | FileCheck %s
+
+; Test that we keep the type name
+; CHECK: %abc = type { i8 }
+
+%abc = type opaque
+
+declare %abc* @f()
+
+define %abc* @g() {
+  %x = call %abc* @f()
+  ret %abc* %x
+}
diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll
index 91c80339ec03..900b03592647 100644
--- a/test/Linker/type-unique-odr-a.ll
+++ b/test/Linker/type-unique-odr-a.ll
@@ -41,7 +41,7 @@
 ; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZL3barv"
 
 ; getFoo and A may only appear once.
-; CHECK-NOT:  {{(getFoo)|("A")}}
+; CHECK-NOT:  AT_name{{.*(getFoo)|("A")}}
 
 
 ; ModuleID = 'type-unique-odr-a.cpp'
@@ -59,12 +59,12 @@ entry:
 define internal void @_ZL3barv() #0 {
 entry:
   %a = alloca %class.A, align 4
-  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !24), !dbg !25
+  call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !{!"0x102"}), !dbg !25
   ret void, !dbg !26
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
@@ -73,30 +73,30 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !21}
 !llvm.ident = !{!22}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"type-unique-odr-a.cpp", metadata !""}
-!6 = metadata !{metadata !7, metadata !9}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1A", metadata !"data", i32 2, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [data] [line 2, size 32, align 32, offset 0] [private] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 4, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !13, i32 4} ; [ DW_TAG_subprogram ] [line 4] [protected] [getFoo]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{null, metadata !12}
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!13 = metadata !{i32 786468}
-!14 = metadata !{metadata !15, metadata !19}
-!15 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"baz", metadata !"baz", metadata !"_Z3bazv", i32 11, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3bazv, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [baz]
-!16 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [type-unique-odr-a.cpp]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{null}
-!19 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"bar", metadata !"bar", metadata !"_ZL3barv", i32 7, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZL3barv, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [local] [def] [bar]
-!20 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!22 = metadata !{metadata !"clang version 3.5.0 "}
-!23 = metadata !{i32 11, i32 0, metadata !15, null}
-!24 = metadata !{i32 786688, metadata !19, metadata !"a", metadata !16, i32 8, metadata !"_ZTS1A", i32 0, i32 0} ; [ DW_TAG_auto_variable ] [a] [line 8]
-!25 = metadata !{i32 8, i32 0, metadata !19, null} ; [ DW_TAG_imported_declaration ]
-!26 = metadata !{i32 9, i32 0, metadata !19, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\001\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"type-unique-odr-a.cpp", !""}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00data\002\0032\0032\000\001", !5, !"_ZTS1A", !8} ; [ DW_TAG_member ] [data] [line 2, size 32, align 32, offset 0] [private] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\004\000\000\000\006\00258\000\004", !5, !"_ZTS1A", !10, null, null, null, i32 0, !13} ; [ DW_TAG_subprogram ] [line 4] [protected] [getFoo]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12}
+!12 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!13 = !{i32 786468}
+!14 = !{!15, !19}
+!15 = !{!"0x2e\00baz\00baz\00_Z3bazv\0011\000\001\000\006\00256\000\0011", !5, !16, !17, null, void ()* @_Z3bazv, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [baz]
+!16 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [type-unique-odr-a.cpp]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null}
+!19 = !{!"0x2e\00bar\00bar\00_ZL3barv\007\001\001\000\006\00256\000\007", !5, !16, !17, null, void ()* @_ZL3barv, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [local] [def] [bar]
+!20 = !{i32 2, !"Dwarf Version", i32 4}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
+!22 = !{!"clang version 3.5.0 "}
+!23 = !MDLocation(line: 11, scope: !15)
+!24 = !{!"0x100\00a\008\000", !19, !16, !"_ZTS1A"} ; [ DW_TAG_auto_variable ] [a] [line 8]
+!25 = !MDLocation(line: 8, scope: !19)
+!26 = !MDLocation(line: 9, scope: !19)
diff --git a/test/Linker/type-unique-odr-b.ll b/test/Linker/type-unique-odr-b.ll
index 3c8b7a1e428f..b262191b91fc 100644
--- a/test/Linker/type-unique-odr-b.ll
+++ b/test/Linker/type-unique-odr-b.ll
@@ -26,13 +26,13 @@ define void @_ZN1A6getFooEv(%class.A* %this) #0 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !24), !dbg !26
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !24, metadata !{!"0x102"}), !dbg !26
   %this1 = load %class.A** %this.addr
   ret void, !dbg !27
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
 define void @_Z1fv() #0 {
@@ -54,33 +54,33 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"type-unique-odr-b.cpp", metadata !""}
-!6 = metadata !{metadata !7, metadata !9}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS1A", metadata !"data", i32 3, i64 32, i64 32, i64 0, i32 1, metadata !8} ; [ DW_TAG_member ] [data] [line 3, size 32, align 32, offset 0] [private] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 5, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i32 258, i1 false, null, null, i32 0, metadata !13, i32 5} ; [ DW_TAG_subprogram ] [line 5] [protected] [getFoo]
-!10 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!11 = metadata !{null, metadata !12}
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!13 = metadata !{i32 786468}
-!14 = metadata !{metadata !15, metadata !16, metadata !20}
-!15 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 8, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1A6getFooEv, null, metadata !9, metadata !2, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [getFoo]
-!16 = metadata !{i32 786478, metadata !5, metadata !17, metadata !"f", metadata !"f", metadata !"_Z1fv", i32 11, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z1fv, null, null, metadata !2, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [f]
-!17 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [type-unique-odr-b.cpp]
-!18 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!19 = metadata !{null}
-!20 = metadata !{i32 786478, metadata !5, metadata !17, metadata !"bar", metadata !"bar", metadata !"_ZL3barv", i32 10, metadata !18, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_ZL3barv, null, null, metadata !2, i32 10} ; [ DW_TAG_subprogram ] [line 10] [local] [def] [bar]
-!21 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!23 = metadata !{metadata !"clang version 3.5.0 "}
-!24 = metadata !{i32 786689, metadata !15, metadata !"this", null, i32 16777216, metadata !25, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!25 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!26 = metadata !{i32 0, i32 0, metadata !15, null}
-!27 = metadata !{i32 8, i32 0, metadata !15, null} ; [ DW_TAG_imported_declaration ]
-!28 = metadata !{i32 11, i32 0, metadata !16, null}
-!29 = metadata !{i32 10, i32 0, metadata !20, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\002\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"type-unique-odr-b.cpp", !""}
+!6 = !{!7, !9}
+!7 = !{!"0xd\00data\003\0032\0032\000\001", !5, !"_ZTS1A", !8} ; [ DW_TAG_member ] [data] [line 3, size 32, align 32, offset 0] [private] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\005\000\000\000\006\00258\000\005", !5, !"_ZTS1A", !10, null, null, null, i32 0, !13} ; [ DW_TAG_subprogram ] [line 5] [protected] [getFoo]
+!10 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12}
+!12 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!13 = !{i32 786468}
+!14 = !{!15, !16, !20}
+!15 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\008\000\001\000\006\00256\000\008", !5, !"_ZTS1A", !10, null, void (%class.A*)* @_ZN1A6getFooEv, null, !9, !2} ; [ DW_TAG_subprogram ] [line 8] [def] [getFoo]
+!16 = !{!"0x2e\00f\00f\00_Z1fv\0011\000\001\000\006\00256\000\0011", !5, !17, !18, null, void ()* @_Z1fv, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [f]
+!17 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [type-unique-odr-b.cpp]
+!18 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !19, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!19 = !{null}
+!20 = !{!"0x2e\00bar\00bar\00_ZL3barv\0010\001\001\000\006\00256\000\0010", !5, !17, !18, null, void ()* @_ZL3barv, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [local] [def] [bar]
+!21 = !{i32 2, !"Dwarf Version", i32 4}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5.0 "}
+!24 = !{!"0x101\00this\0016777216\001088", !15, null, !25} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!25 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!26 = !MDLocation(line: 0, scope: !15)
+!27 = !MDLocation(line: 8, scope: !15)
+!28 = !MDLocation(line: 11, scope: !16)
+!29 = !MDLocation(line: 10, scope: !20)
diff --git a/test/Linker/type-unique-opaque.ll b/test/Linker/type-unique-opaque.ll
new file mode 100644
index 000000000000..b4f696699212
--- /dev/null
+++ b/test/Linker/type-unique-opaque.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link -S %s %p/Inputs/type-unique-opaque.ll | FileCheck %s
+
+; Test that a failed attempt at merging %u2 and %t2 (for the other file) will
+; not cause %u and %t to get merged.
+
+; CHECK: %u = type opaque
+; CHECK: define %u* @g() {
+
+%u = type opaque
+%u2 = type { %u*, i8 }
+
+declare %u2* @f()
+
+define %u* @g() {
+  ret %u* null
+}
diff --git a/test/Linker/type-unique-simple-a.ll b/test/Linker/type-unique-simple-a.ll
index 350cd1fd4595..ef29cd9781d7 100644
--- a/test/Linker/type-unique-simple-a.ll
+++ b/test/Linker/type-unique-simple-a.ll
@@ -54,13 +54,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !15), !dbg !16
-  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !17), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
+  call void @llvm.dbg.declare(metadata %struct.Base* %t, metadata !17, metadata !{!"0x102"}), !dbg !18
   ret void, !dbg !19
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -68,24 +68,24 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14, !20}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"foo.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"f", metadata !"f", metadata !"_Z1fi", i32 3, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1fi, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp]
-!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null, metadata !8}
-!14 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!15 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!16 = metadata !{i32 3, i32 0, metadata !10, null}
-!17 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 4, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 4]
-!18 = metadata !{i32 4, i32 0, metadata !10, null}
-!19 = metadata !{i32 5, i32 0, metadata !10, null}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)\000\00\000\00\000", !1, !2, !3, !9, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"foo.cpp", !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Base\001\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"./a.hpp", !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = !{!7}
+!7 = !{!"0xd\00a\002\0032\0032\000\000", !5, !"_ZTS4Base", !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x2e\00f\00f\00_Z1fi\003\000\001\000\006\00256\000\003", !1, !11, !12, null, void (i32)* @_Z1fi, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/foo.cpp]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null, !8}
+!14 = !{i32 2, !"Dwarf Version", i32 2}
+!15 = !{!"0x101\00a\0016777219\000", !10, !11, !8} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = !MDLocation(line: 3, scope: !10)
+!17 = !{!"0x100\00t\004\000", !10, !11, !4} ; [ DW_TAG_auto_variable ] [t] [line 4]
+!18 = !MDLocation(line: 4, scope: !10)
+!19 = !MDLocation(line: 5, scope: !10)
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/type-unique-simple-b.ll b/test/Linker/type-unique-simple-b.ll
index 854ec158794a..fb1a5292975f 100644
--- a/test/Linker/type-unique-simple-b.ll
+++ b/test/Linker/type-unique-simple-b.ll
@@ -10,13 +10,13 @@ entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !18), !dbg !19
-  call void @llvm.dbg.declare(metadata !{%struct.Base* %t}, metadata !20), !dbg !21
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !18, metadata !{!"0x102"}), !dbg !19
+  call void @llvm.dbg.declare(metadata %struct.Base* %t, metadata !20, metadata !{!"0x102"}), !dbg !21
   ret void, !dbg !22
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
 define i32 @main() #2 {
@@ -38,30 +38,30 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17, !26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !9, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"bar.cpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"Base", i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./a.hpp", metadata !"/Users/mren/c_testing/type_unique_air/simple"}
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786445, metadata !5, metadata !"_ZTS4Base", metadata !"a", i32 2, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10, metadata !14}
-!10 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"g", metadata !"g", metadata !"_Z1gi", i32 4, metadata !12, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @_Z1gi, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
-!11 = metadata !{i32 786473, metadata !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp]
-!12 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!13 = metadata !{null, metadata !8}
-!14 = metadata !{i32 786478, metadata !1, metadata !11, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{metadata !8}
-!17 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!18 = metadata !{i32 786689, metadata !10, metadata !"a", metadata !11, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 4]
-!19 = metadata !{i32 4, i32 0, metadata !10, null}
-!20 = metadata !{i32 786688, metadata !10, metadata !"t", metadata !11, i32 5, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [t] [line 5]
-!21 = metadata !{i32 5, i32 0, metadata !10, null}
-!22 = metadata !{i32 6, i32 0, metadata !10, null}
-!23 = metadata !{i32 8, i32 0, metadata !14, null} ; [ DW_TAG_imported_declaration ]
-!24 = metadata !{i32 9, i32 0, metadata !14, null}
-!25 = metadata !{i32 10, i32 0, metadata !14, null}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)\000\00\000\00\000", !1, !2, !3, !9, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"bar.cpp", !"/Users/mren/c_testing/type_unique_air/simple"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00Base\001\0032\0032\000\000\000", !5, null, null, !6, null, null, !"_ZTS4Base"} ; [ DW_TAG_structure_type ] [Base] [line 1, size 32, align 32, offset 0] [def] [from ]
+!5 = !{!"./a.hpp", !"/Users/mren/c_testing/type_unique_air/simple"}
+!6 = !{!7}
+!7 = !{!"0xd\00a\002\0032\0032\000\000", !5, !"_ZTS4Base", !8} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10, !14}
+!10 = !{!"0x2e\00g\00g\00_Z1gi\004\000\001\000\006\00256\000\004", !1, !11, !12, null, void (i32)* @_Z1gi, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [g]
+!11 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [/Users/mren/c_testing/type_unique_air/simple/bar.cpp]
+!12 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !13, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!13 = !{null, !8}
+!14 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !1, !11, !15, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{!8}
+!17 = !{i32 2, !"Dwarf Version", i32 2}
+!18 = !{!"0x101\00a\0016777220\000", !10, !11, !8} ; [ DW_TAG_arg_variable ] [a] [line 4]
+!19 = !MDLocation(line: 4, scope: !10)
+!20 = !{!"0x100\00t\005\000", !10, !11, !4} ; [ DW_TAG_auto_variable ] [t] [line 5]
+!21 = !MDLocation(line: 5, scope: !10)
+!22 = !MDLocation(line: 6, scope: !10)
+!23 = !MDLocation(line: 8, scope: !14)
+!24 = !MDLocation(line: 9, scope: !14)
+!25 = !MDLocation(line: 10, scope: !14)
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Linker/type-unique-simple2-a.ll b/test/Linker/type-unique-simple2-a.ll
index d0f1155fe654..72a776bba0c1 100644
--- a/test/Linker/type-unique-simple2-a.ll
+++ b/test/Linker/type-unique-simple2-a.ll
@@ -19,7 +19,7 @@
 ; }
 ;
 ; CHECK: _ZN1A6setFooEv
-; CHECK: DW_AT_accessibility [DW_FORM_data1]   (0x01)
+; CHECK: DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; CHECK-NOT: DW_AT_accessibility
 ; CHECK: DW_TAG
 
@@ -48,7 +48,7 @@ define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr #2 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !39), !dbg !41
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !39, metadata !{!"0x102"}), !dbg !41
   %this1 = load %class.A** %this.addr
   call void @_ZN1AC2Ev(%class.A* %this1) #1, !dbg !42
   ret void, !dbg !42
@@ -57,14 +57,14 @@ entry:
 declare i32 @_ZN1A6getFooEv(%class.A*)
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #4
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #4
 
 ; Function Attrs: inlinehint nounwind
 define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr #2 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !44), !dbg !45
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !44, metadata !{!"0x102"}), !dbg !45
   %this1 = load %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !46
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !46
@@ -80,50 +80,50 @@ attributes #4 = { nounwind readnone }
 !llvm.module.flags = !{!35, !36}
 !llvm.ident = !{!37}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !26, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 2, i64 64, i64 64, i32 0, i32 0, null, metadata !6, i32 0, metadata !"_ZTS1A", null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 64, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./ab.h", metadata !""}
-!6 = metadata !{metadata !7, metadata !14, metadata !19}
-!7 = metadata !{i32 786445, metadata !5, metadata !8, metadata !"_vptr$A", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !9} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
-!8 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/./ab.h]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
-!10 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"setFoo", metadata !"setFoo", metadata !"_ZN1A6setFooEv", i32 4, metadata !15, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ] [line 4] [setFoo]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 5, metadata !20, i1 false, i1 false, i32 1, i32 1, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, metadata !25, i32 5} ; [ DW_TAG_subprogram ] [line 5] [getFoo]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !22, metadata !17}
-!22 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !23} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo_t]
-!23 = metadata !{i32 786454, metadata !24, null, metadata !"foo_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ] [foo_t] [line 1, size 0, align 0, offset 0] [from int]
-!24 = metadata !{metadata !"a.cpp", metadata !""}
-!25 = metadata !{i32 786468}
-!26 = metadata !{metadata !27, metadata !31, metadata !34}
-!27 = metadata !{i32 786478, metadata !24, metadata !28, metadata !"bar", metadata !"bar", metadata !"_Z3barv", i32 2, metadata !29, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z3barv, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
-!28 = metadata !{i32 786473, metadata !24}        ; [ DW_TAG_file_type ] [/a.cpp]
-!29 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!30 = metadata !{metadata !23}
-!31 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC1Ev", i32 2, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 320, i1 false, void (%class.A*)* @_ZN1AC1Ev, null, metadata !32, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [A]
-!32 = metadata !{i32 786478, null, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"", i32 0, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !33, i32 0} ; [ DW_TAG_subprogram ] [line 0] [A]
-!33 = metadata !{i32 786468}
-!34 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"A", metadata !"A", metadata !"_ZN1AC2Ev", i32 2, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 320, i1 false, void (%class.A*)* @_ZN1AC2Ev, null, metadata !32, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [A]
-!35 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!36 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!37 = metadata !{metadata !"clang version 3.5 "}
-!38 = metadata !{i32 3, i32 0, metadata !27, null}
-!39 = metadata !{i32 786689, metadata !31, metadata !"this", null, i32 16777216, metadata !40, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!40 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!41 = metadata !{i32 0, i32 0, metadata !31, null}
-!42 = metadata !{i32 2, i32 0, metadata !43, null}
-!43 = metadata !{i32 786443, metadata !5, metadata !31} ; [ DW_TAG_lexical_block ] [/./ab.h]
-!44 = metadata !{i32 786689, metadata !34, metadata !"this", null, i32 16777216, metadata !40, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!45 = metadata !{i32 0, i32 0, metadata !34, null}
-!46 = metadata !{i32 2, i32 0, metadata !34, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !3, !26, !2, !2} ; [ DW_TAG_compile_unit ] [/<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\002\0064\0064\000\000\000", !5, null, null, !6, !"_ZTS1A", null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 64, align 64, offset 0] [def] [from ]
+!5 = !{!"./ab.h", !""}
+!6 = !{!7, !14, !19}
+!7 = !{!"0xd\00_vptr$A\000\0064\000\000\0064", !5, !8, !9} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!8 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/./ab.h]
+!9 = !{!"0xf\00\000\0064\000\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!10 = !{!"0xf\00__vtbl_ptr_type\000\0064\000\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!"0x2e\00setFoo\00setFoo\00_ZN1A6setFooEv\004\000\000\001\006\00259\000\004", !5, !"_ZTS1A", !15, !"_ZTS1A", null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 4] [setFoo]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!18 = !{i32 786468}
+!19 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\005\000\000\001\006\00259\000\005", !5, !"_ZTS1A", !20, !"_ZTS1A", null, null, i32 0, !25} ; [ DW_TAG_subprogram ] [line 5] [getFoo]
+!20 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!22, !17}
+!22 = !{!"0x26\00\000\000\000\000\000", null, null, !23} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo_t]
+!23 = !{!"0x16\00foo_t\001\000\000\000\000", !24, null, !13} ; [ DW_TAG_typedef ] [foo_t] [line 1, size 0, align 0, offset 0] [from int]
+!24 = !{!"a.cpp", !""}
+!25 = !{i32 786468}
+!26 = !{!27, !31, !34}
+!27 = !{!"0x2e\00bar\00bar\00_Z3barv\002\000\001\000\006\00256\000\002", !24, !28, !29, null, i32 ()* @_Z3barv, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!28 = !{!"0x29", !24}        ; [ DW_TAG_file_type ] [/a.cpp]
+!29 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !30, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!30 = !{!23}
+!31 = !{!"0x2e\00A\00A\00_ZN1AC1Ev\002\000\001\000\006\00320\000\002", !5, !"_ZTS1A", !15, null, void (%class.A*)* @_ZN1AC1Ev, null, !32, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [A]
+!32 = !{!"0x2e\00A\00A\00\000\000\000\000\006\00320\000\000", null, !"_ZTS1A", !15, null, null, null, i32 0, !33} ; [ DW_TAG_subprogram ] [line 0] [A]
+!33 = !{i32 786468}
+!34 = !{!"0x2e\00A\00A\00_ZN1AC2Ev\002\000\001\000\006\00320\000\002", !5, !"_ZTS1A", !15, null, void (%class.A*)* @_ZN1AC2Ev, null, !32, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [A]
+!35 = !{i32 2, !"Dwarf Version", i32 2}
+!36 = !{i32 1, !"Debug Info Version", i32 2}
+!37 = !{!"clang version 3.5 "}
+!38 = !MDLocation(line: 3, scope: !27)
+!39 = !{!"0x101\00this\0016777216\001088", !31, null, !40} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!40 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!41 = !MDLocation(line: 0, scope: !31)
+!42 = !MDLocation(line: 2, scope: !43)
+!43 = !{!"0xb\000", !5, !31} ; [ DW_TAG_lexical_block ] [/./ab.h]
+!44 = !{!"0x101\00this\0016777216\001088", !34, null, !40} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!45 = !MDLocation(line: 0, scope: !34)
+!46 = !MDLocation(line: 2, scope: !34)
diff --git a/test/Linker/type-unique-simple2-b.ll b/test/Linker/type-unique-simple2-b.ll
index 9155f69fb916..25e67d424ea8 100644
--- a/test/Linker/type-unique-simple2-b.ll
+++ b/test/Linker/type-unique-simple2-b.ll
@@ -22,20 +22,20 @@ define void @_ZN1A6setFooEv(%class.A* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !32), !dbg !34
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !32, metadata !{!"0x102"}), !dbg !34
   %this1 = load %class.A** %this.addr
   ret void, !dbg !35
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
 define i32 @_ZN1A6getFooEv(%class.A* %this) unnamed_addr #0 align 2 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
-  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !36), !dbg !37
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !37
   %this1 = load %class.A** %this.addr
   ret i32 1, !dbg !38
 }
@@ -47,42 +47,42 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!29, !30}
 !llvm.ident = !{!31}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !25, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/<unknown>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<unknown>", metadata !""}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786434, metadata !5, null, metadata !"A", i32 2, i64 64, i64 64, i32 0, i32 0, null, metadata !6, i32 0, metadata !"_ZTS1A", null, metadata !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 64, align 64, offset 0] [def] [from ]
-!5 = metadata !{metadata !"./ab.h", metadata !""}
-!6 = metadata !{metadata !7, metadata !14, metadata !19}
-!7 = metadata !{i32 786445, metadata !5, metadata !8, metadata !"_vptr$A", i32 0, i64 64, i64 0, i64 0, i32 64, metadata !9} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
-!8 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/./ab.h]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
-!10 = metadata !{i32 786447, null, null, metadata !"__vtbl_ptr_type", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
-!11 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!14 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"setFoo", metadata !"setFoo", metadata !"_ZN1A6setFooEv", i32 4, metadata !15, i1 false, i1 false, i32 1, i32 0, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, metadata !18, i32 4} ; [ DW_TAG_subprogram ] [line 4] [setFoo]
-!15 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!16 = metadata !{null, metadata !17}
-!17 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
-!18 = metadata !{i32 786468}
-!19 = metadata !{i32 786478, metadata !5, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 5, metadata !20, i1 false, i1 false, i32 1, i32 1, metadata !"_ZTS1A", i32 256, i1 false, null, null, i32 0, metadata !24, i32 5} ; [ DW_TAG_subprogram ] [line 5] [getFoo]
-!20 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!21 = metadata !{metadata !22, metadata !17}
-!22 = metadata !{i32 786470, null, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !23} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo_t]
-!23 = metadata !{i32 786454, metadata !5, null, metadata !"foo_t", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ] [foo_t] [line 1, size 0, align 0, offset 0] [from int]
-!24 = metadata !{i32 786468}
-!25 = metadata !{metadata !26, metadata !28}
-!26 = metadata !{i32 786478, metadata !27, metadata !"_ZTS1A", metadata !"setFoo", metadata !"setFoo", metadata !"_ZN1A6setFooEv", i32 2, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (%class.A*)* @_ZN1A6setFooEv, null, metadata !14, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [setFoo]
-!27 = metadata !{metadata !"b.cpp", metadata !""}
-!28 = metadata !{i32 786478, metadata !27, metadata !"_ZTS1A", metadata !"getFoo", metadata !"getFoo", metadata !"_ZN1A6getFooEv", i32 4, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%class.A*)* @_ZN1A6getFooEv, null, metadata !19, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [getFoo]
-!29 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!31 = metadata !{metadata !"clang version 3.5 "}
-!32 = metadata !{i32 786689, metadata !26, metadata !"this", null, i32 16777216, metadata !33, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!33 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
-!34 = metadata !{i32 0, i32 0, metadata !26, null}
-!35 = metadata !{i32 2, i32 0, metadata !26, null}
-!36 = metadata !{i32 786689, metadata !28, metadata !"this", null, i32 16777216, metadata !33, i32 1088, i32 0} ; [ DW_TAG_arg_variable ] [this] [line 0]
-!37 = metadata !{i32 0, i32 0, metadata !28, null}
-!38 = metadata !{i32 4, i32 0, metadata !28, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !3, !25, !2, !2} ; [ DW_TAG_compile_unit ] [/<unknown>] [DW_LANG_C_plus_plus]
+!1 = !{!"<unknown>", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2\00A\002\0064\0064\000\000\000", !5, null, null, !6, !"_ZTS1A", null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 2, size 64, align 64, offset 0] [def] [from ]
+!5 = !{!"./ab.h", !""}
+!6 = !{!7, !14, !19}
+!7 = !{!"0xd\00_vptr$A\000\0064\000\000\0064", !5, !8, !9} ; [ DW_TAG_member ] [_vptr$A] [line 0, size 64, align 0, offset 0] [artificial] [from ]
+!8 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/./ab.h]
+!9 = !{!"0xf\00\000\0064\000\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from __vtbl_ptr_type]
+!10 = !{!"0xf\00__vtbl_ptr_type\000\0064\000\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [__vtbl_ptr_type] [line 0, size 64, align 0, offset 0] [from ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!13}
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!"0x2e\00setFoo\00setFoo\00_ZN1A6setFooEv\004\000\000\001\006\00259\000\004", !5, !"_ZTS1A", !15, !"_ZTS1A", null, null, i32 0, !18} ; [ DW_TAG_subprogram ] [line 4] [setFoo]
+!15 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !16, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = !{null, !17}
+!17 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!18 = !{i32 786468}
+!19 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\005\000\000\001\006\00259\000\005", !5, !"_ZTS1A", !20, !"_ZTS1A", null, null, i32 0, !24} ; [ DW_TAG_subprogram ] [line 5] [getFoo]
+!20 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !21, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!21 = !{!22, !17}
+!22 = !{!"0x26\00\000\000\000\000\000", null, null, !23} ; [ DW_TAG_const_type ] [line 0, size 0, align 0, offset 0] [from foo_t]
+!23 = !{!"0x16\00foo_t\001\000\000\000\000", !5, null, !13} ; [ DW_TAG_typedef ] [foo_t] [line 1, size 0, align 0, offset 0] [from int]
+!24 = !{i32 786468}
+!25 = !{!26, !28}
+!26 = !{!"0x2e\00setFoo\00setFoo\00_ZN1A6setFooEv\002\000\001\000\006\00259\000\002", !27, !"_ZTS1A", !15, null, void (%class.A*)* @_ZN1A6setFooEv, null, !14, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [setFoo]
+!27 = !{!"b.cpp", !""}
+!28 = !{!"0x2e\00getFoo\00getFoo\00_ZN1A6getFooEv\004\000\001\000\006\00259\000\004", !27, !"_ZTS1A", !20, null, i32 (%class.A*)* @_ZN1A6getFooEv, null, !19, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [getFoo]
+!29 = !{i32 2, !"Dwarf Version", i32 2}
+!30 = !{i32 1, !"Debug Info Version", i32 2}
+!31 = !{!"clang version 3.5 "}
+!32 = !{!"0x101\00this\0016777216\001088", !26, null, !33} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!33 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!34 = !MDLocation(line: 0, scope: !26)
+!35 = !MDLocation(line: 2, scope: !26)
+!36 = !{!"0x101\00this\0016777216\001088", !28, null, !33} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!37 = !MDLocation(line: 0, scope: !28)
+!38 = !MDLocation(line: 4, scope: !28)
diff --git a/test/Linker/type-unique-src-type.ll b/test/Linker/type-unique-src-type.ll
new file mode 100644
index 000000000000..01b33a21bd74
--- /dev/null
+++ b/test/Linker/type-unique-src-type.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-link -S %t.bc -o - | FileCheck %s
+; RUN: llvm-link -S %s -o - | FileCheck %s
+
+; Test that we don't try to map %C.0 and C and then try to map %C to a new type.
+; This used to happen when lazy loading since we wouldn't then identify %C
+; as a destination type until it was too late.
+
+; CHECK: %C.0 = type { %B }
+; CHECK-NEXT: %B = type { %A }
+; CHECK-NEXT: %A = type { i8 }
+
+; CHECK: @g1 = external global %C.0
+; CHECK:  getelementptr %C.0* null, i64 0, i32 0, i32 0
+
+%A   = type { i8 }
+%B   = type { %A }
+%C   = type { %B }
+%C.0 = type { %B }
+define void @f1() {
+  getelementptr %C* null, i64 0, i32 0, i32 0
+  ret void
+}
+@g1 = external global %C.0
diff --git a/test/Linker/type-unique-type-array-a.ll b/test/Linker/type-unique-type-array-a.ll
new file mode 100644
index 000000000000..9e2de8821b4d
--- /dev/null
+++ b/test/Linker/type-unique-type-array-a.ll
@@ -0,0 +1,129 @@
+; REQUIRES: object-emission
+;
+; RUN: llvm-link %s %p/type-unique-type-array-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+;
+; rdar://problem/17628609
+;
+; cat -n a.cpp
+;     1	struct SA {
+;     2	  int a;
+;     3	};
+;     4	
+;     5	class A {
+;     6	public:
+;     7	  void testA(SA a) {
+;     8	  }
+;     9	};
+;    10	
+;    11	void topA(A *a, SA sa) {
+;    12	  a->testA(sa);
+;    13	}
+;
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "A"
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A5testAE2SA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[STRUCT:.*]]})
+; CHECK: 0x[[STRUCT]]: DW_TAG_structure_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "SA"
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_TAG_class_type
+; CHECK-NEXT:   DW_AT_name {{.*}} "B"
+; CHECK: DW_TAG_subprogram
+; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1B5testBE2SA"
+; CHECK: DW_TAG_formal_parameter
+; CHECK: DW_TAG_formal_parameter
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[STRUCT]]
+
+%class.A = type { i8 }
+%struct.SA = type { i32 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4topAP1A2SA(%class.A* %a, i32 %sa.coerce) #0 {
+entry:
+  %sa = alloca %struct.SA, align 4
+  %a.addr = alloca %class.A*, align 8
+  %agg.tmp = alloca %struct.SA, align 4
+  %coerce.dive = getelementptr %struct.SA* %sa, i32 0, i32 0
+  store i32 %sa.coerce, i32* %coerce.dive
+  store %class.A* %a, %class.A** %a.addr, align 8
+  call void @llvm.dbg.declare(metadata %class.A** %a.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
+  call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !26, metadata !{!"0x102"}), !dbg !27
+  %0 = load %class.A** %a.addr, align 8, !dbg !28
+  %1 = bitcast %struct.SA* %agg.tmp to i8*, !dbg !28
+  %2 = bitcast %struct.SA* %sa to i8*, !dbg !28
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 4, i32 4, i1 false), !dbg !28
+  %coerce.dive1 = getelementptr %struct.SA* %agg.tmp, i32 0, i32 0, !dbg !28
+  %3 = load i32* %coerce.dive1, !dbg !28
+  call void @_ZN1A5testAE2SA(%class.A* %0, i32 %3), !dbg !28
+  ret void, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define linkonce_odr void @_ZN1A5testAE2SA(%class.A* %this, i32 %a.coerce) #2 align 2 {
+entry:
+  %a = alloca %struct.SA, align 4
+  %this.addr = alloca %class.A*, align 8
+  %coerce.dive = getelementptr %struct.SA* %a, i32 0, i32 0
+  store i32 %a.coerce, i32* %coerce.dive
+  store %class.A* %this, %class.A** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
+  call void @llvm.dbg.declare(metadata %struct.SA* %a, metadata !32, metadata !{!"0x102"}), !dbg !33
+  %this1 = load %class.A** %this.addr
+  ret void, !dbg !34
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)\000\00\000\00\001", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [a.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"a.cpp", !"/Users/manmanren/test-Nov/type_unique/rdar_di_array"}
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !{!"0x2\00A\005\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1A"} ; [ DW_TAG_class_type ] [A] [line 5, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00testA\00testA\00_ZN1A5testAE2SA\007\000\000\000\006\00256\000\007", !1, !"_ZTS1A", !7, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [testA]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !"_ZTS2SA"}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1A]
+!10 = !{!"0x13\00SA\001\0032\0032\000\000\000", !1, null, null, !11, null, null, !"_ZTS2SA"} ; [ DW_TAG_structure_type ] [SA] [line 1, size 32, align 32, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00a\002\0032\0032\000\000", !1, !"_ZTS2SA", !13} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!15, !20}
+!15 = !{!"0x2e\00topA\00topA\00_Z4topAP1A2SA\0011\000\001\000\006\00256\000\0011", !1, !16, !17, null, void (%class.A*, i32)* @_Z4topAP1A2SA, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [topA]
+!16 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [a.cpp]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19, !"_ZTS2SA"}
+!19 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1A"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1A]
+!20 = !{!"0x2e\00testA\00testA\00_ZN1A5testAE2SA\007\000\001\000\006\00256\000\007", !1, !"_ZTS1A", !7, null, void (%class.A*, i32)* @_ZN1A5testAE2SA, null, !6, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [testA]
+!21 = !{i32 2, !"Dwarf Version", i32 2}
+!22 = !{i32 2, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
+!24 = !{!"0x101\00a\0016777227\000", !15, !16, !19} ; [ DW_TAG_arg_variable ] [a] [line 11]
+!25 = !MDLocation(line: 11, column: 14, scope: !15)
+!26 = !{!"0x101\00sa\0033554443\000", !15, !16, !"_ZTS2SA"} ; [ DW_TAG_arg_variable ] [sa] [line 11]
+!27 = !MDLocation(line: 11, column: 20, scope: !15)
+!28 = !MDLocation(line: 12, column: 3, scope: !15)
+!29 = !MDLocation(line: 13, column: 1, scope: !15)
+!30 = !{!"0x101\00this\0016777216\001088", !20, null, !19} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !MDLocation(line: 0, scope: !20)
+!32 = !{!"0x101\00a\0033554439\000", !20, !16, !"_ZTS2SA"} ; [ DW_TAG_arg_variable ] [a] [line 7]
+!33 = !MDLocation(line: 7, column: 17, scope: !20)
+!34 = !MDLocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/type-unique-type-array-b.ll b/test/Linker/type-unique-type-array-b.ll
new file mode 100644
index 000000000000..0fdae2abfe92
--- /dev/null
+++ b/test/Linker/type-unique-type-array-b.ll
@@ -0,0 +1,108 @@
+; RUN: true
+; This file belongs to type-unique-type-array-a.ll.
+;
+; rdar://problem/17628609
+;
+; cat -n b.cpp
+;     1	struct SA {
+;     2	  int a;
+;     3	};
+;     4	
+;     5	class B {
+;     6	public:
+;     7	  void testB(SA sa) {
+;     8	  }
+;     9	};
+;    10	
+;    11	void topB(B* b, SA sa) {
+;    12	  b->testB(sa);
+;    13	}
+
+%class.B = type { i8 }
+%struct.SA = type { i32 }
+
+; Function Attrs: ssp uwtable
+define void @_Z4topBP1B2SA(%class.B* %b, i32 %sa.coerce) #0 {
+entry:
+  %sa = alloca %struct.SA, align 4
+  %b.addr = alloca %class.B*, align 8
+  %agg.tmp = alloca %struct.SA, align 4
+  %coerce.dive = getelementptr %struct.SA* %sa, i32 0, i32 0
+  store i32 %sa.coerce, i32* %coerce.dive
+  store %class.B* %b, %class.B** %b.addr, align 8
+  call void @llvm.dbg.declare(metadata %class.B** %b.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
+  call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !26, metadata !{!"0x102"}), !dbg !27
+  %0 = load %class.B** %b.addr, align 8, !dbg !28
+  %1 = bitcast %struct.SA* %agg.tmp to i8*, !dbg !28
+  %2 = bitcast %struct.SA* %sa to i8*, !dbg !28
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 4, i32 4, i1 false), !dbg !28
+  %coerce.dive1 = getelementptr %struct.SA* %agg.tmp, i32 0, i32 0, !dbg !28
+  %3 = load i32* %coerce.dive1, !dbg !28
+  call void @_ZN1B5testBE2SA(%class.B* %0, i32 %3), !dbg !28
+  ret void, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind ssp uwtable
+define linkonce_odr void @_ZN1B5testBE2SA(%class.B* %this, i32 %sa.coerce) #2 align 2 {
+entry:
+  %sa = alloca %struct.SA, align 4
+  %this.addr = alloca %class.B*, align 8
+  %coerce.dive = getelementptr %struct.SA* %sa, i32 0, i32 0
+  store i32 %sa.coerce, i32* %coerce.dive
+  store %class.B* %this, %class.B** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
+  call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !32, metadata !{!"0x102"}), !dbg !33
+  %this1 = load %class.B** %this.addr
+  ret void, !dbg !34
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #3
+
+attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!21, !22}
+!llvm.ident = !{!23}
+
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)\000\00\000\00\001", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [b.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"b.cpp", !"/Users/manmanren/test-Nov/type_unique/rdar_di_array"}
+!2 = !{}
+!3 = !{!4, !10}
+!4 = !{!"0x2\00B\005\008\008\000\000\000", !1, null, null, !5, null, null, !"_ZTS1B"} ; [ DW_TAG_class_type ] [B] [line 5, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!6}
+!6 = !{!"0x2e\00testB\00testB\00_ZN1B5testBE2SA\007\000\000\000\006\00256\000\007", !1, !"_ZTS1B", !7, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 7] [testB]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null, !9, !"_ZTS2SA"}
+!9 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS1B]
+!10 = !{!"0x13\00SA\001\0032\0032\000\000\000", !1, null, null, !11, null, null, !"_ZTS2SA"} ; [ DW_TAG_structure_type ] [SA] [line 1, size 32, align 32, offset 0] [def] [from ]
+!11 = !{!12}
+!12 = !{!"0xd\00a\002\0032\0032\000\000", !1, !"_ZTS2SA", !13} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!14 = !{!15, !20}
+!15 = !{!"0x2e\00topB\00topB\00_Z4topBP1B2SA\0011\000\001\000\006\00256\000\0011", !1, !16, !17, null, void (%class.B*, i32)* @_Z4topBP1B2SA, null, null, !2} ; [ DW_TAG_subprogram ] [line 11] [def] [topB]
+!16 = !{!"0x29", !1}         ; [ DW_TAG_file_type ] [b.cpp]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{null, !19, !"_ZTS2SA"}
+!19 = !{!"0xf\00\000\0064\0064\000\000", null, null, !"_ZTS1B"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from _ZTS1B]
+!20 = !{!"0x2e\00testB\00testB\00_ZN1B5testBE2SA\007\000\001\000\006\00256\000\007", !1, !"_ZTS1B", !7, null, void (%class.B*, i32)* @_ZN1B5testBE2SA, null, !6, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [testB]
+!21 = !{i32 2, !"Dwarf Version", i32 2}
+!22 = !{i32 2, !"Debug Info Version", i32 2}
+!23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
+!24 = !{!"0x101\00b\0016777227\000", !15, !16, !19} ; [ DW_TAG_arg_variable ] [b] [line 11]
+!25 = !MDLocation(line: 11, column: 14, scope: !15)
+!26 = !{!"0x101\00sa\0033554443\000", !15, !16, !"_ZTS2SA"} ; [ DW_TAG_arg_variable ] [sa] [line 11]
+!27 = !MDLocation(line: 11, column: 20, scope: !15)
+!28 = !MDLocation(line: 12, column: 3, scope: !15)
+!29 = !MDLocation(line: 13, column: 1, scope: !15)
+!30 = !{!"0x101\00this\0016777216\001088", !20, null, !19} ; [ DW_TAG_arg_variable ] [this] [line 0]
+!31 = !MDLocation(line: 0, scope: !20)
+!32 = !{!"0x101\00sa\0033554439\000", !20, !16, !"_ZTS2SA"} ; [ DW_TAG_arg_variable ] [sa] [line 7]
+!33 = !MDLocation(line: 7, column: 17, scope: !20)
+!34 = !MDLocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/type-unique-unrelated.ll b/test/Linker/type-unique-unrelated.ll
new file mode 100644
index 000000000000..26d05bbab6c4
--- /dev/null
+++ b/test/Linker/type-unique-unrelated.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-link -S %s %p/Inputs/type-unique-unrelated2.ll %p/Inputs/type-unique-unrelated3.ll | FileCheck %s
+
+; CHECK: %t = type { i8* }
+
+; CHECK: define %t @f2() {
+; CHECK-NEXT:   %x = call %t @f2()
+; CHECK-NEXT:   ret %t %x
+; CHECK-NEXT: }
+
+; CHECK: define %t @g2() {
+; CHECK-NEXT:   %x = call %t @g()
+; CHECK-NEXT:   ret %t %x
+; CHECK-NEXT: }
+
+; CHECK: define %t @g() {
+; CHECK-NEXT:  %x = call %t @f()
+; CHECK-NEXT:  ret %t %x
+; CHECK-NEXT: }
+
+; The idea of this test is that the %t in this file and the one in
+; type-unique-unrelated2.ll look unrelated until type-unique-unrelated3.ll
+; is merged in.
+
+%t = type { i8* }
+declare %t @f()
+
+define %t @f2() {
+ %x = call %t @f2()
+ ret %t %x
+}
+
diff --git a/test/Linker/unique-fwd-decl-a.ll b/test/Linker/unique-fwd-decl-a.ll
new file mode 100644
index 000000000000..c1a4b1d5a577
--- /dev/null
+++ b/test/Linker/unique-fwd-decl-a.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link %s %S/Inputs/unique-fwd-decl-b.ll -S -o - | FileCheck %s
+
+; Test that the arguments of !a and !b get uniqued.
+; CHECK: !a = !{!0}
+; CHECK: !b = !{!0}
+
+!a = !{!0}
+!0 = !{!1}
+!1 = !{}
diff --git a/test/Linker/unique-fwd-decl-order.ll b/test/Linker/unique-fwd-decl-order.ll
new file mode 100644
index 000000000000..e1d8c2e5cf92
--- /dev/null
+++ b/test/Linker/unique-fwd-decl-order.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-link %s %S/Inputs/unique-fwd-decl-order.ll -S -o - | FileCheck %s
+; RUN: llvm-link %S/Inputs/unique-fwd-decl-order.ll %s -S -o - | FileCheck %s
+
+; This test exercises MDNode hashing.  For the nodes to be correctly uniqued,
+; the hash of a to-be-created MDNode has to match the hash of an
+; operand-just-changed MDNode (with the same operands).
+;
+; Note that these two assembly files number the nodes identically, even though
+; the nodes are in a different order.  This is for the reader's convenience.
+
+; CHECK: !named = !{!0, !0}
+!named = !{!0}
+
+; CHECK: !0 = !{!1}
+!0 = !{!1}
+
+; CHECK: !1 = !{}
+!1 = !{}
+
+; CHECK-NOT: !2
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index 39a0c8bd7e5c..d0f54f2259b4 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -1,7 +1,7 @@
 ; This file is for use with unnamed-addr1-a.ll
 ; RUN: true
 
-@global-c = common unnamed_addr global i32 42
+@global-c = common unnamed_addr global i32 0
 @global-d = unnamed_addr global i32 42
 @global-e = unnamed_addr global i32 42
 @global-f = unnamed_addr global i32 42
@@ -13,7 +13,7 @@ define weak void @func-c() unnamed_addr { ret void }
 define weak void @func-d() unnamed_addr { ret void }
 define weak void @func-e() unnamed_addr { ret void }
 
-@global-g = common global i32 42
+@global-g = common global i32 0
 @global-h = global i32 42
 @global-i = global i32 42
 @global-j = global i32 42
diff --git a/test/Linker/visibility.ll b/test/Linker/visibility.ll
new file mode 100644
index 000000000000..4938d7af56d6
--- /dev/null
+++ b/test/Linker/visibility.ll
@@ -0,0 +1,51 @@
+; RUN: llvm-link %s %p/Inputs/visibility.ll -S | FileCheck %s
+; RUN: llvm-link %p/Inputs/visibility.ll %s -S | FileCheck %s
+
+; The values in this file are strong, the ones in Inputs/visibility.ll are weak,
+; but we should still get the visibility from them.
+
+
+$c1 = comdat any
+
+; Variables
+; CHECK-DAG: @v1 = hidden global i32 0
+@v1 = global i32 0
+
+; CHECK-DAG: @v2 = protected  global i32 0
+@v2 = global i32 0
+
+; CHECK-DAG: @v3 = hidden global i32 0
+@v3 = protected global i32 0
+
+; CHECK-DAG: @v4 = hidden global i32 1, comdat($c1)
+@v4 = global i32 1, comdat($c1)
+
+; Aliases
+; CHECK: @a1 = hidden alias i32* @v1
+@a1 = alias i32* @v1
+
+; CHECK: @a2 = protected alias i32* @v2
+@a2 = alias i32* @v2
+
+; CHECK: @a3 = hidden alias i32* @v3
+@a3 = protected alias i32* @v3
+
+
+; Functions
+; CHECK: define hidden void @f1()
+define void @f1()  {
+entry:
+  ret void
+}
+
+; CHECK: define protected void @f2()
+define void @f2()  {
+entry:
+  ret void
+}
+
+; CHECK: define hidden void @f3()
+define protected void @f3()  {
+entry:
+  ret void
+}
diff --git a/test/Linker/visibility1.ll b/test/Linker/visibility1.ll
deleted file mode 100644
index 131f6d59b5e6..000000000000
--- a/test/Linker/visibility1.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: llvm-link %s %p/visibility2.ll -S | FileCheck %s
-; RUN: llvm-link %p/visibility2.ll %s -S | FileCheck %s
-
-; The values in this file are strong, the ones in visibility2.ll are weak,
-; but we should still get the visibility from them.
-
-; Variables
-; CHECK: @v1 = hidden global i32 0
-@v1 = global i32 0
-
-; CHECK: @v2 = protected  global i32 0
-@v2 = global i32 0
-
-; CHECK: @v3 = hidden global i32 0
-@v3 = protected global i32 0
-
-
-; Aliases
-; CHECK: @a1 = hidden alias i32* @v1
-@a1 = alias i32* @v1
-
-; CHECK: @a2 = protected alias i32* @v2
-@a2 = alias i32* @v2
-
-; CHECK: @a3 = hidden alias i32* @v3
-@a3 = protected alias i32* @v3
-
-
-; Functions
-; CHECK: define hidden void @f1()
-define void @f1()  {
-entry:
-  ret void
-}
-
-; CHECK: define protected void @f2()
-define void @f2()  {
-entry:
-  ret void
-}
-
-; CHECK: define hidden void @f3()
-define protected void @f3()  {
-entry:
-  ret void
-}
diff --git a/test/Linker/visibility2.ll b/test/Linker/visibility2.ll
deleted file mode 100644
index e6363ca2f386..000000000000
--- a/test/Linker/visibility2.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; This file is used by visibility1.ll, so it doesn't actually do anything itself
-;
-; RUN: true
-
-; Variables
-@v1 = weak hidden global i32 0
-@v2 = weak protected global i32 0
-@v3 = weak hidden global i32 0
-
-; Aliases
-@a1 = hidden alias weak i32* @v1
-@a2 = protected alias weak i32* @v2
-@a3 = hidden alias weak i32* @v3
-
-; Functions
-define weak hidden void @f1() {
-entry:
-  ret void
-}
-define weak protected void @f2() {
-entry:
-  ret void
-}
-define weak hidden void @f3() {
-entry:
-  ret void
-}
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index b9f2584c7eef..8d479a0d39b3 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s > %t.bc
-; RUN: llvm-as < %p/testlink1.ll > %t2.bc
+; RUN: llvm-as < %p/testlink.ll > %t2.bc
 ; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
 ; RUN: llvm-dis < %t1.bc | FileCheck %s
 ; CHECK: kallsyms_names = extern_weak
diff --git a/test/MC/AArch64/adrp-relocation.s b/test/MC/AArch64/adrp-relocation.s
index 3bcef34e4f5d..3bc6039d5f1b 100644
--- a/test/MC/AArch64/adrp-relocation.s
+++ b/test/MC/AArch64/adrp-relocation.s
@@ -15,4 +15,4 @@ sym:
 // CHECK: R_AARCH64_ADR_PREL_PG_HI21 sym
 // CHECK: R_AARCH64_ADR_GOT_PAGE sym
 // CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 sym
-// CHECK: R_AARCH64_TLSDESC_ADR_PAGE sym
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE21 sym
diff --git a/test/MC/AArch64/arm64-be-datalayout.s b/test/MC/AArch64/arm64-be-datalayout.s
index f448a4b86e15..a5b48f1c594e 100644
--- a/test/MC/AArch64/arm64-be-datalayout.s
+++ b/test/MC/AArch64/arm64-be-datalayout.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -filetype=obj -triple arm64_be %s | llvm-readobj -section-data -sections | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple aarch64_be %s | llvm-readobj -section-data -sections | FileCheck %s
 
 // CHECK: 0000: 00123456 789ABCDE
 foo:    .xword 0x123456789abcde
diff --git a/test/MC/AArch64/arm64-elf-relocs.s b/test/MC/AArch64/arm64-elf-relocs.s
index eb22cc2f2365..612819cc5539 100644
--- a/test/MC/AArch64/arm64-elf-relocs.s
+++ b/test/MC/AArch64/arm64-elf-relocs.s
@@ -77,7 +77,7 @@
 
    adrp x2, :tlsdesc:sym
 // CHECK: adrp x2, :tlsdesc:sym
-// CHECK-OBJ: 58 R_AARCH64_TLSDESC_ADR_PAGE sym
+// CHECK-OBJ: 58 R_AARCH64_TLSDESC_ADR_PAGE21 sym
 
    // LLVM is not competent enough to do this relocation because the
    // page boundary could occur anywhere after linking. A relocation
diff --git a/test/MC/AArch64/arm64-system-encoding.s b/test/MC/AArch64/arm64-system-encoding.s
index 87f8f8a4e98c..eb29117428fd 100644
--- a/test/MC/AArch64/arm64-system-encoding.s
+++ b/test/MC/AArch64/arm64-system-encoding.s
@@ -135,6 +135,8 @@ foo:
   msr VTTBR_EL2, x3
   msr SPSel, x3
   msr S3_2_C11_C6_4, x1
+  msr  S0_0_C0_C0_0, x0
+  msr  S1_2_C3_C4_5, x2
 ; CHECK: msr ACTLR_EL1, x3              ; encoding: [0x23,0x10,0x18,0xd5]
 ; CHECK: msr ACTLR_EL2, x3              ; encoding: [0x23,0x10,0x1c,0xd5]
 ; CHECK: msr ACTLR_EL3, x3              ; encoding: [0x23,0x10,0x1e,0xd5]
@@ -213,6 +215,8 @@ foo:
 ; CHECK: msr VTTBR_EL2, x3              ; encoding: [0x03,0x21,0x1c,0xd5]
 ; CHECK: msr  SPSEL, x3                 ; encoding: [0x03,0x42,0x18,0xd5]
 ; CHECK: msr  S3_2_C11_C6_4, x1         ; encoding: [0x81,0xb6,0x1a,0xd5]
+; CHECK: msr  S0_0_C0_C0_0, x0          ; encoding: [0x00,0x00,0x00,0xd5]
+; CHECK: msr  S1_2_C3_C4_5, x2          ; encoding: [0xa2,0x34,0x0a,0xd5]
 
   mrs x3, ACTLR_EL1
   mrs x3, ACTLR_EL2
diff --git a/test/MC/AArch64/arm64-tls-relocs.s b/test/MC/AArch64/arm64-tls-relocs.s
index 96c2b55c36d8..be7e24a6a3ff 100644
--- a/test/MC/AArch64/arm64-tls-relocs.s
+++ b/test/MC/AArch64/arm64-tls-relocs.s
@@ -304,7 +304,7 @@
 // CHECK: blr     x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
 
-// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
+// CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADR_PAGE21 [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     {{0x[0-9A-F]+}} R_AARCH64_TLSDESC_CALL [[VARSYM]]
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 5293131711b6..07e6d012e934 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -3679,26 +3679,26 @@
 // CHECK-ERROR-NEXT:                 ^
 
 // Now check some invalid generic names
-        mrs xzr, s2_5_c11_c13_2
         mrs x12, s3_8_c11_c13_2
-        mrs x13, s3_3_c12_c13_2
         mrs x19, s3_2_c15_c16_2
         mrs x30, s3_2_c15_c1_8
-// CHECK-ERROR-NEXT: error: expected readable system register
-// CHECK-ERROR-NEXT:         mrs xzr, s2_5_c11_c13_2
-// CHECK-ERROR-NEXT:                  ^
+        mrs x4, s4_7_c15_c15_7
+        mrs x14, s3_7_c16_c15_7
 // CHECK-ERROR-NEXT: error: expected readable system register
 // CHECK-ERROR-NEXT:         mrs x12, s3_8_c11_c13_2
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: expected readable system register
-// CHECK-ERROR-NEXT:         mrs x13, s3_3_c12_c13_2
-// CHECK-ERROR-NEXT:                  ^
-// CHECK-ERROR-NEXT: error: expected readable system register
 // CHECK-ERROR-NEXT:         mrs x19, s3_2_c15_c16_2
 // CHECK-ERROR-NEXT:                  ^
 // CHECK-ERROR-NEXT: error: expected readable system register
 // CHECK-ERROR-NEXT:         mrs x30, s3_2_c15_c1_8
 // CHECK-ERROR-NEXT:                  ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x4, s4_7_c15_c15_7
+// CHECK-ERROR-NEXT:                 ^
+// CHECK-ERROR-NEXT: error: expected readable system register
+// CHECK-ERROR-NEXT:         mrs x14, s3_7_c16_c15_7
+// CHECK-ERROR-NEXT:                  ^
 
 //------------------------------------------------------------------------------
 // Test and branch (immediate)
diff --git a/test/MC/AArch64/basic-a64-instructions.s b/test/MC/AArch64/basic-a64-instructions.s
index 140ea336984c..dd8dfd4e36d1 100644
--- a/test/MC/AArch64/basic-a64-instructions.s
+++ b/test/MC/AArch64/basic-a64-instructions.s
@@ -370,23 +370,29 @@ _func:
         add w11, w13, w15, lsl #0
         add w9, w3, wzr, lsl #10
         add w17, w29, w20, lsl #31
+        add w17, w29, w20, lsl #(31-2)
 // CHECK: add      w11, w13, w15              // encoding: [0xab,0x01,0x0f,0x0b]
 // CHECK: add      w9, w3, wzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x0b]
 // CHECK: add      w17, w29, w20, lsl #31     // encoding: [0xb1,0x7f,0x14,0x0b]
+// CHECK: add      w17, w29, w20, lsl #29     // encoding: [0xb1,0x77,0x14,0x0b]
 
         add w21, w22, w23, lsr #0
         add w24, w25, w26, lsr #18
         add w27, w28, w29, lsr #31
+        add w27, w28, w29, lsr #(31-2)
 // CHECK: add      w21, w22, w23, lsr #0      // encoding: [0xd5,0x02,0x57,0x0b]
 // CHECK: add      w24, w25, w26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x0b]
 // CHECK: add      w27, w28, w29, lsr #31     // encoding: [0x9b,0x7f,0x5d,0x0b]
+// CHECK: add      w27, w28, w29, lsr #29     // encoding: [0x9b,0x77,0x5d,0x0b]
 
         add w2, w3, w4, asr #0
         add w5, w6, w7, asr #21
         add w8, w9, w10, asr #31
+        add w8, w9, w10, asr #(31-2)
 // CHECK: add      w2, w3, w4, asr #0         // encoding: [0x62,0x00,0x84,0x0b]
 // CHECK: add      w5, w6, w7, asr #21        // encoding: [0xc5,0x54,0x87,0x0b]
 // CHECK: add      w8, w9, w10, asr #31       // encoding: [0x28,0x7d,0x8a,0x0b]
+// CHECK: add      w8, w9, w10, asr #29       // encoding: [0x28,0x75,0x8a,0x0b]
 
         add x3, x5, x7
         add xzr, x3, x5
@@ -400,23 +406,29 @@ _func:
         add x11, x13, x15, lsl #0
         add x9, x3, xzr, lsl #10
         add x17, x29, x20, lsl #63
+        add x17, x29, x20, lsl #(63-5)
 // CHECK: add      x11, x13, x15              // encoding: [0xab,0x01,0x0f,0x8b]
 // CHECK: add      x9, x3, xzr, lsl #10       // encoding: [0x69,0x28,0x1f,0x8b]
 // CHECK: add      x17, x29, x20, lsl #63     // encoding: [0xb1,0xff,0x14,0x8b]
+// CHECK: add	   x17, x29, x20, lsl #58     // encoding: [0xb1,0xeb,0x14,0x8b]
 
         add x21, x22, x23, lsr #0
         add x24, x25, x26, lsr #18
         add x27, x28, x29, lsr #63
+        add x17, x29, x20, lsr #(63-5)
 // CHECK: add      x21, x22, x23, lsr #0      // encoding: [0xd5,0x02,0x57,0x8b]
 // CHECK: add      x24, x25, x26, lsr #18     // encoding: [0x38,0x4b,0x5a,0x8b]
 // CHECK: add      x27, x28, x29, lsr #63     // encoding: [0x9b,0xff,0x5d,0x8b]
+// CHECK: add	   x17, x29, x20, lsr #58     // encoding: [0xb1,0xeb,0x54,0x8b]
 
         add x2, x3, x4, asr #0
         add x5, x6, x7, asr #21
         add x8, x9, x10, asr #63
+        add x17, x29, x20, asr #(63-5)
 // CHECK: add      x2, x3, x4, asr #0         // encoding: [0x62,0x00,0x84,0x8b]
 // CHECK: add      x5, x6, x7, asr #21        // encoding: [0xc5,0x54,0x87,0x8b]
 // CHECK: add      x8, x9, x10, asr #63       // encoding: [0x28,0xfd,0x8a,0x8b]
+// CHECK: add	   x17, x29, x20, asr #58     // encoding: [0xb1,0xeb,0x94,0x8b]
 
         adds w3, w5, w7
         adds wzr, w3, w5
@@ -4786,12 +4798,16 @@ _func:
 
         mrs x12, s3_7_c15_c1_5
         mrs x13, s3_2_c11_c15_7
+        mrs x14, s1_3_c9_c2_1
         msr s3_0_c15_c0_0, x12
         msr s3_7_c11_c13_7, x5
+        msr s1_3_c9_c2_1, x4
 // CHECK: mrs     x12, {{s3_7_c15_c1_5|S3_7_C15_C1_5}}      // encoding: [0xac,0xf1,0x3f,0xd5]
-// CHECK: mrs     x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}     // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: mrs     x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}    // encoding: [0xed,0xbf,0x3a,0xd5]
+// CHECK: mrs     x14, {{s1_3_c9_c2_1|S1_3_C9_C2_1}}        // encoding: [0x2e,0x92,0x2b,0xd5]
 // CHECK: msr     {{s3_0_c15_c0_0|S3_0_C15_C0_0}}, x12      // encoding: [0x0c,0xf0,0x18,0xd5]
-// CHECK: msr     {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5      // encoding: [0xe5,0xbd,0x1f,0xd5]
+// CHECK: msr     {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5     // encoding: [0xe5,0xbd,0x1f,0xd5]
+// CHECK: msr     {{s1_3_c9_c2_1|S1_3_C9_C2_1}}, x4         // encoding: [0x24,0x92,0x0b,0xd5]
 
 //------------------------------------------------------------------------------
 // Unconditional branch (immediate)
diff --git a/test/MC/AArch64/elf_osabi_flags.s b/test/MC/AArch64/elf_osabi_flags.s
new file mode 100644
index 000000000000..68cb385fc991
--- /dev/null
+++ b/test/MC/AArch64/elf_osabi_flags.s
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -filetype=obj -triple aarch64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-OSABI %s
+# AARCH64-OSABI: OS/ABI: SystemV (0x0)
+
+# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-LINUX-OSABI %s
+# AARCH64-LINUX-OSABI: OS/ABI: GNU/Linux (0x3)
diff --git a/test/MC/AArch64/inline-asm-modifiers.s b/test/MC/AArch64/inline-asm-modifiers.s
index cf34a952e90c..c3ba1cf62876 100644
--- a/test/MC/AArch64/inline-asm-modifiers.s
+++ b/test/MC/AArch64/inline-asm-modifiers.s
@@ -73,7 +73,7 @@ test_inline_modifier_A:                 // @test_inline_modifier_A
 	.size	test_inline_modifier_A, .Ltmp2-test_inline_modifier_A
 // CHECK: R_AARCH64_ADR_PREL_PG_HI21 var_simple
 // CHECK: R_AARCH64_ADR_GOT_PAGE var_got
-// CHECK: R_AARCH64_TLSDESC_ADR_PAGE var_tlsgd
+// CHECK: R_AARCH64_TLSDESC_ADR_PAGE21 var_tlsgd
 // CHECK: R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21 var_tlsie
 
 	.globl	test_inline_modifier_wx
diff --git a/test/MC/AArch64/inst-directive-diagnostic.s b/test/MC/AArch64/inst-directive-diagnostic.s
new file mode 100644
index 000000000000..8abad5ecbc99
--- /dev/null
+++ b/test/MC/AArch64/inst-directive-diagnostic.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc %s -triple=aarch64-none-linux-gnu -filetype asm -o - 2>&1 \
+// RUN:   | FileCheck -check-prefix CHECK-ERROR %s
+
+	.align 2
+	.global diagnostics
+	.type diagnostics,%function
+diagnostics:
+.Label:
+    .inst
+// CHECK-ERROR: expected expression following directive
+
+    .inst 0x5e104020,
+// CHECK-ERROR: expected expression
+
+    .inst .Label
+// CHECK-ERROR: expected constant expression
+
+    .inst 0x5e104020 0x5e104020
+// CHECK-ERROR: unexpected token in directive
diff --git a/test/MC/AArch64/inst-directive.s b/test/MC/AArch64/inst-directive.s
new file mode 100644
index 000000000000..6a4b64eeaaf0
--- /dev/null
+++ b/test/MC/AArch64/inst-directive.s
@@ -0,0 +1,24 @@
+// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=asm -o - \
+// RUN:   | FileCheck %s --check-prefix=CHECK-ASM
+// RUN: llvm-mc %s -triple=aarch64-none-linux-gnu -filetype=obj -o - \
+// RUN:   | llvm-readobj -s -sd | FileCheck %s  --check-prefix=CHECK-OBJ
+
+    .section    .inst.aarch64_inst
+
+    .align  2
+    .global aarch64_inst
+    .type   aarch64_inst,%function
+aarch64_inst:
+    .inst 0x5e104020
+
+// CHECK-ASM:        .align  2
+// CHECK-ASM:        .globl  aarch64_inst
+// CHECK-ASM:        .type   aarch64_inst,@function
+// CHECK-ASM: aarch64_inst:
+// CHECK-ASM:        .inst   0x5E104020
+
+// CHECK-OBJ: Section {
+// CHECK-OBJ:   Name: .inst.aarch64_inst
+// CHECK-OBJ:   SectionData (
+// CHECK-OBJ-NEXT: 0000: 2040105E
+// CHECK-OBJ-NEXT: )
diff --git a/test/MC/AArch64/single-slash.s b/test/MC/AArch64/single-slash.s
new file mode 100644
index 000000000000..c4c266c6b06a
--- /dev/null
+++ b/test/MC/AArch64/single-slash.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu < %s | FileCheck %s
+
+// Test that a single slash is not mistaken as the start of comment.
+
+//CHECK: movz    x0, #0x10
+    movz x0, #(32 / 2)
diff --git a/test/MC/AArch64/tls-relocs.s b/test/MC/AArch64/tls-relocs.s
index ebf02167a8f3..9e94a52e5afe 100644
--- a/test/MC/AArch64/tls-relocs.s
+++ b/test/MC/AArch64/tls-relocs.s
@@ -391,7 +391,7 @@
 // CHECK:                                 //   fixup A - offset: 0, value: var, kind: fixup_aarch64_tlsdesc_call
 // CHECK: blr    x3                      // encoding: [0x60,0x00,0x3f,0xd6]
 
-// CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE [[VARSYM]]
+// CHECK-ELF-NEXT:     0x104 R_AARCH64_TLSDESC_ADR_PAGE21 [[VARSYM]]
 // CHECK-ELF-NEXT:     0x108 R_AARCH64_TLSDESC_LD64_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x10C R_AARCH64_TLSDESC_ADD_LO12_NC [[VARSYM]]
 // CHECK-ELF-NEXT:     0x110 R_AARCH64_TLSDESC_CALL [[VARSYM]]
diff --git a/test/MC/ARM/arm-elf-relocation-diagnostics.s b/test/MC/ARM/arm-elf-relocation-diagnostics.s
new file mode 100644
index 000000000000..5fe903f71619
--- /dev/null
+++ b/test/MC/ARM/arm-elf-relocation-diagnostics.s
@@ -0,0 +1,27 @@
+@ RUN: not llvm-mc -triple armv7-eabi -filetype obj -o - %s 2>&1 \
+@ RUN:     | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv7-eabi -filetype obj -o - %s 2>&1 \
+@ RUN:     | FileCheck %s
+
+	.byte target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .byte target(sbrel)
+@ CHECK:       ^
+
+@ TODO: enable these negative test cases
+@ 	.hword target(sbrel)
+@ @ CHECK-SBREL-HWORD: error: relocated expression must be 32-bit
+@ @ CHECK-SBREL-HWORD: .hword target(sbrel)
+@ @ CHECK-SBREL-HWORD:        ^
+@
+@ 	.short target(sbrel)
+@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
+@ @ CHECK-SBREL-SHORT: .short target(sbrel)
+@ @ CHECK-SBREL-SHORT:        ^
+@
+@ 	.quad target(sbrel)
+@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
+@ @ CHECK-SBREL-SHORT: .quad target(sbrel)
+@ @ CHECK-SBREL-SHORT:        ^
+
+
diff --git a/test/MC/ARM/arm-elf-relocations.s b/test/MC/ARM/arm-elf-relocations.s
new file mode 100644
index 000000000000..4059591d95b1
--- /dev/null
+++ b/test/MC/ARM/arm-elf-relocations.s
@@ -0,0 +1,37 @@
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s | llvm-readobj -r - \
+@ RUN:     | FileCheck %s
+@ RUN: llvm-mc -triple thumbv7-eabi -filetype obj -o - %s | llvm-readobj -r - \
+@ RUN:     | FileCheck %s
+
+	.syntax unified
+
+	.section .text.r_arm_abs8
+
+	.byte abs8_0 -128
+	.byte abs8_1 +255
+
+@ CHECK: Section {{.*}} .rel.text.r_arm_abs8 {
+@ CHECK:   0x0 R_ARM_ABS8 abs8_0 0x0
+@ CHECK:   0x1 R_ARM_ABS8 abs8_1 0x0
+@ CHECK: }
+
+	.section .text.r_arm_abs16
+
+	.short abs16_0 -32768
+	.short abs16_1 +65535
+
+@ CHECK: Section {{.*}} .rel.text.r_arm_abs16 {
+@ CHECK:   0x0 R_ARM_ABS16 abs16_0 0x0
+@ CHECK:   0x2 R_ARM_ABS16 abs16_1 0x0
+@ CHECK: }
+
+	.section .text.r_arm_sbrel32
+
+	.word target(sbrel)
+	.word target(SBREL)
+
+@ CHECK: Section {{.*}} .rel.text.r_arm_sbrel32 {
+@ CHECK:   0x0 R_ARM_SBREL32 target 0x0
+@ CHECK:   0x4 R_ARM_SBREL32 target 0x0
+@ CHECK: }
+
diff --git a/test/MC/ARM/arm-load-store-multiple-deprecated.s b/test/MC/ARM/arm-load-store-multiple-deprecated.s
new file mode 100644
index 000000000000..9354822bc417
--- /dev/null
+++ b/test/MC/ARM/arm-load-store-multiple-deprecated.s
@@ -0,0 +1,222 @@
+@ RUN: llvm-mc -triple armv6t2-linux-eabi -filetype asm -o - %s 2>&1 \
+@ RUN:   | FileCheck %s
+
+@ RUN: not llvm-mc -triple armv7-linux-eabi -filetype asm -o - %s 2>&1 \
+@ RUN:   | FileCheck %s -check-prefix CHECK -check-prefix CHECK-V7
+
+	.syntax unified
+	.arm
+
+	.global stm
+	.type stm,%function
+stm:
+	stm sp!, {r0, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm sp!, {r0, pc}
+@ CHECK: ^
+	stm r0!, {r0, sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm r0!, {r0, sp}
+@ CHECK: ^
+	stm r1!, {r0, sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm r1!, {r0, sp, pc}
+@ CHECK: ^
+	stm r2!, {sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm r2!, {sp, pc}
+@ CHECK: ^
+	stm sp!, {pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm sp!, {pc}
+@ CHECK: ^
+	stm r0!, {sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stm r0!, {sp}
+@ CHECK: ^
+
+	.global stmda
+	.type stmda,%function
+stmda:
+	stmda sp!, {r0, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda sp!, {r0, pc}
+@ CHECK: ^
+	stmda r0!, {r0, sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda r0!, {r0, sp}
+@ CHECK: ^
+	stmda r1!, {r0, sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda r1!, {r0, sp, pc}
+@ CHECK: ^
+	stmda r2!, {sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda r2!, {sp, pc}
+@ CHECK: ^
+	stmda sp!, {pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda sp!, {pc}
+@ CHECK: ^
+	stmda r0!, {sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmda r0!, {sp}
+@ CHECK: ^
+
+	.global stmdb
+	.type stmdb,%function
+stmdb:
+	stmdb sp!, {r0, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb sp!, {r0, pc}
+@ CHECK: ^
+	stmdb r0!, {r0, sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb r0!, {r0, sp}
+@ CHECK: ^
+	stmdb r1!, {r0, sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb r1!, {r0, sp, pc}
+@ CHECK: ^
+	stmdb r2!, {sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb r2!, {sp, pc}
+@ CHECK: ^
+	stmdb sp!, {pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb sp!, {pc}
+@ CHECK: ^
+	stmdb r0!, {sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmdb r0!, {sp}
+@ CHECK: ^
+
+	.global stmib
+	.type stmib,%function
+stmib:
+	stmib sp!, {r0, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib sp!, {r0, pc}
+@ CHECK: ^
+	stmib r0!, {r0, sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib r0!, {r0, sp}
+@ CHECK: ^
+	stmib r1!, {r0, sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib r1!, {r0, sp, pc}
+@ CHECK: ^
+	stmib r2!, {sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib r2!, {sp, pc}
+@ CHECK: ^
+	stmib sp!, {pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib sp!, {pc}
+@ CHECK: ^
+	stmib r0!, {sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: stmib r0!, {sp}
+@ CHECK: ^
+
+
+	.global push
+	.type push,%function
+push:
+	push {r0, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {r0, pc}
+@ CHECK: ^
+	push {r0, sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {r0, sp}
+@ CHECK: ^
+	push {r0, sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {r0, sp, pc}
+@ CHECK: ^
+	push {sp, pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {sp, pc}
+@ CHECK: ^
+	push {pc}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {pc}
+@ CHECK: ^
+	push {sp}
+@ CHECK: warning: use of SP or PC in the list is deprecated
+@ CHECK: push {sp}
+@ CHECK: ^
+
+	.global ldm
+	.type ldm,%function
+ldm:
+	ldm r0!, {r1, sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldm r0!, {sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldm r0!, {r1, lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+	ldm r0!, {lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+
+	.global ldmda
+	.type ldmda,%function
+ldmda:
+	ldmda r0!, {r1, sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmda r0!, {sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmda r0!, {r1, lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+	ldmda r0!, {lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+
+	.global ldmdb
+	.type ldmdb,%function
+ldmdb:
+	ldmdb r0!, {r1, sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmdb r0!, {sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmdb r0!, {r1, lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+	ldmdb r0!, {lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+
+	.global ldmib
+	.type ldmib,%function
+ldmib:
+	ldmib r0!, {r1, sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmib r0!, {sp}
+@ CHECK: warning: use of SP in the list is deprecated
+	ldmib r0!, {r1, lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+	ldmib r0!, {lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+
+	.global pop
+	.type pop,%function
+pop:
+	pop {r0, sp}
+@ CHECK: warning: use of SP in the list is deprecated
+@ CHECK-V7: error: writeback register not allowed in register list
+	pop {sp}
+@ CHECK: warning: use of SP in the list is deprecated
+@ CHECK-V7: error: writeback register not allowed in register list
+	pop {r0, lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+	pop {lr, pc}
+@ CHECK: warning: use of LR and PC simultaneously in the list is deprecated
+
+	.global valid
+	.type valid,%function
+valid:
+	stmdaeq r0, {r0}
+@ CHECK: stmdaeq r0, {r0}
+	ldmdaeq r0, {r0}
+@ CHECK: ldmdaeq r0, {r0}
+	pop {r0, pc}
+@ CHECK: pop {r0, pc}
+
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index a4b6bda880c5..a4c100ee68f9 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -1,6 +1,6 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s \
 @ RUN:  | FileCheck %s -check-prefix=ALL
-@ RUN: llvm-mc -mcpu=cortex-a9-mp -triple armv7-unknown-nacl -show-encoding %s \
+@ RUN: llvm-mc -mcpu=cortex-a9 -triple armv7-unknown-nacl -show-encoding %s \
 @ RUN:  | FileCheck %s -check-prefix=NACL
 @ RUN: llvm-mc -mcpu=cortex-a8 -mattr=+nacl-trap -triple armv7 -show-encoding %s \
 @ RUN:  | FileCheck %s -check-prefix=NACL
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index e5e96170a775..616674e56350 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -16,6 +16,15 @@ _func:
 @ ADC (immediate)
 @------------------------------------------------------------------------------
         adc r1, r2, #0xf
+        adc r1, r2, $0xf
+        adc r1, r2, 0xf
+        adc r7, r8, #(0xff << 16)
+        adc r7, r8, #-2147483638
+        adc r7, r8, #42, #2
+        adc r7, r8, #40, #2
+        adc r7, r8, $40, $2
+        adc r7, r8, 40, 2
+        adc r7, r8, (2 * 20), (1 << 1)
         adc r1, r2, #0xf0
         adc r1, r2, #0xf00
         adc r1, r2, #0xf000
@@ -25,20 +34,30 @@ _func:
         adc r1, r2, #0xf0000000
         adc r1, r2, #0xf000000f
         adcs r1, r2, #0xf00
+        adcs r7, r8, #40, #2
         adcseq r1, r2, #0xf00
         adceq r1, r2, #0xf00
 
 @ CHECK: adc	r1, r2, #15             @ encoding: [0x0f,0x10,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #15             @ encoding: [0x0f,0x10,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #15             @ encoding: [0x0f,0x10,0xa2,0xe2]
+@ CHECK: adc	r7, r8, #16711680       @ encoding: [0xff,0x78,0xa8,0xe2]
+@ CHECK: adc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xa8,0xe2]
+@ CHECK: adc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xa8,0xe2]
+@ CHECK: adc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xa8,0xe2]
+@ CHECK: adc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xa8,0xe2]
+@ CHECK: adc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xa8,0xe2]
+@ CHECK: adc	r7, r8, #40, #2         @ encoding: [0x28,0x71,0xa8,0xe2]
 @ CHECK: adc	r1, r2, #240            @ encoding: [0xf0,0x10,0xa2,0xe2]
 @ CHECK: adc	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xa2,0xe2]
 @ CHECK: adc	r1, r2, #61440          @ encoding: [0x0f,0x1a,0xa2,0xe2]
 @ CHECK: adc	r1, r2, #983040         @ encoding: [0x0f,0x18,0xa2,0xe2]
 @ CHECK: adc	r1, r2, #15728640       @ encoding: [0x0f,0x16,0xa2,0xe2]
 @ CHECK: adc	r1, r2, #251658240      @ encoding: [0x0f,0x14,0xa2,0xe2]
-@ CHECK: adc	r1, r2, #4026531840     @ encoding: [0x0f,0x12,0xa2,0xe2]
-@ CHECK: adc	r1, r2, #4026531855     @ encoding: [0xff,0x12,0xa2,0xe2]
-
+@ CHECK: adc	r1, r2, #-268435456     @ encoding: [0x0f,0x12,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #-268435441     @ encoding: [0xff,0x12,0xa2,0xe2]
 @ CHECK: adcs	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xb2,0xe2]
+@ CHECK: adcs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0xb8,0xe2]
 @ CHECK: adcseq	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xb2,0x02]
 @ CHECK: adceq	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xa2,0x02]
 
@@ -162,6 +181,16 @@ Lforward:
 @ ADD
 @------------------------------------------------------------------------------
         add r4, r5, #0xf000
+        add r4, r5, $0xf000
+        add r4, r5, 0xf000
+        add r4, r5, -0xf000
+	add r7, r8, #(0xff << 16)
+        add r7, r8, #-2147483638
+        add r7, r8, #42, #2
+        add r7, r8, #40, #2
+        add r7, r8, $40, $2
+        add r7, r8, 40, 2
+        add r7, r8, (2 * 20), (1 << 1)
         add r4, r5, r6
         add r4, r5, r6, lsl #5
         add r4, r5, r6, lsr #5
@@ -177,6 +206,16 @@ Lforward:
 
         @ destination register is optional
         add r5, #0xf000
+        add r5, $0xf000
+        add r5, 0xf000
+        add r5, -0xf000
+	add r7, #(0xff << 16)
+        add r7, #-2147483638
+        add r7, #42, #2
+        add r7, #40, #2
+        add r7, $40, $2
+        add r7, 40, 2
+        add r7, (2 * 20), (1 << 1)
         add r4, r5
         add r4, r5, lsl #5
         add r4, r5, lsr #5
@@ -189,11 +228,25 @@ Lforward:
         add r6, r7, ror r9
         add r4, r5, rrx
 
-	add r0, #-4
-	add r4, r5, #-21
+        add r0, #-4
+        add r4, r5, #-21
         add r0, pc, #0xc0000000
+        addseq r0,pc,#0xc0000000
+
+
+        add r0, pc, #(Lback - .)
 
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
+@ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
+@ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
+@ CHECK: sub	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x45,0xe2]
+@ CHECK: add	r7, r8, #16711680       @ encoding: [0xff,0x78,0x88,0xe2]
+@ CHECK: add    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x88,0xe2]
+@ CHECK: add    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x88,0xe2]
+@ CHECK: add    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe2]
+@ CHECK: add    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe2]
+@ CHECK: add    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe2]
+@ CHECK: add    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x85,0xe0]
@@ -208,6 +261,16 @@ Lforward:
 @ CHECK: add	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe0]
 
 @ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
+@ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
+@ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
+@ CHECK: sub	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x45,0xe2]
+@ CHECK: add	r7, r7, #16711680       @ encoding: [0xff,0x78,0x87,0xe2]
+@ CHECK: add	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x87,0xe2]
+@ CHECK: add	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x87,0xe2]
+@ CHECK: add	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe2]
+@ CHECK: add	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe2]
+@ CHECK: add	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe2]
+@ CHECK: add	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe2]
 @ CHECK: add	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe0]
 @ CHECK: add	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe0]
 @ CHECK: add	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x84,0xe0]
@@ -222,7 +285,12 @@ Lforward:
 
 @ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
 @ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
-@ CHECK: adr    r0, #-1073741824        @ encoding: [0x03,0x01,0x8f,0xe2]
+@ CHECK: adr	r0, #-1073741824        @ encoding: [0x03,0x01,0x8f,0xe2]
+@ CHECK: addseq r0, pc, #-1073741824    @ encoding: [0x03,0x01,0x9f,0x02]
+@ CHECK:        Ltmp0:
+@ CHECK-NEXT:   Ltmp1:
+@ CHECK-NEXT:   adr	r0, (Ltmp1+8)+(Lback-Ltmp0) @ encoding: [A,A,0x0f'A',0xe2'A']
+@ CHECK-NEXT:                           @   fixup A - offset: 0, value: (Ltmp1+8)+(Lback-Ltmp0), kind: fixup_arm_adr_pcrel_12
 
     @ Test right shift by 32, which is encoded as 0
     add r3, r1, r2, lsr #32
@@ -231,9 +299,44 @@ Lforward:
 @ CHECK: add	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x81,0xe0]
 
 @------------------------------------------------------------------------------
+@ ADDS
+@------------------------------------------------------------------------------
+    adds r7, r8, #16711680
+    adds r7, r8, $16711680
+    adds r7, r8, 16711680
+    adds r7, r8, #(0xff << 16)
+    adds r7, r8, #-2147483638
+    adds r7, r8, #42, #2
+    adds r7, r8, #40, #2
+    adds r7, r8, $40, $2
+    adds r7, r8, 40, 2
+    adds r7, r8, (2 * 20), (1 << 1)
+
+@ CHECK: adds	r7, r8, #16711680         @ encoding: [0xff,0x78,0x98,0xe2]
+@ CHECK: adds	r7, r8, #16711680         @ encoding: [0xff,0x78,0x98,0xe2]
+@ CHECK: adds	r7, r8, #16711680         @ encoding: [0xff,0x78,0x98,0xe2]
+@ CHECK: adds	r7, r8, #16711680         @ encoding: [0xff,0x78,0x98,0xe2]
+@ CHECK: adds   r7, r8, #-2147483638      @ encoding: [0x2a,0x71,0x98,0xe2]
+@ CHECK: adds   r7, r8, #-2147483638      @ encoding: [0x2a,0x71,0x98,0xe2]
+@ CHECK: adds   r7, r8, #40, #2           @ encoding: [0x28,0x71,0x98,0xe2]
+@ CHECK: adds   r7, r8, #40, #2           @ encoding: [0x28,0x71,0x98,0xe2]
+@ CHECK: adds   r7, r8, #40, #2           @ encoding: [0x28,0x71,0x98,0xe2]
+@ CHECK: adds   r7, r8, #40, #2           @ encoding: [0x28,0x71,0x98,0xe2]
+
+@------------------------------------------------------------------------------
 @ AND
 @------------------------------------------------------------------------------
     and r10, r1, #0xf
+    and r10, r1, $0xf
+    and r10, r1, 0xf
+    and r10, r1, -0xf
+    and r7, r8, #(0xff << 16)
+    and r7, r8, #-2147483638
+    and r7, r8, #42, #2
+    and r7, r8, #40, #2
+    and r7, r8, $40, $2
+    and r7, r8, 40, 2
+    and r7, r8, (2 * 20), (1 << 1)
     and r10, r1, r6
     and r10, r1, r6, lsl #10
     and r10, r1, r6, lsr #10
@@ -249,6 +352,16 @@ Lforward:
 
     @ destination register is optional
     and r1, #0xf
+    and r1, $0xf
+    and r1, 0xf
+    and r1, -0xf
+    and r7, #(0xff << 16)
+    and r7, #-2147483638
+    and r7, #42, #2
+    and r7, #40, #2
+    and r7, $40, $2
+    and r7, 40, 2
+    and r7, (2 * 20), (1 << 1)
     and r10, r1
     and r10, r1, lsl #10
     and r10, r1, lsr #10
@@ -262,6 +375,16 @@ Lforward:
     and r10, r1, rrx
 
 @ CHECK: and	r10, r1, #15            @ encoding: [0x0f,0xa0,0x01,0xe2]
+@ CHECK: and	r10, r1, #15            @ encoding: [0x0f,0xa0,0x01,0xe2]
+@ CHECK: and	r10, r1, #15            @ encoding: [0x0f,0xa0,0x01,0xe2]
+@ CHECK: bic	r10, r1, #14            @ encoding: [0x0e,0xa0,0xc1,0xe3]
+@ CHECK: and	r7, r8, #16711680       @ encoding: [0xff,0x78,0x08,0xe2]
+@ CHECK: and    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x08,0xe2]
+@ CHECK: and    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x08,0xe2]
+@ CHECK: and    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x08,0xe2]
+@ CHECK: and    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x08,0xe2]
+@ CHECK: and    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x08,0xe2]
+@ CHECK: and    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x08,0xe2]
 @ CHECK: and	r10, r1, r6             @ encoding: [0x06,0xa0,0x01,0xe0]
 @ CHECK: and	r10, r1, r6, lsl #10    @ encoding: [0x06,0xa5,0x01,0xe0]
 @ CHECK: and	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0x01,0xe0]
@@ -276,6 +399,16 @@ Lforward:
 @ CHECK: bic	r2, r3, #-2147483648    @ encoding: [0x02,0x21,0xc3,0xe3]
 
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
+@ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
+@ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
+@ CHECK: bic	r1, r1, #14             @ encoding: [0x0e,0x10,0xc1,0xe3]
+@ CHECK: and	r7, r7, #16711680       @ encoding: [0xff,0x78,0x07,0xe2]
+@ CHECK: and	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x07,0xe2]
+@ CHECK: and	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x07,0xe2]
+@ CHECK: and	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x07,0xe2]
+@ CHECK: and	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x07,0xe2]
+@ CHECK: and	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x07,0xe2]
+@ CHECK: and	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x07,0xe2]
 @ CHECK: and	r10, r10, r1            @ encoding: [0x01,0xa0,0x0a,0xe0]
 @ CHECK: and	r10, r10, r1, lsl #10   @ encoding: [0x01,0xa5,0x0a,0xe0]
 @ CHECK: and	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0x0a,0xe0]
@@ -348,6 +481,16 @@ Lforward:
 @ BIC
 @------------------------------------------------------------------------------
         bic r10, r1, #0xf
+        bic r10, r1, $0xf
+        bic r10, r1, 0xf
+        bic r10, r1, -0xf
+        bic r7, r8, #(0xff << 16)
+        bic r7, r8, #-2147483638
+        bic r7, r8, #42, #2
+        bic r7, r8, #40, #2
+        bic r7, r8, $40, $2
+        bic r7, r8, 40, 2
+        bic r7, r8, (2 * 20), (1 << 1)
         bic r10, r1, r6
         bic r10, r1, r6, lsl #10
         bic r10, r1, r6, lsr #10
@@ -362,6 +505,16 @@ Lforward:
 
         @ destination register is optional
         bic r1, #0xf
+        bic r1, $0xf
+        bic r1, 0xf
+        bic r1, -0xf
+        bic r7, #(0xff << 16)
+        bic r7, #-2147483638
+        bic r7, #42, #2
+        bic r7, #40, #2
+        bic r7, $40, $2
+        bic r7, 40, 2
+        bic r7, (2 * 20), (1 << 1)
         bic r10, r1
         bic r10, r1, lsl #10
         bic r10, r1, lsr #10
@@ -375,6 +528,15 @@ Lforward:
         bic r10, r1, rrx
 
 @ CHECK: bic	r10, r1, #15            @ encoding: [0x0f,0xa0,0xc1,0xe3]
+@ CHECK: bic	r10, r1, #15            @ encoding: [0x0f,0xa0,0xc1,0xe3]
+@ CHECK: bic	r10, r1, #15            @ encoding: [0x0f,0xa0,0xc1,0xe3]
+@ CHECK: and	r10, r1, #14            @ encoding: [0x0e,0xa0,0x01,0xe2]
+@ CHECK: bic	r7, r8, #16711680       @ encoding: [0xff,0x78,0xc8,0xe3]
+@ CHECK: bic    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xc8,0xe3]
+@ CHECK: bic    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xc8,0xe3]
+@ CHECK: bic    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe3]
+@ CHECK: bic    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe3]
+@ CHECK: bic    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe3]
 @ CHECK: bic	r10, r1, r6             @ encoding: [0x06,0xa0,0xc1,0xe1]
 @ CHECK: bic	r10, r1, r6, lsl #10    @ encoding: [0x06,0xa5,0xc1,0xe1]
 @ CHECK: bic	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0xc1,0xe1]
@@ -389,6 +551,16 @@ Lforward:
 
 
 @ CHECK: bic	r1, r1, #15             @ encoding: [0x0f,0x10,0xc1,0xe3]
+@ CHECK: bic	r1, r1, #15             @ encoding: [0x0f,0x10,0xc1,0xe3]
+@ CHECK: bic	r1, r1, #15             @ encoding: [0x0f,0x10,0xc1,0xe3]
+@ CHECK: and	r1, r1, #14             @ encoding: [0x0e,0x10,0x01,0xe2]
+@ CHECK: bic	r7, r7, #16711680       @ encoding: [0xff,0x78,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe3]
+@ CHECK: bic    r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe3]
 @ CHECK: bic	r10, r10, r1            @ encoding: [0x01,0xa0,0xca,0xe1]
 @ CHECK: bic	r10, r10, r1, lsl #10   @ encoding: [0x01,0xa5,0xca,0xe1]
 @ CHECK: bic	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0xca,0xe1]
@@ -505,6 +677,16 @@ Lforward:
 @ CMN
 @------------------------------------------------------------------------------
         cmn r1, #0xf
+        cmn r1, $0xf
+        cmn r1, 0xf
+        cmn r1, -0xf
+        cmn r7, #(0xff << 16)
+        cmn r7, #-2147483638
+        cmn r7, #42, #2
+        cmn r7, #40, #2
+        cmn r7, $40, $2
+        cmn r7, 40, 2
+        cmn r7, (20 * 2), (1 << 1)
         cmn r1, r6
         cmn r1, r6, lsl #10
         cmn r1, r6, lsr #10
@@ -518,6 +700,16 @@ Lforward:
         cmn r1, r6, rrx
 
 @ CHECK: cmn	r1, #15                 @ encoding: [0x0f,0x00,0x71,0xe3]
+@ CHECK: cmn	r1, #15                 @ encoding: [0x0f,0x00,0x71,0xe3]
+@ CHECK: cmn	r1, #15                 @ encoding: [0x0f,0x00,0x71,0xe3]
+@ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
+@ CHECK: cmn	r7, #16711680           @ encoding: [0xff,0x08,0x77,0xe3]
+@ CHECK: cmn	r7, #-2147483638        @ encoding: [0x2a,0x01,0x77,0xe3]
+@ CHECK: cmn	r7, #-2147483638        @ encoding: [0x2a,0x01,0x77,0xe3]
+@ CHECK: cmn	r7, #40, #2             @ encoding: [0x28,0x01,0x77,0xe3]
+@ CHECK: cmn	r7, #40, #2             @ encoding: [0x28,0x01,0x77,0xe3]
+@ CHECK: cmn	r7, #40, #2             @ encoding: [0x28,0x01,0x77,0xe3]
+@ CHECK: cmn	r7, #40, #2             @ encoding: [0x28,0x01,0x77,0xe3]
 @ CHECK: cmn	r1, r6                  @ encoding: [0x06,0x00,0x71,0xe1]
 @ CHECK: cmn	r1, r6, lsl #10         @ encoding: [0x06,0x05,0x71,0xe1]
 @ CHECK: cmn	r1, r6, lsr #10         @ encoding: [0x26,0x05,0x71,0xe1]
@@ -534,6 +726,16 @@ Lforward:
 @ CMP
 @------------------------------------------------------------------------------
         cmp r1, #0xf
+        cmp r1, $0xf
+        cmp r1, 0xf
+        cmp r1, -0xf
+        cmp r7, #(0xff << 16)
+        cmp r7, #-2147483638
+        cmp r7, #42, #2
+        cmp r7, #40, #2
+        cmp r7, $40, $2
+        cmp r7, 40, 2
+        cmp r7, (2 * 20), (1 << 1)
         cmp r1, r6
         cmp r1, r6, lsl #10
         cmp r1, r6, lsr #10
@@ -549,6 +751,16 @@ Lforward:
         cmp lr, #0
 
 @ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
+@ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
+@ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
+@ CHECK: cmn	r1, #15                 @ encoding: [0x0f,0x00,0x71,0xe3]
+@ CHECK: cmp	r7, #16711680           @ encoding: [0xff,0x08,0x57,0xe3]
+@ CHECK: cmp	r7, #-2147483638        @ encoding: [0x2a,0x01,0x57,0xe3]
+@ CHECK: cmp	r7, #-2147483638        @ encoding: [0x2a,0x01,0x57,0xe3]
+@ CHECK: cmp    r7, #40, #2             @ encoding: [0x28,0x01,0x57,0xe3]
+@ CHECK: cmp    r7, #40, #2             @ encoding: [0x28,0x01,0x57,0xe3]
+@ CHECK: cmp    r7, #40, #2             @ encoding: [0x28,0x01,0x57,0xe3]
+@ CHECK: cmp    r7, #40, #2             @ encoding: [0x28,0x01,0x57,0xe3]
 @ CHECK: cmp	r1, r6                  @ encoding: [0x06,0x00,0x51,0xe1]
 @ CHECK: cmp	r1, r6, lsl #10         @ encoding: [0x06,0x05,0x51,0xe1]
 @ CHECK: cmp	r1, r6, lsr #10         @ encoding: [0x26,0x05,0x51,0xe1]
@@ -734,6 +946,15 @@ Lforward:
 @ EOR
 @------------------------------------------------------------------------------
         eor r4, r5, #0xf000
+        eor r4, r5, $0xf000
+        eor r4, r5, 0xf000
+        eor r7, r8, #(0xff << 16)
+        eor r7, r8, #-2147483638
+        eor r7, r8, #42, #2
+        eor r7, r8, #40, #2
+        eor r7, r8, $40, $2
+        eor r7, r8, 40, 2
+        eor r7, r8, (20 * 2), (1 << 1)
         eor r4, r5, r6
         eor r4, r5, r6, lsl #5
         eor r4, r5, r6, lsr #5
@@ -748,6 +969,15 @@ Lforward:
 
         @ destination register is optional
         eor r5, #0xf000
+        eor r5, $0xf000
+        eor r5, 0xf000
+        eor r7, #(0xff << 16)
+        eor r7, #-2147483638
+        eor r7, #42, #2
+        eor r7, #40, #2
+        eor r7, $40, $2
+        eor r7, 40, 2
+        eor r7, (20 * 2), (1 << 1)
         eor r4, r5
         eor r4, r5, lsl #5
         eor r4, r5, lsr #5
@@ -761,6 +991,15 @@ Lforward:
         eor r4, r5, rrx
 
 @ CHECK: eor	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x25,0xe2]
+@ CHECK: eor	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x25,0xe2]
+@ CHECK: eor	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x25,0xe2]
+@ CHECK: eor	r7, r8, #16711680       @ encoding: [0xff,0x78,0x28,0xe2]
+@ CHECK: eor    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x28,0xe2]
+@ CHECK: eor    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x28,0xe2]
+@ CHECK: eor    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x28,0xe2]
+@ CHECK: eor    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x28,0xe2]
+@ CHECK: eor    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x28,0xe2]
+@ CHECK: eor    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x28,0xe2]
 @ CHECK: eor	r4, r5, r6              @ encoding: [0x06,0x40,0x25,0xe0]
 @ CHECK: eor	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x25,0xe0]
 @ CHECK: eor	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x25,0xe0]
@@ -775,6 +1014,15 @@ Lforward:
 
 
 @ CHECK: eor	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x25,0xe2]
+@ CHECK: eor	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x25,0xe2]
+@ CHECK: eor	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x25,0xe2]
+@ CHECK: eor	r7, r7, #16711680       @ encoding: [0xff,0x78,0x27,0xe2]
+@ CHECK: eor	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x27,0xe2]
+@ CHECK: eor	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x27,0xe2]
+@ CHECK: eor	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x27,0xe2]
+@ CHECK: eor	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x27,0xe2]
+@ CHECK: eor	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x27,0xe2]
+@ CHECK: eor	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x27,0xe2]
 @ CHECK: eor	r4, r4, r5              @ encoding: [0x05,0x40,0x24,0xe0]
 @ CHECK: eor	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x24,0xe0]
 @ CHECK: eor	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x24,0xe0]
@@ -1028,8 +1276,23 @@ Lforward:
 @ MOV (immediate)
 @------------------------------------------------------------------------------
         mov r3, #7
+        mov r3, $7
+        mov r3, 7
+        mov r3, -7
         mov r4, #0xff0
         mov r5, #0xff0000
+        mov r7, #42, #0
+        mov r7, #42, #10
+	mov r7, #(0xff << 16)
+        mov r7, #-2147483638
+        mov r7, #42, #2
+        mov pc, #42, #2
+        mov r7, #0, #2
+        mov r7, #40, #2
+        mov r7, $40, $2
+        mov r7, 40, 2
+        mov r7, (2 * 20), (1 << 1)
+        mov r7, #42, #30
         mov r6, #0xffff
         movw r9, #0xffff
         movs r3, #7
@@ -1037,8 +1300,23 @@ Lforward:
         movseq r5, #0xff0000
 
 @ CHECK: mov	r3, #7                  @ encoding: [0x07,0x30,0xa0,0xe3]
+@ CHECK: mov	r3, #7                  @ encoding: [0x07,0x30,0xa0,0xe3]
+@ CHECK: mov	r3, #7                  @ encoding: [0x07,0x30,0xa0,0xe3]
+@ CHECK: mvn	r3, #6                  @ encoding: [0x06,0x30,0xe0,0xe3]
 @ CHECK: mov	r4, #4080               @ encoding: [0xff,0x4e,0xa0,0xe3]
 @ CHECK: mov	r5, #16711680           @ encoding: [0xff,0x58,0xa0,0xe3]
+@ CHECK: mov    r7, #42                 @ encoding: [0x2a,0x70,0xa0,0xe3]
+@ CHECK: mov    r7, #176160768          @ encoding: [0x2a,0x75,0xa0,0xe3]
+@ CHECK: mov	r7, #16711680           @ encoding: [0xff,0x78,0xa0,0xe3]
+@ CHECK: mov    r7, #-2147483638        @ encoding: [0x2a,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #-2147483638        @ encoding: [0x2a,0x71,0xa0,0xe3]
+@ CHECK: mov    pc, #2147483658         @ encoding: [0x2a,0xf1,0xa0,0xe3]
+@ CHECK: mov    r7, #0, #2              @ encoding: [0x00,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #40, #2             @ encoding: [0x28,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #40, #2             @ encoding: [0x28,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #40, #2             @ encoding: [0x28,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #40, #2             @ encoding: [0x28,0x71,0xa0,0xe3]
+@ CHECK: mov    r7, #42, #30            @ encoding: [0x2a,0x7f,0xa0,0xe3]
 @ CHECK: movw	r6, #65535              @ encoding: [0xff,0x6f,0x0f,0xe3]
 @ CHECK: movw	r9, #65535              @ encoding: [0xff,0x9f,0x0f,0xe3]
 @ CHECK: movs	r3, #7                  @ encoding: [0x07,0x30,0xb0,0xe3]
@@ -1132,6 +1410,8 @@ Lforward:
 @------------------------------------------------------------------------------
 
         msr  apsr, #5
+        msr  apsr, $5
+        msr  apsr, 5
         msr  apsr_g, #5
         msr  apsr_nzcvq, #5
         msr  APSR_nzcvq, #5
@@ -1145,8 +1425,17 @@ Lforward:
         msr  spsr_fc, #5
         msr  SPSR_fsxc, #5
         msr  cpsr_fsxc, #5
+	msr  apsr_nzcvqg, #(0xff << 16)
+        msr  APSR_nzcvq, #42, #2
+        msr  apsr_nzcvqg, #2147483658
+        msr  SPSR_fsxc, #40, #2
+        msr  SPSR_fsxc, $40, $2
+        msr  SPSR_fsxc, 40, 2
+        msr  SPSR_fsxc, (2 * 20), (1 << 1)
 
 @ CHECK: msr	APSR_nzcvq, #5          @ encoding: [0x05,0xf0,0x28,0xe3]
+@ CHECK: msr	APSR_nzcvq, #5          @ encoding: [0x05,0xf0,0x28,0xe3]
+@ CHECK: msr	APSR_nzcvq, #5          @ encoding: [0x05,0xf0,0x28,0xe3]
 @ CHECK: msr	APSR_g, #5              @ encoding: [0x05,0xf0,0x24,0xe3]
 @ CHECK: msr	APSR_nzcvq, #5          @ encoding: [0x05,0xf0,0x28,0xe3]
 @ CHECK: msr	APSR_nzcvq, #5          @ encoding: [0x05,0xf0,0x28,0xe3]
@@ -1160,6 +1449,13 @@ Lforward:
 @ CHECK: msr	SPSR_fc, #5             @ encoding: [0x05,0xf0,0x69,0xe3]
 @ CHECK: msr	SPSR_fsxc, #5           @ encoding: [0x05,0xf0,0x6f,0xe3]
 @ CHECK: msr	CPSR_fsxc, #5           @ encoding: [0x05,0xf0,0x2f,0xe3]
+@ CHECK: msr	APSR_nzcvqg, #16711680  @ encoding: [0xff,0xf8,0x2c,0xe3]
+@ CHECK: msr    APSR_nzcvq, #2147483658 @ encoding: [0x2a,0xf1,0x28,0xe3]
+@ CHECK: msr    APSR_nzcvqg, #2147483658 @ encoding: [0x2a,0xf1,0x2c,0xe3]
+@ CHECK: msr    SPSR_fsxc, #40, #2      @ encoding: [0x28,0xf1,0x6f,0xe3]
+@ CHECK: msr    SPSR_fsxc, #40, #2      @ encoding: [0x28,0xf1,0x6f,0xe3]
+@ CHECK: msr    SPSR_fsxc, #40, #2      @ encoding: [0x28,0xf1,0x6f,0xe3]
+@ CHECK: msr    SPSR_fsxc, #40, #2      @ encoding: [0x28,0xf1,0x6f,0xe3]
 
         msr  apsr, r0
         msr  apsr_g, r0
@@ -1210,15 +1506,35 @@ Lforward:
 @ MVN (immediate)
 @------------------------------------------------------------------------------
         mvn r3, #7
+        mvn r3, $7
+        mvn r3, 7
+        mvn r3, -7
         mvn r4, #0xff0
         mvn r5, #0xff0000
+	mvn r7, #(0xff << 16)
+        mvn r7, #-2147483638
+        mvn r7, #42, #2
+        mvn r7, #40, #2
+        mvn r7, $40, $2
+        mvn r7, 40, 2
+        mvn r7, (2 * 20), (1 << 1)
         mvns r3, #7
         mvneq r4, #0xff0
         mvnseq r5, #0xff0000
 
 @ CHECK: mvn	r3, #7                  @ encoding: [0x07,0x30,0xe0,0xe3]
+@ CHECK: mvn	r3, #7                  @ encoding: [0x07,0x30,0xe0,0xe3]
+@ CHECK: mvn	r3, #7                  @ encoding: [0x07,0x30,0xe0,0xe3]
+@ CHECK: mov	r3, #6                  @ encoding: [0x06,0x30,0xa0,0xe3]
 @ CHECK: mvn	r4, #4080               @ encoding: [0xff,0x4e,0xe0,0xe3]
 @ CHECK: mvn	r5, #16711680           @ encoding: [0xff,0x58,0xe0,0xe3]
+@ CHECK: mvn	r7, #16711680           @ encoding: [0xff,0x78,0xe0,0xe3]
+@ CHECK: mvn    r7, #-2147483638        @ encoding: [0x2a,0x71,0xe0,0xe3]
+@ CHECK: mvn    r7, #-2147483638        @ encoding: [0x2a,0x71,0xe0,0xe3]
+@ CHECK: mvn    r7, #40, #2             @ encoding: [0x28,0x71,0xe0,0xe3]
+@ CHECK: mvn    r7, #40, #2             @ encoding: [0x28,0x71,0xe0,0xe3]
+@ CHECK: mvn    r7, #40, #2             @ encoding: [0x28,0x71,0xe0,0xe3]
+@ CHECK: mvn    r7, #40, #2             @ encoding: [0x28,0x71,0xe0,0xe3]
 @ CHECK: mvns	r3, #7                  @ encoding: [0x07,0x30,0xf0,0xe3]
 @ CHECK: mvneq	r4, #4080               @ encoding: [0xff,0x4e,0xe0,0x03]
 @ CHECK: mvnseq	r5, #16711680           @ encoding: [0xff,0x58,0xf0,0x03]
@@ -1285,6 +1601,15 @@ Lforward:
 @ ORR
 @------------------------------------------------------------------------------
         orr r4, r5, #0xf000
+        orr r4, r5, $0xf000
+        orr r4, r5, 0xf000
+	orr r7, r8, #(0xff << 16)
+        orr r7, r8, #-2147483638
+        orr r7, r8, #42, #2
+        orr r7, r8, #40, #2
+        orr r7, r8, $40, $2
+        orr r7, r8, 40, 2
+        orr r7, r8, (2 * 20), (1 << 1)
         orr r4, r5, r6
         orr r4, r5, r6, lsl #5
         orr r4, r5, r6, lsr #5
@@ -1299,6 +1624,17 @@ Lforward:
 
         @ destination register is optional
         orr r5, #0xf000
+        orr r5, $0xf000
+        orr r5, 0xf000
+
+        orr r7, #(0xff << 16)
+        orr r7, #-2147483638
+        orr r7, #42, #2
+        orr r7, #40, #2
+        orr r7, $40, $2
+        orr r7, 40, 2
+        orr r7, (2 * 20), (1 << 1)
+
         orr r4, r5
         orr r4, r5, lsl #5
         orr r4, r5, lsr #5
@@ -1312,6 +1648,15 @@ Lforward:
         orr r4, r5, rrx
 
 @ CHECK: orr	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe3]
+@ CHECK: orr	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe3]
+@ CHECK: orr	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe3]
+@ CHECK: orr	r7, r8, #16711680       @ encoding: [0xff,0x78,0x88,0xe3]
+@ CHECK: orr    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x88,0xe3]
+@ CHECK: orr    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x88,0xe3]
+@ CHECK: orr    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe3]
+@ CHECK: orr    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe3]
+@ CHECK: orr    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe3]
+@ CHECK: orr    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x88,0xe3]
 @ CHECK: orr	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe1]
 @ CHECK: orr	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe1]
 @ CHECK: orr	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x85,0xe1]
@@ -1325,6 +1670,15 @@ Lforward:
 @ CHECK: orr	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe1]
 
 @ CHECK: orr	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe3]
+@ CHECK: orr	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe3]
+@ CHECK: orr	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe3]
+@ CHECK: orr	r7, r7, #16711680       @ encoding: [0xff,0x78,0x87,0xe3]
+@ CHECK: orr	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x87,0xe3]
+@ CHECK: orr	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x87,0xe3]
+@ CHECK: orr	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe3]
+@ CHECK: orr 	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe3]
+@ CHECK: orr 	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe3]
+@ CHECK: orr 	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x87,0xe3]
 @ CHECK: orr	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe1]
 @ CHECK: orr	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe1]
 @ CHECK: orr	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x84,0xe1]
@@ -1570,6 +1924,15 @@ Lforward:
 @ RSB
 @------------------------------------------------------------------------------
         rsb r4, r5, #0xf000
+        rsb r4, r5, $0xf000
+        rsb r4, r5, 0xf000
+        rsb r7, r8, #(0xff << 16)
+        rsb r7, r8, #-2147483638
+        rsb r7, r8, #42, #2
+        rsb r7, r8, #40, #2
+        rsb r7, r8, $40, $2
+        rsb r7, r8, 40, 2
+        rsb r7, r8, (2 * 20), (1 << 1)
         rsb r4, r5, r6
         rsb r4, r5, r6, lsl #5
         rsblo r4, r5, r6, lsr #5
@@ -1584,6 +1947,15 @@ Lforward:
 
         @ destination register is optional
         rsb r5, #0xf000
+        rsb r5, $0xf000
+        rsb r5, 0xf000
+        rsb r7, #(0xff << 16)
+        rsb r7, #-2147483638
+        rsb r7, #42, #2
+        rsb r7, #40, #2
+        rsb r7, $40, $2
+        rsb r7, 40, 2
+        rsb r7, (2 * 20), (1 << 1)
         rsb r4, r5
         rsb r4, r5, lsl #5
         rsb r4, r5, lsr #5
@@ -1597,6 +1969,15 @@ Lforward:
         rsb r4, r5, rrx
 
 @ CHECK: rsb	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x65,0xe2]
+@ CHECK: rsb	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x65,0xe2]
+@ CHECK: rsb	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x65,0xe2]
+@ CHECK: rsb	r7, r8, #16711680       @ encoding: [0xff,0x78,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x68,0xe2]
+@ CHECK: rsb    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x68,0xe2]
 @ CHECK: rsb	r4, r5, r6              @ encoding: [0x06,0x40,0x65,0xe0]
 @ CHECK: rsb	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x65,0xe0]
 @ CHECK: rsblo	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x65,0x30]
@@ -1610,6 +1991,15 @@ Lforward:
 @ CHECK: rsb	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x65,0xe0]
 
 @ CHECK: rsb	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x65,0xe2]
+@ CHECK: rsb	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x65,0xe2]
+@ CHECK: rsb	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x65,0xe2]
+@ CHECK: rsb	r7, r7, #16711680       @ encoding: [0xff,0x78,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x67,0xe2]
+@ CHECK: rsb	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x67,0xe2]
 @ CHECK: rsb	r4, r4, r5              @ encoding: [0x05,0x40,0x64,0xe0]
 @ CHECK: rsb	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x64,0xe0]
 @ CHECK: rsb	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x64,0xe0]
@@ -1623,9 +2013,43 @@ Lforward:
 @ CHECK: rsb	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x64,0xe0]
 
 @------------------------------------------------------------------------------
+@ RSBS
+@------------------------------------------------------------------------------
+    rsbs r7, #16711680
+    rsbs r7, $16711680
+    rsbs r7, 16711680
+    rsbs r7, #(0xff << 16)
+    rsbs r7, r8, #-2147483638
+    rsbs r7, r8, #42, #2
+    rsbs r7, r8, #40, #2
+    rsbs r7, r8, $40, $2
+    rsbs r7, r8, 40, 2
+    rsbs r7, r8, (2 * 20), (1 << 1)
+
+@ CHECK: rsbs	r7, r7, #16711680       @ encoding: [0xff,0x78,0x77,0xe2]
+@ CHECK: rsbs	r7, r7, #16711680       @ encoding: [0xff,0x78,0x77,0xe2]
+@ CHECK: rsbs	r7, r7, #16711680       @ encoding: [0xff,0x78,0x77,0xe2]
+@ CHECK: rsbs	r7, r7, #16711680       @ encoding: [0xff,0x78,0x77,0xe2]
+@ CHECK: rsbs   r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x78,0xe2]
+@ CHECK: rsbs   r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x78,0xe2]
+@ CHECK: rsbs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x78,0xe2]
+@ CHECK: rsbs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x78,0xe2]
+@ CHECK: rsbs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x78,0xe2]
+@ CHECK: rsbs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x78,0xe2]
+
+@------------------------------------------------------------------------------
 @ RSC
 @------------------------------------------------------------------------------
         rsc r4, r5, #0xf000
+        rsc r4, r5, $0xf000
+        rsc r4, r5, 0xf000
+        rsc r7, r8, #(0xff << 16)
+        rsc r7, r8, #-2147483638
+        rsc r7, r8, #42, #2
+        rsc r7, r8, #40, #2
+        rsc r7, r8, $40, $2
+        rsc r7, r8, 40, 2
+        rsc r7, r8, (2 * 20), (1 << 1)
         rsc r4, r5, r6
         rsc r4, r5, r6, lsl #5
         rsclo r4, r5, r6, lsr #5
@@ -1640,6 +2064,15 @@ Lforward:
 
         @ destination register is optional
         rsc r5, #0xf000
+        rsc r5, $0xf000
+        rsc r5, 0xf000
+        rsc r7, #(0xff << 16)
+        rsc r7, #-2147483638
+        rsc r7, #42, #2
+        rsc r7, #40, #2
+        rsc r7, $40, $2
+        rsc r7, 40, 2
+        rsc r7, (2 * 20), (1 << 1)
         rsc r4, r5
         rsc r4, r5, lsl #5
         rsc r4, r5, lsr #5
@@ -1652,6 +2085,15 @@ Lforward:
         rsc r6, r7, ror r9
 
 @ CHECK: rsc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xe5,0xe2]
+@ CHECK: rsc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xe5,0xe2]
+@ CHECK: rsc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xe5,0xe2]
+@ CHECK: rsc    r7, r8, #16711680       @ encoding: [0xff,0x78,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xe8,0xe2]
+@ CHECK: rsc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xe8,0xe2]
 @ CHECK: rsc	r4, r5, r6              @ encoding: [0x06,0x40,0xe5,0xe0]
 @ CHECK: rsc	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0xe5,0xe0]
 @ CHECK: rsclo	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0xe5,0x30]
@@ -1665,6 +2107,15 @@ Lforward:
 @ CHECK: rscs	r1, r8, #4064           @ encoding: [0xfe,0x1e,0xf8,0xe2]
 
 @ CHECK: rsc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xe5,0xe2]
+@ CHECK: rsc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xe5,0xe2]
+@ CHECK: rsc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xe5,0xe2]
+@ CHECK: rsc	r7, r7, #16711680       @ encoding: [0xff,0x78,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xe7,0xe2]
+@ CHECK: rsc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xe7,0xe2]
 @ CHECK: rsc	r4, r4, r5              @ encoding: [0x05,0x40,0xe4,0xe0]
 @ CHECK: rsc	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0xe4,0xe0]
 @ CHECK: rsc	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0xe4,0xe0]
@@ -1728,6 +2179,15 @@ Lforward:
 @ SBC
 @------------------------------------------------------------------------------
         sbc r4, r5, #0xf000
+        sbc r4, r5, $0xf000
+        sbc r4, r5, 0xf000
+        sbc r7, r8, #(0xff << 16)
+        sbc r7, r8, #-2147483638
+        sbc r7, r8, #42, #2
+        sbc r7, r8, #40, #2
+        sbc r7, r8, $40, $2
+        sbc r7, r8, 40, 2
+        sbc r7, r8, (20 * 2), (1 << 1)
         sbc r4, r5, r6
         sbc r4, r5, r6, lsl #5
         sbc r4, r5, r6, lsr #5
@@ -1741,6 +2201,15 @@ Lforward:
 
         @ destination register is optional
         sbc r5, #0xf000
+        sbc r5, $0xf000
+        sbc r5, 0xf000
+        sbc r7, #(0xff << 16)
+        sbc r7, #-2147483638
+        sbc r7, #42, #2
+        sbc r7, #40, #2
+        sbc r7, $40, $2
+        sbc r7, 40, 2
+        sbc r7, (20 * 2), (1 << 1)
         sbc r4, r5
         sbc r4, r5, lsl #5
         sbc r4, r5, lsr #5
@@ -1753,6 +2222,15 @@ Lforward:
         sbc r6, r7, ror r9
 
 @ CHECK: sbc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xc5,0xe2]
+@ CHECK: sbc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xc5,0xe2]
+@ CHECK: sbc	r4, r5, #61440          @ encoding: [0x0f,0x4a,0xc5,0xe2]
+@ CHECK: sbc	r7, r8, #16711680       @ encoding: [0xff,0x78,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe2]
+@ CHECK: sbc    r7, r8, #40, #2         @ encoding: [0x28,0x71,0xc8,0xe2]
 @ CHECK: sbc	r4, r5, r6              @ encoding: [0x06,0x40,0xc5,0xe0]
 @ CHECK: sbc	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0xc5,0xe0]
 @ CHECK: sbc	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0xc5,0xe0]
@@ -1765,6 +2243,15 @@ Lforward:
 @ CHECK: sbc	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0xc7,0xe0]
 
 @ CHECK: sbc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xc5,0xe2]
+@ CHECK: sbc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xc5,0xe2]
+@ CHECK: sbc	r5, r5, #61440          @ encoding: [0x0f,0x5a,0xc5,0xe2]
+@ CHECK: sbc	r7, r7, #16711680       @ encoding: [0xff,0x78,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe2]
+@ CHECK: sbc	r7, r7, #40, #2         @ encoding: [0x28,0x71,0xc7,0xe2]
 @ CHECK: sbc	r4, r4, r5              @ encoding: [0x05,0x40,0xc4,0xe0]
 @ CHECK: sbc	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0xc4,0xe0]
 @ CHECK: sbc	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0xc4,0xe0]
@@ -2383,6 +2870,15 @@ Lforward:
 @ SUB
 @------------------------------------------------------------------------------
         sub r4, r5, #0xf000
+        sub r4, r5, $0xf000
+        sub r4, r5, 0xf000
+        sub r7, r8, #(0xff << 16)
+        sub r7, r8, #-2147483638
+        sub r7, r8, #42, #2
+        sub r7, r8, #40, #2
+        sub r7, r8, $40, $2
+        sub r7, r8, 40, 2
+        sub r7, r8, (20 * 2), (1 << 1)
         sub r4, r5, r6
         sub r4, r5, r6, lsl #5
         sub r4, r5, r6, lsr #5
@@ -2396,6 +2892,15 @@ Lforward:
 
         @ destination register is optional
         sub r5, #0xf000
+        sub r5, $0xf000
+        sub r5, 0xf000
+        sub r7, #(0xff << 16)
+        sub r7, #-2147483638
+        sub r7, #42, #2
+        sub r7, #40, #2
+        sub r7, $40, $2
+        sub r7, 40, 2
+        sub r7, (20 * 2), (1 << 1)
         sub r4, r5
         sub r4, r5, lsl #5
         sub r4, r5, lsr #5
@@ -2408,6 +2913,15 @@ Lforward:
         sub r6, r7, ror r9
 
 @ CHECK: sub	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x45,0xe2]
+@ CHECK: sub	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x45,0xe2]
+@ CHECK: sub	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x45,0xe2]
+@ CHECK: sub	r7, r8, #16711680       @ encoding: [0xff,0x78,0x48,0xe2]
+@ CHECK: sub    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x48,0xe2]
+@ CHECK: sub    r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x48,0xe2]
+@ CHECK: sub    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x48,0xe2]
+@ CHECK: sub    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x48,0xe2]
+@ CHECK: sub    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x48,0xe2]
+@ CHECK: sub    r7, r8, #40, #2         @ encoding: [0x28,0x71,0x48,0xe2]
 @ CHECK: sub	r4, r5, r6              @ encoding: [0x06,0x40,0x45,0xe0]
 @ CHECK: sub	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x45,0xe0]
 @ CHECK: sub	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x45,0xe0]
@@ -2421,6 +2935,15 @@ Lforward:
 
 
 @ CHECK: sub	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x45,0xe2]
+@ CHECK: sub	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x45,0xe2]
+@ CHECK: sub	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x45,0xe2]
+@ CHECK: sub	r7, r7, #16711680       @ encoding: [0xff,0x78,0x47,0xe2]
+@ CHECK: sub	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x47,0xe2]
+@ CHECK: sub	r7, r7, #-2147483638    @ encoding: [0x2a,0x71,0x47,0xe2]
+@ CHECK: sub	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x47,0xe2]
+@ CHECK: sub	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x47,0xe2
+@ CHECK: sub	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x47,0xe2]
+@ CHECK: sub	r7, r7, #40, #2         @ encoding: [0x28,0x71,0x47,0xe2]
 @ CHECK: sub	r4, r4, r5              @ encoding: [0x05,0x40,0x44,0xe0]
 @ CHECK: sub	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x44,0xe0]
 @ CHECK: sub	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x44,0xe0]
@@ -2439,6 +2962,31 @@ Lforward:
 @ CHECK: sub	r3, r1, r2, asr #32     @ encoding: [0x42,0x30,0x41,0xe0]
 
 @------------------------------------------------------------------------------
+@ SUBS
+@------------------------------------------------------------------------------
+    subs r7, r8, #16711680
+    subs r7, r8, $16711680
+    subs r7, r8, 16711680
+    subs r7, r8, #(0xff << 16)
+    subs r7, r8, #-2147483638
+    subs r7, r8, #42, #2
+    subs r7, r8, #40, #2
+    subs r7, r8, $40, $2
+    subs r7, r8, 40, 2
+    subs r7, r8, (20 * 2), (1 << 1)
+
+@ CHECK: subs	r7, r8, #16711680       @ encoding: [0xff,0x78,0x58,0xe2]
+@ CHECK: subs	r7, r8, #16711680       @ encoding: [0xff,0x78,0x58,0xe2]
+@ CHECK: subs	r7, r8, #16711680       @ encoding: [0xff,0x78,0x58,0xe2]
+@ CHECK: subs	r7, r8, #16711680       @ encoding: [0xff,0x78,0x58,0xe2]
+@ CHECK: subs   r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x58,0xe2]
+@ CHECK: subs   r7, r8, #-2147483638    @ encoding: [0x2a,0x71,0x58,0xe2]
+@ CHECK: subs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x58,0xe2]
+@ CHECK: subs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x58,0xe2]
+@ CHECK: subs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x58,0xe2]
+@ CHECK: subs   r7, r8, #40, #2         @ encoding: [0x28,0x71,0x58,0xe2]
+
+@------------------------------------------------------------------------------
 @ SVC
 @------------------------------------------------------------------------------
         svc #16
@@ -2560,6 +3108,15 @@ Lforward:
 @ TEQ
 @------------------------------------------------------------------------------
         teq r5, #0xf000
+        teq r5, $0xf000
+        teq r5, 0xf000
+        teq r7, #(0xff << 16)
+        teq r7, #-2147483638
+        teq r7, #42, #2
+        teq r7, #40, #2
+        teq r7, $40, $2
+        teq r7, 40, 2
+        teq r7, (20 * 2), (1 << 1)
         teq r4, r5
         teq r4, r5, lsl #5
         teq r4, r5, lsr #5
@@ -2572,6 +3129,15 @@ Lforward:
         teq r6, r7, ror r9
 
 @ CHECK: teq	r5, #61440              @ encoding: [0x0f,0x0a,0x35,0xe3]
+@ CHECK: teq	r5, #61440              @ encoding: [0x0f,0x0a,0x35,0xe3]
+@ CHECK: teq	r5, #61440              @ encoding: [0x0f,0x0a,0x35,0xe3]
+@ CHECK: teq	r7, #16711680           @ encoding: [0xff,0x08,0x37,0xe3]
+@ CHECK: teq    r7, #-2147483638        @ encoding: [0x2a,0x01,0x37,0xe3]
+@ CHECK: teq    r7, #-2147483638        @ encoding: [0x2a,0x01,0x37,0xe3]
+@ CHECK: teq    r7, #40, #2             @ encoding: [0x28,0x01,0x37,0xe3]
+@ CHECK: teq    r7, #40, #2             @ encoding: [0x28,0x01,0x37,0xe3]
+@ CHECK: teq    r7, #40, #2             @ encoding: [0x28,0x01,0x37,0xe3]
+@ CHECK: teq    r7, #40, #2             @ encoding: [0x28,0x01,0x37,0xe3]
 @ CHECK: teq	r4, r5                  @ encoding: [0x05,0x00,0x34,0xe1]
 @ CHECK: teq	r4, r5, lsl #5          @ encoding: [0x85,0x02,0x34,0xe1]
 @ CHECK: teq	r4, r5, lsr #5          @ encoding: [0xa5,0x02,0x34,0xe1]
@@ -2588,6 +3154,15 @@ Lforward:
 @ TST
 @------------------------------------------------------------------------------
         tst r5, #0xf000
+        tst r5, $0xf000
+        tst r5, 0xf000
+        tst r7, #(0xff << 16)
+        tst r7, #-2147483638
+        tst r7, #42, #2
+        tst r7, #40, #2
+        tst r7, $40, $2
+        tst r7, 40, 2
+        tst r7, (20 * 2), (1 << 1)
         tst r4, r5
         tst r4, r5, lsl #5
         tst r4, r5, lsr #5
@@ -2600,6 +3175,15 @@ Lforward:
         tst r6, r7, ror r9
 
 @ CHECK: tst	r5, #61440              @ encoding: [0x0f,0x0a,0x15,0xe3]
+@ CHECK: tst	r5, #61440              @ encoding: [0x0f,0x0a,0x15,0xe3]
+@ CHECK: tst	r5, #61440              @ encoding: [0x0f,0x0a,0x15,0xe3]
+@ CHECK: tst    r7, #16711680           @ encoding: [0xff,0x08,0x17,0xe3]
+@ CHECK: tst    r7, #-2147483638        @ encoding: [0x2a,0x01,0x17,0xe3]
+@ CHECK: tst    r7, #-2147483638        @ encoding: [0x2a,0x01,0x17,0xe3]
+@ CHECK: tst    r7, #40, #2             @ encoding: [0x28,0x01,0x17,0xe3]
+@ CHECK: tst    r7, #40, #2             @ encoding: [0x28,0x01,0x17,0xe3]
+@ CHECK: tst    r7, #40, #2             @ encoding: [0x28,0x01,0x17,0xe3]
+@ CHECK: tst    r7, #40, #2             @ encoding: [0x28,0x01,0x17,0xe3]
 @ CHECK: tst	r4, r5                  @ encoding: [0x05,0x00,0x14,0xe1]
 @ CHECK: tst	r4, r5, lsl #5          @ encoding: [0x85,0x02,0x14,0xe1]
 @ CHECK: tst	r4, r5, lsr #5          @ encoding: [0xa5,0x02,0x14,0xe1]
diff --git a/test/MC/ARM/coff-debugging-secrel.ll b/test/MC/ARM/coff-debugging-secrel.ll
index f37b19e6a70c..7323fc6828bb 100644
--- a/test/MC/ARM/coff-debugging-secrel.ll
+++ b/test/MC/ARM/coff-debugging-secrel.ll
@@ -16,17 +16,17 @@ entry:
 !llvm.dbg.cu = !{!7}
 !llvm.module.flags = !{!9, !10}
 
-!0 = metadata !{i32 1, i32 0, metadata !1, null}
-!1 = metadata !{i32 786478, metadata !2, metadata !3, metadata !"function", metadata !"function", metadata !"", i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @function, null, null, metadata !6, i32 1} ; [ DW_TAG_subprogram ], [line 1], [def], [function]
-!2 = metadata !{metadata !"/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", metadata !"/Users/compnerd/work/llvm"}
-!3 = metadata !{i32 786473, metadata !2} ; [ DW_TAG_file_type] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c]
-!4 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ], [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{null}
-!6 = metadata !{}
-!7 = metadata !{i32 786449, metadata !2, i32 12, metadata !"clang version 3.5.0", i1 false, metadata !"", i32 0, metadata !6, metadata !6, metadata !8, metadata !6, metadata !6, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c] [DW_LANG_C99]
-!8 = metadata !{metadata !1}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !MDLocation(line: 1, scope: !1)
+!1 = !{!"0x2e\00function\00function\00\001\000\001\000\006\000\000\001", !2, !3, !4, null, void ()* @function, null, null, !6} ; [ DW_TAG_subprogram ], [line 1], [def], [function]
+!2 = !{!"/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", !"/Users/compnerd/work/llvm"}
+!3 = !{!"0x29", !2} ; [ DW_TAG_file_type] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c]
+!4 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ], [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{null}
+!6 = !{}
+!7 = !{!"0x11\0012\00clang version 3.5.0\000\00\000\00\001", !2, !6, !6, !8, !6, !6} ; [ DW_TAG_compile_unit ] [/Users/compnerd/work/llvm/test/MC/ARM/reduced.c] [DW_LANG_C99]
+!8 = !{!1}
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
 
 ; CHECK-ITANIUM: Relocations [
 ; CHECK-ITANIUM:   Section {{.*}} .debug_info {
@@ -42,8 +42,10 @@ entry:
 
 ; CHECK-MSVC: Relocations [
 ; CHECK-MSVC:   Section {{.*}} .debug$S {
-; CHECK-MSVC:     0xC IMAGE_REL_ARM_SECREL function
-; CHECK-MSVC:     0x10 IMAGE_REL_ARM_SECTION function
+; CHECK-MSVC:     0x2C IMAGE_REL_ARM_SECREL function
+; CHECK-MSVC:     0x30 IMAGE_REL_ARM_SECTION function
+; CHECK-MSVC:     0x48 IMAGE_REL_ARM_SECREL function
+; CHECK-MSVC:     0x4C IMAGE_REL_ARM_SECTION function
 ; CHECK-MSVC:   }
 ; CHECK-MSVC: ]
 
diff --git a/test/MC/ARM/coff-file.s b/test/MC/ARM/coff-file.s
index f0dd29a29256..d3f26f4c8f7c 100644
--- a/test/MC/ARM/coff-file.s
+++ b/test/MC/ARM/coff-file.s
@@ -21,7 +21,7 @@
 // CHECK-SCN: Symbols [
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: null-padded.asm
@@ -29,7 +29,7 @@
 // CHECK-SCN:   }
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: eighteen-chars.asm
@@ -37,7 +37,7 @@
 // CHECK-SCN:   }
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: multiple-auxiliary-entries.asm
diff --git a/test/MC/ARM/coproc-diag.s b/test/MC/ARM/coproc-diag.s
new file mode 100644
index 000000000000..c96f2db6b6e1
--- /dev/null
+++ b/test/MC/ARM/coproc-diag.s
@@ -0,0 +1,10 @@
+# Special test to make sure we don't error on VFP co-proc access
+@ RUN: llvm-mc -triple=armv5 < %s | FileCheck %s
+@ RUN: llvm-mc -triple=armv6 < %s | FileCheck %s
+
+        @ p10 and p11 are reserved for NEON, but accessible on v5/v6
+        ldc  p10, cr0, [r0], {0x20}
+        ldc2 p11, cr0, [r0], {0x21}
+        ldcl p11, cr0, [r0], {0x20}
+
+@ CHECK-NOT: error: invalid operand for instruction
diff --git a/test/MC/ARM/cps.s b/test/MC/ARM/cps.s
new file mode 100644
index 000000000000..a848b22d9876
--- /dev/null
+++ b/test/MC/ARM/cps.s
@@ -0,0 +1,17 @@
+@ RUN: llvm-mc -triple=thumbv6t2--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7a--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7r--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7m--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
+
+  cpsie f
+  cpsie i, #3
+  cps #0
+
+@ CHECK: cpsie f                         @ encoding: [0x61,0xb6]
+@ CHECK: cpsie   i, #3                   @ encoding: [0xaf,0xf3,0x43,0x85]
+@ CHECK: cps     #0                      @ encoding: [0xaf,0xf3,0x00,0x81]
+
+@ UNDEF-DAG: cpsie f                         @ encoding: [0x61,0xb6]
+@ UNDEF-DAG: error: instruction requires:
+@ UNDEF-DAG: error: instruction 'cps' requires effect for M-class
diff --git a/test/MC/ARM/cpu-test.s b/test/MC/ARM/cpu-test.s
new file mode 100644
index 000000000000..766e454d080a
--- /dev/null
+++ b/test/MC/ARM/cpu-test.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -o - -triple arm-gnueabi-freebsd11.0 < %s > %t 2> %t2
+// RUN: FileCheck %s < %t
+// RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t2
+
+// CHECK: .cpu cortex-a8
+.cpu cortex-a8
+// CHECK: dsb     sy
+dsb
+.cpu arm9       
+// CHECK-ERROR: error: instruction requires: data-barriers
+dsb
+// CHECK-ERROR: error: Unknown CPU name
+.cpu foobar
diff --git a/test/MC/ARM/d16.s b/test/MC/ARM/d16.s
new file mode 100644
index 000000000000..aa549a3d7ca1
--- /dev/null
+++ b/test/MC/ARM/d16.s
@@ -0,0 +1,24 @@
+@ RUN:     llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,-d16 2>&1 | FileCheck %s --check-prefix=D32
+@ RUN: not llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+vfp4,+d16 2>&1 | FileCheck %s --check-prefix=D16
+
+@ D32-NOT: error:
+
+@ D16: invalid operand for instruction
+@ D16-NEXT: vadd.f64 d1, d2, d16
+vadd.f64 d1, d2, d16
+
+@ D16: invalid operand for instruction
+@ D16-NEXT: vadd.f64 d1, d17, d6
+vadd.f64 d1, d17, d6
+
+@ D16: invalid operand for instruction
+@ D16-NEXT: vadd.f64 d19, d7, d6
+vadd.f64 d19, d7, d6
+
+@ D16: invalid operand for instruction
+@ D16-NEXT: vcvt.f64.f32 d22, s4
+vcvt.f64.f32 d22, s4
+
+@ D16: invalid operand for instruction
+@ D16-NEXT: vcvt.f32.f64 s26, d30
+vcvt.f32.f64 s26, d30
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 88c5fb5b688a..6f66dc3b4d0a 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -491,3 +491,213 @@ foo2:
 @ CHECK-ERRORS:                 ^
 @ CHECK-ERRORS: error: immediate expression for mov requires :lower16: or :upper16
 @ CHECK-ERRORS:                  ^
+
+        str r0, [r0, #4]!
+        str r0, [r0, r1]!
+        str r0, [r0], #4
+        str r0, [r0], r1
+        strh r0, [r0, #2]!
+        strh r0, [r0, r1]!
+        strh r0, [r0], #2
+        strh r0, [r0], r1
+        strb r0, [r0, #1]!
+        strb r0, [r0, r1]!
+        strb r0, [r0], #1
+        strb r0, [r0], r1
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: str r0, [r0, #4]!
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: str r0, [r0, r1]!
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: str r0, [r0], #4
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: str r0, [r0], r1
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strh r0, [r0, #2]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strh r0, [r0, r1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strh r0, [r0], #2
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strh r0, [r0], r1
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strb r0, [r0, #1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strb r0, [r0, r1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strb r0, [r0], #1
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: source register and base register can't be identical
+@ CHECK-ERRORS: strb r0, [r0], r1
+@ CHECK-ERRORS:          ^
+
+        ldr r0, [r0, #4]!
+        ldr r0, [r0, r1]!
+        ldr r0, [r0], #4
+        ldr r0, [r0], r1
+        ldrh r0, [r0, #2]!
+        ldrh r0, [r0, r1]!
+        ldrh r0, [r0], #2
+        ldrh r0, [r0], r1
+        ldrsh r0, [r0, #2]!
+        ldrsh r0, [r0, r1]!
+        ldrsh r0, [r0], #2
+        ldrsh r0, [r0], r1
+        ldrb r0, [r0, #1]!
+        ldrb r0, [r0, r1]!
+        ldrb r0, [r0], #1
+        ldrb r0, [r0], r1
+        ldrsb r0, [r0, #1]!
+        ldrsb r0, [r0, r1]!
+        ldrsb r0, [r0], #1
+        ldrsb r0, [r0], r1
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldr r0, [r0, #4]!
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldr r0, [r0, r1]!
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldr r0, [r0], #4
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldr r0, [r0], r1
+@ CHECK-ERRORS:         ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrh r0, [r0, #2]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrh r0, [r0, r1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrh r0, [r0], #2
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrh r0, [r0], r1
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsh r0, [r0, #2]!
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsh r0, [r0, r1]!
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsh r0, [r0], #2
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsh r0, [r0], r1
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrb r0, [r0, #1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrb r0, [r0, r1]!
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrb r0, [r0], #1
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrb r0, [r0], r1
+@ CHECK-ERRORS:          ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsb r0, [r0, #1]!
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsb r0, [r0, r1]!
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsb r0, [r0], #1
+@ CHECK-ERRORS:           ^
+@ CHECK-ERRORS: error: destination register and base register can't be identical
+@ CHECK-ERRORS: ldrsb r0, [r0], r1
+@ CHECK-ERRORS:           ^
+
+        @ Out of range modified immediate values
+        mov  r5, #-256, #6
+        mov  r6, #42, #7
+        mvn  r5, #256, #6
+        mvn  r6, #42, #298
+        cmp  r5, #65535, #6
+        cmp  r6, #42, #31
+        cmn  r5, #-1, #6
+        cmn  r6, #42, #32
+	msr  APSR_nzcvq, #-128, #2
+	msr  apsr_nzcvqg, #0, #1
+        adc  r7, r8, #-256, #2
+        adc  r7, r8, #128, #1
+        sbc  r7, r8, #-256, #2
+        sbc  r7, r8, #128, #1
+        add  r7, r8, #-2149, #0
+        add  r7, r8, #100, #1
+        sub  r7, r8, #-2149, #0
+        sub  r7, r8, #100, #1
+        and  r7, r8, #-2149, #0
+        and  r7, r8, #100, #1
+        orr  r7, r8, #-2149, #0
+        orr  r7, r8, #100, #1
+        eor  r7, r8, #-2149, #0
+        eor  r7, r8, #100, #1
+        bic  r7, r8, #-2149, #0
+        bic  r7, r8, #100, #1
+        rsb  r7, r8, #-2149, #0
+        rsb  r7, r8, #100, #1
+        adds r7, r8, #-2149, #0
+        adds r7, r8, #100, #1
+        subs r7, r8, #-2149, #0
+        subs r7, r8, #100, #1
+        rsbs r7, r8, #-2149, #0
+        rsbs r7, r8, #100, #1
+        rsc r7, r8, #-2149, #0
+        rsc r7, r8, #100, #1
+        TST r7, #-2149, #0
+        TST r7, #100, #1
+        TEQ r7, #-2149, #0
+        TEQ r7, #100, #1
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
+@ CHECK-ERRORS: error: immediate operand must a number in the range [0, 255]
+@ CHECK-ERRORS: error: immediate operand must an even number in the range [0, 30]
diff --git a/test/MC/ARM/directive-arch-iwmmxt.s b/test/MC/ARM/directive-arch-iwmmxt.s
index db25ec683fe5..c54846dbcc20 100644
--- a/test/MC/ARM/directive-arch-iwmmxt.s
+++ b/test/MC/ARM/directive-arch-iwmmxt.s
@@ -16,7 +16,7 @@
 @ CHECK-ATTR: FileAttributes {
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_name
-@ CHECK-ATTR:     Value: IWMMXT
+@ CHECK-ATTR:     Value: iwmmxt
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_arch
diff --git a/test/MC/ARM/directive-arch-iwmmxt2.s b/test/MC/ARM/directive-arch-iwmmxt2.s
index de94f97b449e..a4e59b5ba277 100644
--- a/test/MC/ARM/directive-arch-iwmmxt2.s
+++ b/test/MC/ARM/directive-arch-iwmmxt2.s
@@ -16,7 +16,7 @@
 @ CHECK-ATTR: FileAttributes {
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_name
-@ CHECK-ATTR:     Value: IWMMXT2
+@ CHECK-ATTR:     Value: iwmmxt2
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_arch
diff --git a/test/MC/ARM/directive-arch_extension-crc.s b/test/MC/ARM/directive-arch_extension-crc.s
index 9e4dedabca48..1359b1f649ac 100644
--- a/test/MC/ARM/directive-arch_extension-crc.s
+++ b/test/MC/ARM/directive-arch_extension-crc.s
@@ -37,21 +37,21 @@ crc:
 nocrc:
 	crc32b r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 	crc32h r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 	crc32w r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 
 	crc32cb r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 	crc32ch r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 	crc32cw r0, r1, r2
 @ CHECK-V7: error: instruction requires: crc armv8
-@ CHECK-V8: error: instruction requires: crc arm-mode
+@ CHECK-V8: error: instruction requires: crc
 
diff --git a/test/MC/ARM/directive-arch_extension-fp.s b/test/MC/ARM/directive-arch_extension-fp.s
index 0327dd74337e..f2b4dc2c4122 100644
--- a/test/MC/ARM/directive-arch_extension-fp.s
+++ b/test/MC/ARM/directive-arch_extension-fp.s
@@ -1,11 +1,11 @@
 @ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7 -check-prefix CHECK
 @ RUN: not llvm-mc -triple armv8-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V8
+@ RUN:   | FileCheck %s -check-prefix CHECK-V8 -check-prefix CHECK
 @ RUN: not llvm-mc -triple thumbv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7 -check-prefix CHECK
 @ RUN: not llvm-mc -triple thumbv8-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V8
+@ RUN:   | FileCheck %s -check-prefix CHECK-V8 -check-prefix CHECK
 
 	.syntax unified
 
@@ -153,192 +153,131 @@ fp:
 	.type nofp,%function
 nofp:
 	vmrs r0, mvfr2
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vselgt.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vselge.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vseleq.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vselvs.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vmaxnm.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vminnm.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vselgt.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vselge.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vseleq.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vselvs.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vmaxnm.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vminnm.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vcvtb.f64.f16 d0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtb.f16.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtt.f64.f16 d0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtt.f16.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vcvta.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vrintz.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vrinta.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
diff --git a/test/MC/ARM/directive-arch_extension-idiv.s b/test/MC/ARM/directive-arch_extension-idiv.s
index c63bbfbb4cab..88614ea55db5 100644
--- a/test/MC/ARM/directive-arch_extension-idiv.s
+++ b/test/MC/ARM/directive-arch_extension-idiv.s
@@ -43,11 +43,11 @@ noidiv:
 	udiv r0, r1, r2
 @ CHECK-ARMv6: error: instruction requires: divide in ARM
 @ CHECK-THUMBv6: error: instruction requires: divide in ARM arm-mode
-@ CHECK-ARMv7: error: instruction requires: divide in ARM arm-mode
+@ CHECK-ARMv7: error: instruction requires: divide in ARM
 @ CHECK-THUMBv7: error: instruction requires: divide in THUMB
 	sdiv r0, r1, r2
 @ CHECK-ARMv6: error: instruction requires: divide in ARM
 @ CHECK-THUMBv6: error: instruction requires: divide in ARM arm-mode
-@ CHECK-ARMv7: error: instruction requires: divide in ARM arm-mode
+@ CHECK-ARMv7: error: instruction requires: divide in ARM
 @ CHECK-THUMBv7: error: instruction requires: divide in THUMB
 
diff --git a/test/MC/ARM/directive-arch_extension-mode-switch.s b/test/MC/ARM/directive-arch_extension-mode-switch.s
new file mode 100644
index 000000000000..7e4159f2e8f4
--- /dev/null
+++ b/test/MC/ARM/directive-arch_extension-mode-switch.s
@@ -0,0 +1,17 @@
+@ RUN: not llvm-mc -triple armv8-eabi -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+@ Ensure that a mode switch does not revert the architectural features that were
+@ alternated explicitly.
+
+	.syntax unified
+
+	.arch_extension noidiv
+
+	.arm
+	udiv r0, r0, r1
+@ CHECK: instruction requires: divide in ARM
+
+	.thumb
+	udiv r0, r0, r1
+@ CHECK: instruction requires: divide in THUMB
+
diff --git a/test/MC/ARM/directive-arch_extension-simd.s b/test/MC/ARM/directive-arch_extension-simd.s
index c9dbf21541c1..14359c645229 100644
--- a/test/MC/ARM/directive-arch_extension-simd.s
+++ b/test/MC/ARM/directive-arch_extension-simd.s
@@ -1,11 +1,11 @@
 @ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7 -check-prefix CHECK
 @ RUN: not llvm-mc -triple armv8-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V8
+@ RUN:   | FileCheck %s -check-prefix CHECK-V8 -check-prefix CHECK
 @ RUN: not llvm-mc -triple thumbv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7 -check-prefix CHECK
 @ RUN: not llvm-mc -triple thumbv8-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-V8
+@ RUN:   | FileCheck %s -check-prefix CHECK-V8 -check-prefix CHECK
 
 	.syntax unified
 
@@ -125,151 +125,103 @@ simd:
 	.type nosimd,%function
 nosimd:
 	vmaxnm.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vminnm.f32 s0, s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vmaxnm.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vminnm.f64 d0, d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vcvta.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvta.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtn.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtp.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.s32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.u32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.s32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vcvtm.u32.f64 s0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vrintz.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintz.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintr.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f32 s0, s1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f64 d0, d1
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintx.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
 	vrinta.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrinta.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintn.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintp.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f32.f32 s0, s0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 	vrintm.f64.f64 d0, d0
-@ CHECK-V7: error: instruction requires: FPARMv8
-@ CHECK-V8: error: instruction requires: double precision VFP FPARMv8
+@ CHECK: error: instruction requires: FPARMv8
 
diff --git a/test/MC/ARM/directive-arch_extension-toggle.s b/test/MC/ARM/directive-arch_extension-toggle.s
new file mode 100644
index 000000000000..c3fb901cad47
--- /dev/null
+++ b/test/MC/ARM/directive-arch_extension-toggle.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple armv7-eabi -mattr hwdiv -filetype asm -o /dev/null %s
+
+	.syntax unified
+	.thumb
+
+	udiv r0, r1, r2
+	.arch_extension idiv
+	udiv r0, r1, r2
diff --git a/test/MC/ARM/directive-cpu.s b/test/MC/ARM/directive-cpu.s
index 952dd93f370c..d81a03e5e299 100644
--- a/test/MC/ARM/directive-cpu.s
+++ b/test/MC/ARM/directive-cpu.s
@@ -20,7 +20,6 @@
 @ CHECK: 10000000
 
 	.cpu	cortex-a8
-@ CHECK: 05
-@ CHECK: 434F52 5445582D 413800
+@ CHECK: 05636F72 7465782D 613800
 
 @ CHECK: )
diff --git a/test/MC/ARM/directive-eabi_attribute-2.s b/test/MC/ARM/directive-eabi_attribute-2.s
deleted file mode 100644
index 8f00ac807802..000000000000
--- a/test/MC/ARM/directive-eabi_attribute-2.s
+++ /dev/null
@@ -1,98 +0,0 @@
-@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
-
-	.syntax unified
-	.thumb
-
-	.eabi_attribute Tag_CPU_raw_name, "Cortex-A9"
-@ CHECK: .eabi_attribute 4, "Cortex-A9"
-	.eabi_attribute Tag_CPU_name, "cortex-a9"
-@ CHECK: .cpu cortex-a9
-	.eabi_attribute Tag_CPU_arch, 10
-@ CHECK: .eabi_attribute 6, 10
-	.eabi_attribute Tag_CPU_arch_profile, 'A'
-@ CHECK: .eabi_attribute 7, 65
-	.eabi_attribute Tag_ARM_ISA_use, 0
-@ CHECK: .eabi_attribute 8, 0
-	.eabi_attribute Tag_THUMB_ISA_use, 2
-@ CHECK: .eabi_attribute 9, 2
-	.eabi_attribute Tag_FP_arch, 3
-@ CHECK: .eabi_attribute 10, 3
-	.eabi_attribute Tag_WMMX_arch, 0
-@ CHECK: .eabi_attribute 11, 0
-	.eabi_attribute Tag_Advanced_SIMD_arch, 1
-@ CHECK: .eabi_attribute 12, 1
-	.eabi_attribute Tag_PCS_config, 2
-@ CHECK: .eabi_attribute 13, 2
-	.eabi_attribute Tag_ABI_PCS_R9_use, 0
-@ CHECK: .eabi_attribute 14, 0
-	.eabi_attribute Tag_ABI_PCS_RW_data, 0
-@ CHECK: .eabi_attribute 15, 0
-	.eabi_attribute Tag_ABI_PCS_RO_data, 0
-@ CHECK: .eabi_attribute 16, 0
-	.eabi_attribute Tag_ABI_PCS_GOT_use, 0
-@ CHECK: .eabi_attribute 17, 0
-	.eabi_attribute Tag_ABI_PCS_wchar_t, 4
-@ CHECK: .eabi_attribute 18, 4
-	.eabi_attribute Tag_ABI_FP_rounding, 1
-@ CHECK: .eabi_attribute 19, 1
-	.eabi_attribute Tag_ABI_FP_denormal, 2
-@ CHECK: .eabi_attribute 20, 2
-	.eabi_attribute Tag_ABI_FP_exceptions, 1
-@ CHECK: .eabi_attribute 21, 1
-	.eabi_attribute Tag_ABI_FP_user_exceptions, 1
-@ CHECK: .eabi_attribute 22, 1
-	.eabi_attribute Tag_ABI_FP_number_model, 3
-@ CHECK: .eabi_attribute 23, 3
-	.eabi_attribute Tag_ABI_align_needed, 1
-@ CHECK: .eabi_attribute 24, 1
-	.eabi_attribute Tag_ABI_align_preserved, 2
-@ CHECK: .eabi_attribute 25, 2
-	.eabi_attribute Tag_ABI_enum_size, 3
-@ CHECK: .eabi_attribute 26, 3
-	.eabi_attribute Tag_ABI_HardFP_use, 0
-@ CHECK: .eabi_attribute 27, 0
-	.eabi_attribute Tag_ABI_VFP_args, 1
-@ CHECK: .eabi_attribute 28, 1
-	.eabi_attribute Tag_ABI_WMMX_args, 0
-@ CHECK: .eabi_attribute 29, 0
-	.eabi_attribute Tag_ABI_FP_optimization_goals, 1
-@ CHECK: .eabi_attribute 31, 1
-	.eabi_attribute Tag_compatibility, 1
-@ CHECK: .eabi_attribute 32, 1
-	.eabi_attribute Tag_compatibility, 1, "aeabi"
-@ CHECK: .eabi_attribute 32, 1, "aeabi"
-	.eabi_attribute Tag_CPU_unaligned_access, 0
-@ CHECK: .eabi_attribute 34, 0
-	.eabi_attribute Tag_FP_HP_extension, 0
-@ CHECK: .eabi_attribute 36, 0
-	.eabi_attribute Tag_ABI_FP_16bit_format, 0
-@ CHECK: .eabi_attribute 38, 0
-	.eabi_attribute Tag_MPextension_use, 0
-@ CHECK: .eabi_attribute 42, 0
-	.eabi_attribute Tag_DIV_use, 0
-@ CHECK: .eabi_attribute 44, 0
-	.eabi_attribute Tag_nodefaults, 0
-@ CHECK: .eabi_attribute 64, 0
-	.eabi_attribute Tag_also_compatible_with, "gnu"
-@ CHECK: .eabi_attribute 65, "gnu"
-	.eabi_attribute Tag_T2EE_use, 0
-@ CHECK: .eabi_attribute 66, 0
-	.eabi_attribute Tag_conformance, "2.09"
-@ CHECK: .eabi_attribute 67, "2.09"
-	.eabi_attribute Tag_Virtualization_use, 0
-@ CHECK: .eabi_attribute 68, 0
-
-@ ===--- Compatibility Checks ---===
-
-	.eabi_attribute Tag_ABI_align8_needed, 1
-@ CHECK: .eabi_attribute 24, 1
-	.eabi_attribute Tag_ABI_align8_preserved, 2
-@ CHECK: .eabi_attribute 25, 2
-
-@ ===--- GNU AS Compatibility Checks ---===
-
-	.eabi_attribute 2 * 2 + 1, "cortex-a9"
-@ CHECK: .cpu cortex-a9
-	.eabi_attribute 2 * 2 + 2, 5 * 2
-@ CHECK: .eabi_attribute 6, 10
-
diff --git a/test/MC/ARM/directive-eabi_attribute-diagnostics.s b/test/MC/ARM/directive-eabi_attribute-diagnostics.s
index d1ae352b25f4..2b0375e57613 100644
--- a/test/MC/ARM/directive-eabi_attribute-diagnostics.s
+++ b/test/MC/ARM/directive-eabi_attribute-diagnostics.s
@@ -29,6 +29,11 @@
 @ CHECK: 	.eabi_attribute 0
 @ CHECK:                         ^
 
+        .eabi_attribute Tag_compatibility, 1
+@ CHECK: error: comma expected
+@ CHECK: .eabi_attribute Tag_compatibility, 1
+@ CHECK:                                     ^
+
 	.eabi_attribute Tag_MPextension_use_old, 0
 @ CHECK: error: attribute name not recognised: Tag_MPextension_use_old
 @ CHECK: 	.eabi_attribute Tag_MPextension_use_old, 0
diff --git a/test/MC/ARM/directive-eabi_attribute-overwrite.s b/test/MC/ARM/directive-eabi_attribute-overwrite.s
index 6fdded3d8312..e2c509972754 100644
--- a/test/MC/ARM/directive-eabi_attribute-overwrite.s
+++ b/test/MC/ARM/directive-eabi_attribute-overwrite.s
@@ -3,13 +3,11 @@
 
 	.syntax unified
 	.thumb
-
-	.eabi_attribute Tag_compatibility, 1
 	.eabi_attribute Tag_compatibility, 1, "aeabi"
 
 @ CHECK-ATTR: FileAttributes {
 @ CHECK-ATTR:   Attribute {
-@ CHECK-ATTR:     Value: 1, AEABI
+@ CHECK-ATTR:     Value: 1, aeabi
 @ CHECK-ATTR:     TagName: compatibility
 @ CHECK-ATTR:     Description: AEABI Conformant
 @ CHECK-ATTR:   }
diff --git a/test/MC/ARM/directive-eabi_attribute.s b/test/MC/ARM/directive-eabi_attribute.s
index c060b809c843..74a51ab7279a 100644
--- a/test/MC/ARM/directive-eabi_attribute.s
+++ b/test/MC/ARM/directive-eabi_attribute.s
@@ -1,56 +1,248 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
 @ RUN: llvm-mc < %s -triple armv7-unknown-linux-gnueabi -filetype=obj -o - \
-@ RUN:   | llvm-readobj -s -sd | FileCheck %s
-
-@ CHECK: Name: .ARM.attribute
-@ CHECK: SectionData (
-
-@ <format-version>
-@ CHECK: 41
-
-@ <section-length>
-@ CHECK: 250000 00
-
-@ <vendor-name> "aeabi\0"
-@ CHECK: 616561 626900
-
-@ <file-tag>
-@ CHECK: 01
-
-@ <size>
-@ CHECK: 1B000000
-
-@ <attribute>*
-
-	.eabi_attribute 6, 10
-@ CHECK: 060A
-
-	.eabi_attribute 7, 65
-@ CHECK: 0741
-
-	.eabi_attribute 8, 1
-@ CHECK: 0801
-
-	.eabi_attribute 9, 2
-@ CHECK: 0902
-
-	.eabi_attribute 10, 3
-@ CHECK: 0A03
-
-	.eabi_attribute 12, 1
-@ CHECK: 0C01
-
-	.eabi_attribute 20, 1
-@ CHECK: 1401
-
-	.eabi_attribute 21, 1
-@ CHECK: 1501
-
-	.eabi_attribute 23, 3
-@ CHECK: 1703
-
-	.eabi_attribute 24, 1
-@ CHECK: 1801
-
-	.eabi_attribute 25, 1
-@ CHECK: 1901
-@ CHECK: )
+@ RUN:   | llvm-readobj -arm-attributes | FileCheck %s --check-prefix=CHECK-OBJ
+
+        .syntax unified
+        .thumb
+
+        .eabi_attribute Tag_conformance, "2.09"
+@ CHECK: .eabi_attribute 67, "2.09"
+@ Tag_conformance should be be emitted first in a file-scope
+@ sub-subsection of the first public subsection of the attributes
+@ section. 2.3.7.4 of ABI Addenda.
+@ CHECK-OBJ:        Tag: 67
+@ CHECK-OBJ-NEXT:   TagName: conformance
+@ CHECK-OBJ-NEXT:   Value: 2.09
+	.eabi_attribute Tag_CPU_raw_name, "Cortex-A9"
+@ CHECK: .eabi_attribute 4, "Cortex-A9"
+@ CHECK-OBJ:        Tag: 4
+@ CHECK-OBJ-NEXT:   TagName: CPU_raw_name
+@ CHECK-OBJ-NEXT:   Value: Cortex-A9
+	.eabi_attribute Tag_CPU_name, "cortex-a9"
+@ CHECK: .cpu cortex-a9
+@ CHECK-OBJ:        Tag: 5
+@ CHECK-OBJ-NEXT:   TagName: CPU_name
+@ CHECK-OBJ-NEXT:   Value: cortex-a9
+	.eabi_attribute Tag_CPU_arch, 10
+@ CHECK: .eabi_attribute 6, 10
+@ CHECK-OBJ:        Tag: 6
+@ CHECK-OBJ-NEXT:   Value: 10
+@ CHECK-OBJ-NEXT:   TagName: CPU_arch
+@ CHECK-OBJ-NEXT:   Description: ARM v7
+	.eabi_attribute Tag_CPU_arch_profile, 'A'
+@ CHECK: .eabi_attribute 7, 65
+@ CHECK-OBJ:        Tag: 7
+@ CHECK-OBJ-NEXT:   Value: 65
+@ CHECK-OBJ-NEXT:   TagName: CPU_arch_profile
+@ CHECK-OBJ-NEXT:   Description: Application
+	.eabi_attribute Tag_ARM_ISA_use, 0
+@ CHECK: .eabi_attribute 8, 0
+@ CHECK-OBJ:        Tag: 8
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ARM_ISA_use
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_THUMB_ISA_use, 2
+@ CHECK: .eabi_attribute 9, 2
+@ CHECK-OBJ:        Tag: 9
+@ CHECK-OBJ-NEXT:   Value: 2
+@ CHECK-OBJ-NEXT:   TagName: THUMB_ISA_use
+@ CHECK-OBJ-NEXT:   Description: Thumb-2
+	.eabi_attribute Tag_FP_arch, 3
+@ CHECK: .eabi_attribute 10, 3
+@ CHECK-OBJ:        Tag: 10
+@ CHECK-OBJ-NEXT:   Value: 3
+@ CHECK-OBJ-NEXT:   TagName: FP_arch
+@ CHECK-OBJ-NEXT:   Description: VFPv3
+	.eabi_attribute Tag_WMMX_arch, 0
+@ CHECK: .eabi_attribute 11, 0
+@ CHECK-OBJ:        Tag: 11
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: WMMX_arch
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_Advanced_SIMD_arch, 1
+@ CHECK: .eabi_attribute 12, 1
+@ CHECK-OBJ:        Tag: 12
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: Advanced_SIMD_arch
+@ CHECK-OBJ-NEXT:   Description: NEONv1
+	.eabi_attribute Tag_PCS_config, 2
+@ CHECK: .eabi_attribute 13, 2
+@ CHECK-OBJ:        Tag: 13
+@ CHECK-OBJ-NEXT:   Value: 2
+@ CHECK-OBJ-NEXT:   TagName: PCS_config
+@ CHECK-OBJ-NEXT:   Description: Linux Application
+	.eabi_attribute Tag_ABI_PCS_R9_use, 0
+@ CHECK: .eabi_attribute 14, 0
+@ CHECK-OBJ:        Tag: 14
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_PCS_R9_use
+@ CHECK-OBJ-NEXT:   Description: v6
+	.eabi_attribute Tag_ABI_PCS_RW_data, 0
+@ CHECK: .eabi_attribute 15, 0
+@ CHECK-OBJ:        Tag: 15
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_PCS_RW_data
+@ CHECK-OBJ-NEXT:   Description: Absolute
+	.eabi_attribute Tag_ABI_PCS_RO_data, 0
+@ CHECK: .eabi_attribute 16, 0
+@ CHECK-OBJ:        Tag: 16
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_PCS_RO_data
+@ CHECK-OBJ-NEXT:   Description: Absolute
+	.eabi_attribute Tag_ABI_PCS_GOT_use, 0
+@ CHECK: .eabi_attribute 17, 0
+@ CHECK-OBJ:        Tag: 17
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_PCS_GOT_use
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_ABI_PCS_wchar_t, 4
+@ CHECK: .eabi_attribute 18, 4
+@ CHECK-OBJ:        Tag: 18
+@ CHECK-OBJ-NEXT:   Value: 4
+@ CHECK-OBJ-NEXT:   TagName: ABI_PCS_wchar_t
+@ CHECK-OBJ-NEXT:   Description: 4-byte
+	.eabi_attribute Tag_ABI_FP_rounding, 1
+@ CHECK: .eabi_attribute 19, 1
+@ CHECK-OBJ:        Tag: 19
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_rounding
+@ CHECK-OBJ-NEXT:   Description: Runtime
+	.eabi_attribute Tag_ABI_FP_denormal, 2
+@ CHECK: .eabi_attribute 20, 2
+@ CHECK-OBJ:        Tag: 20
+@ CHECK-OBJ-NEXT:   Value: 2
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_denormal
+@ CHECK-OBJ-NEXT:   Description: Sign Only
+	.eabi_attribute Tag_ABI_FP_exceptions, 1
+@ CHECK: .eabi_attribute 21, 1
+@ CHECK-OBJ:        Tag: 21
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_exceptions
+@ CHECK-OBJ-NEXT:   Description: IEEE-754
+	.eabi_attribute Tag_ABI_FP_user_exceptions, 1
+@ CHECK: .eabi_attribute 22, 1
+@ CHECK-OBJ:        Tag: 22
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_user_exceptions
+@ CHECK-OBJ-NEXT:   Description: IEEE-754
+	.eabi_attribute Tag_ABI_FP_number_model, 3
+@ CHECK: .eabi_attribute 23, 3
+@ CHECK-OBJ:        Tag: 23
+@ CHECK-OBJ-NEXT:   Value: 3
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_number_model
+@ CHECK-OBJ-NEXT:   Description: IEEE-754
+	.eabi_attribute Tag_ABI_align_needed, 1
+@ CHECK: .eabi_attribute 24, 1
+@ CHECK-OBJ:        Tag: 24
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_align_needed
+@ CHECK-OBJ-NEXT:   Description: 8-byte alignment
+	.eabi_attribute Tag_ABI_align_preserved, 2
+@ CHECK: .eabi_attribute 25, 2
+@ CHECK-OBJ:        Tag: 25
+@ CHECK-OBJ-NEXT:   Value: 2
+@ CHECK-OBJ-NEXT:   TagName: ABI_align_preserved
+@ CHECK-OBJ-NEXT:   Description: 8-byte data and code alignment
+	.eabi_attribute Tag_ABI_enum_size, 3
+@ CHECK: .eabi_attribute 26, 3
+@ CHECK-OBJ:        Tag: 26
+@ CHECK-OBJ-NEXT:   Value: 3
+@ CHECK-OBJ-NEXT:   TagName: ABI_enum_size
+@ CHECK-OBJ-NEXT:   Description: External Int32
+	.eabi_attribute Tag_ABI_HardFP_use, 0
+@ CHECK: .eabi_attribute 27, 0
+@ CHECK-OBJ:        Tag: 27
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_HardFP_use
+@ CHECK-OBJ-NEXT:   Description: Tag_FP_arch
+	.eabi_attribute Tag_ABI_VFP_args, 1
+@ CHECK: .eabi_attribute 28, 1
+@ CHECK-OBJ:        Tag: 28
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_VFP_args
+@ CHECK-OBJ-NEXT:   Description: AAPCS VFP
+	.eabi_attribute Tag_ABI_WMMX_args, 0
+@ CHECK: .eabi_attribute 29, 0
+@ CHECK-OBJ:        Tag: 29
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_WMMX_args
+@ CHECK-OBJ-NEXT:   Description: AAPCS
+	.eabi_attribute Tag_ABI_FP_optimization_goals, 1
+@ CHECK: .eabi_attribute 31, 1
+@ CHECK-OBJ:        Tag: 31
+@ CHECK-OBJ-NEXT:   Value: 1
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_optimization_goals
+@ CHECK-OBJ-NEXT:   Description: Speed
+	.eabi_attribute Tag_compatibility, 1, "aeabi"
+@ CHECK: .eabi_attribute 32, 1, "aeabi"
+@ CHECK-OBJ:        Tag: 32
+@ CHECK-OBJ-NEXT:   Value: 1, aeabi
+@ CHECK-OBJ-NEXT:   TagName: compatibility
+@ CHECK-OBJ-NEXT:   Description: AEABI Conformant
+	.eabi_attribute Tag_CPU_unaligned_access, 0
+@ CHECK: .eabi_attribute 34, 0
+@ CHECK-OBJ:        Tag: 34
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: CPU_unaligned_access
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_FP_HP_extension, 0
+@ CHECK: .eabi_attribute 36, 0
+@ CHECK-OBJ:        Tag: 36
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: FP_HP_extension
+@ CHECK-OBJ-NEXT:   Description: If Available
+	.eabi_attribute Tag_ABI_FP_16bit_format, 0
+@ CHECK: .eabi_attribute 38, 0
+@ CHECK-OBJ:        Tag: 38
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: ABI_FP_16bit_format
+@ CHECK-OBJ-NEXT:   Description: Not Permitte
+	.eabi_attribute Tag_MPextension_use, 0
+@ CHECK: .eabi_attribute 42, 0
+@ CHECK-OBJ:        Tag: 42
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: MPextension_use
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_DIV_use, 0
+@ CHECK: .eabi_attribute 44, 0
+@ CHECK-OBJ:        Tag: 44
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: DIV_use
+@ CHECK-OBJ-NEXT:   Description: If Available
+	.eabi_attribute Tag_nodefaults, 0
+@ CHECK: .eabi_attribute 64, 0
+@ CHECK-OBJ:        Tag: 64
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: nodefaults
+@ CHECK-OBJ-NEXT:   Description: Unspecified Tags UNDEFINED
+	.eabi_attribute Tag_also_compatible_with, "gnu"
+@ CHECK: .eabi_attribute 65, "gnu"
+@ CHECK-OBJ:        Tag: 65
+@ CHECK-OBJ-NEXT:   TagName: also_compatible_with
+@ CHECK-OBJ-NEXT:   Value: gnu
+	.eabi_attribute Tag_T2EE_use, 0
+@ CHECK: .eabi_attribute 66, 0
+@ CHECK-OBJ:        Tag: 66
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: T2EE_use
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+	.eabi_attribute Tag_Virtualization_use, 0
+@ CHECK: .eabi_attribute 68, 0
+@ CHECK-OBJ:        Tag: 68
+@ CHECK-OBJ-NEXT:   Value: 0
+@ CHECK-OBJ-NEXT:   TagName: Virtualization_use
+@ CHECK-OBJ-NEXT:   Description: Not Permitted
+
+@ ===--- Compatibility Checks ---===
+
+	.eabi_attribute Tag_ABI_align8_needed, 1
+@ CHECK: .eabi_attribute 24, 1
+	.eabi_attribute Tag_ABI_align8_preserved, 2
+@ CHECK: .eabi_attribute 25, 2
+
+@ ===--- GNU AS Compatibility Checks ---===
+
+	.eabi_attribute 2 * 2 + 1, "cortex-a9"
+@ CHECK: .cpu cortex-a9
+	.eabi_attribute 2 * 2 + 2, 5 * 2
+@ CHECK: .eabi_attribute 6, 10
diff --git a/test/MC/ARM/directive-fpu-instrs.s b/test/MC/ARM/directive-fpu-instrs.s
new file mode 100644
index 000000000000..ec97a77aef66
--- /dev/null
+++ b/test/MC/ARM/directive-fpu-instrs.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -triple armv7-unknown-linux-gnueabi -mattr=+vfp3,-neon %s
+
+.fpu neon
+VAND d3, d5, d5
+vldr d21, [r7, #296]
+
+@ .thumb should not disable the prior .fpu neon
+.thumb
+
+vmov q4, q11 @ v4si
+str r6, [r7, #264]
+mov r6, r5
+vldr d21, [r7, #296]
+add r9, r7, #216
+
+fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15}
diff --git a/test/MC/ARM/directive-thumb_func.s b/test/MC/ARM/directive-thumb_func.s
new file mode 100644
index 000000000000..f82e0d1b60a1
--- /dev/null
+++ b/test/MC/ARM/directive-thumb_func.s
@@ -0,0 +1,22 @@
+@ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null %s 2>&1 \
+@ RUN:    | FileCheck %s -check-prefix CHECK-EABI
+
+@ NOTE: this test ensures that both forms are accepted for MachO
+@ RUN: llvm-mc -triple armv7-darwin -filetype asm -o /dev/null %s
+
+	.syntax unified
+
+	.thumb_func
+no_suffix:
+	bx lr
+
+	.thumb_func suffix
+suffix:
+	bx lr
+
+// CHECK-EABI: error: unexpected token in directive
+// CHECK-EABI: 	.thumb_func suffix
+// CHECK-EABI:              ^
+
+// CHECK-EABI-NOT: error: invalid instruction
+
diff --git a/test/MC/ARM/directive-unsupported.s b/test/MC/ARM/directive-unsupported.s
new file mode 100644
index 000000000000..0b1f9bac61a1
--- /dev/null
+++ b/test/MC/ARM/directive-unsupported.s
@@ -0,0 +1,68 @@
+@ RUN: not llvm-mc -triple thumbv7-windows -filetype asm -o /dev/null %s 2>&1 \
+@ RUN:     | FileCheck %s
+
+@ RUN: not llvm-mc -triple armv7-darwin -filetype asm -o /dev/null %s 2>&1 \
+@ RUN:    | FileCheck %s
+
+	.syntax unified
+
+	.arch armv7
+
+// CHECK: error: unknown directive
+// CHECK: .arch armv7
+// CHECK: ^
+
+	.cpu cortex-a7
+
+// CHECK: error: unknown directive
+// CHECK: .cpu cortex-a7
+// CHECK: ^
+
+	.fpu neon
+
+// CHECK: error: unknown directive
+// CHECK: .fpu neon
+// CHECK: ^
+
+	.eabi_attribute 0, 0
+
+// CHECK: error: unknown directive
+// CHECK: .eabi_attribute 0, 0
+// CHECK: ^
+
+	.inst 0xdefe
+
+// CHECK: error: unknown directive
+// CHECK: .inst 0xdefe
+// CHECK: ^
+
+	.inst.n 0xdefe
+
+// CHECK: error: unknown directive
+// CHECK: .inst.n 0xdefe
+// CHECK: ^
+
+	.inst.w 0xdefe
+
+// CHECK: error: unknown directive
+// CHECK: .inst.w 0xdefe
+// CHECK: ^
+
+	.object_arch armv7
+
+// CHECK: error: unknown directive
+// CHECK: .object_arch armv7
+// CHECK: ^
+
+	.tlsdescseq undefined
+
+// CHECK: error: unknown directive
+// CHECK: .tlsdescseq undefined
+// CHECK: ^
+
+	.fnstart
+
+// CHECK: error: unknown directive
+// CHECK: .fnstart
+// CHECK: ^
+
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
new file mode 100644
index 000000000000..5bf8fbd57fa5
--- /dev/null
+++ b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp -dwarf-version 2 2>&1 | FileCheck -check-prefix MESSAGES %s
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+
+  .section .text, "ax"
+a:
+  mov r0, r0
+
+  .section foo, "ax"
+b:
+  mov r1, r1
+
+// MESSAGES: warning: DWARF2 only supports one section per compilation unit
+
+// DWARF: .debug_abbrev contents:
+// DWARF: Abbrev table for offset: 0x00000000
+// DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
+// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_low_pc    DW_FORM_addr
+// DWARF:         DW_AT_high_pc   DW_FORM_addr
+// DWARF:         DW_AT_name      DW_FORM_string
+// DWARF:         DW_AT_comp_dir  DW_FORM_string
+// DWARF:         DW_AT_producer  DW_FORM_string
+// DWARF:         DW_AT_language  DW_FORM_data2
+
+// DWARF: .debug_info contents:
+// DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
+// CHECK-NOT-DWARF: DW_TAG_
+// DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+// DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
+
+// DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
+// DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
+
+
+// DWARF: .debug_aranges contents:
+// DWARF-NEXT: Address Range Header: length = 0x00000024, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+// DWARF-NEXT: [0x00000000 - 0x00000004)
+
+// DWARF: .debug_line contents:
+// DWARF:      0x0000000000000000      7      0      1   0   0 is_stmt
+// DWARF-NEXT: 0x0000000000000004      7      0      1   0   0 is_stmt end_sequence
+// DWARF:      0x0000000000000000     11      0      1   0   0 is_stmt
+// DWARF-NEXT: 0x0000000000000004     11      0      1   0   0 is_stmt end_sequence
+
+
+// DWARF: .debug_ranges contents:
+// DWARF-NOT: {{0-9a-f}}
+// DWARF: .debug_pubnames contents:
+
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_info]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_abbrev
+// RELOC-NEXT: 0000000c R_ARM_ABS32 .debug_line
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 .text
+// RELOC-NEXT: R_ARM_ABS32 foo
+
+// RELOC-NOT: RELOCATION RECORDS FOR [.rel.debug_ranges]:
+
+// RELOC: RELOCATION RECORDS FOR [.rel.debug_aranges]:
+// RELOC-NEXT: 00000006 R_ARM_ABS32 .debug_info
+// RELOC-NEXT: 00000010 R_ARM_ABS32 .text
+// RELOC-NEXT: 00000018 R_ARM_ABS32 foo
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
index ed1b89eff3cd..0eb8bab81620 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
 // RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
 // RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
-// RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
 // RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
 // RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
   .section .text, "ax"
@@ -25,7 +25,7 @@ b:
 // DWARF: .debug_info contents:
 // DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
 // CHECK-NOT-DWARF: DW_TAG_
-// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000)
+// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000
 
 // DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
 // DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
@@ -73,7 +73,7 @@ b:
 // RELOC-NEXT: 00000018 R_ARM_ABS32 foo
 
 
-// VERSION: {{.*}} error: DWARF2 only supports one section per compilation unit
+// VERSION: {{.*}} warning: DWARF2 only supports one section per compilation unit
 
 // DWARF1: Dwarf version 1 is not supported.
 // DWARF5: Dwarf version 5 is not supported.
diff --git a/test/MC/ARM/ldr-pseudo-parse-errors.s b/test/MC/ARM/ldr-pseudo-parse-errors.s
index 2e6114d6fe08..2516239f5ee0 100644
--- a/test/MC/ARM/ldr-pseudo-parse-errors.s
+++ b/test/MC/ARM/ldr-pseudo-parse-errors.s
@@ -4,7 +4,7 @@
 .text
 bar:
   mov r0, =0x101
-@ CHECK: error: unexpected token in operand
+@ CHECK: error: unknown token in expression
 @ CHECK: mov r0, =0x101
 @ CHECK:         ^
 
diff --git a/test/MC/ARM/move-banked-regs.s b/test/MC/ARM/move-banked-regs.s
new file mode 100644
index 000000000000..b3b91f5915e2
--- /dev/null
+++ b/test/MC/ARM/move-banked-regs.s
@@ -0,0 +1,220 @@
+@ RUN: llvm-mc -triple armv7 -mattr=virtualization -show-encoding %s | FileCheck %s --check-prefix=CHECK-ARM
+@ RUN: llvm-mc -triple thumbv7 -mattr=virtualization -show-encoding %s | FileCheck %s --check-prefix=CHECK-THUMB
+
+        mrs r2, r8_usr
+        mrs r3, r9_usr
+        mrs r5, r10_usr
+        mrs r7, r11_usr
+        mrs r11, r12_usr
+        mrs r1, sp_usr
+        mrs r2, lr_usr
+@ CHECK-ARM:         mrs     r2, r8_usr              @ encoding: [0x00,0x22,0x00,0xe1]
+@ CHECK-ARM:         mrs     r3, r9_usr              @ encoding: [0x00,0x32,0x01,0xe1]
+@ CHECK-ARM:         mrs     r5, r10_usr             @ encoding: [0x00,0x52,0x02,0xe1]
+@ CHECK-ARM:         mrs     r7, r11_usr             @ encoding: [0x00,0x72,0x03,0xe1]
+@ CHECK-ARM:         mrs     r11, r12_usr            @ encoding: [0x00,0xb2,0x04,0xe1]
+@ CHECK-ARM:         mrs     r1, sp_usr              @ encoding: [0x00,0x12,0x05,0xe1]
+@ CHECK-ARM:         mrs     r2, lr_usr              @ encoding: [0x00,0x22,0x06,0xe1]
+@ CHECK-THUMB:         mrs     r2, r8_usr              @ encoding: [0xe0,0xf3,0x20,0x82]
+@ CHECK-THUMB:         mrs     r3, r9_usr              @ encoding: [0xe1,0xf3,0x20,0x83]
+@ CHECK-THUMB:         mrs     r5, r10_usr             @ encoding: [0xe2,0xf3,0x20,0x85]
+@ CHECK-THUMB:         mrs     r7, r11_usr             @ encoding: [0xe3,0xf3,0x20,0x87]
+@ CHECK-THUMB:         mrs     r11, r12_usr            @ encoding: [0xe4,0xf3,0x20,0x8b]
+@ CHECK-THUMB:         mrs     r1, sp_usr              @ encoding: [0xe5,0xf3,0x20,0x81]
+@ CHECK-THUMB:         mrs     r2, lr_usr              @ encoding: [0xe6,0xf3,0x20,0x82]
+
+        mrs r2, r8_fiq
+        mrs r3, r9_fiq
+        mrs r5, r10_fiq
+        mrs r7, r11_fiq
+        mrs r11, r12_fiq
+        mrs r1, sp_fiq
+        mrs r2, lr_fiq
+        mrs r3, spsr_fiq
+@ CHECK-ARM:         mrs     r2, r8_fiq              @ encoding: [0x00,0x22,0x08,0xe1]
+@ CHECK-ARM:         mrs     r3, r9_fiq              @ encoding: [0x00,0x32,0x09,0xe1]
+@ CHECK-ARM:         mrs     r5, r10_fiq             @ encoding: [0x00,0x52,0x0a,0xe1]
+@ CHECK-ARM:         mrs     r7, r11_fiq             @ encoding: [0x00,0x72,0x0b,0xe1]
+@ CHECK-ARM:         mrs     r11, r12_fiq            @ encoding: [0x00,0xb2,0x0c,0xe1]
+@ CHECK-ARM:         mrs     r1, sp_fiq              @ encoding: [0x00,0x12,0x0d,0xe1]
+@ CHECK-ARM:         mrs     r2, lr_fiq              @ encoding: [0x00,0x22,0x0e,0xe1]
+@ CHECK-ARM:         mrs     r3, SPSR_fiq            @ encoding: [0x00,0x32,0x4e,0xe1]
+@ CHECK-THUMB:         mrs     r2, r8_fiq              @ encoding: [0xe8,0xf3,0x20,0x82]
+@ CHECK-THUMB:         mrs     r3, r9_fiq              @ encoding: [0xe9,0xf3,0x20,0x83]
+@ CHECK-THUMB:         mrs     r5, r10_fiq             @ encoding: [0xea,0xf3,0x20,0x85]
+@ CHECK-THUMB:         mrs     r7, r11_fiq             @ encoding: [0xeb,0xf3,0x20,0x87]
+@ CHECK-THUMB:         mrs     r11, r12_fiq            @ encoding: [0xec,0xf3,0x20,0x8b]
+@ CHECK-THUMB:         mrs     r1, sp_fiq              @ encoding: [0xed,0xf3,0x20,0x81]
+@ CHECK-THUMB:         mrs     r2, lr_fiq              @ encoding: [0xee,0xf3,0x20,0x82]
+@ CHECK-THUMB:         mrs     r3, SPSR_fiq            @ encoding: [0xfe,0xf3,0x20,0x83]
+
+        mrs r4, lr_irq
+        mrs r9, sp_irq
+        mrs r1, spsr_irq
+@ CHECK-ARM:         mrs     r4, lr_irq              @ encoding: [0x00,0x43,0x00,0xe1]
+@ CHECK-ARM:         mrs     r9, sp_irq              @ encoding: [0x00,0x93,0x01,0xe1]
+@ CHECK-ARM:         mrs     r1, SPSR_irq            @ encoding: [0x00,0x13,0x40,0xe1]
+@ CHECK-THUMB:         mrs     r4, lr_irq              @ encoding: [0xe0,0xf3,0x30,0x84]
+@ CHECK-THUMB:         mrs     r9, sp_irq              @ encoding: [0xe1,0xf3,0x30,0x89]
+@ CHECK-THUMB:         mrs     r1, SPSR_irq            @ encoding: [0xf0,0xf3,0x30,0x81]
+
+        mrs r1, lr_svc
+        mrs r3, sp_svc
+        mrs r5, spsr_svc
+@ CHECK-ARM:         mrs     r1, lr_svc              @ encoding: [0x00,0x13,0x02,0xe1]
+@ CHECK-ARM:         mrs     r3, sp_svc              @ encoding: [0x00,0x33,0x03,0xe1]
+@ CHECK-ARM:         mrs     r5, SPSR_svc            @ encoding: [0x00,0x53,0x42,0xe1]
+@ CHECK-THUMB:         mrs     r1, lr_svc              @ encoding: [0xe2,0xf3,0x30,0x81]
+@ CHECK-THUMB:         mrs     r3, sp_svc              @ encoding: [0xe3,0xf3,0x30,0x83]
+@ CHECK-THUMB:         mrs     r5, SPSR_svc            @ encoding: [0xf2,0xf3,0x30,0x85]
+
+        mrs r5, lr_abt
+        mrs r7, sp_abt
+        mrs r9, spsr_abt
+@ CHECK-ARM:         mrs     r5, lr_abt              @ encoding: [0x00,0x53,0x04,0xe1]
+@ CHECK-ARM:         mrs     r7, sp_abt              @ encoding: [0x00,0x73,0x05,0xe1]
+@ CHECK-ARM:         mrs     r9, SPSR_abt            @ encoding: [0x00,0x93,0x44,0xe1]
+@ CHECK-THUMB:         mrs     r5, lr_abt              @ encoding: [0xe4,0xf3,0x30,0x85]
+@ CHECK-THUMB:         mrs     r7, sp_abt              @ encoding: [0xe5,0xf3,0x30,0x87]
+@ CHECK-THUMB:         mrs     r9, SPSR_abt            @ encoding: [0xf4,0xf3,0x30,0x89]
+
+        mrs r9, lr_und
+        mrs r11, sp_und
+        mrs r12, spsr_und
+@ CHECK-ARM:         mrs     r9, lr_und              @ encoding: [0x00,0x93,0x06,0xe1]
+@ CHECK-ARM:         mrs     r11, sp_und             @ encoding: [0x00,0xb3,0x07,0xe1]
+@ CHECK-ARM:         mrs     r12, SPSR_und           @ encoding: [0x00,0xc3,0x46,0xe1]
+@ CHECK-THUMB:         mrs     r9, lr_und              @ encoding: [0xe6,0xf3,0x30,0x89]
+@ CHECK-THUMB:         mrs     r11, sp_und             @ encoding: [0xe7,0xf3,0x30,0x8b]
+@ CHECK-THUMB:         mrs     r12, SPSR_und           @ encoding: [0xf6,0xf3,0x30,0x8c]
+
+
+        mrs r2, lr_mon
+        mrs r4, sp_mon
+        mrs r6, spsr_mon
+@ CHECK-ARM:         mrs     r2, lr_mon              @ encoding: [0x00,0x23,0x0c,0xe1]
+@ CHECK-ARM:         mrs     r4, sp_mon              @ encoding: [0x00,0x43,0x0d,0xe1]
+@ CHECK-ARM:         mrs     r6, SPSR_mon            @ encoding: [0x00,0x63,0x4c,0xe1]
+@ CHECK-THUMB:         mrs     r2, lr_mon              @ encoding: [0xec,0xf3,0x30,0x82]
+@ CHECK-THUMB:         mrs     r4, sp_mon              @ encoding: [0xed,0xf3,0x30,0x84]
+@ CHECK-THUMB:         mrs     r6, SPSR_mon            @ encoding: [0xfc,0xf3,0x30,0x86]
+
+
+        mrs r6, elr_hyp
+        mrs r8, sp_hyp
+        mrs r10, spsr_hyp
+@ CHECK-ARM:         mrs     r6, elr_hyp             @ encoding: [0x00,0x63,0x0e,0xe1]
+@ CHECK-ARM:         mrs     r8, sp_hyp              @ encoding: [0x00,0x83,0x0f,0xe1]
+@ CHECK-ARM:         mrs     r10, SPSR_hyp            @ encoding: [0x00,0xa3,0x4e,0xe1]
+@ CHECK-THUMB:         mrs     r6, elr_hyp             @ encoding: [0xee,0xf3,0x30,0x86]
+@ CHECK-THUMB:         mrs     r8, sp_hyp              @ encoding: [0xef,0xf3,0x30,0x88]
+@ CHECK-THUMB:         mrs     r10, SPSR_hyp            @ encoding: [0xfe,0xf3,0x30,0x8a]
+
+
+        msr r8_usr, r2
+        msr r9_usr, r3
+        msr r10_usr, r5
+        msr r11_usr, r7
+        msr r12_usr, r11
+        msr sp_usr, r1
+        msr lr_usr, r2
+@ CHECK-ARM:         msr     r8_usr, r2              @ encoding: [0x02,0xf2,0x20,0xe1]
+@ CHECK-ARM:         msr     r9_usr, r3              @ encoding: [0x03,0xf2,0x21,0xe1]
+@ CHECK-ARM:         msr     r10_usr, r5             @ encoding: [0x05,0xf2,0x22,0xe1]
+@ CHECK-ARM:         msr     r11_usr, r7             @ encoding: [0x07,0xf2,0x23,0xe1]
+@ CHECK-ARM:         msr     r12_usr, r11            @ encoding: [0x0b,0xf2,0x24,0xe1]
+@ CHECK-ARM:         msr     sp_usr, r1              @ encoding: [0x01,0xf2,0x25,0xe1]
+@ CHECK-ARM:         msr     lr_usr, r2              @ encoding: [0x02,0xf2,0x26,0xe1]
+@ CHECK-THUMB:         msr     r8_usr, r2              @ encoding: [0x82,0xf3,0x20,0x80]
+@ CHECK-THUMB:         msr     r9_usr, r3              @ encoding: [0x83,0xf3,0x20,0x81]
+@ CHECK-THUMB:         msr     r10_usr, r5             @ encoding: [0x85,0xf3,0x20,0x82]
+@ CHECK-THUMB:         msr     r11_usr, r7             @ encoding: [0x87,0xf3,0x20,0x83]
+@ CHECK-THUMB:         msr     r12_usr, r11            @ encoding: [0x8b,0xf3,0x20,0x84]
+@ CHECK-THUMB:         msr     sp_usr, r1              @ encoding: [0x81,0xf3,0x20,0x85]
+@ CHECK-THUMB:         msr     lr_usr, r2              @ encoding: [0x82,0xf3,0x20,0x86]
+
+        msr r8_fiq, r2
+        msr r9_fiq, r3
+        msr r10_fiq, r5
+        msr r11_fiq, r7
+        msr r12_fiq, r11
+        msr sp_fiq, r1
+        msr lr_fiq, r2
+        msr spsr_fiq, r3
+@ CHECK-ARM:         msr     r8_fiq, r2              @ encoding: [0x02,0xf2,0x28,0xe1]
+@ CHECK-ARM:         msr     r9_fiq, r3              @ encoding: [0x03,0xf2,0x29,0xe1]
+@ CHECK-ARM:         msr     r10_fiq, r5             @ encoding: [0x05,0xf2,0x2a,0xe1]
+@ CHECK-ARM:         msr     r11_fiq, r7             @ encoding: [0x07,0xf2,0x2b,0xe1]
+@ CHECK-ARM:         msr     r12_fiq, r11            @ encoding: [0x0b,0xf2,0x2c,0xe1]
+@ CHECK-ARM:         msr     sp_fiq, r1              @ encoding: [0x01,0xf2,0x2d,0xe1]
+@ CHECK-ARM:         msr     lr_fiq, r2              @ encoding: [0x02,0xf2,0x2e,0xe1]
+@ CHECK-ARM:         msr     SPSR_fiq, r3            @ encoding: [0x03,0xf2,0x6e,0xe1]
+@ CHECK-THUMB:         msr     r8_fiq, r2              @ encoding: [0x82,0xf3,0x20,0x88]
+@ CHECK-THUMB:         msr     r9_fiq, r3              @ encoding: [0x83,0xf3,0x20,0x89]
+@ CHECK-THUMB:         msr     r10_fiq, r5             @ encoding: [0x85,0xf3,0x20,0x8a]
+@ CHECK-THUMB:         msr     r11_fiq, r7             @ encoding: [0x87,0xf3,0x20,0x8b]
+@ CHECK-THUMB:         msr     r12_fiq, r11            @ encoding: [0x8b,0xf3,0x20,0x8c]
+@ CHECK-THUMB:         msr     sp_fiq, r1              @ encoding: [0x81,0xf3,0x20,0x8d]
+@ CHECK-THUMB:         msr     lr_fiq, r2              @ encoding: [0x82,0xf3,0x20,0x8e]
+@ CHECK-THUMB:        msr     SPSR_fiq, r3            @ encoding: [0x93,0xf3,0x20,0x8e]
+
+        msr lr_irq, r4
+        msr sp_irq, r9
+        msr spsr_irq, r11
+@ CHECK-ARM:         msr     lr_irq, r4              @ encoding: [0x04,0xf3,0x20,0xe1]
+@ CHECK-ARM:         msr     sp_irq, r9              @ encoding: [0x09,0xf3,0x21,0xe1]
+@ CHECK-ARM:         msr     SPSR_irq, r11           @ encoding: [0x0b,0xf3,0x60,0xe1]
+@ CHECK-THUMB:         msr     lr_irq, r4              @ encoding: [0x84,0xf3,0x30,0x80]
+@ CHECK-THUMB:         msr     sp_irq, r9              @ encoding: [0x89,0xf3,0x30,0x81]
+@ CHECK-THUMB:         msr     SPSR_irq, r11           @ encoding: [0x9b,0xf3,0x30,0x80]
+
+        msr lr_svc, r1
+        msr sp_svc, r3
+        msr spsr_svc, r5
+@ CHECK-ARM:         msr     lr_svc, r1              @ encoding: [0x01,0xf3,0x22,0xe1]
+@ CHECK-ARM:         msr     sp_svc, r3              @ encoding: [0x03,0xf3,0x23,0xe1]
+@ CHECK-ARM:         msr     SPSR_svc, r5            @ encoding: [0x05,0xf3,0x62,0xe1]
+@ CHECK-THUMB:         msr     lr_svc, r1              @ encoding: [0x81,0xf3,0x30,0x82]
+@ CHECK-THUMB:         msr     sp_svc, r3              @ encoding: [0x83,0xf3,0x30,0x83]
+@ CHECK-THUMB:         msr     SPSR_svc, r5            @ encoding: [0x95,0xf3,0x30,0x82]
+
+        msr lr_abt, r5
+        msr sp_abt, r7
+        msr spsr_abt, r9
+@ CHECK-ARM:         msr     lr_abt, r5              @ encoding: [0x05,0xf3,0x24,0xe1]
+@ CHECK-ARM:         msr     sp_abt, r7              @ encoding: [0x07,0xf3,0x25,0xe1]
+@ CHECK-ARM:         msr     SPSR_abt, r9            @ encoding: [0x09,0xf3,0x64,0xe1]
+@ CHECK-THUMB:         msr     lr_abt, r5              @ encoding: [0x85,0xf3,0x30,0x84]
+@ CHECK-THUMB:         msr     sp_abt, r7              @ encoding: [0x87,0xf3,0x30,0x85]
+@ CHECK-THUMB:         msr     SPSR_abt, r9            @ encoding: [0x99,0xf3,0x30,0x84]
+
+        msr lr_und, r9
+        msr sp_und, r11
+        msr spsr_und, r12
+@ CHECK-ARM:         msr     lr_und, r9              @ encoding: [0x09,0xf3,0x26,0xe1]
+@ CHECK-ARM:         msr     sp_und, r11             @ encoding: [0x0b,0xf3,0x27,0xe1]
+@ CHECK-ARM:         msr     SPSR_und, r12           @ encoding: [0x0c,0xf3,0x66,0xe1]
+@ CHECK-THUMB:         msr     lr_und, r9              @ encoding: [0x89,0xf3,0x30,0x86]
+@ CHECK-THUMB:         msr     sp_und, r11             @ encoding: [0x8b,0xf3,0x30,0x87]
+@ CHECK-THUMB:         msr     SPSR_und, r12           @ encoding: [0x9c,0xf3,0x30,0x86]
+
+
+        msr lr_mon, r2
+        msr sp_mon, r4
+        msr spsr_mon, r6
+@ CHECK-ARM:         msr     lr_mon, r2              @ encoding: [0x02,0xf3,0x2c,0xe1]
+@ CHECK-ARM:         msr     sp_mon, r4              @ encoding: [0x04,0xf3,0x2d,0xe1]
+@ CHECK-ARM:         msr     SPSR_mon, r6            @ encoding: [0x06,0xf3,0x6c,0xe1]
+@ CHECK-THUMB:         msr     lr_mon, r2              @ encoding: [0x82,0xf3,0x30,0x8c]
+@ CHECK-THUMB:         msr     sp_mon, r4              @ encoding: [0x84,0xf3,0x30,0x8d]
+@ CHECK-THUMB:         msr     SPSR_mon, r6            @ encoding: [0x96,0xf3,0x30,0x8c]
+
+        msr elr_hyp, r6
+        msr sp_hyp, r8
+        msr spsr_hyp, r10
+@ CHECK-ARM:         msr     elr_hyp, r6             @ encoding: [0x06,0xf3,0x2e,0xe1]
+@ CHECK-ARM:         msr     sp_hyp, r8              @ encoding: [0x08,0xf3,0x2f,0xe1]
+@ CHECK-ARM:         msr     SPSR_hyp, r10           @ encoding: [0x0a,0xf3,0x6e,0xe1]
+@ CHECK-THUMB:         msr     elr_hyp, r6             @ encoding: [0x86,0xf3,0x30,0x8e]
+@ CHECK-THUMB:         msr     sp_hyp, r8              @ encoding: [0x88,0xf3,0x30,0x8f]
+@ CHECK-THUMB:         msr     SPSR_hyp, r10           @ encoding: [0x9a,0xf3,0x30,0x8e]
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index 8c7228835c9b..d142dbabec9e 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -29,18 +29,63 @@
 
 	vbic	d16, d17, d16
 	vbic	q8, q8, q9
+	vbic q10, q11
+	vbic d9, d1
+	vbic.i16	d16, #0xFF00
+	vbic.i16	q8,  #0xFF00
+	vbic.i16	d16, #0x00FF
+	vbic.i16	q8,  #0x00FF
 	vbic.i32	d16, #0xFF000000
-	vbic.i32	q8, #0xFF000000
-        vbic q10, q11
-        vbic d9, d1
+	vbic.i32	q8,  #0xFF000000
+	vbic.i32	d16, #0x00FF0000
+	vbic.i32	q8,  #0x00FF0000
+	vbic.i32	d16, #0x0000FF00
+	vbic.i32	q8,  #0x0000FF00
+	vbic.i32	d16, #0x000000FF
+	vbic.i32	q8,  #0x000000FF
 
 @ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
 @ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
-@ CHECK: vbic.i32	d16, #0xff000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
-@ CHECK: vbic.i32	q8, #0xff000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
 @ CHECK: vbic	q10, q10, q11           @ encoding: [0xf6,0x41,0x54,0xf2]
 @ CHECK: vbic	d9, d9, d1              @ encoding: [0x11,0x91,0x19,0xf2]
-
+@ CHECK: vbic.i16	d16, #0xff00    @ encoding: [0x3f,0x0b,0xc7,0xf3]
+@ CHECK: vbic.i16	q8, #0xff00     @ encoding: [0x7f,0x0b,0xc7,0xf3]
+@ CHECK: vbic.i16	d16, #0xff      @ encoding: [0x3f,0x09,0xc7,0xf3]
+@ CHECK: vbic.i16	q8, #0xff       @ encoding: [0x7f,0x09,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff0000  @ encoding: [0x3f,0x05,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff0000   @ encoding: [0x7f,0x05,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff00    @ encoding: [0x3f,0x03,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff00     @ encoding: [0x7f,0x03,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff      @ encoding: [0x3f,0x01,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff       @ encoding: [0x7f,0x01,0xc7,0xf3]
+
+	vand.i16 d10, #0xff03
+	vand.i16 q10, #0xff03
+	vand.i16 d10, #0x03ff
+	vand.i16 q10, #0x03ff
+	vand.i32 d10, #0x03ffffff
+	vand.i32 q10, #0x03ffffff
+	vand.i32 d10, #0xff03ffff
+	vand.i32 q10, #0xff03ffff
+	vand.i32 d10, #0xffff03ff
+	vand.i32 q10, #0xffff03ff
+	vand.i32 d10, #0xffffff03
+	vand.i32 q10, #0xffffff03
+
+@ CHECK: vbic.i16	d10, #0xfc      @ encoding: [0x3c,0xa9,0x87,0xf3]
+@ CHECK: vbic.i16	q10, #0xfc      @ encoding: [0x7c,0x49,0xc7,0xf3]
+@ CHECK: vbic.i16	d10, #0xfc00    @ encoding: [0x3c,0xab,0x87,0xf3]
+@ CHECK: vbic.i16	q10, #0xfc00    @ encoding: [0x7c,0x4b,0xc7,0xf3]
+@ CHECK: vbic.i32	d10, #0xfc000000 @ encoding: [0x3c,0xa7,0x87,0xf3]
+@ CHECK: vbic.i32	q10, #0xfc000000 @ encoding: [0x7c,0x47,0xc7,0xf3]
+@ CHECK: vbic.i32	d10, #0xfc0000  @ encoding: [0x3c,0xa5,0x87,0xf3]
+@ CHECK: vbic.i32	q10, #0xfc0000  @ encoding: [0x7c,0x45,0xc7,0xf3]
+@ CHECK: vbic.i32	d10, #0xfc00    @ encoding: [0x3c,0xa3,0x87,0xf3]
+@ CHECK: vbic.i32	q10, #0xfc00    @ encoding: [0x7c,0x43,0xc7,0xf3]
+@ CHECK: vbic.i32	d10, #0xfc      @ encoding: [0x3c,0xa1,0x87,0xf3]
+@ CHECK: vbic.i32	q10, #0xfc      @ encoding: [0x7c,0x41,0xc7,0xf3]
 
 	vorn	d16, d17, d16
 	vorn	q8, q8, q9
diff --git a/test/MC/ARM/neon-mov-vfp.s b/test/MC/ARM/neon-mov-vfp.s
new file mode 100644
index 000000000000..6ee6bfda436c
--- /dev/null
+++ b/test/MC/ARM/neon-mov-vfp.s
@@ -0,0 +1,32 @@
+@ RUN: not llvm-mc -mcpu=cortex-a8 -triple armv7-unknown-unknown -show-encoding -mattr=-neon < %s 2>&1 | FileCheck %s --check-prefix=VFP --check-prefix=CHECK
+@ RUN: not llvm-mc -mcpu=cortex-a8 -triple thumbv7-unknown-unknown -show-encoding -mattr=-neon < %s 2>&1 | FileCheck %s --check-prefix=VFP --check-prefix=CHECK
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-unknown-unknown -show-encoding -mattr=+neon < %s 2>&1 | FileCheck %s --check-prefix=NEON --check-prefix=CHECK
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon < %s 2>&1 | FileCheck %s --check-prefix=NEON --check-prefix=CHECK
+
+@ The 32-bit variants of the NEON scalar move instructions are also available
+@ to any core with VFPv2
+
+@ CHECK-DAG: vmov.32 d13[0], r6 @ encoding:
+@ CHECK-DAG: vmov.32 d17[1], r9 @ encoding:
+vmov.32 d13[0], r6
+vmov.32 d17[1], r9
+
+@ VFP-DAG: error: instruction requires: NEON
+@ VFP-DAG: error: instruction requires: NEON
+@ NEON-DAG: vmov.8  d22[5], r2 @ encoding:
+@ NEON-DAG: vmov.16 d3[2], r4 @ encoding:
+vmov.8 d22[5], r2
+vmov.16 d3[2], r4
+
+@ CHECK-DAG: vmov.32 r6, d13[0] @ encoding:
+@ CHECK-DAG: vmov.32 r9, d17[1] @ encoding:
+vmov.32 r6, d13[0]
+vmov.32 r9, d17[1]
+
+@ VFP-DAG: error: instruction requires: NEON
+@ VFP-DAG: error: instruction requires: NEON
+@ NEON-DAG: vmov.s8 r2, d22[5] @ encoding:
+@ NEON-DAG: vmov.u16        r4, d3[2] @ encoding:
+vmov.s8 r2, d22[5]
+vmov.u16 r4, d3[2]
+
diff --git a/test/MC/ARM/symbol-variants.s b/test/MC/ARM/symbol-variants.s
index a10fe5029e0a..af1bc07b5e1a 100644
--- a/test/MC/ARM/symbol-variants.s
+++ b/test/MC/ARM/symbol-variants.s
@@ -19,8 +19,8 @@
 @ plt
 bl f04(PLT)
 bl f05(plt)
-@ARM: 10 R_ARM_PLT32 f04
-@ARM: 14 R_ARM_PLT32 f05
+@ARM: 10 R_ARM_CALL f04
+@ARM: 14 R_ARM_CALL f05
 @THUMB: 10 R_ARM_THM_CALL f04
 @THUMB: 14 R_ARM_THM_CALL f05
 
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index 19d17c2deef6..bd26d06865ca 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -1,9 +1,11 @@
-@ RUN: not llvm-mc -triple=thumbv6-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
-@ RUN: not llvm-mc -triple=thumbv5-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V5 < %t %s
-@ RUN: not llvm-mc -triple=thumbv8 < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
+@ RUN: not llvm-mc -triple=thumbv6-apple-darwin -o /dev/null < %s 2>&1 \
+@ RUN:     | FileCheck --check-prefix=CHECK-ERRORS %s
+@ RUN: not llvm-mc -triple=thumbv5-apple-darwin -o /dev/null < %s 2>&1 \
+@ RUN:     | FileCheck --check-prefix=CHECK-ERRORS-V5 %s
+@ RUN: not llvm-mc -triple=thumbv7m -o /dev/null < %s 2>&1 \
+@ RUN:     | FileCheck --check-prefix=CHECK-ERRORS-V7M %s
+@ RUN: not llvm-mc -triple=thumbv8 -o /dev/null < %s 2>&1 \
+@ RUN:     | FileCheck --check-prefix=CHECK-ERRORS-V8 %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -59,6 +61,13 @@ error: invalid operand for instruction
         ldm r2!, {r2, r3, r4}
         ldm r2!, {r2, r3, r4, r10}
         ldmdb r2!, {r2, r3, r4}
+        ldm r0, {r2, sp}
+        ldmia r0, {r2-r3, sp}
+        ldmia r0!, {r2-r3, sp}
+        ldmfd r2, {r1, r3-r6, sp}
+        ldmfd r2!, {r1, r3-r6, sp}
+        ldmdb r1, {r2, r3, sp}
+        ldmdb r1!, {r2, r3, sp} 
 @ CHECK-ERRORS: error: registers must be in range r0-r7
 @ CHECK-ERRORS:         ldm r2!, {r5, r8}
 @ CHECK-ERRORS:                  ^
@@ -74,6 +83,27 @@ error: invalid operand for instruction
 @ CHECK-ERRORS-V8: error: writeback register not allowed in register list
 @ CHECK-ERRORS-V8:         ldmdb r2!, {r2, r3, r4}
 @ CHECK-ERRORS-V8:                 ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldm r0, {r2, sp}
+@ CHECK-ERRORS-V7M:                 ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmia r0, {r2-r3, sp}
+@ CHECK-ERRORS-V7M:                   ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmia r0!, {r2-r3, sp}
+@ CHECK-ERRORS-V7M:                    ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmfd r2, {r1, r3-r6, sp}
+@ CHECK-ERRORS-V7M:                   ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmfd r2!, {r1, r3-r6, sp}
+@ CHECK-ERRORS-V7M:                    ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmdb r1, {r2, r3, sp}
+@ CHECK-ERRORS-V7M:                   ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         ldmdb r1!, {r2, r3, sp}
+@ CHECK-ERRORS-V7M:                    ^
 
 @ Invalid writeback and register lists for PUSH/POP
         pop {r1, r2, r10}
@@ -91,6 +121,10 @@ error: invalid operand for instruction
         stm r1!, {r2, r9}
         stm r2!, {r2, r9}
         stmdb r2!, {r0, r2}
+        stm r1!, {r2, sp}
+        stmia r4!, {r0-r3, sp}
+        stmdb r1, {r2, r3, sp}
+        stmdb r1!, {r2, r3, sp}
 @ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         stm r1, {r2, r6}
 @ CHECK-ERRORS:         ^
@@ -103,6 +137,18 @@ error: invalid operand for instruction
 @ CHECK-ERRORS-V8: error: writeback register not allowed in register list
 @ CHECK-ERRORS-V8:         stmdb r2!, {r0, r2}
 @ CHECK-ERRORS-V8:                  ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         stm r1!, {r2, sp}
+@ CHECK-ERRORS-V7M:                  ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         stmia r4!, {r0-r3, sp}
+@ CHECK-ERRORS-V7M:                    ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         stmdb r1, {r2, r3, sp}
+@ CHECK-ERRORS-V7M:                   ^
+@ CHECK-ERRORS-V7M: error: SP may not be in the register list
+@ CHECK-ERRORS-V7M:         stmdb r1!, {r2, r3, sp}
+@ CHECK-ERRORS-V7M:                    ^
 
 @ Out of range immediates for LSL instruction.
         lsls r4, r5, #-1
@@ -160,7 +206,7 @@ error: invalid operand for instruction
 @ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         add sp, sp, #512
 @ CHECK-ERRORS:                     ^
-@ CHECK-ERRORS: error: instruction requires: arm-mode
+@ CHECK-ERRORS: error: instruction requires: thumb2
 @ CHECK-ERRORS:         add r2, sp, #1024
 @ CHECK-ERRORS:         ^
 
@@ -218,3 +264,14 @@ error: invalid operand for instruction
         ldr r4, [pc, #-12]
 @ CHECK-ERRORS: error: instruction requires: thumb2
 
+@------------------------------------------------------------------------------
+@ STC2{L}/LDC2{L} - requires thumb2
+@------------------------------------------------------------------------------
+        stc2 p0, c8, [r1, #4]
+        stc2l p6, c2, [r7, #4]
+        ldc2 p0, c8, [r1, #4]
+        ldc2l p6, c2, [r7, #4]
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
diff --git a/test/MC/ARM/thumb-load-store-multiple.s b/test/MC/ARM/thumb-load-store-multiple.s
new file mode 100644
index 000000000000..6958450df078
--- /dev/null
+++ b/test/MC/ARM/thumb-load-store-multiple.s
@@ -0,0 +1,100 @@
+@ RUN: not llvm-mc -triple thumbv7-eabi -filetype asm -o - %s 2>&1 \
+@ RUN:     | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv7a-eabi -filetype asm -o - %s 2>&1 \
+@ RUN: | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V7A %s
+@ RUN: not llvm-mc -triple thumbv7m-eabi -filetype asm -o - %s 2>&1 \
+@ RUN: | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V7M %s
+
+	.syntax unified
+	.thumb
+
+	.global ldm
+	.type ldm,%function
+ldm:
+	ldm r0!, {r1, sp}
+@ CHECK: error: SP may not be in the register list
+@ CHECK: ldm r0!, {r1, sp}
+@ CHECK:          ^
+	ldm r0!, {lr, pc}
+@ CHECK: error: PC and LR may not be in the register list simultaneously
+@ CHECK: ldm r0!, {lr, pc}
+@ CHECK:          ^
+	itt eq
+	ldmeq r0!, {r1, pc}
+	ldmeq r0!, {r2, lr}
+@ CHECK: error: instruction must be outside of IT block or the last instruction in an IT block
+@ CHECK: ldmeq r0!, {r1, pc}
+@ CHECK:            ^
+
+	.global ldmdb
+	.type ldmdb,%function
+ldmdb:
+	ldmdb r0!, {r1, sp}
+@ CHECK: error: SP may not be in the register list
+	ldmdb r0!, {lr, pc}
+@ error: PC and LR may not be in the register list simultaneously
+	itt eq
+	ldmeq r0!, {r1, pc}
+	ldmeq r0!, {r2, lr}
+@ CHECK: error: instruction must be outside of IT block or the last instruction in an IT block
+@ CHECK: ldmeq r0!, {r1, pc}
+@ CHECK:            ^
+
+	.global stm
+	.type stm,%function
+stm:
+	stm r0!, {r1, sp}
+@ CHECK: error: SP may not be in the register list
+	stm r0!, {r2, pc}
+@ CHECK: error: PC may not be in the register list
+	stm r0!, {sp, pc}
+@ CHECK: error: SP and PC may not be in the register list
+
+	.global stmdb
+	.type stmdb,%function
+stmdb:
+	stmdb r0!, {r1, sp}
+@ CHECK: error: SP may not be in the register list
+	stmdb r0!, {r2, pc}
+@ CHECK: error: PC may not be in the register list
+	stmdb r0!, {sp, pc}
+@ CHECK: error: SP and PC may not be in the register list
+
+	.global push
+	.type push,%function
+push:
+	push {sp}
+@ CHECK: error: SP may not be in the register list
+	push {pc}
+@ CHECK: error: PC may not be in the register list
+	push {sp, pc}
+@ CHECK: error: SP and PC may not be in the register list
+
+	.global pop
+	.type pop,%function
+pop:
+        pop {sp}
+@ CHECK-V7M: error: SP may not be in the register list
+	pop {lr, pc}
+@ CHECK: error: PC and LR may not be in the register list simultaneously
+@ CHECK: pop {lr, pc}
+@ CHECK:     ^
+	itt eq
+	popeq {r1, pc}
+	popeq {r2, lr}
+@ CHECK: error: instruction must be outside of IT block or the last instruction in an IT block
+@ CHECK: popeq {r1, pc}
+@ CHECK:     ^
+
+	.global valid
+	.type valid,%function
+valid:
+	pop {sp}
+@ CHECK-V7A: ldr sp, [sp], #4
+	pop {sp, pc}
+@ CHECK-V7A: pop.w {sp, pc}
+	push.w {r0}
+@ CHECK: str r0, [sp, #-4]
+	pop.w {r0}
+@ CHECK: ldr r0, [sp], #4
+
diff --git a/test/MC/ARM/thumb-not-mclass.s b/test/MC/ARM/thumb-not-mclass.s
new file mode 100644
index 000000000000..fec545e64b06
--- /dev/null
+++ b/test/MC/ARM/thumb-not-mclass.s
@@ -0,0 +1,26 @@
+@ RUN: not llvm-mc -triple=thumbv7m-apple-darwin -show-encoding < %s 2> %t
+@ RUN: FileCheck < %t %s
+@ RUN: not llvm-mc -triple=thumbv6m -show-encoding < %s 2> %t
+@ RUN: FileCheck < %t %s
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler rejects thumb instructions that are not valid
+@ on mclass.
+
+@------------------------------------------------------------------------------
+@ BLX (immediate)
+@------------------------------------------------------------------------------
+        blx _baz
+
+@ CHECK: error: instruction requires: !armv*m
+
+@------------------------------------------------------------------------------
+@ SETEND
+@------------------------------------------------------------------------------
+
+        setend be
+        setend le
+
+@ CHECK: error: invalid operand for instruction
+@ CHECK: error: invalid operand for instruction
diff --git a/test/MC/ARM/thumb2-bxj.s b/test/MC/ARM/thumb2-bxj.s
new file mode 100644
index 000000000000..e60d1a447cba
--- /dev/null
+++ b/test/MC/ARM/thumb2-bxj.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple=thumbv6t2--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7a--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7r--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7m--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
+@ RUN: not llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
+
+        bxj r2
+
+@ CHECK: bxj r2                      @ encoding: [0xc2,0xf3,0x00,0x8f]
+@ UNDEF: error: instruction requires: arm-mode
diff --git a/test/MC/ARM/thumb2-exception-return-mclass.s b/test/MC/ARM/thumb2-exception-return-mclass.s
new file mode 100644
index 000000000000..21669b0dc657
--- /dev/null
+++ b/test/MC/ARM/thumb2-exception-return-mclass.s
@@ -0,0 +1,15 @@
+# RUN: not llvm-mc -triple thumbv7m -assemble < %s 2>&1 | FileCheck %s
+
+  .text
+
+# CHECK: instruction requires: !armv*m
+# CHECK-NEXT: srsdb sp, #7
+  srsdb sp, #7
+
+# CHECK: instruction requires: !armv*m
+# CHECK-NEXT: rfeia r6
+  rfeia r6
+
+# CHECK: instruction requires: !armv*m
+# CHECK-NEXT: subs pc, lr, #42
+  subs pc, lr, #42
diff --git a/test/MC/ARM/thumb2-ldrb-ldrh.s b/test/MC/ARM/thumb2-ldrb-ldrh.s
new file mode 100644
index 000000000000..8c97987fc68d
--- /dev/null
+++ b/test/MC/ARM/thumb2-ldrb-ldrh.s
@@ -0,0 +1,51 @@
+@ RUN: not llvm-mc -triple thumbv7a-none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK
+@ RUN: not llvm-mc -triple thumbv7m-none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK
+
+@ Thumb2 LDRS?[BH] are not valid when Rt == PC (these encodings are used for
+@ preload hints).
+@ We don't check the actual error messages here as they are currently not very
+@ helpful, see http://llvm.org/bugs/show_bug.cgi?id=21066.
+
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+  ldrb    pc, [r0, #10]
+  ldrb.w  pc, [r1, #10]
+  ldrb    pc, [r2, #-5]
+  ldrb    pc, [pc, #7]
+  ldrb.w  pc, [pc, #7]
+
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+  ldrsb   pc, [r3, #10]
+  ldrsb.w pc, [r4, #10]
+  ldrsb   pc, [r5, #-5]
+  ldrsb   pc, [pc, #7]
+  ldrsb.w pc, [pc, #7]
+
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+  ldrh    pc, [r6, #10]
+  ldrh.w  pc, [r7, #10]
+  ldrh    pc, [r8, #-5]
+  ldrh    pc, [pc, #7]
+  ldrh.w  pc, [pc, #7]
+
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+@ CHECK: error:
+  ldrsh   pc, [r9, #10]
+  ldrsh.w pc, [r10, #10]
+  ldrsh   pc, [r11, #-5]
+  ldrsh   pc, [pc, #7]
+  ldrsh.w pc, [pc, #7]
diff --git a/test/MC/ARM/thumb2-ldrexd-strexd.s b/test/MC/ARM/thumb2-ldrexd-strexd.s
new file mode 100644
index 000000000000..3ffb0cb6eaf1
--- /dev/null
+++ b/test/MC/ARM/thumb2-ldrexd-strexd.s
@@ -0,0 +1,14 @@
+@ RUN: llvm-mc -triple=thumbv6t2--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7a--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7r--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv8a--none-eabi -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7m--none-eabi -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=UNDEF
+
+  ldrexd r0, r1, [r2]
+  strexd r3, r4, r5, [r6]
+
+@ CHECK: ldrexd r0, r1, [r2]            @ encoding: [0xd2,0xe8,0x7f,0x01]
+@ CHECK: strexd r3, r4, r5, [r6]        @ encoding: [0xc6,0xe8,0x73,0x45]
+
+@ UNDEF: error: instruction requires: !armv*m
+@ UNDEF: error: instruction requires: !armv*m
diff --git a/test/MC/ARM/thumb2-mclass.s b/test/MC/ARM/thumb2-mclass.s
index d9c96dfcdd34..331ecc147629 100644
--- a/test/MC/ARM/thumb2-mclass.s
+++ b/test/MC/ARM/thumb2-mclass.s
@@ -1,7 +1,7 @@
-@ RUN: llvm-mc -triple=thumbv7m-apple-darwin -show-encoding < %s | FileCheck %s
-@ RUN: llvm-mc -triple=thumbv6m -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv6m -show-encoding < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V6M %s
+@ RUN: llvm-mc -triple=thumbv7m -show-encoding < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V7M %s
+
   .syntax unified
-  .globl _func
 
 @ Check that the assembler can handle the documented syntax from the ARM ARM.
 @ These tests test instruction encodings specific to v6m & v7m (FeatureMClass).
@@ -40,20 +40,12 @@
 
         msr  apsr, r0
         msr  apsr_nzcvq, r0
-        msr  apsr_g, r0
-        msr  apsr_nzcvqg, r0
         msr  iapsr, r0
         msr  iapsr_nzcvq, r0
-        msr  iapsr_g, r0
-        msr  iapsr_nzcvqg, r0
         msr  eapsr, r0
         msr  eapsr_nzcvq, r0
-        msr  eapsr_g, r0
-        msr  eapsr_nzcvqg, r0
         msr  xpsr, r0
         msr  xpsr_nzcvq, r0
-        msr  xpsr_g, r0
-        msr  xpsr_nzcvqg, r0
         msr  ipsr, r0
         msr  epsr, r0
         msr  iepsr, r0
@@ -62,22 +54,22 @@
         msr  primask, r0
         msr  control, r0
 
-@ CHECK: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
-@ CHECK: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
-@ CHECK: msr	apsr_g, r0              @ encoding: [0x80,0xf3,0x00,0x84]
-@ CHECK: msr	apsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x00,0x8c]
-@ CHECK: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
-@ CHECK: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
-@ CHECK: msr	iapsr_g, r0             @ encoding: [0x80,0xf3,0x01,0x84]
-@ CHECK: msr	iapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x01,0x8c]
-@ CHECK: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
-@ CHECK: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
-@ CHECK: msr	eapsr_g, r0             @ encoding: [0x80,0xf3,0x02,0x84]
-@ CHECK: msr	eapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x02,0x8c]
-@ CHECK: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
-@ CHECK: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
-@ CHECK: msr	xpsr_g, r0              @ encoding: [0x80,0xf3,0x03,0x84]
-@ CHECK: msr	xpsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x03,0x8c]
+@ CHECK-V6M: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK-V6M: msr	apsr, r0                @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK-V6M: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK-V6M: msr	iapsr, r0               @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK-V6M: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK-V6M: msr	eapsr, r0               @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK-V6M: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
+@ CHECK-V6M: msr	xpsr, r0                @ encoding: [0x80,0xf3,0x03,0x88]
+@ CHECK-V7M: msr	apsr_nzcvq, r0          @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK-V7M: msr	apsr_nzcvq, r0          @ encoding: [0x80,0xf3,0x00,0x88]
+@ CHECK-V7M: msr	iapsr_nzcvq, r0         @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK-V7M: msr	iapsr_nzcvq, r0         @ encoding: [0x80,0xf3,0x01,0x88]
+@ CHECK-V7M: msr	eapsr_nzcvq, r0         @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK-V7M: msr	eapsr_nzcvq, r0         @ encoding: [0x80,0xf3,0x02,0x88]
+@ CHECK-V7M: msr	xpsr_nzcvq, r0          @ encoding: [0x80,0xf3,0x03,0x88]
+@ CHECK-V7M: msr	xpsr_nzcvq, r0          @ encoding: [0x80,0xf3,0x03,0x88]
 @ CHECK: msr	ipsr, r0                @ encoding: [0x80,0xf3,0x05,0x88]
 @ CHECK: msr	epsr, r0                @ encoding: [0x80,0xf3,0x06,0x88]
 @ CHECK: msr	iepsr, r0               @ encoding: [0x80,0xf3,0x07,0x88]
diff --git a/test/MC/ARM/thumb_rewrites.s b/test/MC/ARM/thumb_rewrites.s
new file mode 100644
index 000000000000..c9d625e60de7
--- /dev/null
+++ b/test/MC/ARM/thumb_rewrites.s
@@ -0,0 +1,52 @@
+@ RUN: llvm-mc -triple thumbv6m -show-encoding < %s | FileCheck %s
+
+    adds    r0, r0, #8
+@ CHECK: adds   r0, #8              @ encoding: [0x08,0x30]
+
+    adds    r0, r0, r0
+@ CHECK: adds   r0, r0, r0          @ encoding: [0x00,0x18]
+
+    add     r0, r0, r8
+@ CHECK: add    r0, r8              @ encoding: [0x40,0x44]
+
+    add     sp, sp, r0
+@ CHECK: add    sp, r0              @ encoding: [0x85,0x44]
+
+    add     r0, r0, r1
+@ CHECK: add    r0, r1              @ encoding: [0x08,0x44]
+
+    add     r2, r2, r3
+@ CHECK: add    r2, r3              @ encoding: [0x1a,0x44]
+
+    subs    r0, r0, r0
+@ CHECK: subs   r0, r0, r0          @ encoding: [0x00,0x1a]
+
+    ands    r0, r0, r1
+@ CHECK: ands   r0, r1              @ encoding: [0x08,0x40]
+
+    eors    r0, r0, r1
+@ CHECK: eors   r0, r1              @ encoding: [0x48,0x40]
+
+    lsls    r0, r0, r1
+@ CHECK: lsls   r0, r1              @ encoding: [0x88,0x40]
+
+    lsrs    r0, r0, r1
+@ CHECK: lsrs   r0, r1              @ encoding: [0xc8,0x40]
+
+    asrs    r0, r0, r1
+@ CHECK: asrs   r0, r1              @ encoding: [0x08,0x41]
+
+    adcs    r0, r0, r1
+@ CHECK: adcs   r0, r1              @ encoding: [0x48,0x41]
+
+    sbcs    r0, r0, r1
+@ CHECK: sbcs   r0, r1              @ encoding: [0x88,0x41]
+
+    rors    r0, r0, r1
+@ CHECK: rors   r0, r1              @ encoding: [0xc8,0x41]
+
+    orrs    r0, r0, r1
+@ CHECK: orrs   r0, r1              @ encoding: [0x08,0x43]
+
+    bics    r0, r0, r1
+@ CHECK: bics   r0, r1              @ encoding: [0x88,0x43]
diff --git a/test/MC/ARM/thumbv7em.s b/test/MC/ARM/thumbv7em.s
new file mode 100644
index 000000000000..53ebff2f4835
--- /dev/null
+++ b/test/MC/ARM/thumbv7em.s
@@ -0,0 +1,53 @@
+@ RUN: llvm-mc -triple=thumbv7em -show-encoding < %s | FileCheck %s
+@ RUN: not llvm-mc -triple=thumbv7m -show-encoding 2>&1 < %s | FileCheck --check-prefix=CHECK-V7M %s
+
+  .syntax unified
+
+@ Check that the assembler can handle the documented syntax from the ARM ARM.
+@ These tests test instruction encodings specific to ARMv7E-M.
+
+@------------------------------------------------------------------------------
+@ MSR
+@------------------------------------------------------------------------------
+
+        msr  apsr_g, r0
+        msr  apsr_nzcvqg, r0
+        msr  iapsr_g, r0
+        msr  iapsr_nzcvqg, r0
+        msr  eapsr_g, r0
+        msr  eapsr_nzcvqg, r0
+        msr  xpsr_g, r0
+        msr  xpsr_nzcvqg, r0
+
+@ CHECK: msr	apsr_g, r0              @ encoding: [0x80,0xf3,0x00,0x84]
+@ CHECK: msr	apsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x00,0x8c]
+@ CHECK: msr	iapsr_g, r0             @ encoding: [0x80,0xf3,0x01,0x84]
+@ CHECK: msr	iapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x01,0x8c]
+@ CHECK: msr	eapsr_g, r0             @ encoding: [0x80,0xf3,0x02,0x84]
+@ CHECK: msr	eapsr_nzcvqg, r0        @ encoding: [0x80,0xf3,0x02,0x8c]
+@ CHECK: msr	xpsr_g, r0              @ encoding: [0x80,0xf3,0x03,0x84]
+@ CHECK: msr	xpsr_nzcvqg, r0         @ encoding: [0x80,0xf3,0x03,0x8c]
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  apsr_g, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  apsr_nzcvqg, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  iapsr_g, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  iapsr_nzcvqg, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  eapsr_g, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  eapsr_nzcvqg, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  xpsr_g, r0
+@ CHECK-V7M-NEXT:              ^
+@ CHECK-V7M: error: invalid operand for instruction
+@ CHECK-V7M-NEXT:         msr  xpsr_nzcvqg, r0
+@ CHECK-V7M-NEXT:              ^
diff --git a/test/MC/ARM/v8_IT_manual.s b/test/MC/ARM/v8_IT_manual.s
index 4b63aa82dd68..160e98ce8b4f 100644
--- a/test/MC/ARM/v8_IT_manual.s
+++ b/test/MC/ARM/v8_IT_manual.s
@@ -554,11 +554,11 @@ pushge {r1, r3, r7}
 @ PUSH, encoding T2 (32-bit)
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
 it ge
-pushge {r1, r13, r7}
+pushge {r1, r3, r7}
 @ PUSH, encoding T3 (32-bit)
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
 it ge
-pushge {r13}
+pushge {r3}
 
 @ REV, encoding T1
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
@@ -614,9 +614,10 @@ stmge r1!, {r2, r3}
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
 it ge
 stmge r1, {r2, r3}
+@ STM, encoding T3 (32-bit)
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
 it ge
-stmge r1!, {r2, r13}
+stmge r1!, {r2, r3}
 
 @ LDM, encoding T1
 @ CHECK: [[@LINE+2]]:1: warning: deprecated instruction in IT block
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
index 8b1b0e0c538b..1563b5aef71e 100644
--- a/test/MC/ARM/vfp4.s
+++ b/test/MC/ARM/vfp4.s
@@ -6,7 +6,7 @@
 
 @ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
 @ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
-@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfma.f64 d16, d18, d17
 vfma.f64 d16, d18, d17
 
@@ -17,7 +17,7 @@ vfma.f32 s2, s4, s0
 
 @ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
 @ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
-@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfma.f32 d16, d18, d17
 vfma.f32 d16, d18, d17
 
@@ -29,7 +29,7 @@ vfma.f32 q2, q4, q0
 
 @ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
 @ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b]
-@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfnma.f64 d16, d18, d17
 vfnma.f64 d16, d18, d17
 
@@ -40,7 +40,7 @@ vfnma.f32 s2, s4, s0
 
 @ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
 @ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b]
-@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfms.f64 d16, d18, d17
 vfms.f64 d16, d18, d17
 
@@ -51,7 +51,7 @@ vfms.f32 s2, s4, s0
 
 @ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
 @ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
-@ THUMB_V7EM-ERRORS: error: instruction requires: NEON
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfms.f32 d16, d18, d17
 vfms.f32 d16, d18, d17
 
@@ -63,7 +63,7 @@ vfms.f32 q2, q4, q0
 
 @ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
 @ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b]
-@ THUMB_V7EM-ERRORS: error: instruction requires: double precision VFP
+@ THUMB_V7EM-ERRORS: error: invalid operand for instruction
 @ THUMB_V7EM-ERRORS-NEXT: vfnms.f64 d16, d18, d17
 vfnms.f64 d16, d18, d17
 
diff --git a/test/MC/ARM/virtexts-arm.s b/test/MC/ARM/virtexts-arm.s
new file mode 100644
index 000000000000..a67a8fea5c6f
--- /dev/null
+++ b/test/MC/ARM/virtexts-arm.s
@@ -0,0 +1,42 @@
+# RUN: llvm-mc -triple armv7 -mattr=virtualization -show-encoding %s | FileCheck %s --check-prefix=CHECK-ARM
+
+    hvc    #1
+    hvc    #7
+    hvc    #257
+    hvc    #65535
+# CHECK-ARM: [0x71,0x00,0x40,0xe1]
+# CHECK-ARM: [0x77,0x00,0x40,0xe1]
+# CHECK-ARM: [0x71,0x10,0x40,0xe1]
+# CHECK-ARM: [0x7f,0xff,0x4f,0xe1]
+
+    eret
+    ereteq
+    eretne
+    ereths
+    eretlo
+    eretmi
+    eretpl
+    eretvs
+    eretvc
+    erethi
+    eretls
+    eretge
+    eretlt
+    eretgt
+    eretle
+# CHECK-ARM: [0x6e,0x00,0x60,0xe1]
+# CHECK-ARM: [0x6e,0x00,0x60,0x01]
+# CHECK-ARM: [0x6e,0x00,0x60,0x11]
+# CHECK-ARM: [0x6e,0x00,0x60,0x21]
+# CHECK-ARM: [0x6e,0x00,0x60,0x31]
+# CHECK-ARM: [0x6e,0x00,0x60,0x41]
+# CHECK-ARM: [0x6e,0x00,0x60,0x51]
+# CHECK-ARM: [0x6e,0x00,0x60,0x61]
+# CHECK-ARM: [0x6e,0x00,0x60,0x71]
+# CHECK-ARM: [0x6e,0x00,0x60,0x81]
+# CHECK-ARM: [0x6e,0x00,0x60,0x91]
+# CHECK-ARM: [0x6e,0x00,0x60,0xa1]
+# CHECK-ARM: [0x6e,0x00,0x60,0xb1]
+# CHECK-ARM: [0x6e,0x00,0x60,0xc1]
+# CHECK-ARM: [0x6e,0x00,0x60,0xd1]
+
diff --git a/test/MC/ARM/virtexts-thumb.s b/test/MC/ARM/virtexts-thumb.s
new file mode 100644
index 000000000000..d911e1dfb1d7
--- /dev/null
+++ b/test/MC/ARM/virtexts-thumb.s
@@ -0,0 +1,59 @@
+# RUN: llvm-mc -triple thumbv7 -mattr=virtualization -show-encoding %s | FileCheck %s --check-prefix=CHECK-THUMB
+
+    hvc    #1
+    hvc    #7
+    hvc    #257
+    hvc    #65535
+# CHECK-THUMB: [0xe0,0xf7,0x01,0x80]
+# CHECK-THUMB: [0xe0,0xf7,0x07,0x80]
+# CHECK-THUMB: [0xe0,0xf7,0x01,0x81]
+# CHECK-THUMB: [0xef,0xf7,0xff,0x8f]
+
+    hvc.w    #1
+    hvc.w    #7
+    hvc.w    #257
+    hvc.w    #65535
+# CHECK-THUMB: [0xe0,0xf7,0x01,0x80]
+# CHECK-THUMB: [0xe0,0xf7,0x07,0x80]
+# CHECK-THUMB: [0xe0,0xf7,0x01,0x81]
+# CHECK-THUMB: [0xef,0xf7,0xff,0x8f]
+
+    eret
+    it eq; ereteq
+    it ne; eretne
+    it hs; ereths
+    it lo; eretlo
+    it mi; eretmi
+    it pl; eretpl
+    it vs; eretvs
+    it vc; eretvc
+    it hi; erethi
+    it ls; eretls
+    it ge; eretge
+    it lt; eretlt
+    it gt; eretgt
+    it le; eretle
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
+
+# SUBS PC, LR, #0 should have the same encoding as ERET.
+# The conditional forms can't be tested becuse the ARM assembler parser doesn't
+# accept SUBS<cond> PC, LR, #<imm>, only the unconditonal form is allowed. This
+# is due to the way that the custom parser handles optional operands; see the
+# FIXME in ARM/AsmParser/ARMAsmParser.cpp.
+
+    subs pc, lr, #0
+# CHECK-THUMB: [0xde,0xf3,0x00,0x8f]
diff --git a/test/MC/ARM/vorr-vbic-illegal-cases.s b/test/MC/ARM/vorr-vbic-illegal-cases.s
index 16ab6b5bc74b..673098ad5a07 100644
--- a/test/MC/ARM/vorr-vbic-illegal-cases.s
+++ b/test/MC/ARM/vorr-vbic-illegal-cases.s
@@ -1,6 +1,13 @@
 @ RUN: not llvm-mc -triple=armv7-linux-gnueabi %s 2>&1 | FileCheck %s
 .text
 
+        vorr.i32        d2, #0xffffffff
+        vorr.i32        q2, #0xffffffff
+        vorr.i32        d2, #0xabababab
+        vorr.i32        q2, #0xabababab
+        vorr.i16        q2, #0xabab
+        vorr.i16        q2, #0xabab
+
 @ CHECK: error: invalid operand for instruction
 @ CHECK: vorr.i32        d2, #0xffffffff
 @ CHECK: error: invalid operand for instruction
@@ -14,6 +21,13 @@
 @ CHECK: error: invalid operand for instruction
 @ CHECK: vorr.i16        q2, #0xabab
 
+        vbic.i32        d2, #0xffffffff
+        vbic.i32        q2, #0xffffffff
+        vbic.i32        d2, #0xabababab
+        vbic.i32        q2, #0xabababab
+        vbic.i16        d2, #0xabab
+        vbic.i16        q2, #0xabab
+
 @ CHECK: error: invalid operand for instruction
 @ CHECK: vbic.i32        d2, #0xffffffff
 @ CHECK: error: invalid operand for instruction
@@ -27,16 +41,25 @@
 @ CHECK: error: invalid operand for instruction
 @ CHECK: vbic.i16        q2, #0xabab
 
-        vorr.i32        d2, #0xffffffff
-        vorr.i32        q2, #0xffffffff
-        vorr.i32        d2, #0xabababab
-        vorr.i32        q2, #0xabababab
-        vorr.i16        q2, #0xabab
-        vorr.i16        q2, #0xabab
+        vbic.i32        d2, #0x03ffffff
+        vbic.i32        q2, #0x03ffff
+        vbic.i32        d2, #0x03ff
+        vbic.i32        d2, #0xff00ff
+        vbic.i16        d2, #0x03ff
+        vbic.i16        q2, #0xf0f0
+        vbic.i16        q2, #0xf0f0f0
 
-        vbic.i32        d2, #0xffffffff
-        vbic.i32        q2, #0xffffffff
-        vbic.i32        d2, #0xabababab
-        vbic.i32        q2, #0xabababab
-        vbic.i16        d2, #0xabab
-        vbic.i16        q2, #0xabab
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        d2, #0x03ffffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        q2, #0x03ffff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        d2, #0x03ff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i32        d2, #0xff00ff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i16        d2, #0x03ff
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i16        q2, #0xf0f0
+@ CHECK: error: invalid operand for instruction
+@ CHECK: vbic.i16        q2, #0xf0f0f0
diff --git a/test/MC/AsmParser/comments-x86-darwin.s b/test/MC/AsmParser/comments-x86-darwin.s
new file mode 100644
index 000000000000..e201f48b1d79
--- /dev/null
+++ b/test/MC/AsmParser/comments-x86-darwin.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s 2>&1 | FileCheck %s
+# ensure that single '#' comments are worink as expected on x86 darwin
+.align 3            # test single hash after align
+// CHECK: .align 3
+foo:                # single hash should be ignored as comment
+// CHECK-LABEL: foo:
+    movl %esp, %ebp # same after an instruction
+// CHECK: movl %esp, %ebp
+#   movl %esp, %ebp ## start of the line
+// CHECK-NOT: movl %esp, %ebp
+    # movl %esp, %ebp ## not quite start of the line
+// CHECK-NOT: movl %esp, %ebp
+bar:
+// CHECK-LABEL: bar:
diff --git a/test/MC/AsmParser/directive-warning.s b/test/MC/AsmParser/directive-warning.s
new file mode 100644
index 000000000000..311989ee3fa1
--- /dev/null
+++ b/test/MC/AsmParser/directive-warning.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -triple i386 %s 2>&1 | FileCheck %s
+
+	.warning
+// CHECK: warning: .warning directive invoked in source file
+// CHECK-NEXT: 	.warning
+// CHECK-NEXT:  ^
+
+	.ifc a,a
+		.warning
+	.endif
+// CHECK: warning: .warning directive invoked in source file
+// CHECK-NEXT:		.warning
+// CHECK-NEXT:          ^
+
+	.ifnc a,a
+		.warning
+	.endif
+// CHECK-NOT: warning: .warning directive invoked in source file
+
+	.warning "here be dragons"
+// CHECK: warning: here be dragons
+
+	.ifc one, two
+		.warning "dragons, i say"
+	.endif
+// CHECK-NOT: warning: dragons, i say
diff --git a/test/MC/AsmParser/directive_set.s b/test/MC/AsmParser/directive_set.s
index 69abce0db2ff..8d4180a364b6 100644
--- a/test/MC/AsmParser/directive_set.s
+++ b/test/MC/AsmParser/directive_set.s
@@ -1,12 +1,14 @@
-# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+# RUN: llvm-mc -triple i386-unknown-elf %s | FileCheck %s
 
 # CHECK: TEST0:
 # CHECK: a = 0
+# CHECK-NOT: .no_dead_strip a
 TEST0:  
         .set a, 0
         
 # CHECK: TEST1:
 # CHECK: a = 0
+# CHECK-NOT: .no_dead_strip a
 TEST1:  
         .equ a, 0
 
diff --git a/test/MC/AsmParser/macro-exitm.s b/test/MC/AsmParser/macro-exitm.s
new file mode 100644
index 000000000000..66a0597288aa
--- /dev/null
+++ b/test/MC/AsmParser/macro-exitm.s
@@ -0,0 +1,64 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+// .exitm is encountered in a normal macro expansion
+.macro REP
+.rept 3
+.long 0
+.exitm
+.endr
+.endm
+REP
+// Only the output from the first rept expansion should make it through:
+// CHECK: .long 0
+// CHECK-NOT: .long 0
+
+// .exitm is in a true branch
+.macro A
+.if 1
+.long 1
+.exitm
+.endif
+.long 1
+.endm
+A
+// CHECK: .long 1
+// CHECK-NOT: .long 1
+
+// .exitm is in a false branch
+.macro B
+.if 1
+.long 2
+.else
+.exitm
+.endif
+.long 2
+.endm
+B
+// CHECK: .long 2
+// CHECK: .long 2
+
+
+// .exitm is in a false branch that is encountered prior to the true branch
+.macro C
+.if 0
+.exitm
+.else
+.long 3
+.endif
+.long 3
+.endm
+C
+// CHECK: .long 3
+// CHECK: .long 3
+
+// .exitm is in a macro that's expanded in a conditional block.
+.macro D
+.long 4
+.exitm
+.long 4
+.endm
+.if 1
+D
+.endif
+// CHECK: .long 4
+// CHECK-NOT: .long 4
diff --git a/test/MC/AsmParser/macros-darwin-vararg.s b/test/MC/AsmParser/macros-darwin-vararg.s
index a650c0871d23..4aa2f4c1d9b6 100644
--- a/test/MC/AsmParser/macros-darwin-vararg.s
+++ b/test/MC/AsmParser/macros-darwin-vararg.s
@@ -1,8 +1,90 @@
-// RUN: not llvm-mc -triple i386-apple-darwin10 %s 2>&1 | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin10 %s 2>&1 | FileCheck %s
 
-// CHECK: error: vararg is not a valid parameter qualifier for 'arg' in macro 'abc'
-// CHECK: .macro abc arg:vararg
+.macro abc a b:vararg
+.globl "\a, \b"
+.endm
+
+// CHECK: .globl "zed0, zed1, zed2"
+abc zed0, zed1, zed2
+
+.purgem abc
+
+.macro ifcc arg:vararg
+.if cc
+            \arg
+.endif
+.endm
+
+.macro ifcc2 arg0 arg1:vararg
+.if cc
+            movl \arg0, \arg1
+.endif
+.endm
+
+.macro ifcc3 arg0, arg1:vararg
+.if cc
+            movl \arg0, \arg1
+.endif
+.endm
+
+.macro ifcc4 arg0, arg1:vararg
+.if cc
+            movl \arg1, \arg0
+.endif
+.endm
 
-.macro abc arg:vararg
-    \arg
+.text
+
+// CHECK: movl %esp, %ebp
+// CHECK: subl $0, %esp
+// CHECK: movl %eax, %ebx
+// CHECK: movl %ecx, %ebx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
+// CHECK: movl %ecx, %eax
+// CHECK: movl %eax, %ecx
+.set cc,1
+  ifcc  movl    %esp, %ebp
+        subl $0, %esp
+
+  ifcc2 %eax, %ebx
+  ifcc2 %ecx, %ebx
+  ifcc3 %ecx, %eax
+  ifcc3 %eax, %ecx
+  ifcc4 %eax, %ecx  ## test
+  ifcc4 %ecx, %eax ## test
+
+// CHECK-NOT movl
+// CHECK: subl $1, %esp
+.set cc,0
+  ifcc  movl,    %esp, %ebp
+        subl $1, %esp
+
+.macro abc arg:vararg=nop
+  \arg
+.endm
+
+.macro abcd arg0=%eax, arg1:vararg=%ebx
+  movl \arg0, \arg1
 .endm
+
+.text
+
+// CHECK: nop
+  abc
+// CHECK: movl %eax, %ebx
+  abcd ,
+
+.macro .make_macro start, end, name, body:vararg
+\start \name
+\body
+\end
+.endmacro
+
+.make_macro .macro,.endmacro,.mybyte,.byte $0, $2, $1
+
+.data
+// CHECK: .byte 10
+// CHECK: .byte 12
+// CHECK: .byte 11
+.mybyte 10,11,12
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
index fcaa4204d49b..2293d43c5750 100644
--- a/test/MC/COFF/alias.s
+++ b/test/MC/COFF/alias.s
@@ -48,7 +48,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:   Symbol {
 // CHECK:          Name: global_aliased_to_external
 // CHECK-NEXT:     Value: 0
-// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     Section: IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:     BaseType: Null (0x0)
 // CHECK-NEXT:     ComplexType: Null (0x0)
 // CHECK-NEXT:     StorageClass: External (0x2)
@@ -57,7 +57,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: external1
 // CHECK-NEXT:     Value: 0
-// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     Section: IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:     BaseType: Null (0x0)
 // CHECK-NEXT:     ComplexType: Null (0x0)
 // CHECK-NEXT:     StorageClass: External (0x2)
@@ -84,7 +84,7 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: weak_aliased_to_external
 // CHECK-NEXT:     Value: 0
-// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     Section: IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:     BaseType: Null (0x0)
 // CHECK-NEXT:     ComplexType: Null (0x0)
 // CHECK-NEXT:     StorageClass: WeakExternal (0x69)
@@ -92,13 +92,12 @@ weak_aliased_to_external = external2
 // CHECK-NEXT:     AuxWeakExternal {
 // CHECK-NEXT:       Linked: external2
 // CHECK-NEXT:       Search: Library (0x2)
-// CHECK-NEXT:       Unused: (00 00 00 00 00 00 00 00 00 00)
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: external2
 // CHECK-NEXT:     Value: 0
-// CHECK-NEXT:     Section:  (0)
+// CHECK-NEXT:     Section: IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:     BaseType: Null (0x0)
 // CHECK-NEXT:     ComplexType: Null (0x0)
 // CHECK-NEXT:     StorageClass: External (0x2)
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
index 38a9e578a4ca..62e4eb92f855 100644
--- a/test/MC/COFF/basic-coff-64.s
+++ b/test/MC/COFF/basic-coff-64.s
@@ -113,7 +113,6 @@ _main:                                  # @main
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: [[DataNum]]
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -128,7 +127,7 @@ _main:                                  # @main
 // CHECK:   Symbol {
 // CHECK:     Name:           _printf
 // CHECK:     Value:          0
-// CHECK:     Section:        (0)
+// CHECK:     Section:        IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:       Null
 // CHECK:     ComplexType:    Null
 // CHECK:     StorageClass:   External
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 38bfa6d1014c..549825aacea8 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -113,7 +113,6 @@ L_.str:                                 # @.str
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -128,7 +127,7 @@ L_.str:                                 # @.str
 // CHECK:   Symbol {
 // CHECK:     Name:           _printf
 // CHECK:     Value:          0
-// CHECK:     Section:        (0)
+// CHECK:     Section:        IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:       Null
 // CHECK:     ComplexType:    Null
 // CHECK:     StorageClass:   External
diff --git a/test/MC/COFF/bigobj.py b/test/MC/COFF/bigobj.py
new file mode 100644
index 000000000000..2d610738b9da
--- /dev/null
+++ b/test/MC/COFF/bigobj.py
@@ -0,0 +1,26 @@
+# RUN: python %s | llvm-mc -filetype=obj -triple i686-pc-win32 - | llvm-readobj -h | FileCheck %s
+
+# This test checks that the COFF object emitter can produce objects with
+# more than 65279 sections.
+
+# While we only generate 65277 sections, an implicit .text, .data and .bss will
+# also be emitted.  This brings the total to 65280.
+num_sections = 65277
+
+# CHECK:      ImageFileHeader {
+# CHECK-NEXT:   Machine: IMAGE_FILE_MACHINE_I386
+# CHECK-NEXT:   SectionCount: 65280
+# CHECK-NEXT:   TimeDateStamp: {{[0-9]+}}
+# CHECK-NEXT:   PointerToSymbolTable: 0x{{[0-9A-F]+}}
+# CHECK-NEXT:   SymbolCount: 195837
+# CHECK-NEXT:   OptionalHeaderSize: 0
+# CHECK-NEXT:   Characteristics [ (0x0)
+# CHECK-NEXT:   ]
+# CHECK-NEXT: }
+
+for i in range(0, num_sections):
+	print("""	.section	.bss,"bw",discard,_b%d
+	.globl	_b%d                     # @b%d
+_b%d:
+	.byte	0                       # 0x0
+""" % (i, i, i, i))
diff --git a/test/MC/COFF/bss_section.ll b/test/MC/COFF/bss_section.ll
index 477b3dfbd3a6..1921eeb61a65 100644
--- a/test/MC/COFF/bss_section.ll
+++ b/test/MC/COFF/bss_section.ll
@@ -6,4 +6,4 @@
 ; CHECK: .bss
 
 @thingy_linkonce = linkonce_odr global %struct.foo zeroinitializer, align 4
-; CHECK: .section .bss,"bw",discard,_thingy_linkonce
+; CHECK: .section .bss,"wb",discard,_thingy_linkonce
diff --git a/test/MC/COFF/comm-align.s b/test/MC/COFF/comm-align.s
new file mode 100644
index 000000000000..ca6bfbea2473
--- /dev/null
+++ b/test/MC/COFF/comm-align.s
@@ -0,0 +1,57 @@
+# RUN: llvm-mc -triple i686-windows-gnu -filetype obj -o - %s \
+# RUN:    | llvm-readobj -coff-directives -symbols | FileCheck %s
+
+# NOTE: this test checks multiple things:
+# - that -aligncomm is not emitted for 1-byte alignment
+# - that -aligncomm is emitted for the various alignments (greater than 1)
+# - that the alignment is represented as a log_2 of the alignment
+# - that the section switching occurs correctly
+# - that functions after the switch also are emitted into the correct section
+
+	.text
+
+	.def _a
+		.scl 3
+		.type 32
+	.endef
+_a:
+	ret
+
+	.data
+
+	.comm _s_1,4,0                  # @s_1
+	.comm _s_2,4,1                  # @s_2
+	.comm _s_4,4,2                  # @s_3
+	.comm _s_8,4,3                  # @s_4
+
+	.comm _small_but_overaligned,1,3                  # @s_4
+
+	.text
+
+	.def _b
+		.scl 3
+		.type 32
+	.endef
+_b:
+	ret
+
+# CHECK-NOT: -aligncomm:"_s_1",0
+
+# CHECK: Symbols [
+# CHECK:   Symbol {
+# CHECK:     Name: _a
+# CHECK:     Section: .text (1)
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: _small_but_overaligned
+# CHECK-NEXT:Value: 1
+# CHECK-NEXT:Section: IMAGE_SYM_UNDEFINED (0)
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: _b
+# CHECK:     Section: .text (1)
+# CHECK:   }
+# CHECK: ]
+
+# CHECK: Directive(s): -aligncomm:"_s_2",1 -aligncomm:"_s_4",2 -aligncomm:"_s_8",3 -aligncomm:"_small_but_overaligned",3
+
diff --git a/test/MC/COFF/comm.ll b/test/MC/COFF/comm.ll
index 6fe122ef1d93..74da557fb5cc 100644
--- a/test/MC/COFF/comm.ll
+++ b/test/MC/COFF/comm.ll
@@ -9,5 +9,5 @@
 ; CHECK: .lcomm	_a,1
 ; CHECK: .lcomm	_b,8,8
 ; .comm uses log2 alignment
-; CHECK: .comm	_c,1
-; CHECK: .comm	_d,8
+; CHECK: .comm	_c,1,0
+; CHECK: .comm	_d,8,3
diff --git a/test/MC/COFF/comm.s b/test/MC/COFF/comm.s
index 37db75f9cc4a..773ebde0b9d1 100644
--- a/test/MC/COFF/comm.s
+++ b/test/MC/COFF/comm.s
@@ -1,7 +1,9 @@
 // RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -t | FileCheck %s
 
 .lcomm _a,4,4
-.comm	_b, 4
+.comm	_b, 4, 2
+// _c has size 1 but align 32, the value field is the max of size and align.
+.comm	_c, 1, 5
 
 
 // CHECK:       Symbol {
@@ -17,7 +19,17 @@
 // CHECK:       Symbol {
 // CHECK:         Name: _b
 // CHECK-NEXT:    Value: 4
-// CHECK-NEXT:    Section:  (0)
+// CHECK-NEXT:    Section:  IMAGE_SYM_UNDEFINED (0)
+// CHECK-NEXT:    BaseType: Null
+// CHECK-NEXT:    ComplexType: Null
+// CHECK-NEXT:    StorageClass: External
+// CHECK-NEXT:    AuxSymbolCount: 0
+// CHECK-NEXT:  }
+
+// CHECK:       Symbol {
+// CHECK:         Name: _c
+// CHECK-NEXT:    Value: 32
+// CHECK-NEXT:    Section:  IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:    BaseType: Null
 // CHECK-NEXT:    ComplexType: Null
 // CHECK-NEXT:    StorageClass: External
diff --git a/test/MC/COFF/const-gv-with-rel-init.ll b/test/MC/COFF/const-gv-with-rel-init.ll
new file mode 100644
index 000000000000..7d3c5f631881
--- /dev/null
+++ b/test/MC/COFF/const-gv-with-rel-init.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
+
+define void @f() {
+  ret void
+}
+
+@ptr = constant void ()* @f, section ".CRT$XLB", align 8
+; CHECK:  .section  .CRT$XLB,"rd"
+
+@weak_array = weak_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @f to i8*)]
+; CHECK:  .section  .rdata,"rd",discard,weak_array
diff --git a/test/MC/COFF/feat00.s b/test/MC/COFF/feat00.s
index bfd47ad4abc0..f671ebe7d921 100644
--- a/test/MC/COFF/feat00.s
+++ b/test/MC/COFF/feat00.s
@@ -6,7 +6,7 @@
 // CHECK: Symbol {
 // CHECK:   Name: @feat.00
 // CHECK:   Value: 123
-// CHECK:   Section: (65535)
+// CHECK:   Section: IMAGE_SYM_ABSOLUTE (-1)
 // CHECK:   BaseType: Null (0x0)
 // CHECK:   ComplexType: Null (0x0)
 // CHECK:   StorageClass: External (0x2)
diff --git a/test/MC/COFF/file.s b/test/MC/COFF/file.s
index 132e82b2e25d..a18a1f476aca 100644
--- a/test/MC/COFF/file.s
+++ b/test/MC/COFF/file.s
@@ -21,7 +21,7 @@
 // CHECK-SCN: Symbols [
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: null-padded.asm
@@ -29,7 +29,7 @@
 // CHECK-SCN:   }
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: eighteen-chars.asm
@@ -37,7 +37,7 @@
 // CHECK-SCN:   }
 // CHECK-SCN:   Symbol {
 // CHECK-SCN:     Name: .file
-// CHECK-SCN:     Section: (65534)
+// CHECK-SCN:     Section: IMAGE_SYM_DEBUG (-2)
 // CHECK-SCN:     StorageClass: File
 // CHECK-SCN:     AuxFileRecord {
 // CHECK-SCN:       FileName: multiple-auxiliary-entries.asm
diff --git a/test/MC/COFF/ir-to-imgrel.ll b/test/MC/COFF/ir-to-imgrel.ll
index 39884d2a15eb..dfc88b2a6752 100644
--- a/test/MC/COFF/ir-to-imgrel.ll
+++ b/test/MC/COFF/ir-to-imgrel.ll
@@ -2,5 +2,5 @@
 
 @__ImageBase = external global i8
 
-; X64: .quad   "?x@@3HA"@IMGREL32
+; X64: .quad   "?x@@3HA"@IMGREL
 @"\01?x@@3HA" = global i64 sub nsw (i64 ptrtoint (i64* @"\01?x@@3HA" to i64), i64 ptrtoint (i8* @__ImageBase to i64)), align 8
diff --git a/test/MC/COFF/linker-options.ll b/test/MC/COFF/linker-options.ll
index 0be74e57ad6e..a5b866632cc9 100755
--- a/test/MC/COFF/linker-options.ll
+++ b/test/MC/COFF/linker-options.ll
@@ -1,12 +1,6 @@
 ; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s
 
-!0 = metadata !{ i32 6, metadata !"Linker Options",
-   metadata !{
-      metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib" },
-      metadata !{ metadata !"/DEFAULTLIB:msvcrt.lib",
-                  metadata !"/DEFAULTLIB:secur32.lib" },
-      metadata !{ metadata !"/DEFAULTLIB:C:\5Cpath to\5Casan_rt.lib" },
-      metadata !{ metadata !"/with spaces" } } }
+!0 = !{i32 6, !"Linker Options", !{!{!"/DEFAULTLIB:msvcrt.lib"}, !{!"/DEFAULTLIB:msvcrt.lib", !"/DEFAULTLIB:secur32.lib"}, !{!"/DEFAULTLIB:C:\5Cpath to\5Casan_rt.lib"}, !{!"/with spaces"}, !{!"\22/quoted spaces\22"}}}
 
 !llvm.module.flags = !{ !0 }
 
@@ -14,10 +8,11 @@ define dllexport void @foo() {
   ret void
 }
 
-; CHECK: .section        .drectve,"r"
+; CHECK: .section        .drectve,"yn"
 ; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
 ; CHECK: .ascii   " /DEFAULTLIB:msvcrt.lib"
 ; CHECK: .ascii   " /DEFAULTLIB:secur32.lib"
 ; CHECK: .ascii   " \"/DEFAULTLIB:C:\\path to\\asan_rt.lib\""
 ; CHECK: .ascii   " \"/with spaces\""
+; CHECK: .ascii   " \"/quoted spaces\""
 ; CHECK: .ascii   " /EXPORT:_foo"
diff --git a/test/MC/COFF/secidx.s b/test/MC/COFF/secidx.s
index 619d777e0177..022804d82196 100644
--- a/test/MC/COFF/secidx.s
+++ b/test/MC/COFF/secidx.s
@@ -4,7 +4,9 @@
 
 Lfoo:
 	.secidx	Lfoo
+	.short  0
 	.secidx	Lbar
+	.short  0
 
 .section spam
 Lbar:
diff --git a/test/MC/COFF/section-invalid-flags.s b/test/MC/COFF/section-invalid-flags.s
index 17b1550a904e..9cdceaf30014 100644
--- a/test/MC/COFF/section-invalid-flags.s
+++ b/test/MC/COFF/section-invalid-flags.s
@@ -6,3 +6,6 @@
 
 // CHECK: error: conflicting section flags 'b' and 'd'
 .section s_bd,"bd"; .long 1
+
+// CHECK: error: expected comdat type such as 'discard' or 'largest' after protection bits
+.section .stack, "w", @nobits
diff --git a/test/MC/COFF/section-name-encoding.s b/test/MC/COFF/section-name-encoding.s
index 7edd6d7446d9..73ab4bdb770f 100644
--- a/test/MC/COFF/section-name-encoding.s
+++ b/test/MC/COFF/section-name-encoding.s
@@ -21,14 +21,19 @@
 .section s1234567; .long 1
 
 
+// Note: the names in the string table will be sorted in reverse
+// lexicographical order. Use a suffix letter (z, y, x, ...) to
+// get the preferred ordering of names in the test.
+
 // Base 10 encoding
+// Ending in z should put the name first in the string table.
 
 // /4
 // CHECK:   Section {
 // CHECK:     Number: 6
-// CHECK:     Name: s12345678 (2F 34 00 00 00 00 00 00)
+// CHECK:     Name: s1234567z (2F 34 00 00 00 00 00 00)
 // CHECK:   }
-.section s12345678; .long 1
+.section s1234567z; .long 1
 
 
 // Generate padding sections to increase the string table size to at least
@@ -47,20 +52,20 @@
   pad_sections2 \pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad\pad
 .endm
 
-// 1000x 'a'
-pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+// 1000x 'y'
+pad_sections yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
 
 
 // /1000029 == 4 + 10 + (5 * (2 + (20 * 10 * 1000) + 1))
 //             v   |     |    v    ~~~~~~~~~~~~~~    v
 //    table size   v     v   "p0"        pad         NUL separator
-//     "s12345678\0"     # of pad sections
+//     "s1234567z\0"     # of pad sections
 //
 // CHECK:   Section {
 // CHECK:     Number: 12
-// CHECK:     Name: seven_digit (2F 31 30 30 30 30 32 39)
+// CHECK:     Name: sevendigitx (2F 31 30 30 30 30 32 39)
 // CHECK:   }
-.section seven_digit; .long 1
+.section sevendigitx; .long 1
 
 
 // Generate padding sections to increase the string table size to at least
@@ -71,18 +76,18 @@ pad_sections aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 .endm
 
 // 1000x 'a'
-pad_sections_ex aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+pad_sections_ex wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
 
 
 // //AAmJa4 == 1000029 + 12 + (5 * (2 + (9 * 20 * 10 * 1000) + 1)) == 38*64^3 + 9*64^2 + 26*64 + 56
 //             v         |     |    v    ~~~~~~~~~~~~~~~~~~    v
 // seven_digit offset    v     v   "p0"         pad            NUL separator
-//         "seven_digit\0"     # of pad sections
+//         "sevendigitx\0"     # of pad sections
 //
 // "2F 2F 41 41 6D 4A 61 34" is "//AAmJa4", which decodes to "0 0 38 9 26 56".
 //
 // CHECK:   Section {
 // CHECK:     Number: 18
-// CHECK:     Name: double_slash (2F 2F 41 41 6D 4A 61 34)
+// CHECK:     Name: doubleslashv (2F 2F 41 41 6D 4A 61 34)
 // CHECK:   }
-.section double_slash; .long 1
+.section doubleslashv; .long 1
diff --git a/test/MC/COFF/section-passthru-flags.s b/test/MC/COFF/section-passthru-flags.s
new file mode 100644
index 000000000000..3bd061b391d1
--- /dev/null
+++ b/test/MC/COFF/section-passthru-flags.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple i386-pc-win32 < %s | FileCheck %s
+.section .klaatu,"wn"
+// CHECK: .section .klaatu,"wn"
+.section .barada,"y"
+// CHECK: .section .barada,"y"
+.section .nikto,"wds"
+// CHECK: .section .nikto,"wds"
diff --git a/test/MC/COFF/seh-linkonce.s b/test/MC/COFF/seh-linkonce.s
new file mode 100644
index 000000000000..5631b748c00b
--- /dev/null
+++ b/test/MC/COFF/seh-linkonce.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-readobj -sections -section-symbols | FileCheck %s
+
+        .text
+        .def     weak_func;
+        .scl    2;
+        .type   32;
+        .endef
+        .section        .text,"xr",discard,weak_func
+        .globl  weak_func
+        .align  16, 0x90
+weak_func:                              # @weak_func
+.Ltmp0:
+.seh_proc weak_func
+# BB#0:                                 # %entry
+        pushq   %rbp
+.Ltmp1:
+        .seh_pushreg 5
+        movq    %rsp, %rbp
+.Ltmp2:
+        .seh_setframe 5, 0
+.Ltmp3:
+        .seh_endprologue
+        xorl    %eax, %eax
+        popq    %rbp
+        retq
+.Leh_func_end0:
+.Ltmp4:
+        .seh_endproc
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Name: .text
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: .data
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Name: .bss
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: [[TEXT_SECNUM:[0-9]+]]
+// CHECK:     Name: .text
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: [[XDATA_SECNUM:[0-9]+]]
+// CHECK:     Name: .xdata
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:     Symbols [
+// CHECK:       Symbol {
+// CHECK:         Name: .xdata
+// CHECK:         Section: .xdata ([[XDATA_SECNUM]])
+// CHECK:         StorageClass: Static (0x3)
+// CHECK:         AuxSymbolCount: 1
+// CHECK:         AuxSectionDef {
+// CHECK:           Selection: Associative (0x5)
+// CHECK:           AssocSection: .text ([[TEXT_SECNUM]])
+// CHECK:         }
+// CHECK:       }
+// CHECK:     ]
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Number: [[PDATA_SECNUM:[0-9]+]]
+// CHECK:     Name: .pdata
+// CHECK:     Characteristics [
+// CHECK:       IMAGE_SCN_LNK_COMDAT
+// CHECK:     ]
+// CHECK:     Symbols [
+// CHECK:       Symbol {
+// CHECK:         Name: .pdata
+// CHECK:         Section: .pdata ([[PDATA_SECNUM]])
+// CHECK:         StorageClass: Static (0x3)
+// CHECK:         AuxSymbolCount: 1
+// CHECK:         AuxSectionDef {
+// CHECK:           Selection: Associative (0x5)
+// CHECK:           AssocSection: .text ([[TEXT_SECNUM]])
+// CHECK:         }
+// CHECK:       }
+// CHECK:     ]
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/COFF/seh-section.s b/test/MC/COFF/seh-section.s
index 026c0d733484..c95eece800bd 100644
--- a/test/MC/COFF/seh-section.s
+++ b/test/MC/COFF/seh-section.s
@@ -1,5 +1,7 @@
-// This test ensures that, if the section containing a function has a suffix
-// (e.g. .text$foo), its unwind info section also has a suffix (.xdata$foo).
+// This test ensures functions in custom sections get unwind info emitted in a
+// distinct .xdata section. Ideally we'd just emit a second .xdata section with
+// the same name and characteristics, but MC uniques sections by name and
+// characteristics, so that is not possible.
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s -sd | FileCheck %s
 
 // CHECK:      Name: .xdata$foo
@@ -20,6 +22,44 @@
 // CHECK-NEXT:   0000: 01050200 05500402
 // CHECK-NEXT: )
 
+// CHECK:      Name: .xdata$.mytext
+// CHECK-NEXT: VirtualSize
+// CHECK-NEXT: VirtualAddress
+// CHECK-NEXT: RawDataSize: 8
+// CHECK-NEXT: PointerToRawData
+// CHECK-NEXT: PointerToRelocations
+// CHECK-NEXT: PointerToLineNumbers
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: LineNumberCount: 0
+// CHECK-NEXT: Characteristics [
+// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:   IMAGE_SCN_MEM_READ
+// CHECK-NEXT: ]
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 01050200 05500402
+// CHECK-NEXT: )
+
+// CHECK:      Name: .xdata
+// CHECK-NEXT: VirtualSize
+// CHECK-NEXT: VirtualAddress
+// CHECK-NEXT: RawDataSize: 8
+// CHECK-NEXT: PointerToRawData
+// CHECK-NEXT: PointerToRelocations
+// CHECK-NEXT: PointerToLineNumbers
+// CHECK-NEXT: RelocationCount: 0
+// CHECK-NEXT: LineNumberCount: 0
+// CHECK-NEXT: Characteristics [
+// CHECK-NEXT:   IMAGE_SCN_ALIGN_4BYTES
+// CHECK-NEXT:   IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK-NEXT:   IMAGE_SCN_MEM_READ
+// CHECK-NEXT: ]
+// CHECK-NEXT: SectionData (
+// CHECK-NEXT:   0000: 01050200 05500402
+// CHECK-NEXT: )
+
+
+
     .section .text$foo,"x"
     .globl foo
     .def foo; .scl 2; .type 32; .endef
@@ -35,3 +75,33 @@ foo:
     ret
     .seh_endproc
 
+    .section .mytext,"x"
+    .globl bar
+    .def bar; .scl 2; .type 32; .endef
+    .seh_proc bar
+bar:
+    subq $8, %rsp
+    .seh_stackalloc 8
+    pushq %rbp
+    .seh_pushreg %rbp
+    .seh_endprologue
+    popq %rbp
+    addq $8, %rsp
+    ret
+    .seh_endproc
+
+    .section .text
+    .globl baz
+    .def baz; .scl 2; .type 32; .endef
+    .seh_proc baz
+baz:
+    subq $8, %rsp
+    .seh_stackalloc 8
+    pushq %rbp
+    .seh_pushreg %rbp
+    .seh_endprologue
+    popq %rbp
+    addq $8, %rsp
+    ret
+    .seh_endproc
+
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index 2a74f21f12d0..cb5d7642ee6b 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -1,5 +1,6 @@
-// The purpose of this test is to verify that we do not produce unneeded
-// relocations when symbols are in the same section and we know their offset.
+// The purpose of this test is to verify that we produce relocations for
+// references to functions.  Failing to do so might cause pointer-to-function
+// equality to fail if /INCREMENTAL links are used.
 
 // RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
@@ -46,4 +47,4 @@ Ltmp0:
 	ret
 
 // CHECK:     Sections [
-// CHECK-NOT: RelocationCount: {{[^0]}}
+// CHECK: RelocationCount: 1
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
index deac88869b20..05b46bbb7e79 100644
--- a/test/MC/COFF/symbol-fragment-offset-64.s
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -117,7 +117,6 @@ _main:                                  # @main
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: 1
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -135,7 +134,6 @@ _main:                                  # @main
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -150,7 +148,7 @@ _main:                                  # @main
 // CHECK:   Symbol {
 // CHECK:     Name:                      _printf
 // CHECK:     Value:                     0
-// CHECK:     Section:                   (0)
+// CHECK:     Section:                   IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:                  Null
 // CHECK:     ComplexType:               Null
 // CHECK:     StorageClass:              External
@@ -159,7 +157,7 @@ _main:                                  # @main
 // CHECK:   Symbol {
 // CHECK:     Name:                      _puts
 // CHECK:     Value:                     0
-// CHECK:     Section:                   (0)
+// CHECK:     Section:                   IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:                  Null
 // CHECK:     ComplexType:               Null
 // CHECK:     StorageClass:              External
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index b09c5af1b61e..cc5040a99cc1 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -117,7 +117,6 @@ L_.str2:
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: 1
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -135,7 +134,6 @@ L_.str2:
 // CHECK:       Checksum: 0x0
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
-// CHECK:       Unused: (00 00 00)
 // CHECK:     }
 // CHECK:   }
 // CHECK:   Symbol {
@@ -150,7 +148,7 @@ L_.str2:
 // CHECK:   Symbol {
 // CHECK:     Name:                      _printf
 // CHECK:     Value:                     0
-// CHECK:     Section:                   (0)
+// CHECK:     Section:                   IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:                  Null
 // CHECK:     ComplexType:               Null
 // CHECK:     StorageClass:              External
@@ -159,7 +157,7 @@ L_.str2:
 // CHECK:   Symbol {
 // CHECK:     Name:                      _puts
 // CHECK:     Value:                     0
-// CHECK:     Section:                   (0)
+// CHECK:     Section:                   IMAGE_SYM_UNDEFINED (0)
 // CHECK:     BaseType:                  Null
 // CHECK:     ComplexType:               Null
 // CHECK:     StorageClass:              External
diff --git a/test/MC/COFF/weak.s b/test/MC/COFF/weak.s
index accd3f452eaf..6086749d80de 100644
--- a/test/MC/COFF/weak.s
+++ b/test/MC/COFF/weak.s
@@ -37,7 +37,7 @@ LBB0_2:                                 # %return
 // CHECK:      Symbol {
 // CHECK:        Name:           _test_weak
 // CHECK-NEXT:   Value:          0
-// CHECK-NEXT:   Section:        (0)
+// CHECK-NEXT:   Section:        IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:   BaseType:       Null
 // CHECK-NEXT:   ComplexType:    Null
 // CHECK-NEXT:   StorageClass:   WeakExternal
@@ -45,14 +45,13 @@ LBB0_2:                                 # %return
 // CHECK-NEXT:   AuxWeakExternal {
 // CHECK-NEXT:     Linked: .weak._test_weak.default
 // CHECK-NEXT:      Search: Library
-// CHECK-NEXT:      Unused: (00 00 00 00 00 00 00 00 00 00)
 // CHECK-NEXT:   }
 // CHECK-NEXT: }
 
 // CHECK:      Symbol {
 // CHECK:        Name:                .weak._test_weak.default
 // CHECK-NEXT:   Value:               0
-// CHECK-NEXT:   Section:             (65535)
+// CHECK-NEXT:   Section:             IMAGE_SYM_ABSOLUTE (-1)
 // CHECK-NEXT:   BaseType:            Null
 // CHECK-NEXT:   ComplexType:         Null
 // CHECK-NEXT:   StorageClass:        External
@@ -62,7 +61,7 @@ LBB0_2:                                 # %return
 // CHECK:      Symbol {
 // CHECK:        Name:           _test_weak_alias
 // CHECK-NEXT:   Value:          0
-// CHECK-NEXT:   Section:        (0)
+// CHECK-NEXT:   Section:        IMAGE_SYM_UNDEFINED (0)
 // CHECK-NEXT:   BaseType:       Null
 // CHECK-NEXT:   ComplexType:    Null
 // CHECK-NEXT:   StorageClass:   WeakExternal
@@ -70,6 +69,5 @@ LBB0_2:                                 # %return
 // CHECK-NEXT:   AuxWeakExternal {
 // CHECK-NEXT:     Linked: _main
 // CHECK-NEXT:      Search: Library
-// CHECK-NEXT:      Unused: (00 00 00 00 00 00 00 00 00 00)
 // CHECK-NEXT:   }
 // CHECK-NEXT: }
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index acc2d9fec609..008bb1154e57 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -1,6 +1,6 @@
-# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 -mcpu=cortex-a9-mp | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=armv7-apple-darwin9 -mcpu=cortex-a9 | FileCheck %s
 
-# CHECK:	addpl	r4, pc, #318767104
+# CHECK:	addpl	r4, pc, #76, #10
 0x4c 0x45 0x8f 0x52
 
 # CHECK:	b	#0
diff --git a/test/MC/Disassembler/ARM/basic-arm-instructions.txt b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
index 8bcf4e6e3faa..335e69fe2410 100644
--- a/test/MC/Disassembler/ARM/basic-arm-instructions.txt
+++ b/test/MC/Disassembler/ARM/basic-arm-instructions.txt
@@ -10,9 +10,12 @@
 # CHECK: adc r1, r2, #983040
 # CHECK: adc r1, r2, #15728640
 # CHECK: adc r1, r2, #251658240
-# CHECK: adc r1, r2, #4026531840
-# CHECK: adc r1, r2, #4026531855
+# CHECK: adc r1, r2, #-268435456
+# CHECK: adc r1, r2, #-268435441
+# CHECK: adc r7, r8, #-2147483638
+# CHECK: adc r7, r8, #40, #2
 # CHECK: adcs r1, r2, #3840
+# CHECK: adcs r7, r8, #40, #2
 # CHECK: adcseq r1, r2, #3840
 # CHECK: adceq r1, r2, #3840
 
@@ -25,8 +28,11 @@
 0x0f 0x14 0xa2 0xe2
 0x0f 0x12 0xa2 0xe2
 0xff 0x12 0xa2 0xe2
+0x2a 0x71 0xa8 0xe2
+0x28 0x71 0xa8 0xe2
 
 0x0f 0x1c 0xb2 0xe2
+0x28 0x71 0xb8 0xe2
 0x0f 0x1c 0xb2 0x02
 0x0f 0x1c 0xa2 0x02
 
@@ -112,6 +118,8 @@
 # ADD
 #------------------------------------------------------------------------------
 # CHECK: add r4, r5, #61440
+# CHECK: add r7, r8, #-2147483638
+# CHECK: add r7, r8, #40, #2
 # CHECK: add r4, r5, r6
 # CHECK: add r4, r5, r6, lsl #5
 # CHECK: add r4, r5, r6, lsr #5
@@ -138,6 +146,8 @@
 # CHECK: add r4, r4, r5, rrx
 
 0x0f 0x4a 0x85 0xe2
+0x2a 0x71 0x88 0xe2
+0x28 0x71 0x88 0xe2
 0x06 0x40 0x85 0xe0
 0x86 0x42 0x85 0xe0
 0xa6 0x42 0x85 0xe0
@@ -165,6 +175,15 @@
 0x65 0x40 0x84 0xe0
 
 #------------------------------------------------------------------------------
+# ADDS
+#------------------------------------------------------------------------------
+# CHECK: adds   r7, r8, #-2147483638
+# CHECK: adds   r7, r8, #40, #2
+
+0x2a 0x71 0x98 0xe2
+0x28 0x71 0x98 0xe2
+
+#------------------------------------------------------------------------------
 # ADR
 #------------------------------------------------------------------------------
 # CHECK: add	r2, pc, #3
@@ -183,6 +202,8 @@
 # AND
 #------------------------------------------------------------------------------
 # CHECK: and r10, r1, #15
+# CHECK: and r7, r8, #-2147483638
+# CHECK: and r7, r8, #40, #2
 # CHECK: and r10, r1, r6
 # CHECK: and r10, r1, r6, lsl #10
 # CHECK: and r10, r1, r6, lsr #10
@@ -209,6 +230,8 @@
 # CHECK: and r10, r10, r1, rrx
 
 0x0f 0xa0 0x01 0xe2
+0x2a 0x71 0x08 0xe2
+0x28 0x71 0x08 0xe2
 0x06 0xa0 0x01 0xe0
 0x06 0xa5 0x01 0xe0
 0x26 0xa5 0x01 0xe0
@@ -262,6 +285,8 @@
 # BIC
 #------------------------------------------------------------------------------
 # CHECK: bic r10, r1, #15
+# CHECK: bic r7, r8, #-2147483638
+# CHECK: bic r7, r8, #40, #2
 # CHECK: bic r10, r1, r6
 # CHECK: bic r10, r1, r6, lsl #10
 # CHECK: bic r10, r1, r6, lsr #10
@@ -288,6 +313,8 @@
 # CHECK: bic r10, r10, r1, rrx
 
 0x0f 0xa0 0xc1 0xe3
+0x2a 0x71 0xc8 0xe3
+0x28 0x71 0xc8 0xe3
 0x06 0xa0 0xc1 0xe1
 0x06 0xa5 0xc1 0xe1
 0x26 0xa5 0xc1 0xe1
@@ -393,6 +420,8 @@
 # CMN
 #------------------------------------------------------------------------------
 # CHECK: cmn r1, #15
+# CHECK: cmn r7, #40, #2
+# CHECK: cmn r7, #-2147483638
 # CHECK: cmn r1, r6
 # CHECK: cmn r1, r6, lsl #10
 # CHECK: cmn r1, r6, lsr #10
@@ -406,6 +435,8 @@
 # CHECK: cmn r1, r6, rrx
 
 0x0f 0x00 0x71 0xe3
+0x28 0x01 0x77 0xe3
+0x2a 0x01 0x77 0xe3
 0x06 0x00 0x71 0xe1
 0x06 0x05 0x71 0xe1
 0x26 0x05 0x71 0xe1
@@ -422,6 +453,8 @@
 # CMP
 #------------------------------------------------------------------------------
 # CHECK: cmp r1, #15
+# CHECK: cmp r7, #40, #2
+# CHECK: cmp r7, #-2147483638
 # CHECK: cmp r1, r6
 # CHECK: cmp r1, r6, lsl #10
 # CHECK: cmp r1, r6, lsr #10
@@ -435,6 +468,8 @@
 # CHECK: cmp r1, r6, rrx
 
 0x0f 0x00 0x51 0xe3
+0x28 0x01 0x57 0xe3
+0x2a 0x01 0x57 0xe3
 0x06 0x00 0x51 0xe1
 0x06 0x05 0x51 0xe1
 0x26 0x05 0x51 0xe1
@@ -556,6 +591,8 @@
 # EOR
 #------------------------------------------------------------------------------
 # CHECK: eor r4, r5, #61440
+# CHECK: eor r7, r8, #-2147483638
+# CHECK: eor r7, r8, #40, #2
 # CHECK: eor r4, r5, r6
 # CHECK: eor r4, r5, r6, lsl #5
 # CHECK: eor r4, r5, r6, lsr #5
@@ -582,6 +619,8 @@
 # CHECK: eor r4, r4, r5, rrx
 
 0x0f 0x4a 0x25 0xe2
+0x2a 0x71 0x28 0xe2
+0x28 0x71 0x28 0xe2
 0x06 0x40 0x25 0xe0
 0x86 0x42 0x25 0xe0
 0xa6 0x42 0x25 0xe0
@@ -714,10 +753,15 @@
 # CHECK: mov r4, #4080
 # CHECK: mov r5, #16711680
 # CHECK: mov sp, #35
+# CHECK: mov r9, #240, #30
+# CHECK: mov r7, #-2147483638
+# CHECK: mov pc, #2147483658
 # CHECK: movw r6, #65535
 # CHECK: movw r9, #65535
 # CHECK: movw sp, #1193
 # CHECK: movs r3, #7
+# CHECK: movs r11, #99
+# CHECK: movs r11, #240, #30
 # CHECK: moveq r4, #4080
 # CHECK: movseq r5, #16711680
 
@@ -725,10 +769,15 @@
 0xff 0x4e 0xa0 0xe3
 0xff 0x58 0xa0 0xe3
 0x23 0xd0 0xa0 0xe3
+0xf0 0x9f 0xa0 0xe3
+0x2a 0x71 0xa0 0xe3
+0x2a 0xf1 0xa0 0xe3
 0xff 0x6f 0x0f 0xe3
 0xff 0x9f 0x0f 0xe3
 0xa9 0xd4 0x00 0xe3
 0x07 0x30 0xb0 0xe3
+0x63 0xb0 0xb0 0xe3
+0xf0 0xbf 0xb0 0xe3
 0xff 0x4e 0xa0 0x03
 0xff 0x58 0xb0 0x03
 
@@ -810,6 +859,8 @@
 # CHECK: msr  SPSR_fc, #5
 # CHECK: msr  SPSR_fsxc, #5
 # CHECK: msr  CPSR_fsxc, #5
+# CHECK: msr  APSR_nzcvq, #2147483658
+# CHECK: msr  SPSR_fsxc, #40, #2
 
 0x05 0xf0 0x29 0xe3
 0x05 0xf0 0x24 0xe3
@@ -825,6 +876,8 @@
 0x05 0xf0 0x69 0xe3
 0x05 0xf0 0x6f 0xe3
 0x05 0xf0 0x2f 0xe3
+0x2a 0xf1 0x28 0xe3
+0x28 0xf1 0x6f 0xe3
 
 # CHECK: msr  CPSR_fc, r0
 # CHECK: msr  APSR_g, r0
@@ -877,14 +930,22 @@
 # CHECK: mvn r3, #7
 # CHECK: mvn r4, #4080
 # CHECK: mvn r5, #16711680
+# CHECK: mvn r7, #40, #2
+# CHECK: mvn r7, #-2147483638
 # CHECK: mvns r3, #7
+# CHECK: mvns r11, #240, #30
+# CHECK: mvns r11, #-2147483638
 # CHECK: mvneq r4, #4080
 # CHECK: mvnseq r5, #16711680
 
 0x07 0x30 0xe0 0xe3
 0xff 0x4e 0xe0 0xe3
 0xff 0x58 0xe0 0xe3
+0x28 0x71 0xe0 0xe3
+0x2a 0x71 0xe0 0xe3
 0x07 0x30 0xf0 0xe3
+0xf0 0xbf 0xf0 0xe3
+0x2a 0xb1 0xf0 0xe3
 0xff 0x4e 0xe0 0x03
 0xff 0x58 0xf0 0x03
 
@@ -940,6 +1001,8 @@
 # ORR
 #------------------------------------------------------------------------------
 # CHECK: orr r4, r5, #61440
+# CHECK: orr r7, r8, #-2147483638
+# CHECK: orr r7, r8, #40, #2
 # CHECK: orr r4, r5, r6
 # CHECK: orr r4, r5, r6, lsl #5
 # CHECK: orr r4, r5, r6, lsr #5
@@ -966,6 +1029,8 @@
 # CHECK: orr r4, r4, r5, rrx
 
 0x0f 0x4a 0x85 0xe3
+0x2a 0x71 0x88 0xe3
+0x28 0x71 0x88 0xe3
 0x06 0x40 0x85 0xe1
 0x86 0x42 0x85 0xe1
 0xa6 0x42 0x85 0xe1
@@ -1204,6 +1269,8 @@
 # RSB
 #------------------------------------------------------------------------------
 # CHECK: rsb r4, r5, #61440
+# CHECK: rsb r7, r8, #-2147483638
+# CHECK: rsb r7, r8, #40, #2
 # CHECK: rsb r4, r5, r6
 # CHECK: rsb r4, r5, r6, lsl #5
 # CHECK: rsblo r4, r5, r6, lsr #5
@@ -1230,6 +1297,8 @@
 # CHECK: rsb r4, r4, r5, rrx
 
 0x0f 0x4a 0x65 0xe2
+0x2a 0x71 0x68 0xe2
+0x28 0x71 0x68 0xe2
 0x06 0x40 0x65 0xe0
 0x86 0x42 0x65 0xe0
 0xa6 0x42 0x65 0x30
@@ -1256,9 +1325,20 @@
 0x65 0x40 0x64 0xe0
 
 #------------------------------------------------------------------------------
+# RSBS
+#------------------------------------------------------------------------------
+# CHECK: rsbs   r7, r8, #-2147483638
+# CHECK: rsbs   r7, r8, #40, #2
+
+0x2a 0x71 0x78 0xe2
+0x28 0x71 0x78 0xe2
+
+#------------------------------------------------------------------------------
 # RSC
 #------------------------------------------------------------------------------
 # CHECK: rsc r4, r5, #61440
+# CHECK: rsc r7, r8, #-2147483638
+# CHECK: rsc r7, r8, #40, #2
 # CHECK: rsc r4, r5, r6
 # CHECK: rsc r4, r5, r6, lsl #5
 # CHECK: rsclo r4, r5, r6, lsr #5
@@ -1283,6 +1363,8 @@
 # CHECK: rsc r6, r6, r7, ror r9
 
 0x0f 0x4a 0xe5 0xe2
+0x2a 0x71 0xe8 0xe2
+0x28 0x71 0xe8 0xe2
 0x06 0x40 0xe5 0xe0
 0x86 0x42 0xe5 0xe0
 0xa6 0x42 0xe5 0x30
@@ -1357,6 +1439,8 @@
 # SBC
 #------------------------------------------------------------------------------
 # CHECK: sbc r4, r5, #61440
+# CHECK: sbc r7, r8, #-2147483638
+# CHECK: sbc r7, r8, #40, #2
 # CHECK: sbc r4, r5, r6
 # CHECK: sbc r4, r5, r6, lsl #5
 # CHECK: sbc r4, r5, r6, lsr #5
@@ -1381,6 +1465,8 @@
 # CHECK: sbc r6, r6, r7, ror r9
 
 0x0f 0x4a 0xc5 0xe2
+0x2a 0x71 0xc8 0xe2
+0x28 0x71 0xc8 0xe2
 0x06 0x40 0xc5 0xe0
 0x86 0x42 0xc5 0xe0
 0xa6 0x42 0xc5 0xe0
@@ -1868,6 +1954,8 @@
 # SUB
 #------------------------------------------------------------------------------
 # CHECK: sub r4, r5, #61440
+# CHECK: sub r7, r8, #-2147483638
+# CHECK: sub r7, r8, #40, #2
 # CHECK: sub r4, r5, r6
 # CHECK: sub r4, r5, r6, lsl #5
 # CHECK: sub r4, r5, r6, lsr #5
@@ -1892,6 +1980,8 @@
 # CHECK: sub r6, r6, r7, ror r9
 
 0x0f 0x4a 0x45 0xe2
+0x2a 0x71 0x48 0xe2
+0x28 0x71 0x48 0xe2
 0x06 0x40 0x45 0xe0
 0x86 0x42 0x45 0xe0
 0xa6 0x42 0x45 0xe0
@@ -1916,6 +2006,14 @@
 0x57 0x69 0x46 0xe0
 0x77 0x69 0x46 0xe0
 
+#------------------------------------------------------------------------------
+# SUBS
+#------------------------------------------------------------------------------
+# CHECK: subs   r7, r8, #-2147483638
+# CHECK: subs   r7, r8, #40, #2
+
+0x2a 0x71 0x58 0xe2
+0x28 0x71 0x58 0xe2
 
 #------------------------------------------------------------------------------
 # SVC
@@ -2044,6 +2142,8 @@
 # TEQ
 #------------------------------------------------------------------------------
 # CHECK: teq r5, #61440
+# CHECK: teq r7, #-2147483638
+# CHECK: teq r7, #40, #2
 # CHECK: teq r4, r5
 # CHECK: teq r4, r5, lsl #5
 # CHECK: teq r4, r5, lsr #5
@@ -2056,6 +2156,8 @@
 # CHECK: teq r6, r7, ror r9
 
 0x0f 0x0a 0x35 0xe3
+0x2a 0x01 0x37 0xe3
+0x28 0x01 0x37 0xe3
 0x05 0x00 0x34 0xe1
 0x85 0x02 0x34 0xe1
 0xa5 0x02 0x34 0xe1
@@ -2072,6 +2174,8 @@
 # TST
 #------------------------------------------------------------------------------
 # CHECK: tst r5, #61440
+# CHECK: tst r7, #-2147483638
+# CHECK: tst r7, #40, #2
 # CHECK: tst r4, r5
 # CHECK: tst r4, r5, lsl #5
 # CHECK: tst r4, r5, lsr #5
@@ -2084,6 +2188,8 @@
 # CHECK: tst r6, r7, ror r9
 
 0x0f 0x0a 0x15 0xe3
+0x2a 0x01 0x17 0xe3
+0x28 0x01 0x17 0xe3
 0x05 0x00 0x14 0xe1
 0x85 0x02 0x14 0xe1
 0xa5 0x02 0x14 0xe1
diff --git a/test/MC/Disassembler/ARM/d16.txt b/test/MC/Disassembler/ARM/d16.txt
new file mode 100644
index 000000000000..735af811df0f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/d16.txt
@@ -0,0 +1,23 @@
+# RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -disassemble -mattr=+vfp4,-d16 2>&1 | FileCheck %s --check-prefix=D32
+# RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -disassemble -mattr=+vfp4,-d16 2>&1 | FileCheck %s --check-prefix=D32
+
+
+# D32: vadd.f64 d1, d2, d16
+# D16: warning: invalid instruction encoding
+[0x32,0xee,0x20,0x1b]
+
+# D32: vadd.f64 d1, d17, d6
+# D16: warning: invalid instruction encoding
+[0x31,0xee,0x86,0x1b]
+
+# D32: vadd.f64 d19, d7, d6
+# D16: warning: invalid instruction encoding
+[0x77,0xee,0x06,0x3b]
+
+# D32: vcvt.f64.f32 d22, s4
+# D16: warning: invalid instruction encoding
+[0xf7,0xee,0xc2,0x6a]
+
+# D32: vcvt.f32.f64 s26, d30
+# D16: warning: invalid instruction encoding
+[0xb7,0xee,0xee,0xdb]
diff --git a/test/MC/Disassembler/ARM/invalid-thumb-MSR-MClass.txt b/test/MC/Disassembler/ARM/invalid-thumb-MSR-MClass.txt
new file mode 100644
index 000000000000..26fa907084e9
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-thumb-MSR-MClass.txt
@@ -0,0 +1,35 @@
+# RUN: not llvm-mc -disassemble %s -triple=thumbv7em 2>&1 | FileCheck --check-prefix=CHECK %s
+# RUN: not llvm-mc -disassemble %s -triple=thumbv7m 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=CHECK-V7M %s
+
+#------------------------------------------------------------------------------
+# Undefined encodings for mrs
+#------------------------------------------------------------------------------
+
+# invalid SYSm
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0xef 0xf3 0x80 0x80]
+[0xef 0xf3 0x80 0x80]
+
+#------------------------------------------------------------------------------
+# Undefined encodings for msr
+#------------------------------------------------------------------------------
+
+# invalid mask = '00'
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0x80 0xf3 0x00 0x80]
+[0x80 0xf3 0x00 0x80]
+
+# invalid mask = '11' with SYSm not in {0..3}
+# CHECK: warning: potentially undefined instruction encoding
+# CHECK-NEXT: [0x80 0xf3 0x05 0x8c]
+[0x80 0xf3 0x05 0x8c]
+
+# invalid mask = '01' (ThumbV7M does not have the DSP extension)
+# CHECK-V7M: warning: potentially undefined instruction encoding
+# CHECK-V7M-NEXT: [0x80 0xf3 0x00 0x84]
+[0x80 0xf3 0x00 0x84]
+
+# invalid SYSm
+# CHECK: warning: invalid instruction encoding
+# CHECK-NEXT: [0x80 0xf3 0x80 0x88]
+[0x80 0xf3 0x80 0x88]
diff --git a/test/MC/Disassembler/ARM/invalid-virtexts.arm.txt b/test/MC/Disassembler/ARM/invalid-virtexts.arm.txt
new file mode 100644
index 000000000000..1daada98d67f
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-virtexts.arm.txt
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -disassemble -triple armv7a -mcpu=cortex-a15 %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
+
+# HVC (ARM)
+[0x7f,0xff,0x4f,0xf1]
+# CHECK-ARM:   warning: invalid instruction encoding
+
+[0x70,0xff,0x4f,0x01]
+[0x7f,0xff,0x4f,0xd1]
+# CHECK-ARM:   warning: potentially undefined instruction encoding
+# CHECK-ARM:   warning: potentially undefined instruction encoding
diff --git a/test/MC/Disassembler/ARM/move-banked-regs-arm.txt b/test/MC/Disassembler/ARM/move-banked-regs-arm.txt
new file mode 100644
index 000000000000..b92c5779a664
--- /dev/null
+++ b/test/MC/Disassembler/ARM/move-banked-regs-arm.txt
@@ -0,0 +1,150 @@
+@ RUN: llvm-mc -disassemble -triple armv7 -mcpu=cyclone %s | FileCheck %s
+
+
+[0x00,0x22,0x00,0xe1]
+[0x00,0x32,0x01,0xe1]
+[0x00,0x52,0x02,0xe1]
+[0x00,0x72,0x03,0xe1]
+[0x00,0xb2,0x04,0xe1]
+[0x00,0x12,0x05,0xe1]
+[0x00,0x22,0x06,0xe1]
+@ CHECK:         mrs     r2, r8_usr
+@ CHECK:         mrs     r3, r9_usr
+@ CHECK:         mrs     r5, r10_usr
+@ CHECK:         mrs     r7, r11_usr
+@ CHECK:         mrs     r11, r12_usr
+@ CHECK:         mrs     r1, sp_usr
+@ CHECK:         mrs     r2, lr_usr
+
+[0x00,0x22,0x08,0xe1]
+[0x00,0x32,0x09,0xe1]
+[0x00,0x52,0x0a,0xe1]
+[0x00,0x72,0x0b,0xe1]
+[0x00,0xb2,0x0c,0xe1]
+[0x00,0x12,0x0d,0xe1]
+[0x00,0x22,0x0e,0xe1]
+[0x00,0x32,0x4e,0xe1]
+@ CHECK:         mrs     r2, r8_fiq
+@ CHECK:         mrs     r3, r9_fiq
+@ CHECK:         mrs     r5, r10_fiq
+@ CHECK:         mrs     r7, r11_fiq
+@ CHECK:         mrs     r11, r12_fiq
+@ CHECK:         mrs     r1, sp_fiq
+@ CHECK:         mrs     r2, lr_fiq
+@ CHECK:         mrs     r3, SPSR_fiq
+
+[0x00,0x43,0x00,0xe1]
+[0x00,0x93,0x01,0xe1]
+[0x00,0x13,0x40,0xe1]
+@ CHECK:         mrs     r4, lr_irq
+@ CHECK:         mrs     r9, sp_irq
+@ CHECK:         mrs     r1, SPSR_irq
+
+[0x00,0x13,0x02,0xe1]
+[0x00,0x33,0x03,0xe1]
+[0x00,0x53,0x42,0xe1]
+@ CHECK:         mrs     r1, lr_svc
+@ CHECK:         mrs     r3, sp_svc
+@ CHECK:         mrs     r5, SPSR_svc
+
+[0x00,0x53,0x04,0xe1]
+[0x00,0x73,0x05,0xe1]
+[0x00,0x93,0x44,0xe1]
+@ CHECK:         mrs     r5, lr_abt
+@ CHECK:         mrs     r7, sp_abt
+@ CHECK:         mrs     r9, SPSR_abt
+
+[0x00,0x93,0x06,0xe1]
+[0x00,0xb3,0x07,0xe1]
+[0x00,0xc3,0x46,0xe1]
+@ CHECK:         mrs     r9, lr_und
+@ CHECK:         mrs     r11, sp_und
+@ CHECK:         mrs     r12, SPSR_und
+
+[0x00,0x23,0x0c,0xe1]
+[0x00,0x43,0x0d,0xe1]
+[0x00,0x63,0x4c,0xe1]
+@ CHECK:         mrs     r2, lr_mon
+@ CHECK:         mrs     r4, sp_mon
+@ CHECK:         mrs     r6, SPSR_mon
+
+[0x00,0x63,0x0e,0xe1]
+[0x00,0x83,0x0f,0xe1]
+[0x00,0xa3,0x4e,0xe1]
+@ CHECK:         mrs     r6, elr_hyp
+@ CHECK:         mrs     r8, sp_hyp
+@ CHECK:         mrs     r10, SPSR_hyp
+
+[0x02,0xf2,0x20,0xe1]
+[0x03,0xf2,0x21,0xe1]
+[0x05,0xf2,0x22,0xe1]
+[0x07,0xf2,0x23,0xe1]
+[0x0b,0xf2,0x24,0xe1]
+[0x01,0xf2,0x25,0xe1]
+[0x02,0xf2,0x26,0xe1]
+@ CHECK:         msr     r8_usr, r2
+@ CHECK:         msr     r9_usr, r3
+@ CHECK:         msr     r10_usr, r5
+@ CHECK:         msr     r11_usr, r7
+@ CHECK:         msr     r12_usr, r11
+@ CHECK:         msr     sp_usr, r1
+@ CHECK:         msr     lr_usr, r2
+
+[0x02,0xf2,0x28,0xe1]
+[0x03,0xf2,0x29,0xe1]
+[0x05,0xf2,0x2a,0xe1]
+[0x07,0xf2,0x2b,0xe1]
+[0x0b,0xf2,0x2c,0xe1]
+[0x01,0xf2,0x2d,0xe1]
+[0x02,0xf2,0x2e,0xe1]
+[0x03,0xf2,0x6e,0xe1]
+@ CHECK:         msr     r8_fiq, r2
+@ CHECK:         msr     r9_fiq, r3
+@ CHECK:         msr     r10_fiq, r5
+@ CHECK:         msr     r11_fiq, r7
+@ CHECK:         msr     r12_fiq, r11
+@ CHECK:         msr     sp_fiq, r1
+@ CHECK:         msr     lr_fiq, r2
+@ CHECK:         msr     SPSR_fiq, r3
+
+[0x04,0xf3,0x20,0xe1]
+[0x09,0xf3,0x21,0xe1]
+[0x0b,0xf3,0x60,0xe1]
+@ CHECK:         msr     lr_irq, r4
+@ CHECK:         msr     sp_irq, r9
+@ CHECK:         msr     SPSR_irq, r11
+
+[0x01,0xf3,0x22,0xe1]
+[0x03,0xf3,0x23,0xe1]
+[0x05,0xf3,0x62,0xe1]
+@ CHECK:         msr     lr_svc, r1
+@ CHECK:         msr     sp_svc, r3
+@ CHECK:         msr     SPSR_svc, r5
+
+[0x05,0xf3,0x24,0xe1]
+[0x07,0xf3,0x25,0xe1]
+[0x09,0xf3,0x64,0xe1]
+@ CHECK:         msr     lr_abt, r5
+@ CHECK:         msr     sp_abt, r7
+@ CHECK:         msr     SPSR_abt, r9
+
+[0x09,0xf3,0x26,0xe1]
+[0x0b,0xf3,0x27,0xe1]
+[0x0c,0xf3,0x66,0xe1]
+@ CHECK:         msr     lr_und, r9
+@ CHECK:         msr     sp_und, r11
+@ CHECK:         msr     SPSR_und, r12
+
+[0x02,0xf3,0x2c,0xe1]
+[0x04,0xf3,0x2d,0xe1]
+[0x06,0xf3,0x6c,0xe1]
+@ CHECK:         msr     lr_mon, r2
+@ CHECK:         msr     sp_mon, r4
+@ CHECK:         msr     SPSR_mon, r6
+
+[0x06,0xf3,0x2e,0xe1]
+[0x08,0xf3,0x2f,0xe1]
+[0x0a,0xf3,0x6e,0xe1]
+@ CHECK:         msr     elr_hyp, r6
+@ CHECK:         msr     sp_hyp, r8
+@ CHECK:         msr     SPSR_hyp, r10
diff --git a/test/MC/Disassembler/ARM/move-banked-regs-thumb.txt b/test/MC/Disassembler/ARM/move-banked-regs-thumb.txt
new file mode 100644
index 000000000000..29e91abcc7f1
--- /dev/null
+++ b/test/MC/Disassembler/ARM/move-banked-regs-thumb.txt
@@ -0,0 +1,153 @@
+@ RUN: llvm-mc -disassemble -triple thumb -mcpu=cyclone %s | FileCheck %s
+
+[0xe0,0xf3,0x20,0x82]
+[0xe1,0xf3,0x20,0x83]
+[0xe2,0xf3,0x20,0x85]
+[0xe3,0xf3,0x20,0x87]
+[0xe4,0xf3,0x20,0x8b]
+[0xe5,0xf3,0x20,0x81]
+[0xe6,0xf3,0x20,0x82]
+@ CHECK:         mrs     r2, r8_usr
+@ CHECK:         mrs     r3, r9_usr
+@ CHECK:         mrs     r5, r10_usr
+@ CHECK:         mrs     r7, r11_usr
+@ CHECK:         mrs     r11, r12_usr
+@ CHECK:         mrs     r1, sp_usr
+@ CHECK:         mrs     r2, lr_usr
+
+[0xe8,0xf3,0x20,0x82]
+[0xe9,0xf3,0x20,0x83]
+[0xea,0xf3,0x20,0x85]
+[0xeb,0xf3,0x20,0x87]
+[0xec,0xf3,0x20,0x8b]
+[0xed,0xf3,0x20,0x81]
+[0xee,0xf3,0x20,0x82]
+[0xfe,0xf3,0x20,0x83]
+@ CHECK:         mrs     r2, r8_fiq
+@ CHECK:         mrs     r3, r9_fiq
+@ CHECK:         mrs     r5, r10_fiq
+@ CHECK:         mrs     r7, r11_fiq
+@ CHECK:         mrs     r11, r12_fiq
+@ CHECK:         mrs     r1, sp_fiq
+@ CHECK:         mrs     r2, lr_fiq
+@ CHECK:         mrs     r3, SPSR_fiq
+
+[0xe0,0xf3,0x30,0x84]
+[0xe1,0xf3,0x30,0x89]
+[0xf0,0xf3,0x30,0x81]
+@ CHECK:         mrs     r4, lr_irq
+@ CHECK:         mrs     r9, sp_irq
+@ CHECK:         mrs     r1, SPSR_irq
+
+[0xe2,0xf3,0x30,0x81]
+[0xe3,0xf3,0x30,0x83]
+[0xf2,0xf3,0x30,0x85]
+@ CHECK:         mrs     r1, lr_svc
+@ CHECK:         mrs     r3, sp_svc
+@ CHECK:         mrs     r5, SPSR_svc
+
+[0xe4,0xf3,0x30,0x85]
+[0xe5,0xf3,0x30,0x87]
+[0xf4,0xf3,0x30,0x89]
+@ CHECK:         mrs     r5, lr_abt
+@ CHECK:         mrs     r7, sp_abt
+@ CHECK:         mrs     r9, SPSR_abt
+
+[0xe6,0xf3,0x30,0x89]
+[0xe7,0xf3,0x30,0x8b]
+[0xf6,0xf3,0x30,0x8c]
+@ CHECK:         mrs     r9, lr_und
+@ CHECK:         mrs     r11, sp_und
+@ CHECK:         mrs     r12, SPSR_und
+
+
+[0xec,0xf3,0x30,0x82]
+[0xed,0xf3,0x30,0x84]
+[0xfc,0xf3,0x30,0x86]
+@ CHECK:         mrs     r2, lr_mon
+@ CHECK:         mrs     r4, sp_mon
+@ CHECK:         mrs     r6, SPSR_mon
+
+
+[0xee,0xf3,0x30,0x86]
+[0xef,0xf3,0x30,0x88]
+[0xfe,0xf3,0x30,0x8a]
+@ CHECK:         mrs     r6, elr_hyp
+@ CHECK:         mrs     r8, sp_hyp
+@ CHECK:         mrs     r10, SPSR_hyp
+
+
+[0x82,0xf3,0x20,0x80]
+[0x83,0xf3,0x20,0x81]
+[0x85,0xf3,0x20,0x82]
+[0x87,0xf3,0x20,0x83]
+[0x8b,0xf3,0x20,0x84]
+[0x81,0xf3,0x20,0x85]
+[0x82,0xf3,0x20,0x86]
+@ CHECK:         msr     r8_usr, r2
+@ CHECK:         msr     r9_usr, r3
+@ CHECK:         msr     r10_usr, r5
+@ CHECK:         msr     r11_usr, r7
+@ CHECK:         msr     r12_usr, r11
+@ CHECK:         msr     sp_usr, r1
+@ CHECK:         msr     lr_usr, r2
+
+[0x82,0xf3,0x20,0x88]
+[0x83,0xf3,0x20,0x89]
+[0x85,0xf3,0x20,0x8a]
+[0x87,0xf3,0x20,0x8b]
+[0x8b,0xf3,0x20,0x8c]
+[0x81,0xf3,0x20,0x8d]
+[0x82,0xf3,0x20,0x8e]
+[0x93,0xf3,0x20,0x8e]
+@ CHECK:         msr     r8_fiq, r2
+@ CHECK:         msr     r9_fiq, r3
+@ CHECK:         msr     r10_fiq, r5
+@ CHECK:         msr     r11_fiq, r7
+@ CHECK:         msr     r12_fiq, r11
+@ CHECK:         msr     sp_fiq, r1
+@ CHECK:         msr     lr_fiq, r2
+@ CHECK:        msr     SPSR_fiq, r3
+
+[0x84,0xf3,0x30,0x80]
+[0x89,0xf3,0x30,0x81]
+[0x9b,0xf3,0x30,0x80]
+@ CHECK:         msr     lr_irq, r4
+@ CHECK:         msr     sp_irq, r9
+@ CHECK:         msr     SPSR_irq, r11
+
+[0x81,0xf3,0x30,0x82]
+[0x83,0xf3,0x30,0x83]
+[0x95,0xf3,0x30,0x82]
+@ CHECK:         msr     lr_svc, r1
+@ CHECK:         msr     sp_svc, r3
+@ CHECK:         msr     SPSR_svc, r5
+
+[0x85,0xf3,0x30,0x84]
+[0x87,0xf3,0x30,0x85]
+[0x99,0xf3,0x30,0x84]
+@ CHECK:         msr     lr_abt, r5
+@ CHECK:         msr     sp_abt, r7
+@ CHECK:         msr     SPSR_abt, r9
+
+[0x89,0xf3,0x30,0x86]
+[0x8b,0xf3,0x30,0x87]
+[0x9c,0xf3,0x30,0x86]
+@ CHECK:         msr     lr_und, r9
+@ CHECK:         msr     sp_und, r11
+@ CHECK:         msr     SPSR_und, r12
+
+
+[0x82,0xf3,0x30,0x8c]
+[0x84,0xf3,0x30,0x8d]
+[0x96,0xf3,0x30,0x8c]
+@ CHECK:         msr     lr_mon, r2
+@ CHECK:         msr     sp_mon, r4
+@ CHECK:         msr     SPSR_mon, r6
+
+[0x86,0xf3,0x30,0x8e]
+[0x88,0xf3,0x30,0x8f]
+[0x9a,0xf3,0x30,0x8e]
+@ CHECK:         msr     elr_hyp, r6
+@ CHECK:         msr     sp_hyp, r8
+@ CHECK:         msr     SPSR_hyp, r10
diff --git a/test/MC/Disassembler/ARM/thumb-MSR-MClass.txt b/test/MC/Disassembler/ARM/thumb-MSR-MClass.txt
index 497cb9a2a5e0..c1a2790e3f73 100644
--- a/test/MC/Disassembler/ARM/thumb-MSR-MClass.txt
+++ b/test/MC/Disassembler/ARM/thumb-MSR-MClass.txt
@@ -1,7 +1,94 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mcpu cortex-m3 | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=thumbv7em | FileCheck %s
 
-# CHECK: msr    primask, r0
-0x80 0xf3 0x10 0x80
+#------------------------------------------------------------------------------
+# MRS
+#------------------------------------------------------------------------------
 
-# CHECK: mrs    r0, primask
+# CHECK: mrs r0, apsr
+# CHECK: mrs r0, iapsr
+# CHECK: mrs r0, eapsr
+# CHECK: mrs r0, xpsr
+# CHECK: mrs r0, ipsr
+# CHECK: mrs r0, epsr
+# CHECK: mrs r0, iepsr
+# CHECK: mrs r0, msp
+# CHECK: mrs r0, psp
+# CHECK: mrs r0, primask
+# CHECK: mrs r0, basepri
+# CHECK: mrs r0, basepri_max
+# CHECK: mrs r0, faultmask
+# CHECK: mrs r0, control
+
+0xef 0xf3 0x00 0x80
+0xef 0xf3 0x01 0x80
+0xef 0xf3 0x02 0x80
+0xef 0xf3 0x03 0x80
+0xef 0xf3 0x05 0x80
+0xef 0xf3 0x06 0x80
+0xef 0xf3 0x07 0x80
+0xef 0xf3 0x08 0x80
+0xef 0xf3 0x09 0x80
 0xef 0xf3 0x10 0x80
+0xef 0xf3 0x11 0x80
+0xef 0xf3 0x12 0x80
+0xef 0xf3 0x13 0x80
+0xef 0xf3 0x14 0x80
+
+
+#------------------------------------------------------------------------------
+# MSR
+#------------------------------------------------------------------------------
+
+# CHECK: msr apsr_nzcvq, r0
+# CHECK: msr apsr_g, r0
+# CHECK: msr apsr_nzcvqg, r0
+
+0x80 0xf3 0x00 0x88
+0x80 0xf3 0x00 0x84
+0x80 0xf3 0x00 0x8c
+
+# CHECK: msr iapsr_nzcvq, r0
+# CHECK: msr iapsr_g, r0
+# CHECK: msr iapsr_nzcvqg, r0
+
+0x80 0xf3 0x01 0x88
+0x80 0xf3 0x01 0x84
+0x80 0xf3 0x01 0x8c
+
+# CHECK: msr eapsr_nzcvq, r0
+# CHECK: msr eapsr_g, r0
+# CHECK: msr eapsr_nzcvqg, r0
+
+0x80 0xf3 0x02 0x88
+0x80 0xf3 0x02 0x84
+0x80 0xf3 0x02 0x8c
+
+# CHECK: msr xpsr_nzcvq, r0
+# CHECK: msr xpsr_g, r0
+# CHECK: msr xpsr_nzcvqg, r0
+
+0x80 0xf3 0x03 0x88
+0x80 0xf3 0x03 0x84
+0x80 0xf3 0x03 0x8c
+
+# CHECK: msr ipsr, r0
+# CHECK: msr epsr, r0
+# CHECK: msr iepsr, r0
+# CHECK: msr msp, r0
+# CHECK: msr psp, r0
+# CHECK: msr primask, r0
+# CHECK: msr basepri, r0
+# CHECK: msr basepri_max, r0
+# CHECK: msr faultmask, r0
+# CHECK: msr control, r0
+
+0x80 0xf3 0x05 0x88
+0x80 0xf3 0x06 0x88
+0x80 0xf3 0x07 0x88
+0x80 0xf3 0x08 0x88
+0x80 0xf3 0x09 0x88
+0x80 0xf3 0x10 0x88
+0x80 0xf3 0x11 0x88
+0x80 0xf3 0x12 0x88
+0x80 0xf3 0x13 0x88
+0x80 0xf3 0x14 0x88
diff --git a/test/MC/Disassembler/ARM/thumb-tests.txt b/test/MC/Disassembler/ARM/thumb-tests.txt
index df2bac140cf7..dcb6e3f6721d 100644
--- a/test/MC/Disassembler/ARM/thumb-tests.txt
+++ b/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mcpu=cortex-a9-mp | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-apple-darwin9 -mcpu=cortex-a9 | FileCheck %s
 
 # CHECK:	add	r5, sp, #68
 0x11 0xad
diff --git a/test/MC/Disassembler/ARM/thumb2-preloads.txt b/test/MC/Disassembler/ARM/thumb2-preloads.txt
new file mode 100644
index 000000000000..dec4d648e92c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/thumb2-preloads.txt
@@ -0,0 +1,69 @@
+# RUN: not llvm-mc -triple=thumbv6t2-none-eabi -disassemble            < %s 2>/dev/null | FileCheck %s --check-prefix=V6T2
+# RUN: not llvm-mc -triple=thumbv7a-none-eabi  -disassemble -mattr=-mp < %s 2>/dev/null | FileCheck %s --check-prefix=V6T2 --check-prefix=V7
+# RUN:     llvm-mc -triple=thumbv7a-none-eabi  -disassemble -mattr=+mp < %s 2>/dev/null | FileCheck %s --check-prefix=V6T2 --check-prefix=V7 --check-prefix=MP
+# RUN: not llvm-mc -triple=thumbv7m-none-eabi  -disassemble            < %s 2>/dev/null | FileCheck %s --check-prefix=V6T2 --check-prefix=V7
+
+# RUN: not llvm-mc -triple=thumbv6t2-none-eabi -disassemble            < %s 2>&1 >/dev/null | FileCheck %s --check-prefix=MP-ERR --check-prefix=V7-ERR
+# RUN: not llvm-mc -triple=thumbv7a-none-eabi  -disassemble -mattr=-mp < %s 2>&1 >/dev/null | FileCheck %s --check-prefix=MP-ERR
+# RUN:     llvm-mc -triple=thumbv7a-none-eabi  -disassemble -mattr=+mp < %s 2>&1 >/dev/null
+# RUN: not llvm-mc -triple=thumbv7m-none-eabi  -disassemble            < %s 2>&1 >/dev/null | FileCheck %s --check-prefix=MP-ERR
+
+# V6T2: pld     [r1, #3]
+[0x91,0xf8,0x03,0xf0]
+
+# V6T2: pld     [r2, #-5]
+[0x12,0xf8,0x05,0xfc]
+
+# MP: pldw    [r3, #4]
+# MP-ERR: invalid instruction encoding
+# MP-ERR-NEXT: [0xb3,0xf8,0x04,0xf0]
+[0xb3,0xf8,0x04,0xf0]
+
+# MP: pldw    [r4, #-6]
+# MP-ERR: invalid instruction encoding
+# MP-ERR-NEXT: [0x34,0xf8,0x06,0xfc]
+[0x34,0xf8,0x06,0xfc]
+
+# V6T2: pld     [pc, #8]
+[0x9f,0xf8,0x08,0xf0]
+
+# V6T2: pld     [pc, #-5]
+[0x1f,0xf8,0x05,0xf0]
+
+# V6T2: pld     [r5, r6]
+[0x15,0xf8,0x06,0xf0]
+
+# V6T2: pld     [r7, r8, lsl #1]
+[0x17,0xf8,0x18,0xf0]
+
+# MP: pldw    [r9, r10]
+# MP-ERR: invalid instruction encoding
+# MP-ERR-NEXT: [0x39,0xf8,0x0a,0xf0]
+[0x39,0xf8,0x0a,0xf0]
+
+# MP: pldw    [r11, r12, lsl #2]
+# MP-ERR: invalid instruction encoding
+# MP-ERR-NEXT: [0x3b,0xf8,0x2c,0xf0]
+[0x3b,0xf8,0x2c,0xf0]
+
+# V7: pli     [r1, #10]
+# V7-ERR: invalid instruction encoding
+# V7-ERR-NEXT: [0x91,0xf9,0x0a,0xf0]
+[0x91,0xf9,0x0a,0xf0]
+
+# V7: pli     [r2, #-3]
+# V7-ERR: invalid instruction encoding
+# V7-ERR-NEXT: [0x12,0xf9,0x03,0xfc]
+[0x12,0xf9,0x03,0xfc]
+
+# V7: pli     [pc, #6]
+# V7-ERR: invalid instruction encoding
+# V7-ERR-NEXT: [0x9f,0xf9,0x06,0xf0]
+[0x9f,0xf9,0x06,0xf0]
+
+# V7: pli     [pc, #-8]
+# V7-ERR: invalid instruction encoding
+# V7-ERR-NEXT: [0x1f,0xf9,0x08,0xf0]
+[0x1f,0xf9,0x08,0xf0]
+
+# NO-ERR-NOT: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/virtexts-arm.txt b/test/MC/Disassembler/ARM/virtexts-arm.txt
new file mode 100644
index 000000000000..a18466f2d663
--- /dev/null
+++ b/test/MC/Disassembler/ARM/virtexts-arm.txt
@@ -0,0 +1,41 @@
+# RUN: llvm-mc -disassemble -triple armv7a -mcpu=cortex-a15 %s | FileCheck %s
+
+[0x71,0x00,0x40,0xe1]
+[0x77,0x00,0x40,0xe1]
+[0x71,0x10,0x40,0xe1]
+[0x7f,0xff,0x4f,0xe1]
+# CHECK:    hvc    #1
+# CHECK:    hvc    #7
+# CHECK:    hvc    #257
+# CHECK:    hvc    #65535
+
+[0x6e,0x00,0x60,0xe1]
+[0x6e,0x00,0x60,0x01]
+[0x6e,0x00,0x60,0x11]
+[0x6e,0x00,0x60,0x21]
+[0x6e,0x00,0x60,0x31]
+[0x6e,0x00,0x60,0x41]
+[0x6e,0x00,0x60,0x51]
+[0x6e,0x00,0x60,0x61]
+[0x6e,0x00,0x60,0x71]
+[0x6e,0x00,0x60,0x81]
+[0x6e,0x00,0x60,0x91]
+[0x6e,0x00,0x60,0xa1]
+[0x6e,0x00,0x60,0xb1]
+[0x6e,0x00,0x60,0xc1]
+[0x6e,0x00,0x60,0xd1]
+# CHECK:    eret
+# CHECK:    ereteq
+# CHECK:    eretne
+# CHECK:    ereths
+# CHECK:    eretlo
+# CHECK:    eretmi
+# CHECK:    eretpl
+# CHECK:    eretvs
+# CHECK:    eretvc
+# CHECK:    erethi
+# CHECK:    eretls
+# CHECK:    eretge
+# CHECK:    eretlt
+# CHECK:    eretgt
+# CHECK:    eretle
diff --git a/test/MC/Disassembler/ARM/virtexts-thumb.txt b/test/MC/Disassembler/ARM/virtexts-thumb.txt
new file mode 100644
index 000000000000..da0f621dcacd
--- /dev/null
+++ b/test/MC/Disassembler/ARM/virtexts-thumb.txt
@@ -0,0 +1,61 @@
+# RUN: llvm-mc -disassemble -triple thumbv7 -mcpu=cortex-a15 %s | FileCheck %s --check-prefix=CHECK-THUMB
+# RUN: not llvm-mc -disassemble -triple thumbv7 -mcpu=cortex-a9 %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOVIRT
+
+[0xe0,0xf7,0x01,0x80]
+[0xe0,0xf7,0x07,0x80]
+[0xe0,0xf7,0x01,0x81]
+[0xef,0xf7,0xff,0x8f]
+# CHECK-THUMB:    hvc.w    #1
+# CHECK-THUMB:    hvc.w    #7
+# CHECK-THUMB:    hvc.w    #257
+# CHECK-THUMB:    hvc.w    #65535
+# CHECK-NOVIRT:    warning: invalid instruction encoding
+# CHECK-NOVIRT:    warning: invalid instruction encoding
+# CHECK-NOVIRT:    warning: invalid instruction encoding
+# CHECK-NOVIRT:    warning: invalid instruction encoding
+
+[0xde,0xf3,0x00,0x8f]
+[0x08,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x18,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x28,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x38,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x48,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x58,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x68,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x78,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x88,0xbf] [0xde,0xf3,0x00,0x8f]
+[0x98,0xbf] [0xde,0xf3,0x00,0x8f]
+[0xa8,0xbf] [0xde,0xf3,0x00,0x8f]
+[0xb8,0xbf] [0xde,0xf3,0x00,0x8f]
+[0xc8,0xbf] [0xde,0xf3,0x00,0x8f]
+[0xd8,0xbf] [0xde,0xf3,0x00,0x8f]
+# CHECK-THUMB:    eret
+# CHECK-THUMB:    ereteq
+# CHECK-THUMB:    eretne
+# CHECK-THUMB:    ereths
+# CHECK-THUMB:    eretlo
+# CHECK-THUMB:    eretmi
+# CHECK-THUMB:    eretpl
+# CHECK-THUMB:    eretvs
+# CHECK-THUMB:    eretvc
+# CHECK-THUMB:    erethi
+# CHECK-THUMB:    eretls
+# CHECK-THUMB:    eretge
+# CHECK-THUMB:    eretlt
+# CHECK-THUMB:    eretgt
+# CHECK-THUMB:    eretle
+# CHECK-NOVIRT:    subs    pc, lr, #0
+# CHECK-NOVIRT:    subseq  pc, lr, #0
+# CHECK-NOVIRT:    subsne  pc, lr, #0
+# CHECK-NOVIRT:    subshs  pc, lr, #0
+# CHECK-NOVIRT:    subslo  pc, lr, #0
+# CHECK-NOVIRT:    subsmi  pc, lr, #0
+# CHECK-NOVIRT:    subspl  pc, lr, #0
+# CHECK-NOVIRT:    subsvs  pc, lr, #0
+# CHECK-NOVIRT:    subsvc  pc, lr, #0
+# CHECK-NOVIRT:    subshi  pc, lr, #0
+# CHECK-NOVIRT:    subsls  pc, lr, #0
+# CHECK-NOVIRT:    subsge  pc, lr, #0
+# CHECK-NOVIRT:    subslt  pc, lr, #0
+# CHECK-NOVIRT:    subsgt  pc, lr, #0
+# CHECK-NOVIRT:    subsle  pc, lr, #0
diff --git a/test/MC/Disassembler/Hexagon/alu32_alu.txt b/test/MC/Disassembler/Hexagon/alu32_alu.txt
new file mode 100644
index 000000000000..7fd400779292
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/alu32_alu.txt
@@ -0,0 +1,44 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0xf1 0xc3 0x15 0xb0
+# CHECK: r17 = add(r21, #31)
+0x11 0xdf 0x15 0xf3
+# CHECK: r17 = add(r21, r31)
+0x11 0xdf 0x55 0xf6
+# CHECK: r17 = add(r21, r31):sat
+0xf1 0xc3 0x15 0x76
+# CHECK: r17 = and(r21, #31)
+0xf1 0xc3 0x95 0x76
+# CHECK: r17 = or(r21, #31)
+0x11 0xdf 0x15 0xf1
+# CHECK: r17 = and(r21, r31)
+0x11 0xdf 0x35 0xf1
+# CHECK: r17 = or(r21, r31)
+0x11 0xdf 0x75 0xf1
+# CHECK: r17 = xor(r21, r31)
+0x11 0xd5 0x9f 0xf1
+# CHECK: r17 = and(r21, ~r31)
+0x11 0xd5 0xbf 0xf1
+# CHECK: r17 = or(r21, ~r31)
+0x00 0xc0 0x00 0x7f
+# CHECK: nop
+0xb1 0xc2 0x5f 0x76
+# CHECK: r17 = sub(#21, r31)
+0x11 0xdf 0x35 0xf3
+# CHECK: r17 = sub(r31, r21)
+0x11 0xdf 0xd5 0xf6
+# CHECK: r17 = sub(r31, r21):sat
+0x11 0xc0 0xbf 0x70
+# CHECK: r17 = sxtb(r31)
+0x15 0xc0 0x31 0x72
+# CHECK: r17.h = #21
+0x15 0xc0 0x31 0x71
+# CHECK: r17.l = #21
+0xf1 0xff 0x5f 0x78
+# CHECK: r17 = #32767
+0xf1 0xff 0xdf 0x78
+# CHECK: r17 = ##65535
+0x11 0xc0 0x75 0x70
+# CHECK: r17 = r21
+0x11 0xc0 0xd5 0x70
+# CHECK: r17 = zxth(r21)
diff --git a/test/MC/Disassembler/Hexagon/alu32_perm.txt b/test/MC/Disassembler/Hexagon/alu32_perm.txt
new file mode 100644
index 000000000000..15977edbd6b7
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/alu32_perm.txt
@@ -0,0 +1,32 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0xdf 0x95 0xf3
+# CHECK: r17 = combine(r31.h, r21.h)
+0x11 0xdf 0xb5 0xf3
+# CHECK: r17 = combine(r31.h, r21.l)
+0x11 0xdf 0xd5 0xf3
+# CHECK: r17 = combine(r31.l, r21.h)
+0x11 0xdf 0xf5 0xf3
+# CHECK: r17 = combine(r31.l, r21.l)
+0xb0 0xe2 0x0f 0x7c
+# CHECK: r17:16 = combine(#21, #31)
+0xb0 0xe2 0x3f 0x73
+# CHECK: r17:16 = combine(#21, r31)
+0xf0 0xe3 0x15 0x73
+# CHECK: r17:16 = combine(r21, #31)
+0x10 0xdf 0x15 0xf5
+# CHECK: r17:16 = combine(r21, r31)
+0xf1 0xc3 0x75 0x73
+# CHECK: r17 = mux(p3, r21, #31)
+0xb1 0xc2 0xff 0x73
+# CHECK: r17 = mux(p3, #21, r31)
+0xb1 0xe2 0x8f 0x7b
+# CHECK: r17 = mux(p3, #21, #31)
+0x71 0xdf 0x15 0xf4
+# CHECK: r17 = mux(p3, r21, r31)
+0x11 0xc0 0x15 0x70
+# CHECK: r17 = aslh(r21)
+0x11 0xc0 0x35 0x70
+# CHECK: r17 = asrh(r21)
+0x10 0xdf 0x95 0xf5
+# CHECK: r17:16 = packhl(r21, r31)
diff --git a/test/MC/Disassembler/Hexagon/alu32_pred.txt b/test/MC/Disassembler/Hexagon/alu32_pred.txt
new file mode 100644
index 000000000000..28a800359ea1
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/alu32_pred.txt
@@ -0,0 +1,70 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x71 0xdf 0x15 0xfb
+# CHECK: if (p3) r17 = add(r21, r31)
+0x11 0xe3 0x15 0x70
+# CHECK: if (p3) r17 = aslh(r21)
+0x11 0xe3 0x35 0x70
+# CHECK: if (p3) r17 = asrh(r21)
+0x70 0xdf 0x15 0xfd
+# CHECK: if (p3) r17:16 = combine(r21, r31)
+0xf0 0xdf 0x15 0xfd
+# CHECK: if (!p3) r17:16 = combine(r21, r31)
+0x03 0x40 0x45 0x85 0x70 0xff 0x15 0xfd
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17:16 = combine(r21, r31)
+0x03 0x40 0x45 0x85 0xf0 0xff 0x15 0xfd
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17:16 = combine(r21, r31)
+0x03 0x40 0x45 0x85 0x70 0xff 0x15 0xfd
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17:16 = combine(r21, r31)
+0x03 0x40 0x45 0x85 0xf0 0xff 0x15 0xfd
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17:16 = combine(r21, r31)
+0x71 0xdf 0x15 0xf9
+# CHECK: if (p3) r17 = and(r21, r31)
+0x71 0xdf 0x35 0xf9
+# CHECK: if (p3) r17 = or(r21, r31)
+0x71 0xdf 0x75 0xf9
+# CHECK: if (p3) r17 = xor(r21, r31)
+0x71 0xdf 0x35 0xfb
+# CHECK: if (p3) r17 = sub(r31, r21)
+0x11 0xe3 0xb5 0x70
+# CHECK: if (p3) r17 = sxtb(r21)
+0x11 0xe3 0xf5 0x70
+# CHECK: if (p3) r17 = sxth(r21)
+0xb1 0xc2 0x60 0x7e
+# CHECK: if (p3) r17 = #21
+0xb1 0xc2 0xe0 0x7e
+# CHECK: if (!p3) r17 = #21
+0x03 0x40 0x45 0x85 0xb1 0xe2 0x60 0x7e
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = #21
+0x03 0x40 0x45 0x85 0xb1 0xe2 0xe0 0x7e
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = #21
+0x11 0xe3 0x95 0x70
+# CHECK: if (p3) r17 = zxtb(r21)
+0x11 0xe3 0xd5 0x70
+# CHECK: if (p3) r17 = zxth(r21)
+0x03 0xdf 0x15 0xf2
+# CHECK: p3 = cmp.eq(r21, r31)
+0x13 0xdf 0x15 0xf2
+# CHECK: p3 = !cmp.eq(r21, r31)
+0x03 0xdf 0x55 0xf2
+# CHECK: p3 = cmp.gt(r21, r31)
+0x13 0xdf 0x55 0xf2
+# CHECK: p3 = !cmp.gt(r21, r31)
+0x03 0xdf 0x75 0xf2
+# CHECK: p3 = cmp.gtu(r21, r31)
+0x13 0xdf 0x75 0xf2
+# CHECK: p3 = !cmp.gtu(r21, r31)
+0xf1 0xe3 0x55 0x73
+# CHECK: r17 = cmp.eq(r21, #31)
+0xf1 0xe3 0x75 0x73
+# CHECK: r17 = !cmp.eq(r21, #31)
+0x11 0xdf 0x55 0xf3
+# CHECK: r17 = cmp.eq(r21, r31)
+0x11 0xdf 0x75 0xf3
+# CHECK: r17 = !cmp.eq(r21, r31)
diff --git a/test/MC/Disassembler/Hexagon/cr.txt b/test/MC/Disassembler/Hexagon/cr.txt
new file mode 100644
index 000000000000..89157156cffd
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/cr.txt
@@ -0,0 +1,66 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x93 0xe1 0x12 0x6b
+# CHECK: p3 = !fastcorner9(p2, p1)
+0x91 0xe3 0x02 0x6b
+# CHECK: p1 = fastcorner9(p2, p3)
+0x01 0xc0 0x82 0x6b
+# CHECK: p1 = any8(p2)
+0x01 0xc0 0xa2 0x6b
+# CHECK: p1 = all8(p2)
+0x08 0xc4 0x15 0x60
+# CHECK: loop0
+0x08 0xc4 0x35 0x60
+# CHECK: loop1
+0x68 0xc4 0x00 0x69
+# CHECK: loop0
+0x68 0xc4 0x20 0x69
+# CHECK: loop1
+0x08 0xc4 0xb5 0x60
+# CHECK: p3 = sp1loop0
+0x08 0xc4 0xd5 0x60
+# CHECK: p3 = sp2loop0
+0x08 0xc4 0xf5 0x60
+# CHECK: p3 = sp3loop0
+0xa9 0xc4 0xa0 0x69
+# CHECK: p3 = sp1loop0
+0xa9 0xc4 0xc0 0x69
+# CHECK: p3 = sp2loop0
+0xa9 0xc4 0xe0 0x69
+# CHECK: p3 = sp3loop0
+0x01 0xc3 0x02 0x6b
+# CHECK: p1 = and(p3, p2)
+0xc1 0xc3 0x12 0x6b
+# CHECK: p1 = and(p2, and(p3, p3))
+0x01 0xc3 0x62 0x6b
+# CHECK: p1 = and(p3, !p2)
+0x01 0xc3 0x22 0x6b
+# CHECK: p1 = or(p3, p2)
+0xc1 0xc3 0x32 0x6b
+# CHECK: p1 = and(p2, or(p3, p3))
+0x01 0xc3 0xe2 0x6b
+# CHECK: p1 = or(p3, !p2)
+0x01 0xc2 0x43 0x6b
+# CHECK: p1 = xor(p3, p2)
+0xc1 0xc3 0x52 0x6b
+# CHECK: p1 = or(p2, and(p3, p3))
+0x01 0xc2 0x63 0x6b 
+# CHECK: p1 = and(p2, !p3)
+0xc1 0xc3 0x72 0x6b
+# CHECK: p1 = or(p2, or(p3, p3))
+0xc1 0xc3 0x92 0x6b
+# CHECK: p1 = and(p2, and(p3, !p3))
+0xc1 0xc3 0xb2 0x6b
+# CHECK: p1 = and(p2, or(p3, !p3))
+0x01 0xc0 0xc2 0x6b
+# CHECK: p1 = not(p2)
+0xc1 0xc3 0xd2 0x6b
+# CHECK: p1 = or(p2, and(p3, !p3))
+0x01 0xc2 0xe3 0x6b
+# CHECK: p1 = or(p2, !p3)
+0xc1 0xc3 0xf2 0x6b
+# CHECK: p1 = or(p2, or(p3, !p3))
+0x0d 0xc0 0x35 0x62
+# CHECK: cs1 = r21
+0x11 0xc0 0x0d 0x6a
+# CHECK: r17 = cs1
diff --git a/test/MC/Disassembler/Hexagon/j.txt b/test/MC/Disassembler/Hexagon/j.txt
new file mode 100644
index 000000000000..c14d0341547c
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/j.txt
@@ -0,0 +1,156 @@
+# RUN: llvm-mc -triple hexagon -disassemble < %s | FileCheck %s
+
+0x22 0xc0 0x00 0x5a
+# CHECK: call
+0x22 0xc3 0x00 0x5d
+# CHECK: if (p3) call
+0x22 0xc3 0x20 0x5d
+# CHECK: if (!p3) call
+0x00 0xc0 0x89 0x11
+# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:nt
+0x00 0xc1 0x89 0x11
+# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:nt
+0x00 0xc3 0x89 0x11
+# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:nt
+0x00 0xe0 0x89 0x11
+# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:t
+0x00 0xe1 0x89 0x11
+# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:t
+0x00 0xe3 0x89 0x11
+# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:t
+0x00 0xc0 0xc9 0x11
+# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:nt
+0x00 0xc1 0xc9 0x11
+# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:nt
+0x00 0xc3 0xc9 0x11
+# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:nt
+0x00 0xe0 0xc9 0x11
+# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:t
+0x00 0xe1 0xc9 0x11
+# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:t
+0x00 0xe3 0xc9 0x11
+# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:t
+0x00 0xd5 0x09 0x10
+# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:nt
+0x00 0xf5 0x09 0x10
+# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:t
+0x00 0xd5 0x49 0x10
+# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:nt
+0x00 0xf5 0x49 0x10
+# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:t
+0x00 0xd5 0x89 0x10
+# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:nt
+0x00 0xf5 0x89 0x10
+# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:t
+0x00 0xd5 0xc9 0x10
+# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:nt
+0x00 0xf5 0xc9 0x10
+# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:t
+0x00 0xd5 0x09 0x11
+# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:nt
+0x00 0xf5 0x09 0x11
+# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:t
+0x00 0xd5 0x49 0x11
+# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:nt
+0x00 0xf5 0x49 0x11
+# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:t
+0x00 0xc0 0x89 0x13
+# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:nt
+0x00 0xc1 0x89 0x13
+# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:nt
+0x00 0xc3 0x89 0x13
+# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:nt
+0x00 0xe0 0x89 0x13
+# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:t
+0x00 0xe1 0x89 0x13
+# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:t
+0x00 0xe3 0x89 0x13
+# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:t
+0x00 0xc0 0xc9 0x13
+# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:nt
+0x00 0xc1 0xc9 0x13
+# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:nt
+0x00 0xc3 0xc9 0x13
+# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:nt
+0x00 0xe0 0xc9 0x13
+# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:t
+0x00 0xe1 0xc9 0x13
+# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:t
+0x00 0xe3 0xc9 0x13
+# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:t
+0x00 0xd5 0x09 0x12
+# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:nt
+0x00 0xf5 0x09 0x12
+# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:t
+0x00 0xd5 0x49 0x12
+# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:nt
+0x00 0xf5 0x49 0x12
+# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:t
+0x00 0xd5 0x89 0x12
+# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:nt
+0x00 0xf5 0x89 0x12
+# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:t
+0x00 0xd5 0xc9 0x12
+# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:nt
+0x00 0xf5 0xc9 0x12
+# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:t
+0x00 0xd5 0x09 0x13
+# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:nt
+0x00 0xf5 0x09 0x13
+# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:t
+0x00 0xd5 0x49 0x13
+# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:nt
+0x00 0xf5 0x49 0x13
+# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:t
+0x00 0xcd 0x09 0x14
+# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:nt
+0x00 0xdd 0x09 0x14
+# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:nt
+0x00 0xed 0x09 0x14
+# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:t
+0x00 0xfd 0x09 0x14
+# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:t
+0x00 0xcd 0x49 0x14
+# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:nt
+0x00 0xdd 0x49 0x14
+# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:nt
+0x00 0xed 0x49 0x14
+# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:t
+0x00 0xfd 0x49 0x14
+# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:t
+0x00 0xcd 0x89 0x14
+# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:nt
+0x00 0xdd 0x89 0x14
+# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:nt
+0x00 0xed 0x89 0x14
+# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:t
+0x00 0xfd 0x89 0x14
+# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:t
+0x00 0xcd 0xc9 0x14
+# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:nt
+0x00 0xdd 0xc9 0x14
+# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:nt
+0x00 0xed 0xc9 0x14
+# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:t
+0x00 0xfd 0xc9 0x14
+# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:t
+0x00 0xcd 0x09 0x15
+# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:nt
+0x00 0xdd 0x09 0x15
+# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:nt
+0x00 0xed 0x09 0x15
+# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:t
+0x00 0xfd 0x09 0x15
+# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:t
+0x00 0xcd 0x49 0x15
+# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:nt
+0x00 0xdd 0x49 0x15
+# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:nt
+0x00 0xed 0x49 0x15
+# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:t
+0x00 0xfd 0x49 0x15
+# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:t
+0x00 0xd5 0x09 0x16
+# CHECK: r9 = #21 ; jump
+0x00 0xc9 0x0d 0x17
+# CHECK: r9 = r13 ; jump
diff --git a/test/MC/Disassembler/Hexagon/jr.txt b/test/MC/Disassembler/Hexagon/jr.txt
new file mode 100644
index 000000000000..880a3399297b
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/jr.txt
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple hexagon -disassemble < %s | FileCheck %s
+
+0x00 0xc0 0xb5 0x50
+# CHECK: callr r21
+0x00 0xc1 0x15 0x51
+# CHECK: if (p1) callr r21
+0x00 0xc3 0x35 0x51
+# CHECK: if (!p3) callr r21
+0x00 0xc0 0x95 0x52
+# CHECK: jumpr r21
+0x00 0xc1 0x55 0x53
+# CHECK: if (p1) jumpr r21
+0x03 0x40 0x45 0x85 0x00 0xcb 0x55 0x53
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) jumpr:nt r21
+0x03 0x40 0x45 0x85 0x00 0xdb 0x55 0x53
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) jumpr:t r21
+0x00 0xc3 0x75 0x53
+# CHECK: if (!p3) jumpr r21
+0x03 0x40 0x45 0x85 0x00 0xcb 0x75 0x53
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) jumpr:nt r21
+0x03 0x40 0x45 0x85 0x00 0xdb 0x75 0x53
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) jumpr:t r21
diff --git a/test/MC/Disassembler/Hexagon/ld.txt b/test/MC/Disassembler/Hexagon/ld.txt
new file mode 100644
index 000000000000..9440480e4e31
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/ld.txt
@@ -0,0 +1,292 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x90 0xff 0xd5 0x3a
+# CHECK: r17:16 = memd(r21 + r31<<#3)
+0x10 0xc5 0xc0 0x49
+# CHECK: r17:16 = memd(##320)
+0xb0 0xe0 0xd5 0x99
+# CHECK: r17:16 = memd(r21 ++ #40:circ(m1))
+0x10 0xe2 0xd5 0x99
+# CHECK: r17:16 = memd(r21 ++ I:circ(m1))
+0xb0 0xc0 0xd5 0x9b
+# CHECK: r17:16 = memd(r21++#40)
+0x10 0xe0 0xd5 0x9d
+# CHECK: r17:16 = memd(r21++m1)
+0x10 0xe0 0xd5 0x9f
+# CHECK: r17:16 = memd(r21 ++ m1:brev)
+0xf0 0xff 0xd5 0x30
+# CHECK: if (p3) r17:16 = memd(r21+r31<<#3)
+0xf0 0xff 0xd5 0x31
+# CHECK: if (!p3) r17:16 = memd(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf0 0xff 0xd5 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17:16 = memd(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf0 0xff 0xd5 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17:16 = memd(r21+r31<<#3)
+0x70 0xd8 0xd5 0x41
+# CHECK: if (p3) r17:16 = memd(r21 + #24)
+0x03 0x40 0x45 0x85 0x70 0xd8 0xd5 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17:16 = memd(r21 + #24)
+0x70 0xd8 0xd5 0x45
+# CHECK: if (!p3) r17:16 = memd(r21 + #24)
+0x03 0x40 0x45 0x85 0x70 0xd8 0xd5 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17:16 = memd(r21 + #24)
+0xb0 0xe6 0xd5 0x9b
+# CHECK: if (p3) r17:16 = memd(r21++#40)
+0xb0 0xee 0xd5 0x9b
+# CHECK: if (!p3) r17:16 = memd(r21++#40)
+0x03 0x40 0x45 0x85 0xb0 0xf6 0xd5 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17:16 = memd(r21++#40)
+0x03 0x40 0x45 0x85 0xb0 0xfe 0xd5 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17:16 = memd(r21++#40)
+
+0x91 0xff 0x15 0x3a
+# CHECK: r17 = memb(r21 + r31<<#3)
+0xb1 0xc2 0x00 0x49
+# CHECK: r17 = memb(##21)
+0xf1 0xc3 0x15 0x91
+# CHECK: r17 = memb(r21 + #31)
+0xb1 0xe0 0x15 0x99
+# CHECK: r17 = memb(r21 ++ #5:circ(m1))
+0x11 0xe2 0x15 0x99
+# CHECK: r17 = memb(r21 ++ I:circ(m1))
+0xb1 0xc0 0x15 0x9b
+# CHECK: r17 = memb(r21++#5)
+0x11 0xe0 0x15 0x9d
+# CHECK: r17 = memb(r21++m1)
+0x11 0xe0 0x15 0x9f
+# CHECK: r17 = memb(r21 ++ m1:brev)
+0xf1 0xff 0x15 0x30
+# CHECK: if (p3) r17 = memb(r21+r31<<#3)
+0xf1 0xff 0x15 0x31
+# CHECK: if (!p3) r17 = memb(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x15 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memb(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x15 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memb(r21+r31<<#3)
+0x91 0xdd 0x15 0x41
+# CHECK: if (p3) r17 = memb(r21 + #44)
+0x03 0x40 0x45 0x85 0x91 0xdd 0x15 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memb(r21 + #44)
+0x91 0xdd 0x15 0x45
+# CHECK: if (!p3) r17 = memb(r21 + #44)
+0x03 0x40 0x45 0x85 0x91 0xdd 0x15 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memb(r21 + #44)
+0xb1 0xe6 0x15 0x9b
+# CHECK: if (p3) r17 = memb(r21++#5)
+0xb1 0xee 0x15 0x9b
+# CHECK: if (!p3) r17 = memb(r21++#5)
+0x03 0x40 0x45 0x85 0xb1 0xf6 0x15 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memb(r21++#5)
+0x03 0x40 0x45 0x85 0xb1 0xfe 0x15 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memb(r21++#5)
+
+0x91 0xff 0x55 0x3a
+# CHECK: r17 = memh(r21 + r31<<#3)
+0x51 0xc5 0x40 0x49
+# CHECK: r17 = memh(##84)
+0xf1 0xc3 0x55 0x91
+# CHECK: r17 = memh(r21 + #62)
+0xb1 0xe0 0x55 0x99
+# CHECK: r17 = memh(r21 ++ #10:circ(m1))
+0x11 0xe2 0x55 0x99
+# CHECK: r17 = memh(r21 ++ I:circ(m1))
+0xb1 0xc0 0x55 0x9b
+# CHECK: r17 = memh(r21++#10)
+0x11 0xe0 0x55 0x9d
+# CHECK: r17 = memh(r21++m1)
+0x11 0xe0 0x55 0x9f
+# CHECK: r17 = memh(r21 ++ m1:brev)
+0xf1 0xff 0x55 0x30
+# CHECK: if (p3) r17 = memh(r21+r31<<#3)
+0xf1 0xff 0x55 0x31
+# CHECK: if (!p3) r17 = memh(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x55 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memh(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x55 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memh(r21+r31<<#3)
+0xb1 0xe6 0x55 0x9b
+# CHECK: if (p3) r17 = memh(r21++#10)
+0xb1 0xee 0x55 0x9b
+# CHECK: if (!p3) r17 = memh(r21++#10)
+0x03 0x40 0x45 0x85 0xb1 0xf6 0x55 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memh(r21++#10)
+0x03 0x40 0x45 0x85 0xb1 0xfe 0x55 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memh(r21++#10)
+
+0x91 0xff 0x35 0x3a
+# CHECK: r17 = memub(r21 + r31<<#3)
+0xb1 0xc2 0x20 0x49
+# CHECK: r17 = memub(##21)
+0xf1 0xc3 0x35 0x91
+# CHECK: r17 = memub(r21 + #31)
+0xb1 0xe0 0x35 0x99
+# CHECK: r17 = memub(r21 ++ #5:circ(m1))
+0x11 0xe2 0x35 0x99
+# CHECK: r17 = memub(r21 ++ I:circ(m1))
+0xb1 0xc0 0x35 0x9b
+# CHECK: r17 = memub(r21++#5)
+0x11 0xe0 0x35 0x9d
+# CHECK: r17 = memub(r21++m1)
+0x11 0xe0 0x35 0x9f
+# CHECK: r17 = memub(r21 ++ m1:brev)
+0xf1 0xff 0x35 0x30
+# CHECK: if (p3) r17 = memub(r21+r31<<#3)
+0xf1 0xff 0x35 0x31
+# CHECK: if (!p3) r17 = memub(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x35 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memub(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x35 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memub(r21+r31<<#3)
+0xf1 0xdb 0x35 0x41
+# CHECK: if (p3) r17 = memub(r21 + #31)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x35 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memub(r21 + #31)
+0xf1 0xdb 0x35 0x45
+# CHECK: if (!p3) r17 = memub(r21 + #31)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x35 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memub(r21 + #31)
+0xb1 0xe6 0x35 0x9b
+# CHECK: if (p3) r17 = memub(r21++#5)
+0xb1 0xee 0x35 0x9b
+# CHECK: if (!p3) r17 = memub(r21++#5)
+0x03 0x40 0x45 0x85 0xb1 0xf6 0x35 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memub(r21++#5)
+0x03 0x40 0x45 0x85 0xb1 0xfe 0x35 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memub(r21++#5)
+
+0x91 0xff 0x75 0x3a
+# CHECK: r17 = memuh(r21 + r31<<#3)
+0x51 0xc5 0x60 0x49
+# CHECK: r17 = memuh(##84)
+0xb1 0xc2 0x75 0x91
+# CHECK: r17 = memuh(r21 + #42)
+0xb1 0xe0 0x75 0x99
+# CHECK: r17 = memuh(r21 ++ #10:circ(m1))
+0x11 0xe2 0x75 0x99
+# CHECK: r17 = memuh(r21 ++ I:circ(m1))
+0xb1 0xc0 0x75 0x9b
+# CHECK: r17 = memuh(r21++#10)
+0x11 0xe0 0x75 0x9d
+# CHECK: r17 = memuh(r21++m1)
+0x11 0xe0 0x75 0x9f
+# CHECK: r17 = memuh(r21 ++ m1:brev)
+0xf1 0xff 0x75 0x30
+# CHECK: if (p3) r17 = memuh(r21+r31<<#3)
+0xf1 0xff 0x75 0x31
+# CHECK: if (!p3) r17 = memuh(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x75 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memuh(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x75 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memuh(r21+r31<<#3)
+0xb1 0xda 0x75 0x41
+# CHECK: if (p3) r17 = memuh(r21 + #42)
+0xb1 0xda 0x75 0x45
+# CHECK: if (!p3) r17 = memuh(r21 + #42)
+0x03 0x40 0x45 0x85 0xb1 0xda 0x75 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memuh(r21 + #42)
+0x03 0x40 0x45 0x85 0xb1 0xda 0x75 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memuh(r21 + #42)
+0xb1 0xe6 0x75 0x9b
+# CHECK: if (p3) r17 = memuh(r21++#10)
+0xb1 0xee 0x75 0x9b
+# CHECK: if (!p3) r17 = memuh(r21++#10)
+0x03 0x40 0x45 0x85 0xb1 0xf6 0x75 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memuh(r21++#10)
+0x03 0x40 0x45 0x85 0xb1 0xfe 0x75 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memuh(r21++#10)
+
+0x91 0xff 0x95 0x3a
+# CHECK: r17 = memw(r21 + r31<<#3)
+0x91 0xc2 0x80 0x49
+# CHECK: r17 = memw(##80)
+0xb1 0xc2 0x95 0x91
+# CHECK: r17 = memw(r21 + #84)
+0xb1 0xe0 0x95 0x99
+# CHECK: r17 = memw(r21 ++ #20:circ(m1))
+0x11 0xe2 0x95 0x99
+# CHECK: r17 = memw(r21 ++ I:circ(m1))
+0xb1 0xc0 0x95 0x9b
+# CHECK: r17 = memw(r21++#20)
+0x11 0xe0 0x95 0x9d
+# CHECK: r17 = memw(r21++m1)
+0x11 0xe0 0x95 0x9f
+# CHECK: r17 = memw(r21 ++ m1:brev)
+0xf1 0xff 0x95 0x30
+# CHECK: if (p3) r17 = memw(r21+r31<<#3)
+0xf1 0xff 0x95 0x31
+# CHECK: if (!p3) r17 = memw(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x95 0x32
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memw(r21+r31<<#3)
+0x03 0x40 0x45 0x85 0xf1 0xff 0x95 0x33
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memw(r21+r31<<#3)
+0xb1 0xda 0x95 0x41
+# CHECK: if (p3) r17 = memw(r21 + #84)
+0xb1 0xda 0x95 0x45
+# CHECK: if (!p3) r17 = memw(r21 + #84)
+0x03 0x40 0x45 0x85 0xb1 0xda 0x95 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memw(r21 + #84)
+0x03 0x40 0x45 0x85 0xb1 0xda 0x95 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memw(r21 + #84)
+0xb1 0xe6 0x95 0x9b
+# CHECK: if (p3) r17 = memw(r21++#20)
+0xb1 0xee 0x95 0x9b
+# CHECK: if (!p3) r17 = memw(r21++#20)
+0x03 0x40 0x45 0x85 0xb1 0xf6 0x95 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memw(r21++#20)
+0x03 0x40 0x45 0x85 0xb1 0xfe 0x95 0x9b
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memw(r21++#20)
+
+0x1e 0xc0 0x1e 0x90
+# CHECK: deallocframe
+0x1e 0xc0 0x1e 0x96
+# CHECK: dealloc_return
+0x03 0x40 0x45 0x85 0x1e 0xcb 0x1e 0x96
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) dealloc_return:nt
+0x1e 0xd3 0x1e 0x96
+# CHECK: if (p3) dealloc_return
+0x03 0x40 0x45 0x85 0x1e 0xdb 0x1e 0x96
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) dealloc_return:t
+0x03 0x40 0x45 0x85 0x1e 0xeb 0x1e 0x96
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) dealloc_return:nt
+0x1e 0xf3 0x1e 0x96
+# CHECK: if (!p3) dealloc_return
+0x03 0x40 0x45 0x85 0x1e 0xfb 0x1e 0x96
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) dealloc_return:t
diff --git a/test/MC/Disassembler/Hexagon/lit.local.cfg b/test/MC/Disassembler/Hexagon/lit.local.cfg
new file mode 100644
index 000000000000..6500d4dd7d5a
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/Hexagon/memop.txt b/test/MC/Disassembler/Hexagon/memop.txt
new file mode 100644
index 000000000000..7c944121ddcc
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/memop.txt
@@ -0,0 +1,50 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x95 0xd9 0x11 0x3e
+# CHECK: memb(r17+#51) += r21
+0xb5 0xd9 0x11 0x3e
+# CHECK: memb(r17+#51) -= r21
+0xd5 0xd9 0x11 0x3e
+# CHECK: memb(r17+#51) &= r21
+0xf5 0xd9 0x11 0x3e
+# CHECK: memb(r17+#51) |= r21
+0x95 0xd9 0x11 0x3f
+# CHECK: memb(r17+#51) += #21
+0xb5 0xd9 0x11 0x3f
+# CHECK: memb(r17+#51) -= #21
+0xd5 0xd9 0x11 0x3f
+# CHECK: memb(r17+#51) = clrbit(#21)
+0xf5 0xd9 0x11 0x3f
+# CHECK: memb(r17+#51) = setbit(#21)
+0x95 0xd9 0x31 0x3e
+# CHECK: memh(r17+#102) += r21
+0xb5 0xd9 0x31 0x3e
+# CHECK: memh(r17+#102) -= r21
+0xd5 0xd9 0x31 0x3e
+# CHECK: memh(r17+#102) &= r21
+0xf5 0xd9 0x31 0x3e
+# CHECK: memh(r17+#102) |= r21
+0x95 0xd9 0x31 0x3f
+# CHECK: memh(r17+#102) += #21
+0xb5 0xd9 0x31 0x3f
+# CHECK: memh(r17+#102) -= #21
+0xd5 0xd9 0x31 0x3f
+# CHECK: memh(r17+#102) = clrbit(#21)
+0xf5 0xd9 0x31 0x3f
+# CHECK: memh(r17+#102) = setbit(#21)
+0x95 0xd9 0x51 0x3e
+# CHECK: memw(r17+#204) += r21
+0xb5 0xd9 0x51 0x3e
+# CHECK: memw(r17+#204) -= r21
+0xd5 0xd9 0x51 0x3e
+# CHECK: memw(r17+#204) &= r21
+0xf5 0xd9 0x51 0x3e
+# CHECK: memw(r17+#204) |= r21
+0x95 0xd9 0x51 0x3f
+# CHECK: memw(r17+#204) += #21
+0xb5 0xd9 0x51 0x3f
+# CHECK: memw(r17+#204) -= #21
+0xd5 0xd9 0x51 0x3f
+# CHECK: memw(r17+#204) = clrbit(#21)
+0xf5 0xd9 0x51 0x3f
+# CHECK: memw(r17+#204) = setbit(#21)
diff --git a/test/MC/Disassembler/Hexagon/nv_j.txt b/test/MC/Disassembler/Hexagon/nv_j.txt
new file mode 100644
index 000000000000..e5cee4d44384
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/nv_j.txt
@@ -0,0 +1,134 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x20
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x21
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x22
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x22
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x22
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x22
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.eq(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x24
+# CHECK: r17 = r17
+# CHECK-NETX: if (cmp.eq(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x24
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x25
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0x02 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0x02 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0x42 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0x42 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:t
+0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:nt
+0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x26
+# CHECK: r17 = r17
+# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:t
diff --git a/test/MC/Disassembler/Hexagon/nv_st.txt b/test/MC/Disassembler/Hexagon/nv_st.txt
new file mode 100644
index 000000000000..830570987340
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/nv_st.txt
@@ -0,0 +1,166 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x1f 0x40 0x7f 0x70 0x82 0xf5 0xb1 0x3b
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(r17 + r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0x15 0xc2 0xb1 0xa1
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(r17+#21) = r2.new
+0x1f 0x40 0x7f 0x70 0x28 0xc2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(r17++#5) = r2.new
+0x1f 0x40 0x7f 0x70 0x00 0xe2 0xb1 0xad
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(r17++m1) = r2.new
+0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x34
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memb(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x35
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memb(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x40
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memb(r17+#21) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x44
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memb(r17+#21) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memb(r17+#21) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r2.new
+0x1f 0x40 0x7f 0x70 0x2b 0xe2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memb(r17++#5) = r2.new
+0x1f 0x40 0x7f 0x70 0x2f 0xe2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memb(r17++#5) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xe2 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memb(r17++#5) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xe2 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r2.new
+
+0x1f 0x40 0x7f 0x70 0x8a 0xf5 0xb1 0x3b
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(r17 + r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0x15 0xca 0xb1 0xa1
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(r17+#42) = r2.new
+0x1f 0x40 0x7f 0x70 0x28 0xca 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(r17++#10) = r2.new
+0x1f 0x40 0x7f 0x70 0x00 0xea 0xb1 0xad
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(r17++m1) = r2.new
+0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x34
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memh(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x35
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memh(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x40
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memh(r17+#42) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x44
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memh(r17+#42) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memh(r17+#42) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memh(r17+#42) = r2.new
+0x1f 0x40 0x7f 0x70 0x2b 0xea 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memh(r17++#10) = r2.new
+0x1f 0x40 0x7f 0x70 0x2f 0xea 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memh(r17++#10) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xea 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memh(r17++#10) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xea 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r2.new
+
+0x1f 0x40 0x7f 0x70 0x92 0xf5 0xb1 0x3b
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17 + r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0x15 0xd2 0xb1 0xa1
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17+#84) = r2.new
+0x1f 0x40 0x7f 0x70 0x28 0xd2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17++#20) = r2.new
+0x1f 0x40 0x7f 0x70 0x00 0xf2 0xb1 0xad
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17++m1) = r2.new
+0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x34
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memw(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x35
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memw(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x40
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memw(r17+#84) = r2.new
+0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x44
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memw(r17+#84) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memw(r17+#84) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r2.new
+0x1f 0x40 0x7f 0x70 0x2b 0xf2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (p3) memw(r17++#20) = r2.new
+0x1f 0x40 0x7f 0x70 0x2f 0xf2 0xb1 0xab
+# CHECK: r31 = r31
+# CHECK-NEXT: if (!p3) memw(r17++#20) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xf2 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (p3.new) memw(r17++#20) = r2.new
+0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xf2 0xb1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: r31 = r31
+# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r2.new
diff --git a/test/MC/Disassembler/Hexagon/st.txt b/test/MC/Disassembler/Hexagon/st.txt
new file mode 100644
index 000000000000..4d1a491b344a
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/st.txt
@@ -0,0 +1,274 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x9e 0xf5 0xd1 0x3b
+# CHECK: memd(r17 + r21<<#3) = r31:30
+0x28 0xd4 0xc0 0x48
+# CHECK: memd(##320) = r21:20
+0x15 0xd4 0xd1 0xa1
+# CHECK: memd(r17+#168) = r21:20
+0x02 0xf4 0xd1 0xa9
+# CHECK: memd(r17 ++ I:circ(m1)) = r21:20
+0x28 0xf4 0xd1 0xa9
+# CHECK: memd(r17 ++ #40:circ(m1)) = r21:20
+0x28 0xd4 0xd1 0xab
+# CHECK: memd(r17++#40) = r21:20
+0x00 0xf4 0xd1 0xad
+# CHECK: memd(r17++m1) = r21:20
+0x00 0xf4 0xd1 0xaf
+# CHECK: memd(r17 ++ m1:brev) = r21:20
+0xfe 0xf5 0xd1 0x34
+# CHECK: if (p3) memd(r17+r21<<#3) = r31:30
+0xfe 0xf5 0xd1 0x35
+# CHECK: if (!p3) memd(r17+r21<<#3) = r31:30
+0x03 0x40 0x45 0x85 0xfe 0xf5 0xd1 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memd(r17+r21<<#3) = r31:30
+0x03 0x40 0x45 0x85 0xfe 0xf5 0xd1 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memd(r17+r21<<#3) = r31:30
+0xab 0xde 0xd1 0x40
+# CHECK: if (p3) memd(r17+#168) = r31:30
+0xab 0xde 0xd1 0x44
+# CHECK: if (!p3) memd(r17+#168) = r31:30
+0x03 0x40 0x45 0x85 0xab 0xde 0xd1 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memd(r17+#168) = r31:30
+0x03 0x40 0x45 0x85 0xab 0xde 0xd1 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memd(r17+#168) = r31:30
+0x2b 0xf4 0xd1 0xab
+# CHECK: if (p3) memd(r17++#40) = r21:20
+0x2f 0xf4 0xd1 0xab
+# CHECK: if (!p3) memd(r17++#40) = r21:20
+0x03 0x40 0x45 0x85 0xab 0xf4 0xd1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memd(r17++#40) = r21:20
+0x03 0x40 0x45 0x85 0xaf 0xf4 0xd1 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memd(r17++#40) = r21:20
+
+0x9f 0xf5 0x11 0x3b
+# CHECK: memb(r17 + r21<<#3) = r31
+0x9f 0xca 0x11 0x3c
+# CHECK: memb(r17+#21)=#31
+0x15 0xd5 0x00 0x48
+# CHECK: memb(##21) = r21
+0x15 0xd5 0x11 0xa1
+# CHECK: memb(r17+#21) = r21
+0x02 0xf5 0x11 0xa9
+# CHECK: memb(r17 ++ I:circ(m1)) = r21
+0x28 0xf5 0x11 0xa9
+# CHECK: memb(r17 ++ #5:circ(m1)) = r21
+0x28 0xd5 0x11 0xab
+# CHECK: memb(r17++#5) = r21
+0x00 0xf5 0x11 0xad
+# CHECK: memb(r17++m1) = r21
+0x00 0xf5 0x11 0xaf
+# CHECK: memb(r17 ++ m1:brev) = r21
+0xff 0xf5 0x11 0x34
+# CHECK: if (p3) memb(r17+r21<<#3) = r31
+0xff 0xf5 0x11 0x35
+# CHECK: if (!p3) memb(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x11 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x11 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r31
+0xff 0xca 0x11 0x38
+# CHECK: if (p3) memb(r17+#21)=#31
+0xff 0xca 0x91 0x38
+# CHECK: if (!p3) memb(r17+#21)=#31
+0x03 0x40 0x45 0x85 0xff 0xca 0x11 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(r17+#21)=#31
+0x03 0x40 0x45 0x85 0xff 0xca 0x91 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(r17+#21)=#31
+0xab 0xdf 0x11 0x40
+# CHECK: if (p3) memb(r17+#21) = r31
+0xab 0xdf 0x11 0x44
+# CHECK: if (!p3) memb(r17+#21) = r31
+0x03 0x40 0x45 0x85 0xab 0xdf 0x11 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(r17+#21) = r31
+0x03 0x40 0x45 0x85 0xab 0xdf 0x11 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r31
+0x2b 0xf5 0x11 0xab
+# CHECK: if (p3) memb(r17++#5) = r21
+0x2f 0xf5 0x11 0xab
+# CHECK: if (!p3) memb(r17++#5) = r21
+0x03 0x40 0x45 0x85 0xab 0xf5 0x11 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(r17++#5) = r21
+0x03 0x40 0x45 0x85 0xaf 0xf5 0x11 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r21
+
+0x9f 0xf5 0x51 0x3b
+# CHECK: memh(r17 + r21<<#3) = r31
+0x9f 0xf5 0x71 0x3b
+# CHECK: memh(r17 + r21<<#3) = r31.h
+0x95 0xcf 0x31 0x3c
+# CHECK: memh(r17+#62)=#21
+0x2a 0xd5 0x40 0x48
+# CHECK: memh(##84) = r21
+0x2a 0xd5 0x60 0x48
+# CHECK: memh(##84) = r21.h
+0x15 0xdf 0x51 0xa1
+# CHECK: memh(r17+#42) = r31
+0x15 0xdf 0x71 0xa1
+# CHECK: memh(r17+#42) = r31.h
+0x02 0xf5 0x51 0xa9
+# CHECK: memh(r17 ++ I:circ(m1)) = r21
+0x28 0xf5 0x51 0xa9
+# CHECK: memh(r17 ++ #10:circ(m1)) = r21
+0x02 0xf5 0x71 0xa9
+# CHECK: memh(r17 ++ I:circ(m1)) = r21.h
+0x28 0xf5 0x71 0xa9
+# CHECK: memh(r17 ++ #10:circ(m1)) = r21.h
+0x28 0xd5 0x51 0xab
+# CHECK: memh(r17++#10) = r21
+0x28 0xd5 0x71 0xab
+# CHECK: memh(r17++#10) = r21.h
+0x00 0xf5 0x51 0xad
+# CHECK: memh(r17++m1) = r21
+0x00 0xf5 0x71 0xad
+# CHECK: memh(r17++m1) = r21.h
+0x00 0xf5 0x51 0xaf
+# CHECK: memh(r17 ++ m1:brev) = r21
+0x00 0xf5 0x71 0xaf
+# CHECK: memh(r17 ++ m1:brev) = r21.h
+0xff 0xf5 0x51 0x34
+# CHECK: if (p3) memh(r17+r21<<#3) = r31
+0xff 0xf5 0x71 0x34
+# CHECK: if (p3) memh(r17+r21<<#3) = r31.h
+0xff 0xf5 0x51 0x35
+# CHECK: if (!p3) memh(r17+r21<<#3) = r31
+0xff 0xf5 0x71 0x35
+# CHECK: if (!p3) memh(r17+r21<<#3) = r31.h
+0x03 0x40 0x45 0x85 0xff 0xf5 0x51 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x71 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r31.h
+0x03 0x40 0x45 0x85 0xff 0xf5 0x51 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x71 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r31.h
+0xf5 0xcf 0x31 0x38
+# CHECK: if (p3) memh(r17+#62)=#21
+0xf5 0xcf 0xb1 0x38
+# CHECK: if (!p3) memh(r17+#62)=#21
+0x03 0x40 0x45 0x85 0xf5 0xcf 0x31 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17+#62)=#21
+0x03 0x40 0x45 0x85 0xf5 0xcf 0xb1 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17+#62)=#21
+0xfb 0xd5 0x51 0x40
+# CHECK: if (p3) memh(r17+#62) = r21
+0xfb 0xd5 0x71 0x40
+# CHECK: if (p3) memh(r17+#62) = r21.h
+0xfb 0xd5 0x51 0x44
+# CHECK: if (!p3) memh(r17+#62) = r21
+0xfb 0xd5 0x71 0x44
+# CHECK: if (!p3) memh(r17+#62) = r21.h
+0x03 0x40 0x45 0x85 0xfb 0xd5 0x51 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17+#62) = r21
+0x03 0x40 0x45 0x85 0xfb 0xd5 0x71 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17+#62) = r21.h
+0x03 0x40 0x45 0x85 0xfb 0xd5 0x51 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17+#62) = r21
+0x03 0x40 0x45 0x85 0xfb 0xd5 0x71 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17+#62) = r21.h
+0x2b 0xf5 0x51 0xab
+# CHECK: if (p3) memh(r17++#10) = r21
+0x2f 0xf5 0x51 0xab
+# CHECK: if (!p3) memh(r17++#10) = r21
+0x03 0x40 0x45 0x85 0xab 0xf5 0x51 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17++#10) = r21
+0x03 0x40 0x45 0x85 0xaf 0xf5 0x51 0xab 
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r21
+0x2b 0xf5 0x71 0xab
+# CHECK: if (p3) memh(r17++#10) = r21.h
+0x2f 0xf5 0x71 0xab
+# CHECK: if (!p3) memh(r17++#10) = r21.h
+0x03 0x40 0x45 0x85 0xab 0xf5 0x71 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(r17++#10) = r21.h
+0x03 0x40 0x45 0x85 0xaf 0xf5 0x71 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r21.h
+
+0x9f 0xf5 0x91 0x3b
+# CHECK: memw(r17 + r21<<#3) = r31
+0x9f 0xca 0x51 0x3c
+# CHECK: memw(r17+#84)=#31
+0x15 0xdf 0x91 0xa1
+# CHECK: memw(r17+#84) = r31
+0x14 0xd5 0x80 0x48
+# CHECK: memw(##80) = r21
+0x02 0xf5 0x91 0xa9
+# CHECK: memw(r17 ++ I:circ(m1)) = r21
+0x28 0xf5 0x91 0xa9
+# CHECK: memw(r17 ++ #20:circ(m1)) = r21
+0x28 0xd5 0x91 0xab
+# CHECK: memw(r17++#20) = r21
+0x00 0xf5 0x91 0xad
+# CHECK: memw(r17++m1) = r21
+0x00 0xf5 0x91 0xaf
+# CHECK: memw(r17 ++ m1:brev) = r21
+0xff 0xf5 0x91 0x34
+# CHECK: if (p3) memw(r17+r21<<#3) = r31
+0xff 0xf5 0x91 0x35
+# CHECK: if (!p3) memw(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x91 0x36
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r31
+0x03 0x40 0x45 0x85 0xff 0xf5 0x91 0x37
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r31
+0xff 0xca 0x51 0x38
+# CHECK: if (p3) memw(r17+#84)=#31
+0xff 0xca 0xd1 0x38
+# CHECK: if (!p3) memw(r17+#84)=#31
+0x03 0x40 0x45 0x85 0xff 0xca 0x51 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(r17+#84)=#31
+0x03 0x40 0x45 0x85 0xff 0xca 0xd1 0x39
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(r17+#84)=#31
+0xab 0xdf 0x91 0x40
+# CHECK: if (p3) memw(r17+#84) = r31
+0xab 0xdf 0x91 0x44 
+# CHECK: if (!p3) memw(r17+#84) = r31
+0x03 0x40 0x45 0x85 0xab 0xdf 0x91 0x42
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(r17+#84) = r31
+0x03 0x40 0x45 0x85 0xab 0xdf 0x91 0x46
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r31
+0x2b 0xf5 0x91 0xab
+# CHECK: if (p3) memw(r17++#20) = r21
+0x2f 0xf5 0x91 0xab
+# CHECK: if (!p3) memw(r17++#20) = r21
+0x03 0x40 0x45 0x85 0xaf 0xf5 0x91 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r21
+0x03 0x40 0x45 0x85 0xab 0xf5 0x91 0xab
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(r17++#20) = r21
+
+0x1f 0xc0 0x9d 0xa0
+# CHECK: allocframe(#248)
+\ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/system_user.txt b/test/MC/Disassembler/Hexagon/system_user.txt
new file mode 100644
index 000000000000..51d082a9e180
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/system_user.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0xc0 0x15 0x92
+# CHECK: r17 = memw_locked(r21)
+0x10 0xd0 0x15 0x92
+# CHECK: r17:16 = memd_locked(r21)
+0x00 0xc0 0x00 0xa8
+# CHECK: barrier
+0x15 0xc0 0x11 0x94
+# CHECK: dcfetch(r17 + #168)
+0x00 0xc0 0x51 0x62
+# CHECK: trace(r17)
diff --git a/test/MC/Disassembler/Hexagon/xtype_alu.txt b/test/MC/Disassembler/Hexagon/xtype_alu.txt
new file mode 100644
index 000000000000..d39260c63228
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_alu.txt
@@ -0,0 +1,164 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0xd0 0xc0 0x94 0x80
+# CHECK: r17:16 = abs(r21:20)
+0x91 0xc0 0x95 0x8c
+# CHECK: r17 = abs(r21)
+0xb1 0xc0 0x95 0x8c
+# CHECK: r17 = abs(r21):sat
+0xff 0xd1 0x35 0xdb
+# CHECK: r17 = add(r21, add(r31, #23))
+0xff 0xd1 0xb5 0xdb
+# CHECK: r17 = add(r21, sub(#23, r31))
+0xf1 0xc2 0x15 0xe2
+# CHECK: r17 += add(r21, #23)
+0xf1 0xc2 0x95 0xe2
+# CHECK: r17 -= add(r21, #23)
+0x31 0xdf 0x15 0xef
+# CHECK: r17 += add(r21, r31)
+0x31 0xdf 0x95 0xef
+# CHECK: r17 -= add(r21, r31)
+0xf0 0xde 0x14 0xd3
+# CHECK: r17:16 = add(r21:20, r31:30)
+0x11 0xd5 0x1f 0xd5
+# CHECK: r17 = add(r21.l, r31.l)
+0x51 0xd5 0x1f 0xd5
+# CHECK: r17 = add(r21.l, r31.h)
+0x91 0xd5 0x1f 0xd5
+# CHECK: r17 = add(r21.l, r31.l):sat
+0xd1 0xd5 0x1f 0xd5
+# CHECK: r17 = add(r21.l, r31.h):sat
+0x11 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.l, r31.l):<<16
+0x31 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.l, r31.h):<<16
+0x51 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.h, r31.l):<<16
+0x71 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.h, r31.h):<<16
+0x91 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.l, r31.l):sat:<<16
+0xb1 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.l, r31.h):sat:<<16
+0xd1 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.h, r31.l):sat:<<16
+0xf1 0xd5 0x5f 0xd5
+# CHECK: r17 = add(r21.h, r31.h):sat:<<16
+0x70 0xde 0xd4 0xc2
+# CHECK: r17:16 = add(r21:20, r31:30, p3):carry
+0x70 0xde 0xf4 0xc2
+# CHECK: r17:16 = sub(r21:20, r31:30, p3):carry
+0x90 0xc0 0x94 0x80
+# CHECK: r17:16 = not(r21:20)
+0xf0 0xde 0x14 0xd3
+# CHECK: r17:16 = add(r21:20, r31:30)
+0xb0 0xde 0x74 0xd3
+# CHECK: r17:16 = add(r21:20, r31:30):sat
+0xd0 0xde 0x74 0xd3
+# CHECK: r17:16 = add(r21:20, r31:30):raw:lo
+0xf0 0xde 0x74 0xd3
+# CHECK: r17:16 = add(r21:20, r31:30):raw:hi
+0x10 0xde 0xf4 0xd3
+# CHECK: r17:16 = and(r21:20, r31:30)
+0x30 0xd4 0xfe 0xd3
+# CHECK: r17:16 = and(r21:20, ~r31:30)
+0x50 0xde 0xf4 0xd3
+# CHECK: r17:16 = or(r21:20, r31:30)
+0x70 0xd4 0xfe 0xd3
+# CHECK: r17:16 = or(r21:20, ~r31:30)
+0x10 0xde 0x94 0xca
+# CHECK: r17:16 ^= xor(r21:20, r31:30)
+0xf1 0xc3 0x15 0xda
+# CHECK: r17 |= and(r21, #31)
+0xf5 0xc3 0x51 0xda
+# CHECK: r17 = or(r21, and(r17, #31))
+0xf1 0xc3 0x95 0xda
+# CHECK: r17 |= or(r21, #31)
+0x11 0xdf 0x35 0xef
+# CHECK: r17 |= and(r21, ~r31)
+0x31 0xdf 0x35 0xef
+# CHECK: r17 &= and(r21, ~r31)
+0x51 0xdf 0x35 0xef
+# CHECK: r17 ^= and(r21, ~r31)
+0x11 0xdf 0x55 0xef
+# CHECK: r17 &= and(r21, r31)
+0x31 0xdf 0x55 0xef
+# CHECK: r17 &= or(r21, r31)
+0x51 0xdf 0x55 0xef
+# CHECK: r17 &= xor(r21, r31)
+0x71 0xdf 0x55 0xef
+# CHECK: r17 |= and(r21, r31)
+0x71 0xdf 0x95 0xef
+# CHECK: r17 ^= xor(r21, r31)
+0x11 0xdf 0xd5 0xef
+# CHECK: r17 |= or(r21, r31)
+0x31 0xdf 0xd5 0xef
+# CHECK: r17 |= xor(r21, r31)
+0x51 0xdf 0xd5 0xef
+# CHECK: r17 ^= and(r21, r31)
+0x71 0xdf 0xd5 0xef
+# CHECK: r17 ^= or(r21, r31)
+0x11 0xdf 0xd5 0xd5
+# CHECK: r17 = max(r21, r31)
+0x91 0xdf 0xd5 0xd5
+# CHECK: r17 = maxu(r21, r31)
+0x90 0xde 0xd4 0xd3
+# CHECK: r17:16 = max(r21:20, r31:30)
+0xb0 0xde 0xd4 0xd3
+# CHECK: r17:16 = maxu(r21:20, r31:30)
+0x11 0xd5 0xbf 0xd5
+# CHECK: r17 = min(r21, r31)
+0x91 0xd5 0xbf 0xd5
+# CHECK: r17 = minu(r21, r31)
+0xd0 0xd4 0xbe 0xd3
+# CHECK: r17:16 = min(r21:20, r31:30)
+0xf0 0xd4 0xbe 0xd3
+# CHECK: r17:16 = minu(r21:20, r31:30)
+0xf1 0xdf 0xf5 0xd3
+# CHECK: r17 = modwrap(r21, r31)
+0xb0 0xc0 0x94 0x80
+# CHECK: r17:16 = neg(r21:20)
+0xd1 0xc0 0x95 0x8c
+# CHECK: r17 = neg(r21):sat
+0x11 0xdf 0xf5 0x8c
+# CHECK: r17 = cround(r21, #31)
+0x91 0xdf 0xf5 0x8c
+# CHECK: r17 = round(r21, #31)
+0xd1 0xdf 0xf5 0x8c
+# CHECK: r17 = round(r21, #31):sat
+0x11 0xdf 0xd5 0xc6
+# CHECK: r17 = cround(r21, r31)
+0x91 0xdf 0xd5 0xc6
+# CHECK: r17 = round(r21, r31)
+0xd1 0xdf 0xd5 0xc6
+# CHECK: r17 = round(r21, r31):sat
+0x71 0xd5 0x1f 0xef
+# CHECK: r17 += sub(r21, r31)
+0x11 0xd5 0x3f 0xd5
+# CHECK: r17 = sub(r21.l, r31.l)
+0x51 0xd5 0x3f 0xd5
+# CHECK: r17 = sub(r21.l, r31.h)
+0x91 0xd5 0x3f 0xd5
+# CHECK: r17 = sub(r21.l, r31.l):sat
+0xd1 0xd5 0x3f 0xd5
+# CHECK: r17 = sub(r21.l, r31.h):sat
+0x11 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.l, r31.l):<<16
+0x31 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.l, r31.h):<<16
+0x51 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.h, r31.l):<<16
+0x71 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.h, r31.h):<<16
+0x91 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.l, r31.l):sat:<<16
+0xb1 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.l, r31.h):sat:<<16
+0xd1 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.h, r31.l):sat:<<16
+0xf1 0xd5 0x7f 0xd5
+# CHECK: r17 = sub(r21.h, r31.h):sat:<<16
+0x10 0xc0 0x55 0x84
+# CHECK: r17:16 = sxtw(r21)
+0x90 0xde 0xf4 0xd3
+# CHECK: r17:16 = xor(r21:20, r31:30)
diff --git a/test/MC/Disassembler/Hexagon/xtype_bit.txt b/test/MC/Disassembler/Hexagon/xtype_bit.txt
new file mode 100644
index 000000000000..d1ec38e02183
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_bit.txt
@@ -0,0 +1,92 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0xc0 0x54 0x88
+# CHECK: r17 = clb(r21:20)
+0x51 0xc0 0x54 0x88
+# CHECK: r17 = cl0(r21:20)
+0x91 0xc0 0x54 0x88
+# CHECK: r17 = cl1(r21:20)
+0x11 0xc0 0x74 0x88
+# CHECK: r17 = normamt(r21:20)
+0x51 0xd7 0x74 0x88
+# CHECK: r17 = add(clb(r21:20), #23)
+0x11 0xd7 0x35 0x8c
+# CHECK: r17 = add(clb(r21), #23)
+0x91 0xc0 0x15 0x8c
+# CHECK: r17 = clb(r21)
+0xb1 0xc0 0x15 0x8c
+# CHECK: r17 = cl0(r21)
+0xd1 0xc0 0x15 0x8c
+# CHECK: r17 = cl1(r21)
+0xf1 0xc0 0x15 0x8c
+# CHECK: r17 = normamt(r21)
+0x71 0xc0 0x74 0x88
+# CHECK: r17 = popcount(r21:20)
+0x51 0xc0 0xf4 0x88
+# CHECK: r17 = ct0(r21:20)
+0x91 0xc0 0xf4 0x88
+# CHECK: r17 = ct1(r21:20)
+0x91 0xc0 0x55 0x8c
+# CHECK: r17 = ct0(r21)
+0xb1 0xc0 0x55 0x8c
+# CHECK: r17 = ct1(r21)
+0xf0 0xdf 0x54 0x81
+# CHECK: r17:16 = extractu(r21:20, #31, #23)
+0xf0 0xdf 0x54 0x8a
+# CHECK: r17:16 = extract(r21:20, #31, #23)
+0xf1 0xdf 0x55 0x8d
+# CHECK: r17 = extractu(r21, #31, #23)
+0xf1 0xdf 0xd5 0x8d
+# CHECK: r17 = extract(r21, #31, #23)
+0x10 0xde 0x14 0xc1
+# CHECK: r17:16 = extractu(r21:20, r31:30)
+0x90 0xde 0xd4 0xc1
+# CHECK: r17:16 = extract(r21:20, r31:30)
+0x11 0xde 0x15 0xc9
+# CHECK: r17 = extractu(r21, r31:30)
+0x51 0xde 0x15 0xc9
+# CHECK: r17 = extract(r21, r31:30)
+0xf0 0xdf 0x54 0x83
+# CHECK: r17:16 = insert(r21:20, #31, #23)
+0xf1 0xdf 0x55 0x8f
+# CHECK: r17 = insert(r21, #31, #23)
+0x11 0xde 0x15 0xc8
+# CHECK: r17 = insert(r21, r31:30)
+0x10 0xde 0x14 0xca
+# CHECK: r17:16 = insert(r21:20, r31:30)
+0x90 0xc0 0xd4 0x80
+# CHECK: r17:16 = deinterleave(r21:20)
+0xb0 0xc0 0xd4 0x80
+# CHECK: r17:16 = interleave(r21:20)
+0xd0 0xde 0x94 0xc1
+# CHECK: r17:16 = lfs(r21:20, r31:30)
+0x11 0xde 0x14 0xd0
+# CHECK: r17 = parity(r21:20, r31:30)
+0x11 0xdf 0xf5 0xd5
+# CHECK: r17 = parity(r21, r31)
+0xd0 0xc0 0xd4 0x80
+# CHECK: r17:16 = brev(r21:20)
+0x11 0xdf 0xd5 0x8c
+# CHECK: r17 = setbit(r21, #31)
+0x31 0xdf 0xd5 0x8c
+# CHECK: r17 = clrbit(r21, #31)
+0x51 0xdf 0xd5 0x8c
+# CHECK: r17 = togglebit(r21, #31)
+0x11 0xdf 0x95 0xc6
+# CHECK: r17 = setbit(r21, r31)
+0x51 0xdf 0x95 0xc6
+# CHECK: r17 = clrbit(r21, r31)
+0x91 0xdf 0x95 0xc6
+# CHECK: r17 = togglebit(r21, r31)
+0x90 0xdf 0xd5 0x88
+# CHECK: r17:16 = bitsplit(r21, #31)
+0x10 0xdf 0x35 0xd4
+# CHECK: r17:16 = bitsplit(r21, r31)
+0xf1 0xcd 0x15 0x87
+# CHECK: r17 = tableidxb(r21, #7, #13):raw
+0xf1 0xcd 0x55 0x87
+# CHECK: r17 = tableidxh(r21, #7, #13):raw
+0xf1 0xcd 0x95 0x87
+# CHECK: r17 = tableidxw(r21, #7, #13):raw
+0xf1 0xcd 0xd5 0x87
+# CHECK: r17 = tableidxd(r21, #7, #13):raw
diff --git a/test/MC/Disassembler/Hexagon/xtype_fp.txt b/test/MC/Disassembler/Hexagon/xtype_fp.txt
new file mode 100644
index 000000000000..97c072730bd4
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_fp.txt
@@ -0,0 +1,110 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0xdf 0x15 0xeb
+# CHECK: r17 = sfadd(r21, r31)
+0x03 0xd5 0xf1 0x85
+# CHECK: p3 = sfclass(r17, #21)
+0xb3 0xc2 0x90 0xdc
+# CHECK: p3 = dfclass(r17:16, #21)
+0x03 0xd5 0xf1 0xc7
+# CHECK: p3 = sfcmp.ge(r17, r21)
+0x23 0xd5 0xf1 0xc7
+# CHECK: p3 = sfcmp.uo(r17, r21)
+0x63 0xd5 0xf1 0xc7
+# CHECK: p3 = sfcmp.eq(r17, r21)
+0x83 0xd5 0xf1 0xc7
+# CHECK: p3 = sfcmp.gt(r17, r21)
+0x03 0xd4 0xf0 0xd2
+# CHECK: p3 = dfcmp.eq(r17:16, r21:20)
+0x23 0xd4 0xf0 0xd2
+# CHECK: p3 = dfcmp.gt(r17:16, r21:20)
+0x43 0xd4 0xf0 0xd2
+# CHECK: p3 = dfcmp.ge(r17:16, r21:20)
+0x63 0xd4 0xf0 0xd2
+# CHECK: p3 = dfcmp.uo(r17:16, r21:20)
+0x10 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_sf2df(r21)
+0x31 0xc0 0x14 0x88
+# CHECK: r17 = convert_df2sf(r21:20)
+0x50 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_ud2df(r21:20)
+0x70 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_d2df(r21:20)
+0x30 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_uw2df(r21)
+0x50 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_w2df(r21)
+0x31 0xc0 0x34 0x88
+# CHECK: r17 = convert_ud2sf(r21:20)
+0x31 0xc0 0x54 0x88
+# CHECK: r17 = convert_d2sf(r21:20)
+0x11 0xc0 0x35 0x8b
+# CHECK: r17 = convert_uw2sf(r21)
+0x11 0xc0 0x55 0x8b
+# CHECK: r17 = convert_w2sf(r21)
+0x10 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_df2d(r21:20)
+0x30 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_df2ud(r21:20)
+0xd0 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_df2d(r21:20):chop
+0xf0 0xc0 0xf4 0x80
+# CHECK: r17:16 = convert_df2ud(r21:20):chop
+0x70 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_sf2ud(r21)
+0x90 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_sf2d(r21)
+0xb0 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_sf2ud(r21):chop
+0xd0 0xc0 0x95 0x84
+# CHECK: r17:16 = convert_sf2d(r21):chop
+0x31 0xc0 0x74 0x88
+# CHECK: r17 = convert_df2uw(r21:20)
+0x31 0xc0 0x94 0x88
+# CHECK: r17 = convert_df2w(r21:20)
+0x31 0xc0 0xb4 0x88
+# CHECK: r17 = convert_df2uw(r21:20):chop
+0x31 0xc0 0xf4 0x88
+# CHECK: r17 = convert_df2w(r21:20):chop
+0x11 0xc0 0x75 0x8b
+# CHECK: r17 = convert_sf2uw(r21)
+0x31 0xc0 0x75 0x8b
+# CHECK: r17 = convert_sf2uw(r21):chop
+0x11 0xc0 0x95 0x8b
+# CHECK: r17 = convert_sf2w(r21)
+0x31 0xc0 0x95 0x8b
+# CHECK: r17 = convert_sf2w(r21):chop
+0x11 0xc0 0xb5 0x8b
+# CHECK: r17 = sffixupr(r21)
+0x11 0xdf 0xd5 0xeb
+# CHECK: r17 = sffixupn(r21, r31)
+0x31 0xdf 0xd5 0xeb
+# CHECK: r17 = sffixupd(r21, r31)
+0x91 0xdf 0x15 0xef
+# CHECK: r17 += sfmpy(r21, r31)
+0xb1 0xdf 0x15 0xef
+# CHECK: r17 -= sfmpy(r21, r31)
+0xf1 0xdf 0x75 0xef
+# CHECK: r17 += sfmpy(r21, r31, p3):scale
+0xd1 0xdf 0x15 0xef
+# CHECK: r17 += sfmpy(r21, r31):lib
+0xf1 0xdf 0x15 0xef
+# CHECK: r17 -= sfmpy(r21, r31):lib
+0xb1 0xc2 0x00 0xd6
+# CHECK: r17 = sfmake(#21):pos
+0xb1 0xc2 0x40 0xd6
+# CHECK: r17 = sfmake(#21):neg
+0xb0 0xc2 0x00 0xd9
+# CHECK: r17:16 = dfmake(#21):pos
+0xb0 0xc2 0x40 0xd9
+# CHECK: r17:16 = dfmake(#21):neg
+0x11 0xdf 0x95 0xeb
+# CHECK: r17 = sfmax(r21, r31)
+0x31 0xdf 0x95 0xeb
+# CHECK: r17 = sfmin(r21, r31)
+0x11 0xdf 0x55 0xeb
+# CHECK: r17 = sfmpy(r21, r31)
+0xf1 0xdf 0xf5 0xeb
+# CHECK: r17, p3 = sfrecipa(r21, r31)
+0x31 0xdf 0x15 0xeb
+# CHECK: r17 = sfsub(r21, r31)
diff --git a/test/MC/Disassembler/Hexagon/xtype_mpy.txt b/test/MC/Disassembler/Hexagon/xtype_mpy.txt
new file mode 100644
index 000000000000..b6ccaa6f31e0
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_mpy.txt
@@ -0,0 +1,202 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0xb1 0xdf 0x35 0xd7
+# CHECK: r17 = add(#21, mpyi(r21, r31))
+0xbf 0xd1 0x35 0xd8
+# CHECK: r17 = add(#21, mpyi(r21, #31))
+0xb5 0xd1 0x3f 0xdf
+# CHECK: r17 = add(r21, mpyi(#84, r31))
+0xf5 0xf1 0xb5 0xdf
+# CHECK: r17 = add(r21, mpyi(r21, #31))
+0x15 0xd1 0x1f 0xe3
+# CHECK: r17 = add(r21, mpyi(r17, r31))
+0xf1 0xc3 0x15 0xe0
+# CHECK: r17 =+ mpyi(r21, #31)
+0xf1 0xc3 0x95 0xe0
+# CHECK: r17 =- mpyi(r21, #31)
+0xf1 0xc3 0x15 0xe1
+# CHECK: r17 += mpyi(r21, #31)
+0xf1 0xc3 0x95 0xe1
+# CHECK: r17 -= mpyi(r21, #31)
+0x11 0xdf 0x15 0xed
+# CHECK: r17 = mpyi(r21, r31)
+0x11 0xdf 0x15 0xef
+# CHECK: r17 += mpyi(r21, r31)
+0x10 0xdf 0x95 0xe4
+# CHECK: r17:16 = mpy(r21.l, r31.l):<<1
+0x30 0xdf 0x95 0xe4
+# CHECK: r17:16 = mpy(r21.l, r31.h):<<1
+0x50 0xdf 0x95 0xe4
+# CHECK: r17:16 = mpy(r21.h, r31.l):<<1
+0x70 0xdf 0x95 0xe4
+# CHECK: r17:16 = mpy(r21.h, r31.h):<<1
+0x10 0xdf 0xb5 0xe4
+# CHECK: r17:16 = mpy(r21.l, r31.l):<<1:rnd
+0x30 0xdf 0xb5 0xe4
+# CHECK: r17:16 = mpy(r21.l, r31.h):<<1:rnd
+0x50 0xdf 0xb5 0xe4
+# CHECK: r17:16 = mpy(r21.h, r31.l):<<1:rnd
+0x70 0xdf 0xb5 0xe4
+# CHECK: r17:16 = mpy(r21.h, r31.h):<<1:rnd
+0x10 0xdf 0x95 0xe6
+# CHECK: r17:16 += mpy(r21.l, r31.l):<<1
+0x30 0xdf 0x95 0xe6
+# CHECK: r17:16 += mpy(r21.l, r31.h):<<1
+0x50 0xdf 0x95 0xe6
+# CHECK: r17:16 += mpy(r21.h, r31.l):<<1
+0x70 0xdf 0x95 0xe6
+# CHECK: r17:16 += mpy(r21.h, r31.h):<<1
+0x10 0xdf 0xb5 0xe6
+# CHECK: r17:16 -= mpy(r21.l, r31.l):<<1
+0x30 0xdf 0xb5 0xe6
+# CHECK: r17:16 -= mpy(r21.l, r31.h):<<1
+0x50 0xdf 0xb5 0xe6
+# CHECK: r17:16 -= mpy(r21.h, r31.l):<<1
+0x70 0xdf 0xb5 0xe6
+# CHECK: r17:16 -= mpy(r21.h, r31.h):<<1
+0x11 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.l, r31.l):<<1
+0x31 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.l, r31.h):<<1
+0x51 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.h, r31.l):<<1
+0x71 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.h, r31.h):<<1
+0x91 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.l, r31.l):<<1:sat
+0xb1 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.l, r31.h):<<1:sat
+0xd1 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.h, r31.l):<<1:sat
+0xf1 0xdf 0x95 0xec
+# CHECK: r17 = mpy(r21.h, r31.h):<<1:sat
+0x11 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.l, r31.l):<<1:rnd
+0x31 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.l, r31.h):<<1:rnd
+0x51 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.h, r31.l):<<1:rnd
+0x71 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.h, r31.h):<<1:rnd
+0x91 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.l, r31.l):<<1:rnd:sat
+0xb1 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.l, r31.h):<<1:rnd:sat
+0xd1 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.h, r31.l):<<1:rnd:sat
+0xf1 0xdf 0xb5 0xec
+# CHECK: r17 = mpy(r21.h, r31.h):<<1:rnd:sat
+0x11 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.l, r31.l):<<1
+0x31 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.l, r31.h):<<1
+0x51 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.h, r31.l):<<1
+0x71 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.h, r31.h):<<1
+0x91 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.l, r31.l):<<1:sat
+0xb1 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.l, r31.h):<<1:sat
+0xd1 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.h, r31.l):<<1:sat
+0xf1 0xdf 0x95 0xee
+# CHECK: r17 += mpy(r21.h, r31.h):<<1:sat
+0x11 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.l, r31.l):<<1
+0x31 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.l, r31.h):<<1
+0x51 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.h, r31.l):<<1
+0x71 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.h, r31.h):<<1
+0x91 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.l, r31.l):<<1:sat
+0xb1 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.l, r31.h):<<1:sat
+0xd1 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.h, r31.l):<<1:sat
+0xf1 0xdf 0xb5 0xee
+# CHECK: r17 -= mpy(r21.h, r31.h):<<1:sat
+0x10 0xdf 0xd5 0xe4
+# CHECK: r17:16 = mpyu(r21.l, r31.l):<<1
+0x30 0xdf 0xd5 0xe4
+# CHECK: r17:16 = mpyu(r21.l, r31.h):<<1
+0x50 0xdf 0xd5 0xe4
+# CHECK: r17:16 = mpyu(r21.h, r31.l):<<1
+0x70 0xdf 0xd5 0xe4
+# CHECK: r17:16 = mpyu(r21.h, r31.h):<<1
+0x10 0xdf 0xd5 0xe6
+# CHECK: r17:16 += mpyu(r21.l, r31.l):<<1
+0x30 0xdf 0xd5 0xe6
+# CHECK: r17:16 += mpyu(r21.l, r31.h):<<1
+0x50 0xdf 0xd5 0xe6
+# CHECK: r17:16 += mpyu(r21.h, r31.l):<<1
+0x70 0xdf 0xd5 0xe6
+# CHECK: r17:16 += mpyu(r21.h, r31.h):<<1
+0x10 0xdf 0xf5 0xe6
+# CHECK: r17:16 -= mpyu(r21.l, r31.l):<<1
+0x30 0xdf 0xf5 0xe6
+# CHECK: r17:16 -= mpyu(r21.l, r31.h):<<1
+0x50 0xdf 0xf5 0xe6
+# CHECK: r17:16 -= mpyu(r21.h, r31.l):<<1
+0x70 0xdf 0xf5 0xe6
+# CHECK: r17:16 -= mpyu(r21.h, r31.h):<<1
+0x11 0xdf 0xd5 0xec
+# CHECK: r17 = mpyu(r21.l, r31.l):<<1
+0x31 0xdf 0xd5 0xec
+# CHECK: r17 = mpyu(r21.l, r31.h):<<1
+0x51 0xdf 0xd5 0xec
+# CHECK: r17 = mpyu(r21.h, r31.l):<<1
+0x71 0xdf 0xd5 0xec
+# CHECK: r17 = mpyu(r21.h, r31.h):<<1
+0x11 0xdf 0xd5 0xee
+# CHECK: r17 += mpyu(r21.l, r31.l):<<1
+0x31 0xdf 0xd5 0xee
+# CHECK: r17 += mpyu(r21.l, r31.h):<<1
+0x51 0xdf 0xd5 0xee
+# CHECK: r17 += mpyu(r21.h, r31.l):<<1
+0x71 0xdf 0xd5 0xee
+# CHECK: r17 += mpyu(r21.h, r31.h):<<1
+0x11 0xdf 0xf5 0xee
+# CHECK: r17 -= mpyu(r21.l, r31.l):<<1
+0x31 0xdf 0xf5 0xee
+# CHECK: r17 -= mpyu(r21.l, r31.h):<<1
+0x51 0xdf 0xf5 0xee
+# CHECK: r17 -= mpyu(r21.h, r31.l):<<1
+0x71 0xdf 0xf5 0xee
+# CHECK: r17 -= mpyu(r21.h, r31.h):<<1
+0x31 0xdf 0x15 0xed
+# CHECK: r17 = mpy(r21, r31)
+0x31 0xdf 0x35 0xed
+# CHECK: r17 = mpy(r21, r31):rnd
+0x31 0xdf 0x55 0xed
+# CHECK: r17 = mpyu(r21, r31)
+0x31 0xdf 0x75 0xed
+# CHECK: r17 = mpysu(r21, r31)
+0x11 0xdf 0xb5 0xed
+# CHECK: r17 = mpy(r21, r31.h):<<1:sat
+0x31 0xdf 0xb5 0xed
+# CHECK: r17 = mpy(r21, r31.l):<<1:sat
+0x11 0xdf 0xf5 0xed
+# CHECK: r17 = mpy(r21, r31):<<1:sat
+0x91 0xdf 0xb5 0xed
+# CHECK: r17 = mpy(r21, r31.h):<<1:rnd:sat
+0x91 0xdf 0xf5 0xed
+# CHECK: r17 = mpy(r21, r31.l):<<1:rnd:sat
+0x11 0xdf 0x75 0xef
+# CHECK: r17 += mpy(r21, r31):<<1:sat
+0x31 0xdf 0x75 0xef
+# CHECK: r17 -= mpy(r21, r31):<<1:sat
+0x10 0xdf 0x15 0xe5
+# CHECK: r17:16 = mpy(r21, r31)
+0x10 0xdf 0x55 0xe5
+# CHECK: r17:16 = mpyu(r21, r31)
+0x10 0xdf 0x15 0xe7
+# CHECK: r17:16 += mpy(r21, r31)
+0x10 0xdf 0x35 0xe7
+# CHECK: r17:16 -= mpy(r21, r31)
+0x10 0xdf 0x55 0xe7
+# CHECK: r17:16 += mpyu(r21, r31)
+0x10 0xdf 0x75 0xe7
+# CHECK: r17:16 -= mpyu(r21, r31)
diff --git a/test/MC/Disassembler/Hexagon/xtype_perm.txt b/test/MC/Disassembler/Hexagon/xtype_perm.txt
new file mode 100644
index 000000000000..1de3d11d820a
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_perm.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x11 0xc0 0xd4 0x88
+# CHECK: r17 = sat(r21:20)
+0x91 0xc0 0xd5 0x8c
+# CHECK: r17 = sath(r21)
+0xb1 0xc0 0xd5 0x8c
+# CHECK: r17 = satuh(r21)
+0xd1 0xc0 0xd5 0x8c
+# CHECK: r17 = satub(r21)
+0xf1 0xc0 0xd5 0x8c
+# CHECK: r17 = satb(r21)
+0xf1 0xc0 0x95 0x8c
+# CHECK: r17 = swiz(r21)
diff --git a/test/MC/Disassembler/Hexagon/xtype_pred.txt b/test/MC/Disassembler/Hexagon/xtype_pred.txt
new file mode 100644
index 000000000000..0fa4f05bce82
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_pred.txt
@@ -0,0 +1,66 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x83 0xf4 0x10 0xd2
+# CHECK: p3 = boundscheck(r17:16, r21:20):raw:lo
+0xa3 0xf4 0x10 0xd2
+# CHECK: p3 = boundscheck(r17:16, r21:20):raw:hi
+0x43 0xd5 0xd1 0xc7
+# CHECK: p3 = cmpb.gt(r17, r21)
+0xc3 0xd5 0xd1 0xc7
+# CHECK: p3 = cmpb.eq(r17, r21)
+0xe3 0xd5 0xd1 0xc7
+# CHECK: p3 = cmpb.gtu(r17, r21)
+0xa3 0xc2 0x11 0xdd
+# CHECK: p3 = cmpb.eq(r17, #21)
+0xa3 0xc2 0x31 0xdd
+# CHECK: p3 = cmpb.gt(r17, #21)
+0xa3 0xc2 0x51 0xdd
+# CHECK: p3 = cmpb.gtu(r17, #21)
+0x63 0xd5 0xd1 0xc7
+# CHECK: p3 = cmph.eq(r17, r21)
+0x83 0xd5 0xd1 0xc7
+# CHECK: p3 = cmph.gt(r17, r21)
+0xa3 0xd5 0xd1 0xc7
+# CHECK: p3 = cmph.gtu(r17, r21)
+0xab 0xc2 0x11 0xdd
+# CHECK: p3 = cmph.eq(r17, #21)
+0xab 0xc2 0x31 0xdd
+# CHECK: p3 = cmph.gt(r17, #21)
+0xab 0xc2 0x51 0xdd
+# CHECK: p3 = cmph.gtu(r17, #21)
+0x03 0xde 0x94 0xd2
+# CHECK: p3 = cmp.eq(r21:20, r31:30)
+0x43 0xde 0x94 0xd2
+# CHECK: p3 = cmp.gt(r21:20, r31:30)
+0x83 0xde 0x94 0xd2
+# CHECK: p3 = cmp.gtu(r21:20, r31:30)
+0x03 0xd5 0x91 0x85
+# CHECK: p3 = bitsclr(r17, #21)
+0x03 0xd5 0xb1 0x85
+# CHECK: p3 = !bitsclr(r17, #21)
+0x03 0xd5 0x51 0xc7
+# CHECK: p3 = bitsset(r17, r21)
+0x03 0xd5 0x71 0xc7
+# CHECK: p3 = !bitsset(r17, r21)
+0x03 0xd5 0x91 0xc7
+# CHECK: p3 = bitsclr(r17, r21)
+0x03 0xd5 0xb1 0xc7
+# CHECK: p3 = !bitsclr(r17, r21)
+0x10 0xc3 0x00 0x86
+# CHECK: r17:16 = mask(p3)
+0x63 0xf5 0x10 0xd2
+# CHECK: p3 = tlbmatch(r17:16, r21)
+0x03 0xc0 0x45 0x85
+# CHECK: p3 = r5
+0x05 0xc0 0x43 0x89
+# CHECK: r5 = p3
+0x03 0xd5 0x11 0x85
+# CHECK: p3 = tstbit(r17, #21)
+0x03 0xd5 0x31 0x85
+# CHECK: p3 = !tstbit(r17, #21)
+0x03 0xd5 0x11 0xc7
+# CHECK: p3 = tstbit(r17, r21)
+0x03 0xd5 0x31 0xc7
+# CHECK: p3 = !tstbit(r17, r21)
+0x11 0xc2 0x03 0x89
+# CHECK: r17 = vitpack(p3, p2)
+\ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/xtype_shift.txt b/test/MC/Disassembler/Hexagon/xtype_shift.txt
new file mode 100644
index 000000000000..9912fd3f1f41
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/xtype_shift.txt
@@ -0,0 +1,188 @@
+# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
+
+0x10 0xdf 0x14 0x80
+# CHECK: r17:16 = asr(r21:20, #31)
+0x30 0xdf 0x14 0x80
+# CHECK: r17:16 = lsr(r21:20, #31)
+0x50 0xdf 0x14 0x80
+# CHECK: r17:16 = asl(r21:20, #31)
+0x11 0xdf 0x15 0x8c
+# CHECK: r17 = asr(r21, #31)
+0x31 0xdf 0x15 0x8c
+# CHECK: r17 = lsr(r21, #31)
+0x51 0xdf 0x15 0x8c
+# CHECK: r17 = asl(r21, #31)
+0x10 0xdf 0x14 0x82
+# CHECK: r17:16 -= asr(r21:20, #31)
+0x30 0xdf 0x14 0x82
+# CHECK: r17:16 -= lsr(r21:20, #31)
+0x50 0xdf 0x14 0x82
+# CHECK: r17:16 -= asl(r21:20, #31)
+0x90 0xdf 0x14 0x82
+# CHECK: r17:16 += asr(r21:20, #31)
+0xb0 0xdf 0x14 0x82
+# CHECK: r17:16 += lsr(r21:20, #31)
+0xd0 0xdf 0x14 0x82
+# CHECK: r17:16 += asl(r21:20, #31)
+0x11 0xdf 0x15 0x8e
+# CHECK: r17 -= asr(r21, #31)
+0x31 0xdf 0x15 0x8e
+# CHECK: r17 -= lsr(r21, #31)
+0x51 0xdf 0x15 0x8e
+# CHECK: r17 -= asl(r21, #31)
+0x91 0xdf 0x15 0x8e
+# CHECK: r17 += asr(r21, #31)
+0xb1 0xdf 0x15 0x8e
+# CHECK: r17 += lsr(r21, #31)
+0xd1 0xdf 0x15 0x8e
+# CHECK: r17 += asl(r21, #31)
+0x4c 0xf7 0x11 0xde
+# CHECK: r17 = add(#21, asl(r17, #23))
+0x4e 0xf7 0x11 0xde
+# CHECK: r17 = sub(#21, asl(r17, #23))
+0x5c 0xf7 0x11 0xde
+# CHECK: r17 = add(#21, lsr(r17, #23))
+0x5e 0xf7 0x11 0xde
+# CHECK: r17 = sub(#21, lsr(r17, #23))
+0xf1 0xd5 0x1f 0xc4
+# CHECK: r17 = addasl(r21, r31, #7)
+0x10 0xdf 0x54 0x82
+# CHECK: r17:16 &= asr(r21:20, #31)
+0x30 0xdf 0x54 0x82
+# CHECK: r17:16 &= lsr(r21:20, #31)
+0x50 0xdf 0x54 0x82
+# CHECK: r17:16 &= asl(r21:20, #31)
+0x90 0xdf 0x54 0x82
+# CHECK: r17:16 |= asr(r21:20, #31)
+0xb0 0xdf 0x54 0x82
+# CHECK: r17:16 |= lsr(r21:20, #31)
+0xd0 0xdf 0x54 0x82
+# CHECK: r17:16 |= asl(r21:20, #31)
+0x30 0xdf 0x94 0x82
+# CHECK: r17:16 ^= lsr(r21:20, #31)
+0x50 0xdf 0x94 0x82
+# CHECK: r17:16 ^= asl(r21:20, #31)
+0x48 0xff 0x11 0xde
+# CHECK: r17 = and(#21, asl(r17, #31))
+0x4a 0xff 0x11 0xde
+# CHECK: r17 = or(#21, asl(r17, #31))
+0x58 0xff 0x11 0xde
+# CHECK: r17 = and(#21, lsr(r17, #31))
+0x5a 0xff 0x11 0xde
+# CHECK: r17 = or(#21, lsr(r17, #31))
+0x11 0xdf 0x55 0x8e
+# CHECK: r17 &= asr(r21, #31)
+0x31 0xdf 0x55 0x8e
+# CHECK: r17 &= lsr(r21, #31)
+0x51 0xdf 0x55 0x8e
+# CHECK: r17 &= asl(r21, #31)
+0x91 0xdf 0x55 0x8e
+# CHECK: r17 |= asr(r21, #31)
+0xb1 0xdf 0x55 0x8e
+# CHECK: r17 |= lsr(r21, #31)
+0xd1 0xdf 0x55 0x8e
+# CHECK: r17 |= asl(r21, #31)
+0x31 0xdf 0x95 0x8e
+# CHECK: r17 ^= lsr(r21, #31)
+0x51 0xdf 0x95 0x8e
+# CHECK: r17 ^= asl(r21, #31)
+0xf0 0xdf 0xd4 0x80
+# CHECK: r17:16 = asr(r21:20, #31):rnd
+0x11 0xdf 0x55 0x8c
+# CHECK: r17 = asr(r21, #31):rnd
+0x51 0xdf 0x55 0x8c
+# CHECK: r17 = asl(r21, #31):sat
+0x10 0xdf 0x94 0xc3
+# CHECK: r17:16 = asr(r21:20, r31)
+0x50 0xdf 0x94 0xc3
+# CHECK: r17:16 = lsr(r21:20, r31)
+0x90 0xdf 0x94 0xc3
+# CHECK: r17:16 = asl(r21:20, r31)
+0xd0 0xdf 0x94 0xc3
+# CHECK: r17:16 = lsl(r21:20, r31)
+0x11 0xdf 0x55 0xc6
+# CHECK: r17 = asr(r21, r31)
+0x51 0xdf 0x55 0xc6
+# CHECK: r17 = lsr(r21, r31)
+0x91 0xdf 0x55 0xc6
+# CHECK: r17 = asl(r21, r31)
+0xd1 0xdf 0x55 0xc6
+# CHECK: r17 = lsl(r21, r31)
+0xf1 0xdf 0x8a 0xc6
+# CHECK: r17 = lsl(#21, r31)
+0x10 0xdf 0x94 0xcb
+# CHECK: r17:16 -= asr(r21:20, r31)
+0x50 0xdf 0x94 0xcb
+# CHECK: r17:16 -= lsr(r21:20, r31)
+0x90 0xdf 0x94 0xcb
+# CHECK: r17:16 -= asl(r21:20, r31)
+0xd0 0xdf 0x94 0xcb
+# CHECK: r17:16 -= lsl(r21:20, r31)
+0x10 0xdf 0xd4 0xcb
+# CHECK: r17:16 += asr(r21:20, r31)
+0x50 0xdf 0xd4 0xcb
+# CHECK: r17:16 += lsr(r21:20, r31)
+0x90 0xdf 0xd4 0xcb
+# CHECK: r17:16 += asl(r21:20, r31)
+0xd0 0xdf 0xd4 0xcb
+# CHECK: r17:16 += lsl(r21:20, r31)
+0x11 0xdf 0x95 0xcc
+# CHECK: r17 -= asr(r21, r31)
+0x51 0xdf 0x95 0xcc
+# CHECK: r17 -= lsr(r21, r31)
+0x91 0xdf 0x95 0xcc
+# CHECK: r17 -= asl(r21, r31)
+0xd1 0xdf 0x95 0xcc
+# CHECK: r17 -= lsl(r21, r31)
+0x11 0xdf 0xd5 0xcc
+# CHECK: r17 += asr(r21, r31)
+0x51 0xdf 0xd5 0xcc
+# CHECK: r17 += lsr(r21, r31)
+0x91 0xdf 0xd5 0xcc
+# CHECK: r17 += asl(r21, r31)
+0xd1 0xdf 0xd5 0xcc
+# CHECK: r17 += lsl(r21, r31)
+0x10 0xdf 0x14 0xcb
+# CHECK: r17:16 |= asr(r21:20, r31)
+0x50 0xdf 0x14 0xcb
+# CHECK: r17:16 |= lsr(r21:20, r31)
+0x90 0xdf 0x14 0xcb
+# CHECK: r17:16 |= asl(r21:20, r31)
+0xd0 0xdf 0x14 0xcb
+# CHECK: r17:16 |= lsl(r21:20, r31)
+0x10 0xdf 0x54 0xcb
+# CHECK: r17:16 &= asr(r21:20, r31)
+0x50 0xdf 0x54 0xcb
+# CHECK: r17:16 &= lsr(r21:20, r31)
+0x90 0xdf 0x54 0xcb
+# CHECK: r17:16 &= asl(r21:20, r31)
+0xd0 0xdf 0x54 0xcb
+# CHECK: r17:16 &= lsl(r21:20, r31)
+0x10 0xdf 0x74 0xcb
+# CHECK: r17:16 ^= asr(r21:20, r31)
+0x50 0xdf 0x74 0xcb
+# CHECK: r17:16 ^= lsr(r21:20, r31)
+0x90 0xdf 0x74 0xcb
+# CHECK: r17:16 ^= asl(r21:20, r31)
+0xd0 0xdf 0x74 0xcb
+# CHECK: r17:16 ^= lsl(r21:20, r31)
+0x11 0xdf 0x15 0xcc
+# CHECK: r17 |= asr(r21, r31)
+0x51 0xdf 0x15 0xcc
+# CHECK: r17 |= lsr(r21, r31)
+0x91 0xdf 0x15 0xcc
+# CHECK: r17 |= asl(r21, r31)
+0xd1 0xdf 0x15 0xcc
+# CHECK: r17 |= lsl(r21, r31)
+0x11 0xdf 0x55 0xcc
+# CHECK: r17 &= asr(r21, r31)
+0x51 0xdf 0x55 0xcc
+# CHECK: r17 &= lsr(r21, r31)
+0x91 0xdf 0x55 0xcc
+# CHECK: r17 &= asl(r21, r31)
+0xd1 0xdf 0x55 0xcc
+# CHECK: r17 &= lsl(r21, r31)
+0x11 0xdf 0x15 0xc6
+# CHECK: r17 = asr(r21, r31):sat
+0x91 0xdf 0x15 0xc6
+# CHECK: r17 = asl(r21, r31):sat
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips.txt
index 1458ce2ed4b3..07c1df91441a 100644
--- a/test/MC/Disassembler/Mips/micromips.txt
+++ b/test/MC/Disassembler/Mips/micromips.txt
@@ -16,6 +16,21 @@
 # CHECK: addiu $9, $6, -15001
 0x31 0x26 0xc5 0x67
 
+# CHECK: addiusp -16
+0x4f 0xf9
+
+# CHECK: addiusp -1028
+0x4f 0xff
+
+# CHECK: addiusp -1032
+0x4f 0xfd
+
+# CHECK: addiusp 1024
+0x4c 0x01
+
+# CHECK: addiusp 1028
+0x4c 0x03
+
 # CHECK: addu $9, $6, $7
 0x00 0xe6 0x49 0x50
 
@@ -61,6 +76,9 @@
 # CHECK: andi $9, $6, 17767
 0xd1 0x26 0x45 0x67
 
+# CHECK: andi16 $16, $2, 31
+0x2c 0x29
+
 # CHECK: or $3, $4, $5
 0x00 0xa4 0x1a 0x90
 
@@ -136,6 +154,9 @@
 # CHECK: lw  $6, 4($5)
 0xfc 0xc5 0x00 0x04
 
+# CHECK: lw $6, 123($sp)
+0xfc 0xdd 0x00 0x7b
+
 # CHECK: sb $5, 8($4)
 0x18 0xa4 0x00 0x08
 
@@ -145,6 +166,9 @@
 # CHECK: sw  $5, 4($6)
 0xf8 0xa6 0x00 0x04
 
+# CHECK: sw $5, 123($sp)
+0xf8 0xbd 0x00 0x7b
+
 # CHECK: lwu $2, 8($4)
 0x60 0x44 0xe0 0x08
 
@@ -229,6 +253,9 @@
 # CHECK: jr $7
 0x00 0x07 0x0f 0x3c
 
+# CHECK: jraddiusp 20
+0x47 0x05
+
 # CHECK: beq $9, $6, 1332
 0x94 0xc9 0x02 0x9a
 
@@ -289,8 +316,161 @@
 # CHECK: tnei $9, 17767
 0x41 0x89 0x45 0x67
 
+# CHECK: cache 1, 8($5)
+0x20 0x25 0x60 0x08
+
+# CHECK: pref 1, 8($5)
+0x60 0x25 0x20 0x08
+
+# CHECK: ssnop
+0x00 0x00 0x08 0x00
+
+# CHECK: ehb
+0x00 0x00 0x18 0x00
+
+# CHECK: pause
+0x00 0x00 0x28 0x00
+
 # CHECK: ll $2, 8($4)
 0x60 0x44 0x30 0x08
 
 # CHECK: sc $2, 8($4)
 0x60 0x44 0xb0 0x08
+
+# CHECK: lwxs $2, $3($4)
+0x00 0x64 0x11 0x18
+
+# CHECK: bgezals $6, 1332
+0x42 0x66 0x02 0x9a
+
+# CHECK: bltzals $6, 1332
+0x42 0x26 0x02 0x9a
+
+# CHECK: beqzc $9, 1332
+0x40 0xe9 0x02 0x9a
+
+# CHECK: bnezc $9, 1332
+0x40 0xa9 0x02 0x9a
+
+# CHECK: jals 1328
+0x74 0x00 0x02 0x98
+
+# CHECK: jalrs $ra, $6
+0x03 0xe6 0x4f 0x3c
+
+# CHECK: lwm32 $16, $17, 8($4)
+0x20 0x44 0x50 0x08
+
+# CHECK: swm32 $16, $17, 8($4)
+0x20 0x44 0xd0 0x08
+
+# CHECK: swp $16, 8($4)
+0x22 0x04 0x90 0x08
+
+# CHECK: lwp $16, 8($4)
+0x22 0x04 0x10 0x08
+
+# CHECK: nop
+0x00 0x00 0x00 0x00
+
+# CHECK: addu16 $6, $17, $4
+0x07 0x42
+
+# CHECK: subu16 $5, $16, $3
+0x06 0xb1
+
+# CHECK: and16 $16, $2
+0x44 0x82
+
+# CHECK: not16 $17, $3
+0x44 0x0b
+
+# CHECK: or16 $16, $4
+0x44 0xc4
+
+# CHECK: xor16 $17, $5
+0x44 0x4d
+
+# CHECK: sll16 $3, $16, 5
+0x25 0x8a
+
+# CHECK: srl16 $4, $17, 6
+0x26 0x1d
+
+# CHECK: lbu16 $3, 4($17)
+0x09 0x94
+
+# CHECK: lbu16 $3, -1($16)
+0x09 0x8f
+
+# CHECK: lhu16 $3, 4($16)
+0x29 0x82
+
+# CHECK: lw16 $4, 8($17)
+0x6a 0x12
+
+# CHECK: sb16 $3, 4($16)
+0x89 0x84
+
+# CHECK: sh16 $4, 8($17)
+0xaa 0x14
+
+# CHECK: sw16 $4, 4($17)
+0xea 0x11
+
+# CHECK: sw16 $zero, 4($17)
+0xe8 0x11
+
+# CHECK: mfhi $9
+0x46 0x09
+
+# CHECK: mflo $9
+0x46 0x49
+
+# CHECK: move $25, $1
+0x0f 0x21
+
+# CHECK: jrc $9
+0x45 0xa9
+
+# CHECK: jalr $9
+0x45 0xc9
+
+# CHECK: jalrs16 $9
+0x45 0xe9
+
+# CHECK: jr16 $9
+0x45 0x89
+
+# CHECK: li16 $3, -1
+0xed 0xff
+
+# CHECK: li16 $3, 126
+0xed 0xfe
+
+# CHECK: addiur1sp $7, 4
+0x6f 0x83
+
+# CHECK: addiur2 $6, $7, -1
+0x6f 0x7e
+
+# CHECK: addiur2 $6, $7, 12
+0x6f 0x76
+
+# CHECK: addius5 $7, -2
+0x4c 0xfc
+
+# CHECK: nop
+0x0c 0x00
+
+# CHECK: lw $3, 32($sp)
+0x48 0x68
+
+# CHECK: sw $4, 124($sp)
+0xc8 0x9f
+
+# CHECK: beqz16 $6, 20
+0x8f 0x0a
+
+# CHECK: bnez16 $6, 20
+0xaf 0x0a
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips_le.txt
index bdfe88eaffbf..9a8c4a94530c 100644
--- a/test/MC/Disassembler/Mips/micromips_le.txt
+++ b/test/MC/Disassembler/Mips/micromips_le.txt
@@ -16,9 +16,27 @@
 # CHECK: addiu $9, $6, -15001
 0x26 0x31 0x67 0xc5
 
+# CHECK: addiusp -16
+0xf9 0x4f
+
+# CHECK: addiusp -1028
+0xff 0x4f
+
+# CHECK: addiusp -1032
+0xfd 0x4f
+
+# CHECK: addiusp 1024
+0x01 0x4c
+
+# CHECK: addiusp 1028
+0x03 0x4c
+
 # CHECK: addu $9, $6, $7
 0xe6 0x00 0x50 0x49
 
+# CHECK: andi16 $16, $2, 31
+0x29 0x2c
+
 # CHECK: sub $9, $6, $7
 0xe6 0x00 0x90 0x49
 
@@ -136,6 +154,9 @@
 # CHECK: lw $6, 4($5)
 0xc5 0xfc 0x04 0x00
 
+# CHECK: lw $6, 123($sp)
+0xdd 0xfc 0x7b 0x00
+
 # CHECK: sb $5, 8($4)
 0xa4 0x18 0x08 0x00
 
@@ -145,6 +166,9 @@
 # CHECK: sw $5, 4($6)
 0xa6 0xf8 0x04 0x00
 
+# CHECK: sw $5, 123($sp)
+0xbd 0xf8 0x7b 0x00
+
 # CHECK: lwu $2, 8($4)
 0x44 0x60 0x08 0xe0
 
@@ -229,6 +253,9 @@
 # CHECK: jr $7
 0x07 0x00 0x3c 0x0f
 
+# CHECK: jraddiusp 20
+0x05 0x47
+
 # CHECK: beq $9, $6, 1332
 0xc9 0x94 0x9a 0x02
 
@@ -289,8 +316,161 @@
 # CHECK: tnei $9, 17767
 0x89 0x41 0x67 0x45
 
+# CHECK: cache 1, 8($5)
+0x25 0x20 0x08 0x60
+
+# CHECK: pref 1, 8($5)
+0x25 0x60 0x08 0x20
+
+# CHECK: ssnop
+0x00 0x00 0x00 0x08
+
+# CHECK: ehb
+0x00 0x00 0x00 0x18
+
+# CHECK: pause
+0x00 0x00 0x00 0x28
+
 # CHECK: ll $2, 8($4)
 0x44 0x60 0x08 0x30
 
 # CHECK: sc $2, 8($4)
 0x44 0x60 0x08 0xb0
+
+# CHECK: lwxs $2, $3($4)
+0x64 0x00 0x18 0x11
+
+# CHECK: bgezals $6, 1332
+0x66 0x42 0x9a 0x02
+
+# CHECK: bltzals $6, 1332
+0x26 0x42 0x9a 0x02
+
+# CHECK: beqzc $9, 1332
+0xe9 0x40 0x9a 0x02
+
+# CHECK: bnezc $9, 1332
+0xa9 0x40 0x9a 0x02
+
+# CHECK: jals 1328
+0x00 0x74 0x98 0x02
+
+# CHECK: jalrs $ra, $6
+0xe6 0x03 0x3c 0x4f
+
+# CHECK: lwm32 $16, $17, 8($4)
+0x44 0x20 0x08 0x50
+
+# CHECK: swm32 $16, $17, 8($4)
+0x44 0x20 0x08 0xd0
+
+# CHECK: swp $16, 8($4)
+0x04 0x22  0x08 0x90
+
+# CHECK: lwp $16, 8($4)
+0x04 0x22 0x08 0x10
+
+# CHECK: nop
+0x00 0x00 0x00 0x00
+
+# CHECK: addu16 $6, $17, $4
+0x42 0x07
+
+# CHECK: subu16 $5, $16, $3
+0xb1 0x06
+
+# CHECK: and16 $16, $2
+0x82 0x44
+
+# CHECK: not16 $17, $3
+0x0b 0x44
+
+# CHECK: or16 $16, $4
+0xc4 0x44
+
+# CHECK: xor16 $17, $5
+0x4d 0x44
+
+# CHECK: sll16 $3, $16, 5
+0x8a 0x25
+
+# CHECK: srl16 $4, $17, 6
+0x1d 0x26
+
+# CHECK: lbu16 $3, 4($17)
+0x94 0x09
+
+# CHECK: lbu16 $3, -1($16)
+0x8f 0x09
+
+# CHECK: lhu16 $3, 4($16)
+0x82 0x29
+
+# CHECK: lw16 $4, 8($17)
+0x12 0x6a
+
+# CHECK: sb16 $3, 4($16)
+0x84 0x89
+
+# CHECK: sh16 $4, 8($17)
+0x14 0xaa
+
+# CHECK: sw16 $4, 4($17)
+0x11 0xea
+
+# CHECK: sw16 $zero, 4($17)
+0x11 0xe8
+
+# CHECK: mfhi $9
+0x09 0x46
+
+# CHECK: mflo $9
+0x49 0x46
+
+# CHECK: move $25, $1
+0x21 0x0f
+
+# CHECK: jrc $9
+0xa9 0x45
+
+# CHECK: jalr $9
+0xc9 0x45
+
+# CHECK: jalrs16 $9
+0xe9 0x45
+
+# CHECK: jr16 $9
+0x89 0x45
+
+# CHECK: li16 $3, -1
+0xff 0xed
+
+# CHECK: li16 $3, 126
+0xfe 0xed
+
+# CHECK: addiur1sp $7, 4
+0x83 0x6f
+
+# CHECK: addiur2 $6, $7, -1
+0x7e 0x6f
+
+# CHECK: addiur2 $6, $7, 12
+0x76 0x6f
+
+# CHECK: addius5 $7, -2
+0xfc 0x4c
+
+# CHECK: nop
+0x00 0x0c
+
+# CHECK: lw $3, 32($sp)
+0x68 0x48
+
+# CHECK: sw $4, 124($sp)
+0x9f 0xc8
+
+# CHECK: beqz16 $6, 20
+0x0a 0x8f
+
+# CHECK: bnez16 $6, 20
+0x0a 0xaf
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
new file mode 100644
index 000000000000..474bd17d4bb1
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
@@ -0,0 +1,116 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
+# CHECK: .text
+ 0x85 0xc1 0x20 0x46   	# CHECK:       abs.d	$f6, $f24
+ 0x45 0x82 0x00 0x46   	# CHECK:       abs.s	$f9, $f16
+ 0x20 0xb8 0x45 0x02   	# CHECK:       add	$23, $18, $5
+ 0x00 0x30 0x3c 0x46   	# CHECK:       add.d	$f0, $f6, $f28
+ 0x00 0xaa 0x18 0x46   	# CHECK:       add.s	$f8, $f21, $f24
+ 0xd2 0x66 0x2d 0x21   	# CHECK:       addi	$13, $9, 26322
+ 0xfe 0xff 0x08 0x21   	# CHECK:       addi	$8, $8, -2
+ 0x48 0x3b 0xc9 0x21   	# CHECK:       addi	$9, $14, 15176
+ 0xe7 0xe3 0x18 0x23   	# CHECK:       addi	$24, $24, -7193
+ 0x21 0x48 0x86 0x00   	# CHECK:       addu	$9, $4, $6
+ 0x0a 0x00 0x29 0x25   	# CHECK:       addiu	$9, $9, 10
+ 0x24 0xb8 0x4c 0x00   	# CHECK:       and	$23, $2, $12
+ 0x01 0x00 0x00 0x45   	# CHECK:       bc1f	8
+ 0x04 0x00 0x42 0x30   	# CHECK:       andi	$2, $2, 4
+ 0x01 0x00 0x01 0x45   	# CHECK:       bc1t	8
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x9b 0x14 0x11 0x04   	# CHECK:       bal	21104
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x9b 0x14 0x11 0x04   	# CHECK:       bal	21104
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x9b 0x14 0xd1 0x04   	# CHECK:       bgezal	$6, 21104
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x9b 0x14 0xd0 0x04   	# CHECK:       bltzal	$6, 21104
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x3b 0xe0 0x3c 0x46   	# CHECK:       c.ngl.d	$f28, $f28
+ 0x39 0x00 0x30 0x46   	# CHECK:       c.ngle.d	$f0, $f16
+ 0x38 0xf0 0x20 0x46   	# CHECK:       c.sf.d	$f30, $f0
+ 0x38 0x70 0x16 0x46   	# CHECK:       c.sf.s	$f14, $f22
+ 0x00 0xa8 0x51 0x44   	# CHECK:       cfc1	$17, $21
+ 0x00 0xd0 0xc6 0x44   	# CHECK:       ctc1	$6, $26
+ 0xa1 0xe5 0x00 0x46   	# CHECK:       cvt.d.s	$f22, $f28
+ 0xa1 0x5e 0x80 0x46   	# CHECK:       cvt.d.w	$f26, $f11
+ 0xa0 0x46 0x20 0x46   	# CHECK:       cvt.s.d	$f26, $f8
+ 0xa0 0x7d 0x80 0x46   	# CHECK:       cvt.s.w	$f22, $f15
+ 0x24 0x75 0x20 0x46   	# CHECK:       cvt.w.d	$f20, $f14
+ 0x24 0xc5 0x00 0x46   	# CHECK:       cvt.w.s	$f20, $f24
+ 0x1a 0x00 0x2b 0x03   	# CHECK:       div	$zero, $25, $11
+ 0x03 0xa7 0x3a 0x46   	# CHECK:       div.d	$f28, $f20, $f26
+ 0x03 0x29 0x0f 0x46   	# CHECK:       div.s	$f4, $f5, $f15
+ 0x1b 0x00 0x2f 0x03   	# CHECK:       divu	$zero, $25, $15
+ 0x4d 0xc7 0x58 0x81   	# CHECK:       lb	$24, -14515($10)
+ 0xf3 0x75 0x68 0x90   	# CHECK:       lbu	$8, 30195($3)
+ 0x94 0xde 0xab 0x86   	# CHECK:       lh	$11, -8556($21)
+ 0xbd 0xa6 0x53 0x94   	# CHECK:       lhu	$19, -22851($2)
+ 0xb3 0x8b 0x01 0x24   	# CHECK:       addiu $1, $zero, -29773
+ 0x3f 0x8b 0x00 0x24   	# CHECK:       addiu   $zero, $zero, -29889
+ 0x2a 0x16 0xa8 0x8c   	# CHECK:       lw	$8, 5674($5)
+ 0xf1 0x27 0x50 0xc7   	# CHECK:       lwc1	$f16, 10225($26)
+ 0xb7 0xfc 0xd2 0xc8   	# CHECK:       lwc2	$18, -841($6)
+ 0xf7 0x81 0x4a 0xcf   	# CHECK:       lwc3	$10, -32265($26)
+ 0x79 0xef 0xf4 0x89   	# CHECK:       lwl	$20, -4231($15)
+ 0x35 0xb5 0x80 0x9b   	# CHECK:       lwr	$zero, -19147($gp)
+ 0x00 0xd8 0x07 0x44   	# CHECK:       mfc1	$7, $f27
+ 0x10 0x98 0x00 0x00   	# CHECK:       mfhi	$19
+ 0x10 0xe8 0x00 0x00   	# CHECK:       mfhi	$sp
+ 0x12 0x88 0x00 0x00   	# CHECK:       mflo	$17
+ 0x06 0x75 0x20 0x46   	# CHECK:       mov.d	$f20, $f14
+ 0x86 0xd8 0x00 0x46   	# CHECK:       mov.s	$f2, $f27
+ 0x21 0xf0 0x80 0x00   	# CHECK:       move	$fp, $4
+ 0x21 0xc8 0xc0 0x00   	# CHECK:       move	$25, $6
+ 0x00 0x48 0x9e 0x44   	# CHECK:       mtc1	$fp, $f9
+ 0x11 0x00 0x20 0x02   	# CHECK:       mthi	$17
+ 0x13 0x00 0xa0 0x03   	# CHECK:       mtlo	$sp
+ 0x13 0x00 0x20 0x03   	# CHECK:       mtlo	$25
+ 0x02 0xa5 0x30 0x46   	# CHECK:       mul.d	$f20, $f20, $f16
+ 0x82 0x57 0x02 0x46   	# CHECK:       mul.s	$f30, $f10, $f2
+ 0x18 0x00 0xb4 0x03   	# CHECK:       mult	$sp, $20
+ 0x18 0x00 0xa2 0x03   	# CHECK:       mult	$sp, $2
+ 0x19 0x00 0x9a 0x03   	# CHECK:       multu	$gp, $26
+ 0x19 0x00 0x32 0x01   	# CHECK:       multu	$9, $18
+ 0x23 0x10 0x02 0x00   	# CHECK:       negu	$2, $2
+ 0x23 0x10 0x03 0x00   	# CHECK:       negu	$2, $3
+ 0x87 0x96 0x20 0x46   	# CHECK:       neg.d	$f26, $f18
+ 0x47 0x78 0x00 0x46   	# CHECK:       neg.s	$f1, $f15
+ 0x00 0x00 0x00 0x00   	# CHECK:       nop
+ 0x27 0x38 0x07 0x00   	# CHECK:       nor	$7, $zero, $7
+ 0x25 0x60 0x1d 0x02   	# CHECK:       or	$12, $16, $sp
+ 0x04 0x00 0x42 0x34   	# CHECK:       ori	$2, $2, 4
+ 0x6f 0xb2 0xd6 0xa1   	# CHECK:       sb	$22, -19857($14)
+ 0xd0 0xe5 0xee 0xa5   	# CHECK:       sh	$14, -6704($15)
+ 0x80 0x3c 0x07 0x00   	# CHECK:       sll	$7, $7, 18
+ 0x80 0x3c 0x00 0x00   	# CHECK:       sll	$7, $zero, 18
+ 0x04 0x38 0x20 0x01   	# CHECK:       sllv	$7, $zero, $9
+ 0x04 0x38 0x20 0x01   	# CHECK:       sllv	$7, $zero, $9
+ 0x2a 0xb8 0x7b 0x01   	# CHECK:       slt	$23, $11, $27
+ 0x11 0x25 0x51 0x29   	# CHECK:       slti	$17, $10, 9489
+ 0x55 0xc3 0x39 0x2f   	# CHECK:       sltiu	$25, $25, -15531
+ 0x2b 0xa0 0xab 0x02   	# CHECK:       sltu	$20, $21, $11
+ 0x55 0xc3 0x38 0x2f   	# CHECK:       sltiu	$24, $25, -15531
+ 0xc3 0x8b 0x11 0x00   	# CHECK:       sra	$17, $17, 15
+ 0xc3 0x8b 0x17 0x00   	# CHECK:       sra	$17, $23, 15
+ 0x07 0x88 0xb7 0x03   	# CHECK:       srav	$17, $23, $sp
+ 0x07 0x88 0xb7 0x03   	# CHECK:       srav	$17, $23, $sp
+ 0xc2 0x11 0x02 0x00   	# CHECK:       srl	$2, $2, 7
+ 0xc2 0x11 0x02 0x00   	# CHECK:       srl	$2, $2, 7
+ 0x06 0xc8 0x94 0x00   	# CHECK:       srlv	$25, $20, $4
+ 0x06 0xc8 0x94 0x00   	# CHECK:       srlv	$25, $20, $4
+ 0x40 0x00 0x00 0x00   	# CHECK:       ssnop
+ 0x22 0xb0 0x6c 0x02   	# CHECK:       sub	$22, $19, $12
+ 0x36 0x0c 0x36 0x22   	# CHECK:       addi	$22, $17, 3126
+ 0x90 0xe6 0xad 0x21   	# CHECK:       addi	$13, $13, -6512
+ 0x81 0x14 0x30 0x46   	# CHECK:       sub.d	$f18, $f2, $f16
+ 0xc1 0xb5 0x16 0x46   	# CHECK:       sub.s	$f23, $f22, $f22
+ 0x23 0xe8 0xd6 0x02   	# CHECK:       subu	$sp, $22, $22
+ 0x50 0xd8 0xbf 0xaf   	# CHECK:       sw	$ra, -10160($sp)
+ 0xef 0xde 0x06 0xe7   	# CHECK:       swc1	$f6, -8465($24)
+ 0x30 0x61 0x19 0xea   	# CHECK:       swc2	$25, 24880($16)
+ 0x7e 0x35 0x6f 0xaa   	# CHECK:       swl	$15, 13694($19)
+ 0x22 0x98 0xd1 0xb9   	# CHECK:       swr	$17, -26590($14)
+ 0x08 0x00 0x00 0x42   	# CHECK:       tlbp
+ 0x01 0x00 0x00 0x42   	# CHECK:       tlbr
+ 0x02 0x00 0x00 0x42   	# CHECK:       tlbwi
+ 0x06 0x00 0x00 0x42   	# CHECK:       tlbwr
+ 0x26 0x90 0x9e 0x00   	# CHECK:       xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
new file mode 100644
index 000000000000..957f82adbc8b
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
@@ -0,0 +1,116 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
+# CHECK: .text
+ 0x46 0x20 0xc1 0x85 	# CHECK:       abs.d	$f6, $f24
+ 0x46 0x00 0x82 0x45 	# CHECK:       abs.s	$f9, $f16
+ 0x02 0x45 0xb8 0x20 	# CHECK:       add	$23, $18, $5
+ 0x46 0x3c 0x30 0x00 	# CHECK:       add.d	$f0, $f6, $f28
+ 0x46 0x18 0xaa 0x00 	# CHECK:       add.s	$f8, $f21, $f24
+ 0x21 0x2d 0x66 0xd2 	# CHECK:       addi	$13, $9, 26322
+ 0x21 0x08 0xff 0xfe 	# CHECK:       addi	$8, $8, -2
+ 0x21 0xc9 0x3b 0x48 	# CHECK:       addi	$9, $14, 15176
+ 0x23 0x18 0xe3 0xe7 	# CHECK:       addi	$24, $24, -7193
+ 0x00 0x86 0x48 0x21 	# CHECK:       addu	$9, $4, $6
+ 0x25 0x29 0x00 0x0a 	# CHECK:       addiu	$9, $9, 10
+ 0x00 0x4c 0xb8 0x24 	# CHECK:       and	$23, $2, $12
+ 0x45 0x00 0x00 0x01 	# CHECK:       bc1f	8
+ 0x30 0x42 0x00 0x04 	# CHECK:       andi	$2, $2, 4
+ 0x45 0x01 0x00 0x01 	# CHECK:       bc1t	8
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x04 0x11 0x14 0x9b 	# CHECK:       bal	21104
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x04 0x11 0x14 0x9b 	# CHECK:       bal	21104
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x04 0xd1 0x14 0x9b 	# CHECK:       bgezal	$6, 21104
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x04 0xd0 0x14 0x9b 	# CHECK:       bltzal	$6, 21104
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x46 0x3c 0xe0 0x3b 	# CHECK:       c.ngl.d	$f28, $f28
+ 0x46 0x30 0x00 0x39 	# CHECK:       c.ngle.d	$f0, $f16
+ 0x46 0x20 0xf0 0x38 	# CHECK:       c.sf.d	$f30, $f0
+ 0x46 0x16 0x70 0x38 	# CHECK:       c.sf.s	$f14, $f22
+ 0x44 0x51 0xa8 0x00 	# CHECK:       cfc1	$17, $21
+ 0x44 0xc6 0xd0 0x00 	# CHECK:       ctc1	$6, $26
+ 0x46 0x00 0xe5 0xa1 	# CHECK:       cvt.d.s	$f22, $f28
+ 0x46 0x80 0x5e 0xa1 	# CHECK:       cvt.d.w	$f26, $f11
+ 0x46 0x20 0x46 0xa0 	# CHECK:       cvt.s.d	$f26, $f8
+ 0x46 0x80 0x7d 0xa0 	# CHECK:       cvt.s.w	$f22, $f15
+ 0x46 0x20 0x75 0x24 	# CHECK:       cvt.w.d	$f20, $f14
+ 0x46 0x00 0xc5 0x24 	# CHECK:       cvt.w.s	$f20, $f24
+ 0x03 0x2b 0x00 0x1a 	# CHECK:       div	$zero, $25, $11
+ 0x46 0x3a 0xa7 0x03 	# CHECK:       div.d	$f28, $f20, $f26
+ 0x46 0x0f 0x29 0x03 	# CHECK:       div.s	$f4, $f5, $f15
+ 0x03 0x2f 0x00 0x1b 	# CHECK:       divu	$zero, $25, $15
+ 0x81 0x58 0xc7 0x4d 	# CHECK:       lb	$24, -14515($10)
+ 0x90 0x68 0x75 0xf3 	# CHECK:       lbu	$8, 30195($3)
+ 0x86 0xab 0xde 0x94 	# CHECK:       lh	$11, -8556($21)
+ 0x94 0x53 0xa6 0xbd 	# CHECK:       lhu	$19, -22851($2)
+ 0x24 0x01 0x8b 0xb3 	# CHECK:       addiu $1, $zero, -29773
+ 0x24 0x00 0x8b 0x3f 	# CHECK:       addiu   $zero, $zero, -29889
+ 0x8c 0xa8 0x16 0x2a 	# CHECK:       lw	$8, 5674($5)
+ 0xc7 0x50 0x27 0xf1 	# CHECK:       lwc1	$f16, 10225($26)
+ 0xc8 0xd2 0xfc 0xb7 	# CHECK:       lwc2	$18, -841($6)
+ 0xcf 0x4a 0x81 0xf7 	# CHECK:       lwc3	$10, -32265($26)
+ 0x89 0xf4 0xef 0x79 	# CHECK:       lwl	$20, -4231($15)
+ 0x9b 0x80 0xb5 0x35 	# CHECK:       lwr	$zero, -19147($gp)
+ 0x44 0x07 0xd8 0x00 	# CHECK:       mfc1	$7, $f27
+ 0x00 0x00 0x98 0x10 	# CHECK:       mfhi	$19
+ 0x00 0x00 0xe8 0x10 	# CHECK:       mfhi	$sp
+ 0x00 0x00 0x88 0x12 	# CHECK:       mflo	$17
+ 0x46 0x20 0x75 0x06 	# CHECK:       mov.d	$f20, $f14
+ 0x46 0x00 0xd8 0x86 	# CHECK:       mov.s	$f2, $f27
+ 0x00 0x80 0xf0 0x21 	# CHECK:       move	$fp, $4
+ 0x00 0xc0 0xc8 0x21 	# CHECK:       move	$25, $6
+ 0x44 0x9e 0x48 0x00 	# CHECK:       mtc1	$fp, $f9
+ 0x02 0x20 0x00 0x11 	# CHECK:       mthi	$17
+ 0x03 0xa0 0x00 0x13 	# CHECK:       mtlo	$sp
+ 0x03 0x20 0x00 0x13 	# CHECK:       mtlo	$25
+ 0x46 0x30 0xa5 0x02 	# CHECK:       mul.d	$f20, $f20, $f16
+ 0x46 0x02 0x57 0x82 	# CHECK:       mul.s	$f30, $f10, $f2
+ 0x03 0xb4 0x00 0x18 	# CHECK:       mult	$sp, $20
+ 0x03 0xa2 0x00 0x18 	# CHECK:       mult	$sp, $2
+ 0x03 0x9a 0x00 0x19 	# CHECK:       multu	$gp, $26
+ 0x01 0x32 0x00 0x19 	# CHECK:       multu	$9, $18
+ 0x00 0x02 0x10 0x23 	# CHECK:       negu	$2, $2
+ 0x00 0x03 0x10 0x23 	# CHECK:       negu	$2, $3
+ 0x46 0x20 0x96 0x87 	# CHECK:       neg.d	$f26, $f18
+ 0x46 0x00 0x78 0x47 	# CHECK:       neg.s	$f1, $f15
+ 0x00 0x00 0x00 0x00 	# CHECK:       nop
+ 0x00 0x07 0x38 0x27 	# CHECK:       nor	$7, $zero, $7
+ 0x02 0x1d 0x60 0x25 	# CHECK:       or	$12, $16, $sp
+ 0x34 0x42 0x00 0x04 	# CHECK:       ori	$2, $2, 4
+ 0xa1 0xd6 0xb2 0x6f 	# CHECK:       sb	$22, -19857($14)
+ 0xa5 0xee 0xe5 0xd0 	# CHECK:       sh	$14, -6704($15)
+ 0x00 0x07 0x3c 0x80 	# CHECK:       sll	$7, $7, 18
+ 0x00 0x00 0x3c 0x80 	# CHECK:       sll	$7, $zero, 18
+ 0x01 0x20 0x38 0x04 	# CHECK:       sllv	$7, $zero, $9
+ 0x01 0x20 0x38 0x04 	# CHECK:       sllv	$7, $zero, $9
+ 0x01 0x7b 0xb8 0x2a 	# CHECK:       slt	$23, $11, $27
+ 0x29 0x51 0x25 0x11 	# CHECK:       slti	$17, $10, 9489
+ 0x2f 0x39 0xc3 0x55 	# CHECK:       sltiu	$25, $25, -15531
+ 0x02 0xab 0xa0 0x2b 	# CHECK:       sltu	$20, $21, $11
+ 0x2f 0x38 0xc3 0x55 	# CHECK:       sltiu	$24, $25, -15531
+ 0x00 0x11 0x8b 0xc3 	# CHECK:       sra	$17, $17, 15
+ 0x00 0x17 0x8b 0xc3 	# CHECK:       sra	$17, $23, 15
+ 0x03 0xb7 0x88 0x07 	# CHECK:       srav	$17, $23, $sp
+ 0x03 0xb7 0x88 0x07 	# CHECK:       srav	$17, $23, $sp
+ 0x00 0x02 0x11 0xc2 	# CHECK:       srl	$2, $2, 7
+ 0x00 0x02 0x11 0xc2 	# CHECK:       srl	$2, $2, 7
+ 0x00 0x94 0xc8 0x06 	# CHECK:       srlv	$25, $20, $4
+ 0x00 0x94 0xc8 0x06 	# CHECK:       srlv	$25, $20, $4
+ 0x00 0x00 0x00 0x40 	# CHECK:       ssnop
+ 0x02 0x6c 0xb0 0x22 	# CHECK:       sub	$22, $19, $12
+ 0x22 0x36 0x0c 0x36 	# CHECK:       addi	$22, $17, 3126
+ 0x21 0xad 0xe6 0x90 	# CHECK:       addi	$13, $13, -6512
+ 0x46 0x30 0x14 0x81 	# CHECK:       sub.d	$f18, $f2, $f16
+ 0x46 0x16 0xb5 0xc1 	# CHECK:       sub.s	$f23, $f22, $f22
+ 0x02 0xd6 0xe8 0x23 	# CHECK:       subu	$sp, $22, $22
+ 0xaf 0xbf 0xd8 0x50 	# CHECK:       sw	$ra, -10160($sp)
+ 0xe7 0x06 0xde 0xef 	# CHECK:       swc1	$f6, -8465($24)
+ 0xea 0x19 0x61 0x30 	# CHECK:       swc2	$25, 24880($16)
+ 0xaa 0x6f 0x35 0x7e 	# CHECK:       swl	$15, 13694($19)
+ 0xb9 0xd1 0x98 0x22 	# CHECK:       swr	$17, -26590($14)
+ 0x42 0x00 0x00 0x08 	# CHECK:       tlbp
+ 0x42 0x00 0x00 0x01 	# CHECK:       tlbr
+ 0x42 0x00 0x00 0x02 	# CHECK:       tlbwi
+ 0x42 0x00 0x00 0x06 	# CHECK:       tlbwr
+ 0x00 0x9e 0x90 0x26 	# CHECK:       xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips1/valid-xfail.txt b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
new file mode 100644
index 000000000000..b753f4d8a8ee
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
+# XFAIL: *
+
+0xc2 0x44 0xe3 0x67   # CHECK:     lwc0    $4,-7321($18)
+0xe2 0x64 0x49 0xd8   # CHECK:     swc0    $4,18904($19)
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
new file mode 100644
index 000000000000..8722d43a7d4e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
@@ -0,0 +1,159 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# CHECK: .text
+0x85 0xc1 0x20 0x46  # CHECK: abs.d	$f6, $f24
+0x45 0x82 0x00 0x46  # CHECK: abs.s	$f9, $f16
+0x20 0xb8 0x45 0x02  # CHECK: add	$23, $18, $5
+0x48 0x3b 0xc9 0x21  # CHECK: addi	$9, $14, 15176
+0xe7 0xe3 0x18 0x23  # CHECK: addi	$24, $24, -7193
+0x00 0x30 0x3c 0x46  # CHECK: add.d	$f0, $f6, $f28
+0x00 0xaa 0x18 0x46  # CHECK: add.s	$f8, $f21, $f24
+0xd2 0x66 0x2d 0x21  # CHECK: addi	$13, $9, 26322
+0xfe 0xff 0x08 0x21  # CHECK: addi	$8, $8, -2
+0x21 0x48 0x86 0x00  # CHECK: addu	$9, $4, $6
+0x0a 0x00 0x29 0x25  # CHECK: addiu	$9, $9, 10
+0x24 0xb8 0x4c 0x00  # CHECK: and	$23, $2, $12
+0x04 0x00 0x42 0x30  # CHECK: andi	$2, $2, 4
+0x01 0x00 0x00 0x45  # CHECK: bc1f	 8
+0x0c 0x00 0x02 0x45  # CHECK: bc1fl	 52
+0x01 0x00 0x01 0x45  # CHECK: bc1t	 8
+0xf4 0xf7 0x03 0x45  # CHECK: bc1tl	 -8236
+0x9b 0x14 0x11 0x04  # CHECK: bal	21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x9b 0x14 0xd0 0x04  # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x40 0x0c 0xd3 0x51  # CHECK: beql	$14, $19, 12548
+0x1f 0x07 0x93 0x05  # CHECK: bgezall	$12, 7296
+0x4d 0xf9 0x83 0x04  # CHECK: bgezl	$4, -6856
+0x59 0xfc 0x40 0x5d  # CHECK: bgtzl	$10, -3736
+0xe7 0x02 0xc0 0x58  # CHECK: blezl	$6, 2976
+0x7a 0x00 0xd2 0x04  # CHECK: bltzall	$6, 492
+0x45 0xf6 0x22 0x06  # CHECK: bltzl	$17, -9960
+0xfc 0x04 0x94 0x57  # CHECK: bnel	$gp, $20, 5108
+0x3b 0xe0 0x3c 0x46  # CHECK: c.ngl.d	$f28, $f28
+0x39 0x00 0x30 0x46  # CHECK: c.ngle.d	$f0, $f16
+0x38 0xf0 0x20 0x46  # CHECK: c.sf.d	$f30, $f0
+0x38 0x70 0x16 0x46  # CHECK: c.sf.s	$f14, $f22
+0xce 0xc2 0x20 0x46  # CHECK: ceil.w.d	$f11, $f24
+0x8e 0xa1 0x00 0x46  # CHECK: ceil.w.s	$f6, $f20
+0x00 0xa8 0x51 0x44  # CHECK: cfc1	$17, $21
+0x00 0xd0 0xc6 0x44  # CHECK: ctc1	$6, $26
+0xa1 0xe5 0x00 0x46  # CHECK: cvt.d.s	$f22, $f28
+0xa1 0x5e 0x80 0x46  # CHECK: cvt.d.w	$f26, $f11
+0xa0 0x46 0x20 0x46  # CHECK: cvt.s.d	$f26, $f8
+0xa0 0x7d 0x80 0x46  # CHECK: cvt.s.w	$f22, $f15
+0x24 0x75 0x20 0x46  # CHECK: cvt.w.d	$f20, $f14
+0x24 0xc5 0x00 0x46  # CHECK: cvt.w.s	$f20, $f24
+0x1a 0x00 0x2b 0x03  # CHECK: div	$zero, $25, $11
+0x03 0xa7 0x3a 0x46  # CHECK: div.d	$f28, $f20, $f26
+0x03 0x29 0x0f 0x46  # CHECK: div.s	$f4, $f5, $f15
+0x1b 0x00 0x2f 0x03  # CHECK: divu	$zero, $25, $15
+0xc0 0x00 0x00 0x00  # CHECK: ehb
+0x8f 0x53 0x20 0x46  # CHECK: floor.w.d	$f14, $f10
+0x0f 0x4a 0x00 0x46  # CHECK: floor.w.s	$f8, $f9
+0x4d 0xc7 0x58 0x81  # CHECK: lb	$24, -14515($10)
+0xf3 0x75 0x68 0x90  # CHECK: lbu	$8, 30195($3)
+0x07 0x40 0x0a 0xd6  # CHECK: ldc1	$f10, 16391($16)
+0x43 0xad 0x28 0xd8  # CHECK: ldc2	$8, -21181($1)
+0x1b 0x90 0x3d 0xde  # CHECK: ldc3	$29, -28645($17)
+0x94 0xde 0xab 0x86  # CHECK: lh	$11, -8556($21)
+0xbd 0xa6 0x53 0x94  # CHECK: lhu	$19, -22851($2)
+0xb3 0x8b 0x01 0x24  # CHECK: addiu	$1, $zero, -29773
+0x3f 0x8b 0x00 0x24  # CHECK: addiu	$zero, $zero, -29889
+0x67 0xe3 0x42 0xc2  # CHECK: ll	$2, -7321($18)
+0x2a 0x16 0xa8 0x8c  # CHECK: lw	$8, 5674($5)
+0xf1 0x27 0x50 0xc7  # CHECK: lwc1	$f16, 10225($26)
+0xb7 0xfc 0xd2 0xc8  # CHECK: lwc2	$18, -841($6)
+0xf7 0x81 0x4a 0xcf  # CHECK: lwc3	$10, -32265($26)
+0x79 0xef 0xf4 0x89  # CHECK: lwl	$20, -4231($15)
+0x35 0xb5 0x80 0x9b  # CHECK: lwr	$zero, -19147($gp)
+0x00 0xd8 0x07 0x44  # CHECK: mfc1	$7, $f27
+0x10 0x98 0x00 0x00  # CHECK: mfhi	$19
+0x10 0xe8 0x00 0x00  # CHECK: mfhi	$sp
+0x12 0x88 0x00 0x00  # CHECK: mflo	$17
+0x06 0x75 0x20 0x46  # CHECK: mov.d	$f20, $f14
+0x86 0xd8 0x00 0x46  # CHECK: mov.s	$f2, $f27
+0x21 0xf0 0x80 0x00  # CHECK: move	 $fp, $4
+0x21 0xc8 0xc0 0x00  # CHECK: move	 $25, $6
+0x00 0x48 0x9e 0x44  # CHECK: mtc1	$fp, $f9
+0x11 0x00 0x20 0x02  # CHECK: mthi	$17
+0x13 0x00 0xa0 0x03  # CHECK: mtlo	$sp
+0x13 0x00 0x20 0x03  # CHECK: mtlo	$25
+0x02 0xa5 0x30 0x46  # CHECK: mul.d	$f20, $f20, $f16
+0x82 0x57 0x02 0x46  # CHECK: mul.s	$f30, $f10, $f2
+0x18 0x00 0xb4 0x03  # CHECK: mult	$sp, $20
+0x18 0x00 0xa2 0x03  # CHECK: mult	$sp, $2
+0x19 0x00 0x9a 0x03  # CHECK: multu	$gp, $26
+0x19 0x00 0x32 0x01  # CHECK: multu	$9, $18
+0x23 0x10 0x02 0x00  # CHECK: negu	 $2, $2
+0x23 0x10 0x03 0x00  # CHECK: negu	 $2, $3
+0x87 0x96 0x20 0x46  # CHECK: neg.d	$f26, $f18
+0x47 0x78 0x00 0x46  # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x27 0x38 0x07 0x00  # CHECK: nor	$7, $zero, $7
+0x25 0x60 0x1d 0x02  # CHECK: or	$12, $16, $sp
+0x04 0x00 0x42 0x34  # CHECK: ori	$2, $2, 4
+0x8c 0x21 0x20 0x46  # CHECK: round.w.d	$f6, $f4
+0xcc 0xe6 0x00 0x46  # CHECK: round.w.s	$f27, $f28
+0x6f 0xb2 0xd6 0xa1  # CHECK: sb	$22, -19857($14)
+0xd8 0x49 0x6f 0xe2  # CHECK: sc	$15, 18904($19)
+0x6e 0x77 0xbe 0xf5  # CHECK: sdc1	$f30, 30574($13)
+0x75 0x5a 0x54 0xfa  # CHECK: sdc2	$20, 23157($18)
+0xcb 0x16 0x4c 0xfd  # CHECK: sdc3	$12, 5835($10)
+0xd0 0xe5 0xee 0xa5  # CHECK: sh	$14, -6704($15)
+0x80 0x3c 0x07 0x00  # CHECK: sll	$7, $7, 18
+0x80 0x3c 0x00 0x00  # CHECK: sll	$7, $zero, 18
+0x04 0x38 0x20 0x01  # CHECK: sllv	$7, $zero, $9
+0x04 0x38 0x20 0x01  # CHECK: sllv	$7, $zero, $9
+0x2a 0xb8 0x7b 0x01  # CHECK: slt	$23, $11, $27
+0x11 0x25 0x51 0x29  # CHECK: slti	$17, $10, 9489
+0x55 0xc3 0x39 0x2f  # CHECK: sltiu	$25, $25, -15531
+0x2b 0xa0 0xab 0x02  # CHECK: sltu	$20, $21, $11
+0x55 0xc3 0x38 0x2f  # CHECK: sltiu	$24, $25, -15531
+0x04 0xb4 0x20 0x46  # CHECK: sqrt.d	$f16, $f22
+0x04 0x08 0x00 0x46  # CHECK: sqrt.s	$f0, $f1
+0xc3 0x8b 0x11 0x00  # CHECK: sra	$17, $17, 15
+0xc3 0x8b 0x17 0x00  # CHECK: sra	$17, $23, 15
+0x07 0x88 0xb7 0x03  # CHECK: srav	$17, $23, $sp
+0x07 0x88 0xb7 0x03  # CHECK: srav	$17, $23, $sp
+0xc2 0x11 0x02 0x00  # CHECK: srl	$2, $2, 7
+0xc2 0x11 0x02 0x00  # CHECK: srl	$2, $2, 7
+0x06 0xc8 0x94 0x00  # CHECK: srlv	$25, $20, $4
+0x06 0xc8 0x94 0x00  # CHECK: srlv	$25, $20, $4
+0x40 0x00 0x00 0x00  # CHECK: ssnop
+0x22 0xb0 0x6c 0x02  # CHECK: sub	$22, $19, $12
+0x36 0x0c 0x36 0x22  # CHECK: addi	$22, $17, 3126
+0x90 0xe6 0xad 0x21  # CHECK: addi	$13, $13, -6512
+0x81 0x14 0x30 0x46  # CHECK: sub.d	$f18, $f2, $f16
+0xc1 0xb5 0x16 0x46  # CHECK: sub.s	$f23, $f22, $f22
+0x23 0xe8 0xd6 0x02  # CHECK: subu	$sp, $22, $22
+0x50 0xd8 0xbf 0xaf  # CHECK: sw	$ra, -10160($sp)
+0xef 0xde 0x06 0xe7  # CHECK: swc1	$f6, -8465($24)
+0x30 0x61 0x19 0xea  # CHECK: swc2	$25, 24880($16)
+0xf7 0x81 0x4a 0xef  # CHECK: swc3	$10, -32265($26)
+0x7e 0x35 0x6f 0xaa  # CHECK: swl	$15, 13694($19)
+0x22 0x98 0xd1 0xb9  # CHECK: swr	$17, -26590($14)
+0x34 0x00 0x03 0x00  # CHECK: teq	 $zero, $3
+0x34 0x9b 0xa7 0x00  # CHECK: teq	$5, $7, 620
+0xa0 0xbb 0xac 0x06  # CHECK: teqi	$21, 48032
+0x30 0x00 0xea 0x00  # CHECK: tge	 $7, $10
+0x30 0x55 0xb3 0x00  # CHECK: tge	$5, $19, 340
+0xa1 0x13 0x28 0x06  # CHECK: tgei	$17, 5025
+0x33 0x90 0xa9 0x07  # CHECK: tgeiu	$sp, 36915
+0x31 0x00 0xdc 0x02  # CHECK: tgeu	 $22, $gp
+0xf1 0x5e 0x8e 0x02  # CHECK: tgeu	$20, $14, 379
+0x08 0x00 0x00 0x42  # CHECK: tlbp
+0x01 0x00 0x00 0x42  # CHECK: tlbr
+0x02 0x00 0x00 0x42  # CHECK: tlbwi
+0x06 0x00 0x00 0x42  # CHECK: tlbwr
+0x32 0x00 0xed 0x01  # CHECK: tlt	 $15, $13
+0x72 0x21 0x53 0x00  # CHECK: tlt	$2, $19, 133
+0xbd 0xad 0xca 0x05  # CHECK: tlti	$14, 44477
+0x2c 0xec 0xeb 0x07  # CHECK: tltiu	$ra, 60460
+0x33 0x00 0x70 0x01  # CHECK: tltu	 $11, $16
+0x33 0xfe 0x1d 0x02  # CHECK: tltu	$16, $sp, 1016
+0x36 0x00 0xd1 0x00  # CHECK: tne	 $6, $17
+0x76 0xdd 0xe8 0x00  # CHECK: tne	$7, $8, 885
+0x31 0x8c 0x8e 0x05  # CHECK: tnei	$12, 35889
+0x8d 0x75 0x20 0x46  # CHECK: trunc.w.d	$f22, $f14
+0x0d 0xf7 0x00 0x46  # CHECK: trunc.w.s	$f28, $f30
+0x26 0x90 0x9e 0x00  # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
new file mode 100644
index 000000000000..3d20e629ff53
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
@@ -0,0 +1,159 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# CHECK: .text
+0x46 0x20 0xc1 0x85  # CHECK: abs.d	$f6, $f24
+0x46 0x00 0x82 0x45  # CHECK: abs.s	$f9, $f16
+0x02 0x45 0xb8 0x20  # CHECK: add	$23, $18, $5
+0x21 0xc9 0x3b 0x48  # CHECK: addi	$9, $14, 15176
+0x23 0x18 0xe3 0xe7  # CHECK: addi	$24, $24, -7193
+0x46 0x3c 0x30 0x00  # CHECK: add.d	$f0, $f6, $f28
+0x46 0x18 0xaa 0x00  # CHECK: add.s	$f8, $f21, $f24
+0x21 0x2d 0x66 0xd2  # CHECK: addi	$13, $9, 26322
+0x21 0x08 0xff 0xfe  # CHECK: addi	$8, $8, -2
+0x00 0x86 0x48 0x21  # CHECK: addu	$9, $4, $6
+0x25 0x29 0x00 0x0a  # CHECK: addiu	$9, $9, 10
+0x00 0x4c 0xb8 0x24  # CHECK: and	$23, $2, $12
+0x30 0x42 0x00 0x04  # CHECK: andi	$2, $2, 4
+0x45 0x00 0x00 0x01  # CHECK: bc1f	 8
+0x45 0x02 0x00 0x0c  # CHECK: bc1fl	 52
+0x45 0x01 0x00 0x01  # CHECK: bc1t	 8
+0x45 0x03 0xf7 0xf4  # CHECK: bc1tl	 -8236
+0x04 0x11 0x14 0x9b  # CHECK: bal	21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x04 0xd0 0x14 0x9b  # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x51 0xd3 0x0c 0x40  # CHECK: beql	$14, $19, 12548
+0x05 0x93 0x07 0x1f  # CHECK: bgezall	$12, 7296
+0x04 0x83 0xf9 0x4d  # CHECK: bgezl	$4, -6856
+0x5d 0x40 0xfc 0x59  # CHECK: bgtzl	$10, -3736
+0x58 0xc0 0x02 0xe7  # CHECK: blezl	$6, 2976
+0x04 0xd2 0x00 0x7a  # CHECK: bltzall	$6, 492
+0x06 0x22 0xf6 0x45  # CHECK: bltzl	$17, -9960
+0x57 0x94 0x04 0xfc  # CHECK: bnel	$gp, $20, 5108
+0x46 0x3c 0xe0 0x3b  # CHECK: c.ngl.d	$f28, $f28
+0x46 0x30 0x00 0x39  # CHECK: c.ngle.d	$f0, $f16
+0x46 0x20 0xf0 0x38  # CHECK: c.sf.d	$f30, $f0
+0x46 0x16 0x70 0x38  # CHECK: c.sf.s	$f14, $f22
+0x46 0x20 0xc2 0xce  # CHECK: ceil.w.d	$f11, $f24
+0x46 0x00 0xa1 0x8e  # CHECK: ceil.w.s	$f6, $f20
+0x44 0x51 0xa8 0x00  # CHECK: cfc1	$17, $21
+0x44 0xc6 0xd0 0x00  # CHECK: ctc1	$6, $26
+0x46 0x00 0xe5 0xa1  # CHECK: cvt.d.s	$f22, $f28
+0x46 0x80 0x5e 0xa1  # CHECK: cvt.d.w	$f26, $f11
+0x46 0x20 0x46 0xa0  # CHECK: cvt.s.d	$f26, $f8
+0x46 0x80 0x7d 0xa0  # CHECK: cvt.s.w	$f22, $f15
+0x46 0x20 0x75 0x24  # CHECK: cvt.w.d	$f20, $f14
+0x46 0x00 0xc5 0x24  # CHECK: cvt.w.s	$f20, $f24
+0x03 0x2b 0x00 0x1a  # CHECK: div	$zero, $25, $11
+0x46 0x3a 0xa7 0x03  # CHECK: div.d	$f28, $f20, $f26
+0x46 0x0f 0x29 0x03  # CHECK: div.s	$f4, $f5, $f15
+0x03 0x2f 0x00 0x1b  # CHECK: divu	$zero, $25, $15
+0x00 0x00 0x00 0xc0  # CHECK: ehb
+0x46 0x20 0x53 0x8f  # CHECK: floor.w.d	$f14, $f10
+0x46 0x00 0x4a 0x0f  # CHECK: floor.w.s	$f8, $f9
+0x81 0x58 0xc7 0x4d  # CHECK: lb	$24, -14515($10)
+0x90 0x68 0x75 0xf3  # CHECK: lbu	$8, 30195($3)
+0xd6 0x0a 0x40 0x07  # CHECK: ldc1	$f10, 16391($16)
+0xd8 0x28 0xad 0x43  # CHECK: ldc2	$8, -21181($1)
+0xde 0x3d 0x90 0x1b  # CHECK: ldc3	$29, -28645($17)
+0x86 0xab 0xde 0x94  # CHECK: lh	$11, -8556($21)
+0x94 0x53 0xa6 0xbd  # CHECK: lhu	$19, -22851($2)
+0x24 0x01 0x8b 0xb3  # CHECK: addiu	$1, $zero, -29773
+0x24 0x00 0x8b 0x3f  # CHECK: addiu	$zero, $zero, -29889
+0xc2 0x42 0xe3 0x67  # CHECK: ll	$2, -7321($18)
+0x8c 0xa8 0x16 0x2a  # CHECK: lw	$8, 5674($5)
+0xc7 0x50 0x27 0xf1  # CHECK: lwc1	$f16, 10225($26)
+0xc8 0xd2 0xfc 0xb7  # CHECK: lwc2	$18, -841($6)
+0xcf 0x4a 0x81 0xf7  # CHECK: lwc3	$10, -32265($26)
+0x89 0xf4 0xef 0x79  # CHECK: lwl	$20, -4231($15)
+0x9b 0x80 0xb5 0x35  # CHECK: lwr	$zero, -19147($gp)
+0x44 0x07 0xd8 0x00  # CHECK: mfc1	$7, $f27
+0x00 0x00 0x98 0x10  # CHECK: mfhi	$19
+0x00 0x00 0xe8 0x10  # CHECK: mfhi	$sp
+0x00 0x00 0x88 0x12  # CHECK: mflo	$17
+0x46 0x20 0x75 0x06  # CHECK: mov.d	$f20, $f14
+0x46 0x00 0xd8 0x86  # CHECK: mov.s	$f2, $f27
+0x00 0x80 0xf0 0x21  # CHECK: move	 $fp, $4
+0x00 0xc0 0xc8 0x21  # CHECK: move	 $25, $6
+0x44 0x9e 0x48 0x00  # CHECK: mtc1	$fp, $f9
+0x02 0x20 0x00 0x11  # CHECK: mthi	$17
+0x03 0xa0 0x00 0x13  # CHECK: mtlo	$sp
+0x03 0x20 0x00 0x13  # CHECK: mtlo	$25
+0x46 0x30 0xa5 0x02  # CHECK: mul.d	$f20, $f20, $f16
+0x46 0x02 0x57 0x82  # CHECK: mul.s	$f30, $f10, $f2
+0x03 0xb4 0x00 0x18  # CHECK: mult	$sp, $20
+0x03 0xa2 0x00 0x18  # CHECK: mult	$sp, $2
+0x03 0x9a 0x00 0x19  # CHECK: multu	$gp, $26
+0x01 0x32 0x00 0x19  # CHECK: multu	$9, $18
+0x00 0x02 0x10 0x23  # CHECK: negu	 $2, $2
+0x00 0x03 0x10 0x23  # CHECK: negu	 $2, $3
+0x46 0x20 0x96 0x87  # CHECK: neg.d	$f26, $f18
+0x46 0x00 0x78 0x47  # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x00 0x07 0x38 0x27  # CHECK: nor	$7, $zero, $7
+0x02 0x1d 0x60 0x25  # CHECK: or	$12, $16, $sp
+0x34 0x42 0x00 0x04  # CHECK: ori	$2, $2, 4
+0x46 0x20 0x21 0x8c  # CHECK: round.w.d	$f6, $f4
+0x46 0x00 0xe6 0xcc  # CHECK: round.w.s	$f27, $f28
+0xa1 0xd6 0xb2 0x6f  # CHECK: sb	$22, -19857($14)
+0xe2 0x6f 0x49 0xd8  # CHECK: sc	$15, 18904($19)
+0xf5 0xbe 0x77 0x6e  # CHECK: sdc1	$f30, 30574($13)
+0xfa 0x54 0x5a 0x75  # CHECK: sdc2	$20, 23157($18)
+0xfd 0x4c 0x16 0xcb  # CHECK: sdc3	$12, 5835($10)
+0xa5 0xee 0xe5 0xd0  # CHECK: sh	$14, -6704($15)
+0x00 0x07 0x3c 0x80  # CHECK: sll	$7, $7, 18
+0x00 0x00 0x3c 0x80  # CHECK: sll	$7, $zero, 18
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x7b 0xb8 0x2a  # CHECK: slt	$23, $11, $27
+0x29 0x51 0x25 0x11  # CHECK: slti	$17, $10, 9489
+0x2f 0x39 0xc3 0x55  # CHECK: sltiu	$25, $25, -15531
+0x02 0xab 0xa0 0x2b  # CHECK: sltu	$20, $21, $11
+0x2f 0x38 0xc3 0x55  # CHECK: sltiu	$24, $25, -15531
+0x46 0x20 0xb4 0x04  # CHECK: sqrt.d	$f16, $f22
+0x46 0x00 0x08 0x04  # CHECK: sqrt.s	$f0, $f1
+0x00 0x11 0x8b 0xc3  # CHECK: sra	$17, $17, 15
+0x00 0x17 0x8b 0xc3  # CHECK: sra	$17, $23, 15
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x00 0x00 0x40  # CHECK: ssnop
+0x02 0x6c 0xb0 0x22  # CHECK: sub	$22, $19, $12
+0x22 0x36 0x0c 0x36  # CHECK: addi	$22, $17, 3126
+0x21 0xad 0xe6 0x90  # CHECK: addi	$13, $13, -6512
+0x46 0x30 0x14 0x81  # CHECK: sub.d	$f18, $f2, $f16
+0x46 0x16 0xb5 0xc1  # CHECK: sub.s	$f23, $f22, $f22
+0x02 0xd6 0xe8 0x23  # CHECK: subu	$sp, $22, $22
+0xaf 0xbf 0xd8 0x50  # CHECK: sw	$ra, -10160($sp)
+0xe7 0x06 0xde 0xef  # CHECK: swc1	$f6, -8465($24)
+0xea 0x19 0x61 0x30  # CHECK: swc2	$25, 24880($16)
+0xef 0x4a 0x81 0xf7  # CHECK: swc3	$10, -32265($26)
+0xaa 0x6f 0x35 0x7e  # CHECK: swl	$15, 13694($19)
+0xb9 0xd1 0x98 0x22  # CHECK: swr	$17, -26590($14)
+0x00 0x03 0x00 0x34  # CHECK: teq	 $zero, $3
+0x00 0xa7 0x9b 0x34  # CHECK: teq	$5, $7, 620
+0x06 0xac 0xbb 0xa0  # CHECK: teqi	$21, 48032
+0x00 0xea 0x00 0x30  # CHECK: tge	 $7, $10
+0x00 0xb3 0x55 0x30  # CHECK: tge	$5, $19, 340
+0x06 0x28 0x13 0xa1  # CHECK: tgei	$17, 5025
+0x07 0xa9 0x90 0x33  # CHECK: tgeiu	$sp, 36915
+0x02 0xdc 0x00 0x31  # CHECK: tgeu	 $22, $gp
+0x02 0x8e 0x5e 0xf1  # CHECK: tgeu	$20, $14, 379
+0x42 0x00 0x00 0x08  # CHECK: tlbp
+0x42 0x00 0x00 0x01  # CHECK: tlbr
+0x42 0x00 0x00 0x02  # CHECK: tlbwi
+0x42 0x00 0x00 0x06  # CHECK: tlbwr
+0x01 0xed 0x00 0x32  # CHECK: tlt	 $15, $13
+0x00 0x53 0x21 0x72  # CHECK: tlt	$2, $19, 133
+0x05 0xca 0xad 0xbd  # CHECK: tlti	$14, 44477
+0x07 0xeb 0xec 0x2c  # CHECK: tltiu	$ra, 60460
+0x01 0x70 0x00 0x33  # CHECK: tltu	 $11, $16
+0x02 0x1d 0xfe 0x33  # CHECK: tltu	$16, $sp, 1016
+0x00 0xd1 0x00 0x36  # CHECK: tne	 $6, $17
+0x00 0xe8 0xdd 0x76  # CHECK: tne	$7, $8, 885
+0x05 0x8e 0x8c 0x31  # CHECK: tnei	$12, 35889
+0x46 0x20 0x75 0x8d  # CHECK: trunc.w.d	$f22, $f14
+0x46 0x00 0xf7 0x0d  # CHECK: trunc.w.s	$f28, $f30
+0x00 0x9e 0x90 0x26  # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
new file mode 100644
index 000000000000..43397cb633ae
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
@@ -0,0 +1,209 @@
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# CHECK: .text
+0x85 0xc1 0x20 0x46 # CHECK: abs.d	$f6, $f24
+0x45 0x82 0x00 0x46 # CHECK: abs.s	$f9, $f16
+0x20 0xb8 0x45 0x02 # CHECK: add	$23, $18, $5
+0x48 0x3b 0xc9 0x21 # CHECK: addi	$9, $14, 15176
+0xe7 0xe3 0x18 0x23 # CHECK: addi	$24, $24, -7193
+0x00 0x30 0x3c 0x46 # CHECK: add.d	$f0, $f6, $f28
+0x00 0xaa 0x18 0x46 # CHECK: add.s	$f8, $f21, $f24
+0xd2 0x66 0x2d 0x21 # CHECK: addi	$13, $9, 26322
+0xfe 0xff 0x08 0x21 # CHECK: addi	$8, $8, -2
+0x21 0x48 0x86 0x00 # CHECK: addu	$9, $4, $6
+0x0a 0x00 0x29 0x25 # CHECK: addiu	$9, $9, 10
+0x24 0xb8 0x4c 0x00 # CHECK: and	$23, $2, $12
+0x04 0x00 0x42 0x30 # CHECK: andi	$2, $2, 4
+0x01 0x00 0x00 0x45 # CHECK: bc1f	 8
+0x0c 0x00 0x02 0x45 # CHECK: bc1fl	 52
+0x01 0x00 0x01 0x45 # CHECK: bc1t	 8
+0xf4 0xf7 0x03 0x45 # CHECK: bc1tl	 -8236
+0x9b 0x14 0x11 0x04 # CHECK: bal	21104
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x9b 0x14 0xd0 0x04 # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x40 0x0c 0xd3 0x51 # CHECK: beql	$14, $19, 12548
+0x1f 0x07 0x93 0x05 # CHECK: bgezall	$12, 7296
+0x4d 0xf9 0x83 0x04 # CHECK: bgezl	$4, -6856
+0x59 0xfc 0x40 0x5d # CHECK: bgtzl	$10, -3736
+0xe7 0x02 0xc0 0x58 # CHECK: blezl	$6, 2976
+0x7a 0x00 0xd2 0x04 # CHECK: bltzall	$6, 492
+0x45 0xf6 0x22 0x06 # CHECK: bltzl	$17, -9960
+0xfc 0x04 0x94 0x57 # CHECK: bnel	$gp, $20, 5108
+0x08 0x00 0xa1 0xbc # CHECK: cache   1, 8($5)
+0x3b 0xe0 0x3c 0x46 # CHECK: c.ngl.d	$f28, $f28
+0x39 0x00 0x30 0x46 # CHECK: c.ngle.d	$f0, $f16
+0x38 0xf0 0x20 0x46 # CHECK: c.sf.d	$f30, $f0
+0x38 0x70 0x16 0x46 # CHECK: c.sf.s	$f14, $f22
+0x4a 0x18 0x20 0x46 # CHECK: ceil.l.d  $f1, $f3
+0x8a 0x6c 0x00 0x46 # CHECK: ceil.l.s  $f18, $f13
+0xce 0xc2 0x20 0x46 # CHECK: ceil.w.d	$f11, $f24
+0x8e 0xa1 0x00 0x46 # CHECK: ceil.w.s	$f6, $f20
+0x00 0xa8 0x51 0x44 # CHECK: cfc1	$17, $21
+0x00 0xd0 0xc6 0x44 # CHECK: ctc1	$6, $26
+0xa1 0xe5 0x00 0x46 # CHECK: cvt.d.s	$f22, $f28
+0xa1 0x5e 0x80 0x46 # CHECK: cvt.d.w	$f26, $f11
+0x21 0x81 0xa0 0x46 # CHECK: cvt.d.l $f4, $f16
+0x25 0x7e 0x20 0x46 # CHECK: cvt.l.d $f24, $f15
+0xe5 0xea 0x00 0x46 # CHECK: cvt.l.s $f11, $f29
+0xe0 0xf3 0xa0 0x46 # CHECK: cvt.s.l $f15, $f30
+0xa0 0x46 0x20 0x46 # CHECK: cvt.s.d	$f26, $f8
+0xa0 0x7d 0x80 0x46 # CHECK: cvt.s.w	$f22, $f15
+0x24 0x75 0x20 0x46 # CHECK: cvt.w.d	$f20, $f14
+0x24 0xc5 0x00 0x46 # CHECK: cvt.w.s	$f20, $f24
+0x2c 0x98 0x3f 0x00 # CHECK: dadd    $19, $1, $ra
+0xc7 0x93 0x9d 0x62 # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0xbd 0x63 # CHECK: daddi   $sp, $sp, -27705
+0xc7 0x93 0x9d 0x62 # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0x9d 0x62 # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0xbd 0x63 # CHECK: daddi   $sp, $sp, -27705
+0x16 0xee 0xda 0x66 # CHECK: daddiu  $26, $22, -4586
+0x2d 0x98 0x3f 0x00 # CHECK: daddu   $19, $1, $ra
+0x9f 0x46 0x58 0x64 # CHECK: daddiu  $24, $2, 18079
+0x3f 0x69 0x73 0x66 # CHECK: daddiu  $19, $19, 26943
+0x1e 0x00 0x53 0x03 # CHECK: ddiv    $zero, $26, $19
+0x1f 0x00 0x11 0x02 # CHECK: ddivu   $zero, $16, $17
+0x1a 0x00 0x2b 0x03 # CHECK: div	$zero, $25, $11
+0x03 0xa7 0x3a 0x46 # CHECK: div.d	$f28, $f20, $f26
+0x03 0x29 0x0f 0x46 # CHECK: div.s	$f4, $f5, $f15
+0x1b 0x00 0x2f 0x03 # CHECK: divu	$zero, $25, $15
+0x00 0x68 0x2c 0x44 # CHECK: dmfc1   $12, $f13
+0x00 0x70 0xb0 0x44 # CHECK: dmtc1   $16, $f14
+0x1c 0x00 0xe9 0x02 # CHECK: dmult   $23, $9
+0x1d 0x00 0xa6 0x00 # CHECK: dmultu  $5, $6
+0xb8 0x04 0x00 0x00 # CHECK: dsll    $zero, $zero, 18
+0xb8 0x04 0x14 0x00 # CHECK: dsll    $zero, $20, 18
+0x14 0x00 0x94 0x01 # CHECK: dsllv   $zero, $20, $12
+0xbc 0x04 0x00 0x00 # CHECK: dsll32  $zero, $zero, 18
+0xbc 0x04 0x00 0x00 # CHECK: dsll32  $zero, $zero, 18
+0x14 0x00 0x94 0x01 # CHECK: dsllv   $zero, $20, $12
+0xbb 0xe2 0x1c 0x00 # CHECK: dsra    $gp, $gp, 10
+0xbb 0xe2 0x12 0x00 # CHECK: dsra    $gp, $18, 10
+0x17 0xe0 0x72 0x02 # CHECK: dsrav   $gp, $18, $19
+0xbf 0xe2 0x1c 0x00 # CHECK: dsra32  $gp, $gp, 10
+0xbf 0xe2 0x12 0x00 # CHECK: dsra32  $gp, $18, 10
+0x17 0xe0 0x72 0x02 # CHECK: dsrav   $gp, $18, $19
+0xfa 0x9d 0x13 0x00 # CHECK: dsrl    $19, $19, 23
+0xfa 0x9d 0x06 0x00 # CHECK: dsrl    $19, $6, 23
+0x16 0x98 0x86 0x02 # CHECK: dsrlv   $19, $6, $20
+0xfe 0x9d 0x13 0x00 # CHECK: dsrl32  $19, $19, 23
+0xfe 0x9d 0x06 0x00 # CHECK: dsrl32  $19, $6, 23
+0x16 0x98 0x86 0x02 # CHECK: dsrlv   $19, $6, $20
+0x2e 0x38 0xc8 0x02 # CHECK: dsub    $7, $22, $8
+0x2f 0x28 0xba 0x00 # CHECK: dsubu   $5, $5, $26
+0xc0 0x00 0x00 0x00 # CHECK: ehb
+0x18 0x00 0x00 0x42 # CHECK: eret
+0x8f 0x53 0x20 0x46 # CHECK: floor.w.d	$f14, $f10
+0x0f 0x4a 0x00 0x46 # CHECK: floor.w.s	$f8, $f9
+0x8b 0x3e 0x20 0x46 # CHECK: floor.l.d $f26, $f7
+0x0b 0x2b 0x00 0x46 # CHECK: floor.l.s $f12, $f5
+0x4d 0xc7 0x58 0x81 # CHECK: lb	$24, -14515($10)
+0xf3 0x75 0x68 0x90 # CHECK: lbu	$8, 30195($3)
+0x07 0x40 0x0a 0xd6 # CHECK: ldc1	$f10, 16391($16)
+0x43 0xad 0x28 0xd8 # CHECK: ldc2	$8, -21181($1)
+0x94 0xde 0xab 0x86 # CHECK: lh	$11, -8556($21)
+0xbd 0xa6 0x53 0x94 # CHECK: lhu	$19, -22851($2)
+0xb3 0x8b 0x01 0x24 # CHECK: addiu	$1, $zero, -29773
+0x3f 0x8b 0x00 0x24 # CHECK: addiu	$zero, $zero, -29889
+0x67 0xe3 0x42 0xc2 # CHECK: ll	$2, -7321($18)
+0x2a 0x16 0xa8 0x8c # CHECK: lw	$8, 5674($5)
+0xf1 0x27 0x50 0xc7 # CHECK: lwc1	$f16, 10225($26)
+0xb7 0xfc 0xd2 0xc8 # CHECK: lwc2	$18, -841($6)
+0x79 0xef 0xf4 0x89 # CHECK: lwl	$20, -4231($15)
+0x35 0xb5 0x80 0x9b # CHECK: lwr	$zero, -19147($gp)
+0x00 0xd8 0x07 0x44 # CHECK: mfc1	$7, $f27
+0x10 0x98 0x00 0x00 # CHECK: mfhi	$19
+0x10 0xe8 0x00 0x00 # CHECK: mfhi	$sp
+0x12 0x88 0x00 0x00 # CHECK: mflo	$17
+0x06 0x75 0x20 0x46 # CHECK: mov.d	$f20, $f14
+0x86 0xd8 0x00 0x46 # CHECK: mov.s	$f2, $f27
+0x21 0xf0 0x80 0x00 # CHECK: move	 $fp, $4
+0x21 0xc8 0xc0 0x00 # CHECK: move	 $25, $6
+0x00 0x48 0x9e 0x44 # CHECK: mtc1	$fp, $f9
+0x11 0x00 0x20 0x02 # CHECK: mthi	$17
+0x13 0x00 0xa0 0x03 # CHECK: mtlo	$sp
+0x13 0x00 0x20 0x03 # CHECK: mtlo	$25
+0x02 0xa5 0x30 0x46 # CHECK: mul.d	$f20, $f20, $f16
+0x82 0x57 0x02 0x46 # CHECK: mul.s	$f30, $f10, $f2
+0x18 0x00 0xb4 0x03 # CHECK: mult	$sp, $20
+0x18 0x00 0xa2 0x03 # CHECK: mult	$sp, $2
+0x19 0x00 0x9a 0x03 # CHECK: multu	$gp, $26
+0x19 0x00 0x32 0x01 # CHECK: multu	$9, $18
+0x23 0x10 0x02 0x00 # CHECK: negu	 $2, $2
+0x23 0x10 0x03 0x00 # CHECK: negu	 $2, $3
+0x87 0x96 0x20 0x46 # CHECK: neg.d	$f26, $f18
+0x47 0x78 0x00 0x46 # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x27 0x38 0x07 0x00 # CHECK: nor	$7, $zero, $7
+0x25 0x60 0x1d 0x02 # CHECK: or	$12, $16, $sp
+0x04 0x00 0x42 0x34 # CHECK: ori	$2, $2, 4
+0x08 0x0b 0x20 0x46 # CHECK: round.l.d $f12, $f1
+0x48 0x2e 0x00 0x46 # CHECK: round.l.s $f25, $f5
+0x8c 0x21 0x20 0x46 # CHECK: round.w.d	$f6, $f4
+0xcc 0xe6 0x00 0x46 # CHECK: round.w.s	$f27, $f28
+0x6f 0xb2 0xd6 0xa1 # CHECK: sb	$22, -19857($14)
+0xd8 0x49 0x6f 0xe2 # CHECK: sc	$15, 18904($19)
+0xcd 0xdf 0xaf 0xf3 # CHECK: scd   $15, -8243($sp)
+0xcb 0x16 0x4c 0xfd # CHECK: sd    $12, 5835($10)
+0x1f 0xae 0xc7 0xb3 # CHECK: sdl   $7, -20961($fp)
+0x39 0xb0 0x8b 0xb5 # CHECK: sdr   $11, -20423($12)
+0x6e 0x77 0xbe 0xf5 # CHECK: sdc1	$f30, 30574($13)
+0x75 0x5a 0x54 0xfa # CHECK: sdc2	$20, 23157($18)
+0xd0 0xe5 0xee 0xa5 # CHECK: sh	$14, -6704($15)
+0x80 0x3c 0x07 0x00 # CHECK: sll	$7, $7, 18
+0x80 0x3c 0x00 0x00 # CHECK: sll	$7, $zero, 18
+0x04 0x38 0x20 0x01 # CHECK: sllv	$7, $zero, $9
+0x04 0x38 0x20 0x01 # CHECK: sllv	$7, $zero, $9
+0x2a 0xb8 0x7b 0x01 # CHECK: slt	$23, $11, $27
+0x11 0x25 0x51 0x29 # CHECK: slti	$17, $10, 9489
+0x55 0xc3 0x39 0x2f # CHECK: sltiu	$25, $25, -15531
+0x2b 0xa0 0xab 0x02 # CHECK: sltu	$20, $21, $11
+0x55 0xc3 0x38 0x2f # CHECK: sltiu	$24, $25, -15531
+0x04 0xb4 0x20 0x46 # CHECK: sqrt.d	$f16, $f22
+0x04 0x08 0x00 0x46 # CHECK: sqrt.s	$f0, $f1
+0xc3 0x8b 0x11 0x00 # CHECK: sra	$17, $17, 15
+0xc3 0x8b 0x17 0x00 # CHECK: sra	$17, $23, 15
+0x07 0x88 0xb7 0x03 # CHECK: srav	$17, $23, $sp
+0x07 0x88 0xb7 0x03 # CHECK: srav	$17, $23, $sp
+0xc2 0x11 0x02 0x00 # CHECK: srl	$2, $2, 7
+0xc2 0x11 0x02 0x00 # CHECK: srl	$2, $2, 7
+0x06 0xc8 0x94 0x00 # CHECK: srlv	$25, $20, $4
+0x06 0xc8 0x94 0x00 # CHECK: srlv	$25, $20, $4
+0x40 0x00 0x00 0x00 # CHECK: ssnop
+0x22 0xb0 0x6c 0x02 # CHECK: sub	$22, $19, $12
+0x36 0x0c 0x36 0x22 # CHECK: addi	$22, $17, 3126
+0x90 0xe6 0xad 0x21 # CHECK: addi	$13, $13, -6512
+0x81 0x14 0x30 0x46 # CHECK: sub.d	$f18, $f2, $f16
+0xc1 0xb5 0x16 0x46 # CHECK: sub.s	$f23, $f22, $f22
+0x23 0xe8 0xd6 0x02 # CHECK: subu	$sp, $22, $22
+0x50 0xd8 0xbf 0xaf # CHECK: sw	$ra, -10160($sp)
+0xef 0xde 0x06 0xe7 # CHECK: swc1	$f6, -8465($24)
+0x30 0x61 0x19 0xea # CHECK: swc2	$25, 24880($16)
+0x7e 0x35 0x6f 0xaa # CHECK: swl	$15, 13694($19)
+0x22 0x98 0xd1 0xb9 # CHECK: swr	$17, -26590($14)
+0x34 0x00 0x03 0x00 # CHECK: teq	 $zero, $3
+0x34 0x9b 0xa7 0x00 # CHECK: teq	$5, $7, 620
+0xa0 0xbb 0xac 0x06 # CHECK: teqi	$21, 48032
+0x30 0x00 0xea 0x00 # CHECK: tge	 $7, $10
+0x30 0x55 0xb3 0x00 # CHECK: tge	$5, $19, 340
+0xa1 0x13 0x28 0x06 # CHECK: tgei	$17, 5025
+0x33 0x90 0xa9 0x07 # CHECK: tgeiu	$sp, 36915
+0x31 0x00 0xdc 0x02 # CHECK: tgeu	 $22, $gp
+0xf1 0x5e 0x8e 0x02 # CHECK: tgeu	$20, $14, 379
+0x08 0x00 0x00 0x42 # CHECK: tlbp
+0x01 0x00 0x00 0x42 # CHECK: tlbr
+0x02 0x00 0x00 0x42 # CHECK: tlbwi
+0x06 0x00 0x00 0x42 # CHECK: tlbwr
+0x32 0x00 0xed 0x01 # CHECK: tlt	 $15, $13
+0x72 0x21 0x53 0x00 # CHECK: tlt	$2, $19, 133
+0xbd 0xad 0xca 0x05 # CHECK: tlti	$14, 44477
+0x2c 0xec 0xeb 0x07 # CHECK: tltiu	$ra, 60460
+0x33 0x00 0x70 0x01 # CHECK: tltu	 $11, $16
+0x33 0xfe 0x1d 0x02 # CHECK: tltu	$16, $sp, 1016
+0x36 0x00 0xd1 0x00 # CHECK: tne	 $6, $17
+0x76 0xdd 0xe8 0x00 # CHECK: tne	$7, $8, 885
+0x31 0x8c 0x8e 0x05 # CHECK: tnei	$12, 35889
+0xc9 0xbd 0x20 0x46 # CHECK: trunc.l.d $f23, $f23
+0x09 0xff 0x00 0x46 # CHECK: trunc.l.s $f28, $f31
+0x8d 0x75 0x20 0x46 # CHECK: trunc.w.d	$f22, $f14
+0x0d 0xf7 0x00 0x46 # CHECK: trunc.w.s	$f28, $f30
+0x26 0x90 0x9e 0x00 # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
new file mode 100644
index 000000000000..95c06e0e68d7
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
@@ -0,0 +1,209 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# CHECK: .text
+0x46 0x20 0xc1 0x85  # CHECK: abs.d	$f6, $f24
+0x46 0x00 0x82 0x45  # CHECK: abs.s	$f9, $f16
+0x02 0x45 0xb8 0x20  # CHECK: add	$23, $18, $5
+0x21 0xc9 0x3b 0x48  # CHECK: addi	$9, $14, 15176
+0x23 0x18 0xe3 0xe7  # CHECK: addi	$24, $24, -7193
+0x46 0x3c 0x30 0x00  # CHECK: add.d	$f0, $f6, $f28
+0x46 0x18 0xaa 0x00  # CHECK: add.s	$f8, $f21, $f24
+0x21 0x2d 0x66 0xd2  # CHECK: addi	$13, $9, 26322
+0x21 0x08 0xff 0xfe  # CHECK: addi	$8, $8, -2
+0x00 0x86 0x48 0x21  # CHECK: addu	$9, $4, $6
+0x25 0x29 0x00 0x0a  # CHECK: addiu	$9, $9, 10
+0x00 0x4c 0xb8 0x24  # CHECK: and	$23, $2, $12
+0x30 0x42 0x00 0x04  # CHECK: andi	$2, $2, 4
+0x45 0x00 0x00 0x01  # CHECK: bc1f	 8
+0x45 0x02 0x00 0x0c  # CHECK: bc1fl	 52
+0x45 0x01 0x00 0x01  # CHECK: bc1t	 8
+0x45 0x03 0xf7 0xf4  # CHECK: bc1tl	 -8236
+0x04 0x11 0x14 0x9b  # CHECK: bal	21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x04 0xd0 0x14 0x9b  # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x51 0xd3 0x0c 0x40  # CHECK: beql	$14, $19, 12548
+0x05 0x93 0x07 0x1f  # CHECK: bgezall	$12, 7296
+0x04 0x83 0xf9 0x4d  # CHECK: bgezl	$4, -6856
+0x5d 0x40 0xfc 0x59  # CHECK: bgtzl	$10, -3736
+0x58 0xc0 0x02 0xe7  # CHECK: blezl	$6, 2976
+0x04 0xd2 0x00 0x7a  # CHECK: bltzall	$6, 492
+0x06 0x22 0xf6 0x45  # CHECK: bltzl	$17, -9960
+0x57 0x94 0x04 0xfc  # CHECK: bnel	$gp, $20, 5108
+0xbc 0xa1 0x00 0x08  # CHECK: cache   1, 8($5)
+0x46 0x3c 0xe0 0x3b  # CHECK: c.ngl.d	$f28, $f28
+0x46 0x30 0x00 0x39  # CHECK: c.ngle.d	$f0, $f16
+0x46 0x20 0xf0 0x38  # CHECK: c.sf.d	$f30, $f0
+0x46 0x16 0x70 0x38  # CHECK: c.sf.s	$f14, $f22
+0x46 0x20 0x18 0x4a  # CHECK: ceil.l.d  $f1, $f3
+0x46 0x00 0x6c 0x8a  # CHECK: ceil.l.s  $f18, $f13
+0x46 0x20 0xc2 0xce  # CHECK: ceil.w.d	$f11, $f24
+0x46 0x00 0xa1 0x8e  # CHECK: ceil.w.s	$f6, $f20
+0x44 0x51 0xa8 0x00  # CHECK: cfc1	$17, $21
+0x44 0xc6 0xd0 0x00  # CHECK: ctc1	$6, $26
+0x46 0x00 0xe5 0xa1  # CHECK: cvt.d.s	$f22, $f28
+0x46 0x80 0x5e 0xa1  # CHECK: cvt.d.w	$f26, $f11
+0x46 0xa0 0x81 0x21  # CHECK: cvt.d.l $f4, $f16
+0x46 0x20 0x7e 0x25  # CHECK: cvt.l.d $f24, $f15
+0x46 0x00 0xea 0xe5  # CHECK: cvt.l.s $f11, $f29
+0x46 0xa0 0xf3 0xe0  # CHECK: cvt.s.l $f15, $f30
+0x46 0x20 0x46 0xa0  # CHECK: cvt.s.d	$f26, $f8
+0x46 0x80 0x7d 0xa0  # CHECK: cvt.s.w	$f22, $f15
+0x46 0x20 0x75 0x24  # CHECK: cvt.w.d	$f20, $f14
+0x46 0x00 0xc5 0x24  # CHECK: cvt.w.s	$f20, $f24
+0x00 0x3f 0x98 0x2c  # CHECK: dadd    $19, $1, $ra
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x63 0xbd 0x93 0xc7  # CHECK: daddi   $sp, $sp, -27705
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x63 0xbd 0x93 0xc7  # CHECK: daddi   $sp, $sp, -27705
+0x66 0xda 0xee 0x16  # CHECK: daddiu  $26, $22, -4586
+0x00 0x3f 0x98 0x2d  # CHECK: daddu   $19, $1, $ra
+0x64 0x58 0x46 0x9f  # CHECK: daddiu  $24, $2, 18079
+0x66 0x73 0x69 0x3f  # CHECK: daddiu  $19, $19, 26943
+0x03 0x53 0x00 0x1e  # CHECK: ddiv    $zero, $26, $19
+0x02 0x11 0x00 0x1f  # CHECK: ddivu   $zero, $16, $17
+0x03 0x2b 0x00 0x1a  # CHECK: div	$zero, $25, $11
+0x46 0x3a 0xa7 0x03  # CHECK: div.d	$f28, $f20, $f26
+0x46 0x0f 0x29 0x03  # CHECK: div.s	$f4, $f5, $f15
+0x03 0x2f 0x00 0x1b  # CHECK: divu	$zero, $25, $15
+0x44 0x2c 0x68 0x00  # CHECK: dmfc1   $12, $f13
+0x44 0xb0 0x70 0x00  # CHECK: dmtc1   $16, $f14
+0x02 0xe9 0x00 0x1c  # CHECK: dmult   $23, $9
+0x00 0xa6 0x00 0x1d  # CHECK: dmultu  $5, $6
+0x00 0x00 0x04 0xb8  # CHECK: dsll    $zero, $zero, 18
+0x00 0x14 0x04 0xb8  # CHECK: dsll    $zero, $20, 18
+0x01 0x94 0x00 0x14  # CHECK: dsllv   $zero, $20, $12
+0x00 0x00 0x04 0xbc  # CHECK: dsll32  $zero, $zero, 18
+0x00 0x00 0x04 0xbc  # CHECK: dsll32  $zero, $zero, 18
+0x01 0x94 0x00 0x14  # CHECK: dsllv   $zero, $20, $12
+0x00 0x1c 0xe2 0xbb  # CHECK: dsra    $gp, $gp, 10
+0x00 0x12 0xe2 0xbb  # CHECK: dsra    $gp, $18, 10
+0x02 0x72 0xe0 0x17  # CHECK: dsrav   $gp, $18, $19
+0x00 0x1c 0xe2 0xbf  # CHECK: dsra32  $gp, $gp, 10
+0x00 0x12 0xe2 0xbf  # CHECK: dsra32  $gp, $18, 10
+0x02 0x72 0xe0 0x17  # CHECK: dsrav   $gp, $18, $19
+0x00 0x13 0x9d 0xfa  # CHECK: dsrl    $19, $19, 23
+0x00 0x06 0x9d 0xfa  # CHECK: dsrl    $19, $6, 23
+0x02 0x86 0x98 0x16  # CHECK: dsrlv   $19, $6, $20
+0x00 0x13 0x9d 0xfe  # CHECK: dsrl32  $19, $19, 23
+0x00 0x06 0x9d 0xfe  # CHECK: dsrl32  $19, $6, 23
+0x02 0x86 0x98 0x16  # CHECK: dsrlv   $19, $6, $20
+0x02 0xc8 0x38 0x2e  # CHECK: dsub    $7, $22, $8
+0x00 0xba 0x28 0x2f  # CHECK: dsubu   $5, $5, $26
+0x00 0x00 0x00 0xc0  # CHECK: ehb
+0x42 0x00 0x00 0x18  # CHECK: eret
+0x46 0x20 0x53 0x8f  # CHECK: floor.w.d	$f14, $f10
+0x46 0x00 0x4a 0x0f  # CHECK: floor.w.s	$f8, $f9
+0x46 0x20 0x3e 0x8b  # CHECK: floor.l.d $f26, $f7
+0x46 0x00 0x2b 0x0b  # CHECK: floor.l.s $f12, $f5
+0x81 0x58 0xc7 0x4d  # CHECK: lb	$24, -14515($10)
+0x90 0x68 0x75 0xf3  # CHECK: lbu	$8, 30195($3)
+0xd6 0x0a 0x40 0x07  # CHECK: ldc1	$f10, 16391($16)
+0xd8 0x28 0xad 0x43  # CHECK: ldc2	$8, -21181($1)
+0x86 0xab 0xde 0x94  # CHECK: lh	$11, -8556($21)
+0x94 0x53 0xa6 0xbd  # CHECK: lhu	$19, -22851($2)
+0x24 0x01 0x8b 0xb3  # CHECK: addiu	$1, $zero, -29773
+0x24 0x00 0x8b 0x3f  # CHECK: addiu	$zero, $zero, -29889
+0xc2 0x42 0xe3 0x67  # CHECK: ll	$2, -7321($18)
+0x8c 0xa8 0x16 0x2a  # CHECK: lw	$8, 5674($5)
+0xc7 0x50 0x27 0xf1  # CHECK: lwc1	$f16, 10225($26)
+0xc8 0xd2 0xfc 0xb7  # CHECK: lwc2	$18, -841($6)
+0x89 0xf4 0xef 0x79  # CHECK: lwl	$20, -4231($15)
+0x9b 0x80 0xb5 0x35  # CHECK: lwr	$zero, -19147($gp)
+0x44 0x07 0xd8 0x00  # CHECK: mfc1	$7, $f27
+0x00 0x00 0x98 0x10  # CHECK: mfhi	$19
+0x00 0x00 0xe8 0x10  # CHECK: mfhi	$sp
+0x00 0x00 0x88 0x12  # CHECK: mflo	$17
+0x46 0x20 0x75 0x06  # CHECK: mov.d	$f20, $f14
+0x46 0x00 0xd8 0x86  # CHECK: mov.s	$f2, $f27
+0x00 0x80 0xf0 0x21  # CHECK: move	 $fp, $4
+0x00 0xc0 0xc8 0x21  # CHECK: move	 $25, $6
+0x44 0x9e 0x48 0x00  # CHECK: mtc1	$fp, $f9
+0x02 0x20 0x00 0x11  # CHECK: mthi	$17
+0x03 0xa0 0x00 0x13  # CHECK: mtlo	$sp
+0x03 0x20 0x00 0x13  # CHECK: mtlo	$25
+0x46 0x30 0xa5 0x02  # CHECK: mul.d	$f20, $f20, $f16
+0x46 0x02 0x57 0x82  # CHECK: mul.s	$f30, $f10, $f2
+0x03 0xb4 0x00 0x18  # CHECK: mult	$sp, $20
+0x03 0xa2 0x00 0x18  # CHECK: mult	$sp, $2
+0x03 0x9a 0x00 0x19  # CHECK: multu	$gp, $26
+0x01 0x32 0x00 0x19  # CHECK: multu	$9, $18
+0x00 0x02 0x10 0x23  # CHECK: negu	 $2, $2
+0x00 0x03 0x10 0x23  # CHECK: negu	 $2, $3
+0x46 0x20 0x96 0x87  # CHECK: neg.d	$f26, $f18
+0x46 0x00 0x78 0x47  # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x00 0x07 0x38 0x27  # CHECK: nor	$7, $zero, $7
+0x02 0x1d 0x60 0x25  # CHECK: or	$12, $16, $sp
+0x34 0x42 0x00 0x04  # CHECK: ori	$2, $2, 4
+0x46 0x20 0x0b 0x08  # CHECK: round.l.d $f12, $f1
+0x46 0x00 0x2e 0x48  # CHECK: round.l.s $f25, $f5
+0x46 0x20 0x21 0x8c  # CHECK: round.w.d	$f6, $f4
+0x46 0x00 0xe6 0xcc  # CHECK: round.w.s	$f27, $f28
+0xa1 0xd6 0xb2 0x6f  # CHECK: sb	$22, -19857($14)
+0xe2 0x6f 0x49 0xd8  # CHECK: sc	$15, 18904($19)
+0xf3 0xaf 0xdf 0xcd  # CHECK: scd   $15, -8243($sp)
+0xfd 0x4c 0x16 0xcb  # CHECK: sd    $12, 5835($10)
+0xb3 0xc7 0xae 0x1f  # CHECK: sdl   $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39  # CHECK: sdr   $11, -20423($12)
+0xf5 0xbe 0x77 0x6e  # CHECK: sdc1	$f30, 30574($13)
+0xfa 0x54 0x5a 0x75  # CHECK: sdc2	$20, 23157($18)
+0xa5 0xee 0xe5 0xd0  # CHECK: sh	$14, -6704($15)
+0x00 0x07 0x3c 0x80  # CHECK: sll	$7, $7, 18
+0x00 0x00 0x3c 0x80  # CHECK: sll	$7, $zero, 18
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x7b 0xb8 0x2a  # CHECK: slt	$23, $11, $27
+0x29 0x51 0x25 0x11  # CHECK: slti	$17, $10, 9489
+0x2f 0x39 0xc3 0x55  # CHECK: sltiu	$25, $25, -15531
+0x02 0xab 0xa0 0x2b  # CHECK: sltu	$20, $21, $11
+0x2f 0x38 0xc3 0x55  # CHECK: sltiu	$24, $25, -15531
+0x46 0x20 0xb4 0x04  # CHECK: sqrt.d	$f16, $f22
+0x46 0x00 0x08 0x04  # CHECK: sqrt.s	$f0, $f1
+0x00 0x11 0x8b 0xc3  # CHECK: sra	$17, $17, 15
+0x00 0x17 0x8b 0xc3  # CHECK: sra	$17, $23, 15
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x00 0x00 0x40  # CHECK: ssnop
+0x02 0x6c 0xb0 0x22  # CHECK: sub	$22, $19, $12
+0x22 0x36 0x0c 0x36  # CHECK: addi	$22, $17, 3126
+0x21 0xad 0xe6 0x90  # CHECK: addi	$13, $13, -6512
+0x46 0x30 0x14 0x81  # CHECK: sub.d	$f18, $f2, $f16
+0x46 0x16 0xb5 0xc1  # CHECK: sub.s	$f23, $f22, $f22
+0x02 0xd6 0xe8 0x23  # CHECK: subu	$sp, $22, $22
+0xaf 0xbf 0xd8 0x50  # CHECK: sw	$ra, -10160($sp)
+0xe7 0x06 0xde 0xef  # CHECK: swc1	$f6, -8465($24)
+0xea 0x19 0x61 0x30  # CHECK: swc2	$25, 24880($16)
+0xaa 0x6f 0x35 0x7e  # CHECK: swl	$15, 13694($19)
+0xb9 0xd1 0x98 0x22  # CHECK: swr	$17, -26590($14)
+0x00 0x03 0x00 0x34  # CHECK: teq	 $zero, $3
+0x00 0xa7 0x9b 0x34  # CHECK: teq	$5, $7, 620
+0x06 0xac 0xbb 0xa0  # CHECK: teqi	$21, 48032
+0x00 0xea 0x00 0x30  # CHECK: tge	 $7, $10
+0x00 0xb3 0x55 0x30  # CHECK: tge	$5, $19, 340
+0x06 0x28 0x13 0xa1  # CHECK: tgei	$17, 5025
+0x07 0xa9 0x90 0x33  # CHECK: tgeiu	$sp, 36915
+0x02 0xdc 0x00 0x31  # CHECK: tgeu	 $22, $gp
+0x02 0x8e 0x5e 0xf1  # CHECK: tgeu	$20, $14, 379
+0x42 0x00 0x00 0x08  # CHECK: tlbp
+0x42 0x00 0x00 0x01  # CHECK: tlbr
+0x42 0x00 0x00 0x02  # CHECK: tlbwi
+0x42 0x00 0x00 0x06  # CHECK: tlbwr
+0x01 0xed 0x00 0x32  # CHECK: tlt	 $15, $13
+0x00 0x53 0x21 0x72  # CHECK: tlt	$2, $19, 133
+0x05 0xca 0xad 0xbd  # CHECK: tlti	$14, 44477
+0x07 0xeb 0xec 0x2c  # CHECK: tltiu	$ra, 60460
+0x01 0x70 0x00 0x33  # CHECK: tltu	 $11, $16
+0x02 0x1d 0xfe 0x33  # CHECK: tltu	$16, $sp, 1016
+0x00 0xd1 0x00 0x36  # CHECK: tne	 $6, $17
+0x00 0xe8 0xdd 0x76  # CHECK: tne	$7, $8, 885
+0x05 0x8e 0x8c 0x31  # CHECK: tnei	$12, 35889
+0x46 0x20 0xbd 0xc9  # CHECK: trunc.l.d $f23, $f23
+0x46 0x00 0xff 0x09  # CHECK: trunc.l.s $f28, $f31
+0x46 0x20 0x75 0x8d  # CHECK: trunc.w.d	$f22, $f14
+0x46 0x00 0xf7 0x0d  # CHECK: trunc.w.s	$f28, $f30
+0x00 0x9e 0x90 0x26  # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
new file mode 100644
index 000000000000..17d2094a87eb
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
@@ -0,0 +1,294 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
+
+0x05 0x73 0x20 0x46   # CHECK: abs.d $f12, $f14
+
+0x85 0x39 0x00 0x46   # CHECK: abs.s $f6, $f7
+
+0x20 0x48 0xc7 0x00   # CHECK: add $9, $6, $7
+
+0x00 0x62 0x2e 0x46   # CHECK: add.d $f8, $f12, $f14
+
+0x40 0x32 0x07 0x46   # CHECK: add.s $f9, $f6, $f7
+
+0x67 0x45 0xc9 0x20   # CHECK: addi $9, $6, 17767
+
+0x67 0xc5 0xc9 0x24   # CHECK: addiu $9, $6, -15001
+
+0x21 0x48 0xc7 0x00   # CHECK: addu $9, $6, $7
+
+0x24 0x48 0xc7 0x00   # CHECK: and $9, $6, $7
+
+0x67 0x45 0xc9 0x30   # CHECK: andi $9, $6, 17767
+
+0x4c 0x01 0x00 0x10   # CHECK: b 1332
+
+0x4c 0x01 0x00 0x45   # CHECK: bc1f 1332
+
+0x4c 0x01 0x1c 0x45   # CHECK: bc1f $fcc7, 1332
+
+0x4c 0x01 0x01 0x45   # CHECK: bc1t 1332
+
+0x4c 0x01 0x1d 0x45   # CHECK: bc1t $fcc7, 1332
+
+0x4c 0x01 0x26 0x11   # CHECK: beq $9, $6, 1332
+
+0x4c 0x01 0xc1 0x04   # CHECK: bgez  $6, 1332
+
+0x4c 0x01 0xd1 0x04   # CHECK: bgezal  $6, 1332
+
+0x4c 0x01 0xc0 0x1c   # CHECK: bgtz  $6, 1332
+
+0x4c 0x01 0xc0 0x18   # CHECK: blez  $6, 1332
+
+0x4c 0x01 0x26 0x15   # CHECK: bne $9, $6, 1332
+
+0x32 0x60 0x2e 0x46   # CHECK: c.eq.d $f12, $f14
+
+0x32 0x30 0x07 0x46   # CHECK: c.eq.s $f6, $f7
+
+0x30 0x60 0x2e 0x46   # CHECK: c.f.d $f12, $f14
+
+0x30 0x30 0x07 0x46   # CHECK: c.f.s $f6, $f7
+
+0x3e 0x60 0x2e 0x46   # CHECK: c.le.d $f12, $f14
+
+0x3e 0x30 0x07 0x46   # CHECK: c.le.s $f6, $f7
+
+0x3c 0x60 0x2e 0x46   # CHECK: c.lt.d $f12, $f14
+
+0x3c 0x30 0x07 0x46   # CHECK: c.lt.s $f6, $f7
+
+0x3d 0x60 0x2e 0x46   # CHECK: c.nge.d $f12, $f14
+
+0x3d 0x30 0x07 0x46   # CHECK: c.nge.s $f6, $f7
+
+0x3b 0x60 0x2e 0x46   # CHECK: c.ngl.d $f12, $f14
+
+0x3b 0x30 0x07 0x46   # CHECK: c.ngl.s $f6, $f7
+
+0x39 0x60 0x2e 0x46   # CHECK: c.ngle.d $f12, $f14
+
+0x39 0x30 0x07 0x46   # CHECK: c.ngle.s $f6, $f7
+
+0x3f 0x60 0x2e 0x46   # CHECK: c.ngt.d $f12, $f14
+
+0x3f 0x30 0x07 0x46   # CHECK: c.ngt.s $f6, $f7
+
+0x36 0x60 0x2e 0x46   # CHECK: c.ole.d $f12, $f14
+
+0x36 0x30 0x07 0x46   # CHECK: c.ole.s $f6, $f7
+
+0x34 0x60 0x2e 0x46   # CHECK: c.olt.d $f12, $f14
+
+0x34 0x30 0x07 0x46   # CHECK: c.olt.s $f6, $f7
+
+0x3a 0x60 0x2e 0x46   # CHECK: c.seq.d $f12, $f14
+
+0x3a 0x30 0x07 0x46   # CHECK: c.seq.s $f6, $f7
+
+0x38 0x60 0x2e 0x46   # CHECK: c.sf.d $f12, $f14
+
+0x38 0x30 0x07 0x46   # CHECK: c.sf.s $f6, $f7
+
+0x33 0x60 0x2e 0x46   # CHECK: c.ueq.d $f12, $f14
+
+0x33 0xe0 0x12 0x46   # CHECK: c.ueq.s $f28, $f18
+
+0x37 0x60 0x2e 0x46   # CHECK: c.ule.d $f12, $f14
+
+0x37 0x30 0x07 0x46   # CHECK: c.ule.s $f6, $f7
+
+0x35 0x60 0x2e 0x46   # CHECK: c.ult.d $f12, $f14
+
+0x35 0x30 0x07 0x46   # CHECK: c.ult.s $f6, $f7
+
+0x31 0x60 0x2e 0x46   # CHECK: c.un.d $f12, $f14
+
+0x31 0x30 0x07 0x46   # CHECK: c.un.s $f6, $f7
+
+0x0e 0x73 0x20 0x46   # CHECK: ceil.w.d $f12, $f14
+
+0x8e 0x39 0x00 0x46   # CHECK: ceil.w.s $f6, $f7
+
+0x00 0x38 0x46 0x44   # CHECK: cfc1  $6, $7
+
+0x21 0x30 0xe6 0x70   # CHECK: clo  $6, $7
+
+0x20 0x30 0xe6 0x70   # CHECK: clz  $6, $7
+
+0x00 0x38 0xc6 0x44   # CHECK: ctc1  $6, $7
+
+0xa1 0x39 0x00 0x46   # CHECK: cvt.d.s $f6, $f7
+
+0x21 0x73 0x80 0x46   # CHECK: cvt.d.w $f12, $f14
+
+0x20 0x73 0x20 0x46   # CHECK: cvt.s.d $f12, $f14
+
+0xa0 0x39 0x80 0x46   # CHECK: cvt.s.w $f6, $f7
+
+0x24 0x73 0x20 0x46   # CHECK: cvt.w.d $f12, $f14
+
+0xa4 0x39 0x00 0x46   # CHECK: cvt.w.s $f6, $f7
+
+0x0f 0x73 0x20 0x46   # CHECK: floor.w.d $f12, $f14
+
+0x8f 0x39 0x00 0x46   # CHECK: floor.w.s $f6, $f7
+
+0x4c 0x01 0x00 0x08   # CHECK: j 1328
+
+0x4c 0x01 0x00 0x0c   # CHECK: jal 1328
+
+0x4c 0x01 0x00 0x74   # CHECK: jalx 1328
+
+0x09 0xf8 0xe0 0x00   # CHECK: jalr  $7
+
+0x09 0xfc 0x80 0x00   # CHECK: jalr.hb  $4
+
+0x09 0x24 0xa0 0x00   # CHECK: jalr.hb  $4, $5
+
+0x08 0x00 0xe0 0x00   # CHECK: jr  $7
+
+0xc6 0x23 0xa4 0x80   # CHECK: lb  $4, 9158($5)
+
+0x06 0x00 0xa4 0x90   # CHECK: lbu $4, 6($5)
+
+0xc6 0x23 0xe9 0xd4   # CHECK: ldc1  $f9, 9158($7)
+
+0x0c 0x00 0xa4 0x84   # CHECK: lh  $4, 12($5)
+
+0x0c 0x00 0xa4 0x84   # CHECK: lh  $4, 12($5)
+
+0xc6 0x23 0xe9 0xc0   # CHECK: ll  $9, 9158($7)
+
+0x67 0x45 0x06 0x3c   # CHECK: lui  $6, 17767
+
+0x18 0x00 0xa4 0x8c   # CHECK: lw  $4, 24($5)
+
+0xc6 0x23 0xe9 0xc4   # CHECK: lwc1  $f9, 9158($7)
+
+0x03 0x00 0x82 0x88   # CHECK: lwl   $2,  3($4)
+
+0x10 0x00 0xa3 0x98   # CHECK: lwr   $3, 16($5)
+
+0x00 0x00 0xc7 0x70   # CHECK: madd   $6,  $7
+
+0x01 0x00 0xc7 0x70   # CHECK: maddu  $6,  $7
+
+0x00 0x38 0x06 0x44   # CHECK: mfc1   $6, $f7
+
+0x10 0x28 0x00 0x00   # CHECK: mfhi  $5
+
+0x12 0x28 0x00 0x00   # CHECK: mflo  $5
+
+0x86 0x41 0x20 0x46   # CHECK: mov.d $f6, $f8
+
+0x86 0x39 0x00 0x46   # CHECK: mov.s $f6, $f7
+
+0x04 0x00 0xc7 0x70   # CHECK: msub   $6,  $7
+
+0x05 0x00 0xc7 0x70   # CHECK: msubu  $6,  $7
+
+0x00 0x38 0x86 0x44   # CHECK: mtc1   $6, $f7
+
+0x11 0x00 0xe0 0x00   # CHECK: mthi   $7
+
+0x13 0x00 0xe0 0x00   # CHECK: mtlo   $7
+
+0x02 0x62 0x2e 0x46   # CHECK: mul.d $f8, $f12, $f14
+
+0x42 0x32 0x07 0x46   # CHECK: mul.s $f9, $f6, $f7
+
+0x02 0x48 0xc7 0x70   # CHECK: mul $9,  $6,  $7
+
+0x18 0x00 0x65 0x00   # CHECK: mult  $3, $5
+
+0x19 0x00 0x65 0x00   # CHECK: multu $3, $5
+
+0x07 0x73 0x20 0x46   # CHECK: neg.d $f12, $f14
+
+0x87 0x39 0x00 0x46   # CHECK: neg.s $f6, $f7
+
+0x00 0x00 0x00 0x00   # CHECK: nop
+
+0x27 0x48 0xc7 0x00   # CHECK: nor $9,  $6, $7
+
+0x25 0x18 0x65 0x00   # CHECK: or  $3, $3, $5
+
+0x67 0x45 0xc9 0x34   # CHECK: ori $9,  $6, 17767
+
+0x0c 0x73 0x20 0x46   # CHECK: round.w.d $f12, $f14
+
+0x8c 0x39 0x00 0x46   # CHECK: round.w.s $f6, $f7
+
+0xc6 0x23 0xa4 0xa0   # CHECK: sb  $4, 9158($5)
+
+0x06 0x00 0xa4 0xa0   # CHECK: sb  $4, 6($5)
+
+0xc6 0x23 0xe9 0xe0   # CHECK: sc  $9, 9158($7)
+
+0xc6 0x23 0xe9 0xf4   # CHECK: sdc1  $f9, 9158($7)
+
+0xc6 0x23 0xa4 0xa4   # CHECK: sh  $4, 9158($5)
+
+0xc0 0x21 0x03 0x00   # CHECK: sll $4, $3, 7
+
+0x04 0x10 0xa3 0x00   # CHECK: sllv  $2, $3, $5
+
+0x2a 0x18 0x65 0x00   # CHECK: slt $3, $3, $5
+
+0x67 0x00 0x63 0x28   # CHECK: slti  $3, $3, 103
+
+0x67 0x00 0x63 0x2c   # CHECK: sltiu $3, $3, 103
+
+0x2b 0x18 0x65 0x00   # CHECK: sltu  $3, $3, $5
+
+0x04 0x73 0x20 0x46   # CHECK: sqrt.d  $f12, $f14
+
+0x84 0x39 0x00 0x46   # CHECK: sqrt.s  $f6, $f7
+
+0xc3 0x21 0x03 0x00   # CHECK: sra $4, $3, 7
+
+0x07 0x10 0xa3 0x00   # CHECK: srav  $2, $3, $5
+
+0xc2 0x21 0x03 0x00   # CHECK: srl $4, $3, 7
+
+0x06 0x10 0xa3 0x00   # CHECK: srlv  $2, $3, $5
+
+0x01 0x62 0x2e 0x46   # CHECK: sub.d $f8, $f12, $f14
+
+0x41 0x32 0x07 0x46   # CHECK: sub.s $f9, $f6, $f7
+
+0x22 0x48 0xc7 0x00   # CHECK: sub $9,  $6, $7
+
+0x23 0x20 0x65 0x00   # CHECK: subu  $4, $3, $5
+
+0x18 0x00 0xa4 0xac   # CHECK: sw  $4, 24($5)
+
+0xc6 0x23 0xe9 0xe4   # CHECK: swc1  $f9, 9158($7)
+
+0x10 0x00 0xa4 0xa8   # CHECK: swl $4,  16($5)
+
+0x10 0x00 0xe6 0xb8   # CHECK: swr $6, 16($7)
+
+0xcf 0x01 0x00 0x00   # CHECK: sync  7
+
+0x0d 0x73 0x20 0x46   # CHECK: trunc.w.d $f12, $f14
+
+0x8d 0x39 0x00 0x46   # CHECK: trunc.w.s $f6, $f7
+
+0x26 0x18 0x65 0x00   # CHECK: xor $3, $3, $5
+
+0x67 0x45 0xc9 0x38   # CHECK: xori  $9,  $6, 17767
+
+0x3b 0xe8 0x05 0x7c   # CHECK: .set    push
+                      # CHECK: .set    mips32r2
+                      # CHECK: rdhwr   $5, $29
+                      # CHECK: .set    pop
+
+0x02 0x00 0x61 0xbc   # CHECK: cache 1, 2($3)
+
+0x04 0x00 0x43 0xcc   # CHECK: pref 3, 4($2)
+
+0xc6 0x23 0xe9 0xe8   # CHECK: swc2  $9, 9158($7)
+
+0xca 0x23 0xc8 0xc8   # CHECK: lwc2  $8, 9162($6)
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
new file mode 100644
index 000000000000..a7eb9b6a40ac
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
@@ -0,0 +1,294 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
+
+0x46 0x20 0x73 0x05   # CHECK: abs.d $f12, $f14
+
+0x46 0x00 0x39 0x85   # CHECK: abs.s $f6, $f7
+
+0x00 0xc7 0x48 0x20   # CHECK: add $9, $6, $7
+
+0x46 0x2e 0x62 0x00   # CHECK: add.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x40   # CHECK: add.s $f9, $f6, $f7
+
+0x20 0xc9 0x45 0x67   # CHECK: addi $9, $6, 17767
+
+0x24 0xc9 0xc5 0x67   # CHECK: addiu $9, $6, -15001
+
+0x00 0xc7 0x48 0x21   # CHECK: addu $9, $6, $7
+
+0x00 0xc7 0x48 0x24   # CHECK: and $9, $6, $7
+
+0x30 0xc9 0x45 0x67   # CHECK: andi $9, $6, 17767
+
+0x10 0x00 0x01 0x4c   # CHECK: b 1332
+
+0x45 0x00 0x01 0x4c   # CHECK: bc1f 1332
+
+0x45 0x1c 0x01 0x4c   # CHECK: bc1f $fcc7, 1332
+
+0x45 0x01 0x01 0x4c   # CHECK: bc1t 1332
+
+0x45 0x1d 0x01 0x4c   # CHECK: bc1t $fcc7, 1332
+
+0x11 0x26 0x01 0x4c   # CHECK: beq $9, $6, 1332
+
+0x04 0xc1 0x01 0x4c   # CHECK: bgez  $6, 1332
+
+0x04 0xd1 0x01 0x4c   # CHECK: bgezal  $6, 1332
+
+0x1c 0xc0 0x01 0x4c   # CHECK: bgtz  $6, 1332
+
+0x18 0xc0 0x01 0x4c   # CHECK: blez  $6, 1332
+
+0x15 0x26 0x01 0x4c   # CHECK: bne $9, $6, 1332
+
+0x46 0x2e 0x60 0x32   # CHECK: c.eq.d $f12, $f14
+
+0x46 0x07 0x30 0x32   # CHECK: c.eq.s $f6, $f7
+
+0x46 0x2e 0x60 0x30   # CHECK: c.f.d $f12, $f14
+
+0x46 0x07 0x30 0x30   # CHECK: c.f.s $f6, $f7
+
+0x46 0x2e 0x60 0x3e   # CHECK: c.le.d $f12, $f14
+
+0x46 0x07 0x30 0x3e   # CHECK: c.le.s $f6, $f7
+
+0x46 0x2e 0x60 0x3c   # CHECK: c.lt.d $f12, $f14
+
+0x46 0x07 0x30 0x3c   # CHECK: c.lt.s $f6, $f7
+
+0x46 0x2e 0x60 0x3d   # CHECK: c.nge.d $f12, $f14
+
+0x46 0x07 0x30 0x3d   # CHECK: c.nge.s $f6, $f7
+
+0x46 0x2e 0x60 0x3b   # CHECK: c.ngl.d $f12, $f14
+
+0x46 0x07 0x30 0x3b   # CHECK: c.ngl.s $f6, $f7
+
+0x46 0x2e 0x60 0x39   # CHECK: c.ngle.d $f12, $f14
+
+0x46 0x07 0x30 0x39   # CHECK: c.ngle.s $f6, $f7
+
+0x46 0x2e 0x60 0x3f   # CHECK: c.ngt.d $f12, $f14
+
+0x46 0x07 0x30 0x3f   # CHECK: c.ngt.s $f6, $f7
+
+0x46 0x2e 0x60 0x36   # CHECK: c.ole.d $f12, $f14
+
+0x46 0x07 0x30 0x36   # CHECK: c.ole.s $f6, $f7
+
+0x46 0x2e 0x60 0x34   # CHECK: c.olt.d $f12, $f14
+
+0x46 0x07 0x30 0x34   # CHECK: c.olt.s $f6, $f7
+
+0x46 0x2e 0x60 0x3a   # CHECK: c.seq.d $f12, $f14
+
+0x46 0x07 0x30 0x3a   # CHECK: c.seq.s $f6, $f7
+
+0x46 0x2e 0x60 0x38   # CHECK: c.sf.d $f12, $f14
+
+0x46 0x07 0x30 0x38   # CHECK: c.sf.s $f6, $f7
+
+0x46 0x2e 0x60 0x33   # CHECK: c.ueq.d $f12, $f14
+
+0x46 0x12 0xe0 0x33   # CHECK: c.ueq.s $f28, $f18
+
+0x46 0x2e 0x60 0x37   # CHECK: c.ule.d $f12, $f14
+
+0x46 0x07 0x30 0x37   # CHECK: c.ule.s $f6, $f7
+
+0x46 0x2e 0x60 0x35   # CHECK: c.ult.d $f12, $f14
+
+0x46 0x07 0x30 0x35   # CHECK: c.ult.s $f6, $f7
+
+0x46 0x2e 0x60 0x31   # CHECK: c.un.d $f12, $f14
+
+0x46 0x07 0x30 0x31   # CHECK: c.un.s $f6, $f7
+
+0x46 0x20 0x73 0x0e   # CHECK: ceil.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8e   # CHECK: ceil.w.s $f6, $f7
+
+0x44 0x46 0x38 0x00   # CHECK: cfc1  $6, $7
+
+0x70 0xe6 0x30 0x21   # CHECK: clo  $6, $7
+
+0x70 0xe6 0x30 0x20   # CHECK: clz  $6, $7
+
+0x44 0xc6 0x38 0x00   # CHECK: ctc1  $6, $7
+
+0x46 0x00 0x39 0xa1   # CHECK: cvt.d.s $f6, $f7
+
+0x46 0x80 0x73 0x21   # CHECK: cvt.d.w $f12, $f14
+
+0x46 0x20 0x73 0x20   # CHECK: cvt.s.d $f12, $f14
+
+0x46 0x80 0x39 0xa0   # CHECK: cvt.s.w $f6, $f7
+
+0x46 0x20 0x73 0x24   # CHECK: cvt.w.d $f12, $f14
+
+0x46 0x00 0x39 0xa4   # CHECK: cvt.w.s $f6, $f7
+
+0x46 0x20 0x73 0x0f   # CHECK: floor.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8f   # CHECK: floor.w.s $f6, $f7
+
+0x08 0x00 0x01 0x4c   # CHECK: j 1328
+
+0x0c 0x00 0x01 0x4c   # CHECK: jal 1328
+
+0x74 0x00 0x01 0x4c   # CHECK: jalx 1328
+
+0x00 0xe0 0xf8 0x09   # CHECK: jalr  $7
+
+0x00 0x80 0xfc 0x09   # CHECK: jalr.hb  $4
+
+0x00 0xa0 0x24 0x09   # CHECK: jalr.hb  $4, $5
+
+0x00 0xe0 0x00 0x08   # CHECK: jr  $7
+
+0x80 0xa4 0x23 0xc6   # CHECK: lb  $4, 9158($5)
+
+0x90 0xa4 0x00 0x06   # CHECK: lbu $4, 6($5)
+
+0xd4 0xe9 0x23 0xc6   # CHECK: ldc1  $f9, 9158($7)
+
+0x84 0xa4 0x00 0x0c   # CHECK: lh  $4, 12($5)
+
+0x84 0xa4 0x00 0x0c   # CHECK: lh  $4, 12($5)
+
+0xc0 0xe9 0x23 0xc6   # CHECK: ll  $9, 9158($7)
+
+0x3c 0x06 0x45 0x67   # CHECK: lui  $6, 17767
+
+0x8c 0xa4 0x00 0x18   # CHECK: lw  $4, 24($5)
+
+0xc4 0xe9 0x23 0xc6   # CHECK: lwc1  $f9, 9158($7)
+
+0x88 0x82 0x00 0x03   # CHECK: lwl   $2,  3($4)
+
+0x98 0xa3 0x00 0x10   # CHECK: lwr   $3, 16($5)
+
+0x70 0xc7 0x00 0x00   # CHECK: madd   $6,  $7
+
+0x70 0xc7 0x00 0x01   # CHECK: maddu  $6,  $7
+
+0x44 0x06 0x38 0x00   # CHECK: mfc1   $6, $f7
+
+0x00 0x00 0x28 0x10   # CHECK: mfhi  $5
+
+0x00 0x00 0x28 0x12   # CHECK: mflo  $5
+
+0x46 0x20 0x41 0x86   # CHECK: mov.d $f6, $f8
+
+0x46 0x00 0x39 0x86   # CHECK: mov.s $f6, $f7
+
+0x70 0xc7 0x00 0x04   # CHECK: msub   $6,  $7
+
+0x70 0xc7 0x00 0x05   # CHECK: msubu  $6,  $7
+
+0x44 0x86 0x38 0x00   # CHECK: mtc1   $6, $f7
+
+0x00 0xe0 0x00 0x11   # CHECK: mthi   $7
+
+0x00 0xe0 0x00 0x13   # CHECK: mtlo   $7
+
+0x46 0x2e 0x62 0x02   # CHECK: mul.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x42   # CHECK: mul.s $f9, $f6, $f7
+
+0x70 0xc7 0x48 0x02   # CHECK: mul $9,  $6,  $7
+
+0x00 0x65 0x00 0x18   # CHECK: mult  $3, $5
+
+0x00 0x65 0x00 0x19   # CHECK: multu $3, $5
+
+0x46 0x20 0x73 0x07   # CHECK: neg.d $f12, $f14
+
+0x46 0x00 0x39 0x87   # CHECK: neg.s $f6, $f7
+
+0x00 0x00 0x00 0x00   # CHECK: nop
+
+0x00 0xc7 0x48 0x27   # CHECK: nor $9,  $6, $7
+
+0x00 0x65 0x18 0x25   # CHECK: or  $3, $3, $5
+
+0x34 0xc9 0x45 0x67   # CHECK: ori $9,  $6, 17767
+
+0x46 0x20 0x73 0x0c   # CHECK: round.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8c   # CHECK: round.w.s $f6, $f7
+
+0xa0 0xa4 0x23 0xc6   # CHECK: sb  $4, 9158($5)
+
+0xa0 0xa4 0x00 0x06   # CHECK: sb  $4, 6($5)
+
+0xe0 0xe9 0x23 0xc6   # CHECK: sc  $9, 9158($7)
+
+0xf4 0xe9 0x23 0xc6   # CHECK: sdc1  $f9, 9158($7)
+
+0xa4 0xa4 0x23 0xc6   # CHECK: sh  $4, 9158($5)
+
+0x00 0x03 0x21 0xc0   # CHECK: sll $4, $3, 7
+
+0x00 0xa3 0x10 0x04   # CHECK: sllv  $2, $3, $5
+
+0x00 0x65 0x18 0x2a   # CHECK: slt $3, $3, $5
+
+0x28 0x63 0x00 0x67   # CHECK: slti  $3, $3, 103
+
+0x2c 0x63 0x00 0x67   # CHECK: sltiu $3, $3, 103
+
+0x00 0x65 0x18 0x2b   # CHECK: sltu  $3, $3, $5
+
+0x46 0x20 0x73 0x04   # CHECK: sqrt.d  $f12, $f14
+
+0x46 0x00 0x39 0x84   # CHECK: sqrt.s  $f6, $f7
+
+0x00 0x03 0x21 0xc3   # CHECK: sra $4, $3, 7
+
+0x00 0xa3 0x10 0x07   # CHECK: srav  $2, $3, $5
+
+0x00 0x03 0x21 0xc2   # CHECK: srl $4, $3, 7
+
+0x00 0xa3 0x10 0x06   # CHECK: srlv  $2, $3, $5
+
+0x46 0x2e 0x62 0x01   # CHECK: sub.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x41   # CHECK: sub.s $f9, $f6, $f7
+
+0x00 0xc7 0x48 0x22   # CHECK: sub $9,  $6, $7
+
+0x00 0x65 0x20 0x23   # CHECK: subu  $4, $3, $5
+
+0xac 0xa4 0x00 0x18   # CHECK: sw  $4, 24($5)
+
+0xe4 0xe9 0x23 0xc6   # CHECK: swc1  $f9, 9158($7)
+
+0xa8 0xa4 0x00 0x10   # CHECK: swl $4,  16($5)
+
+0xb8 0xe6 0x00 0x10   # CHECK: swr $6, 16($7)
+
+0x00 0x00 0x01 0xcf   # CHECK: sync  7
+
+0x46 0x20 0x73 0x0d   # CHECK: trunc.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8d   # CHECK: trunc.w.s $f6, $f7
+
+0x00 0x65 0x18 0x26   # CHECK: xor $3, $3, $5
+
+0x38 0xc9 0x45 0x67   # CHECK: xori  $9,  $6, 17767
+
+0x7c 0x05 0xe8 0x3b   # CHECK: .set    push
+                      # CHECK: .set    mips32r2
+                      # CHECK: rdhwr   $5, $29
+                      # CHECK: .set    pop
+
+0xbc 0x61 0x00 0x02   # CHECK: cache 1, 2($3)
+
+0xcc 0x43 0x00 0x04   # CHECK: pref 3, 4($2)
+
+0xe8 0xe9 0x23 0xc6   # CHECK: swc2  $9, 9158($7)
+
+0xc8 0xc8 0x23 0xca   # CHECK: lwc2  $8, 9162($6)
diff --git a/test/MC/Disassembler/Mips/mips32/valid-xfail-mips32.txt b/test/MC/Disassembler/Mips/mips32/valid-xfail-mips32.txt
new file mode 100644
index 000000000000..bcd1c8269644
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/valid-xfail-mips32.txt
@@ -0,0 +1,30 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble | FileCheck %s
+# XFAIL: *
+0x46 0x2f 0x79 0x32  # CHECK :      c.eq.d    $fcc1,$f15,$f15
+0x46 0x11 0xc5 0x32  # CHECK :      c.eq.s    $fcc5,$f24,$f17
+0x46 0x35 0x5c 0x30  # CHECK :      c.f.d     $fcc4,$f11,$f21
+0x46 0x07 0xf4 0x30  # CHECK :      c.f.s     $fcc4,$f30,$f7
+0x46 0x21 0x94 0x3e  # CHECK :      c.le.d    $fcc4,$f18,$f1
+0x46 0x04 0xc6 0x3e  # CHECK :      c.le.s    $fcc6,$f24,$f4
+0x46 0x23 0x4b 0x3c  # CHECK :      c.lt.d    $fcc3,$f9,$f3
+0x46 0x0e 0x8a 0x3c  # CHECK :      c.lt.s    $fcc2,$f17,$f14
+0x46 0x30 0xad 0x3d  # CHECK :      c.nge.d   $fcc5,$f21,$f16
+0x46 0x08 0x5b 0x3d  # CHECK :      c.nge.s   $fcc3,$f11,$f8
+0x46 0x17 0xfa 0x3b  # CHECK :      c.ngl.s   $fcc2,$f31,$f23
+0x46 0x17 0x92 0x39  # CHECK :      c.ngle.s  $fcc2,$f18,$f23
+0x46 0x27 0xc4 0x3f  # CHECK :      c.ngt.d   $fcc4,$f24,$f7
+0x46 0x0d 0x45 0x3f  # CHECK :      c.ngt.s   $fcc5,$f8,$f13
+0x46 0x3f 0x82 0x36  # CHECK :      c.ole.d   $fcc2,$f16,$f31
+0x46 0x14 0x3b 0x36  # CHECK :      c.ole.s   $fcc3,$f7,$f20
+0x46 0x3c 0x9c 0x34  # CHECK :      c.olt.d   $fcc4,$f19,$f28
+0x46 0x07 0xa6 0x34  # CHECK :      c.olt.s   $fcc6,$f20,$f7
+0x46 0x27 0xfc 0x3a  # CHECK :      c.seq.d   $fcc4,$f31,$f7
+0x46 0x19 0x0f 0x3a  # CHECK :      c.seq.s   $fcc7,$f1,$f25
+0x46 0x39 0x6c 0x33  # CHECK :      c.ueq.d   $fcc4,$f13,$f25
+0x46 0x1e 0x1e 0x33  # CHECK :      c.ueq.s   $fcc6,$f3,$f30
+0x46 0x32 0xcf 0x37  # CHECK :      c.ule.d   $fcc7,$f25,$f18
+0x46 0x1e 0xaf 0x37  # CHECK :      c.ule.s   $fcc7,$f21,$f30
+0x46 0x31 0x36 0x35  # CHECK :      c.ult.d   $fcc6,$f6,$f17
+0x46 0x0a 0xc7 0x35  # CHECK :      c.ult.s   $fcc7,$f24,$f10
+0x46 0x38 0xbe 0x31  # CHECK :      c.un.d    $fcc6,$f23,$f24
+0x46 0x04 0xf1 0x31  # CHECK :      c.un.s    $fcc1,$f30,$f4
diff --git a/test/MC/Disassembler/Mips/mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2.txt
index 299f6f0c8a3e..354ef74a75aa 100644
--- a/test/MC/Disassembler/Mips/mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2.txt
@@ -448,3 +448,6 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x38 0xc9 0x45 0x67
+
+# CHECK: synci -6137($fp)
+0x07 0xdf 0xe8 0x07
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt
new file mode 100644
index 000000000000..142413226805
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-le.txt
@@ -0,0 +1,337 @@
+# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
+
+0x05 0x73 0x20 0x46    # CHECK: abs.d $f12, $f14
+
+0x85 0x39 0x00 0x46    # CHECK: abs.s $f6, $f7
+
+0x20 0x48 0xc7 0x00    # CHECK: add $9, $6, $7
+
+0x00 0x62 0x2e 0x46    # CHECK: add.d $f8, $f12, $f14
+
+0x40 0x32 0x07 0x46    # CHECK: add.s $f9, $f6, $f7
+
+0x67 0x45 0xc9 0x20    # CHECK: addi $9, $6, 17767
+
+0x67 0xc5 0xc9 0x24    # CHECK: addiu $9, $6, -15001
+
+0x21 0x48 0xc7 0x00    # CHECK: addu $9, $6, $7
+
+0x24 0x48 0xc7 0x00    # CHECK: and $9, $6, $7
+
+0x67 0x45 0xc9 0x30    # CHECK: andi $9, $6, 17767
+
+0x4c 0x01 0x00 0x10    # CHECK: b 1332
+
+0x4c 0x01 0x00 0x45    # CHECK: bc1f 1332
+
+0x4c 0x01 0x1c 0x45    # CHECK: bc1f $fcc7, 1332
+
+0x4c 0x01 0x01 0x45    # CHECK: bc1t 1332
+
+0x4c 0x01 0x1d 0x45    # CHECK: bc1t $fcc7, 1332
+
+0x4c 0x01 0x26 0x11    # CHECK: beq $9, $6, 1332
+
+0x4c 0x01 0xc1 0x04    # CHECK: bgez  $6, 1332
+
+0x4c 0x01 0xd1 0x04    # CHECK: bgezal  $6, 1332
+
+0x4c 0x01 0xc0 0x1c    # CHECK: bgtz  $6, 1332
+
+0x4c 0x01 0xc0 0x18    # CHECK: blez  $6, 1332
+
+0x4c 0x01 0x26 0x15    # CHECK: bne $9, $6, 1332
+
+0x32 0x60 0x2e 0x46    # CHECK: c.eq.d $f12, $f14
+
+0x32 0x30 0x07 0x46    # CHECK: c.eq.s $f6, $f7
+
+0x30 0x60 0x2e 0x46    # CHECK: c.f.d $f12, $f14
+
+0x30 0x30 0x07 0x46    # CHECK: c.f.s $f6, $f7
+
+0x3e 0x60 0x2e 0x46    # CHECK: c.le.d $f12, $f14
+
+0x3e 0x30 0x07 0x46    # CHECK: c.le.s $f6, $f7
+
+0x3c 0x60 0x2e 0x46    # CHECK: c.lt.d $f12, $f14
+
+0x3c 0x30 0x07 0x46    # CHECK: c.lt.s $f6, $f7
+
+0x3d 0x60 0x2e 0x46    # CHECK: c.nge.d $f12, $f14
+
+0x3d 0x30 0x07 0x46    # CHECK: c.nge.s $f6, $f7
+
+0x3b 0x60 0x2e 0x46    # CHECK: c.ngl.d $f12, $f14
+
+0x3b 0x30 0x07 0x46    # CHECK: c.ngl.s $f6, $f7
+
+0x39 0x60 0x2e 0x46    # CHECK: c.ngle.d $f12, $f14
+
+0x39 0x30 0x07 0x46    # CHECK: c.ngle.s $f6, $f7
+
+0x3f 0x60 0x2e 0x46    # CHECK: c.ngt.d $f12, $f14
+
+0x3f 0x30 0x07 0x46    # CHECK: c.ngt.s $f6, $f7
+
+0x36 0x60 0x2e 0x46    # CHECK: c.ole.d $f12, $f14
+
+0x36 0x30 0x07 0x46    # CHECK: c.ole.s $f6, $f7
+
+0x34 0x60 0x2e 0x46    # CHECK: c.olt.d $f12, $f14
+
+0x34 0x30 0x07 0x46    # CHECK: c.olt.s $f6, $f7
+
+0x3a 0x60 0x2e 0x46    # CHECK: c.seq.d $f12, $f14
+
+0x3a 0x30 0x07 0x46    # CHECK: c.seq.s $f6, $f7
+
+0x38 0x60 0x2e 0x46    # CHECK: c.sf.d $f12, $f14
+
+0x38 0x30 0x07 0x46    # CHECK: c.sf.s $f6, $f7
+
+0x33 0x60 0x2e 0x46    # CHECK: c.ueq.d $f12, $f14
+
+0x33 0xe0 0x12 0x46    # CHECK: c.ueq.s $f28, $f18
+
+0x37 0x60 0x2e 0x46    # CHECK: c.ule.d $f12, $f14
+
+0x37 0x30 0x07 0x46    # CHECK: c.ule.s $f6, $f7
+
+0x35 0x60 0x2e 0x46    # CHECK: c.ult.d $f12, $f14
+
+0x35 0x30 0x07 0x46    # CHECK: c.ult.s $f6, $f7
+
+0x31 0x60 0x2e 0x46    # CHECK: c.un.d $f12, $f14
+
+0x31 0x30 0x07 0x46    # CHECK: c.un.s $f6, $f7
+
+0x0e 0x73 0x20 0x46    # CHECK: ceil.w.d $f12, $f14
+
+0x8e 0x39 0x00 0x46    # CHECK: ceil.w.s $f6, $f7
+
+0x00 0x38 0x46 0x44    # CHECK: cfc1  $6, $7
+
+0x21 0x30 0xe6 0x70    # CHECK: clo  $6, $7
+
+0x20 0x30 0xe6 0x70    # CHECK: clz  $6, $7
+
+0x00 0x38 0xc6 0x44    # CHECK: ctc1  $6, $7
+
+0xa1 0x39 0x00 0x46    # CHECK: cvt.d.s $f6, $f7
+
+0x21 0x73 0x80 0x46    # CHECK: cvt.d.w $f12, $f14
+
+0x25 0x73 0x20 0x46    # CHECK: cvt.l.d $f12, $f14
+
+0xa5 0x39 0x00 0x46    # CHECK: cvt.l.s $f6, $f7
+
+0x20 0x73 0x20 0x46    # CHECK: cvt.s.d $f12, $f14
+
+0xa0 0x39 0x80 0x46    # CHECK: cvt.s.w $f6, $f7
+
+0x24 0x73 0x20 0x46    # CHECK: cvt.w.d $f12, $f14
+
+0xa4 0x39 0x00 0x46    # CHECK: cvt.w.s $f6, $f7
+
+0x00 0x60 0x7e 0x41    # CHECK: di  $fp
+
+0x00 0x60 0x60 0x41    # CHECK: di
+
+0x20 0x60 0x6e 0x41    # CHECK: ei  $14
+
+0x20 0x60 0x60 0x41    # CHECK: ei
+
+0x0f 0x73 0x20 0x46    # CHECK: floor.w.d $f12, $f14
+
+0x8f 0x39 0x00 0x46    # CHECK: floor.w.s $f6, $f7
+
+0x84 0x61 0x33 0x7d    # CHECK: ins $19, $9, 6, 7
+
+0x4c 0x01 0x00 0x08    # CHECK: j 1328
+
+0x4c 0x01 0x00 0x0c    # CHECK: jal 1328
+
+0x4c 0x01 0x00 0x74    # CHECK: jalx 1328
+
+0x09 0xf8 0xe0 0x00    # CHECK: jalr  $7
+
+0x09 0xfc 0x80 0x00    # CHECK: jalr.hb  $4
+
+0x09 0x24 0xa0 0x00    # CHECK: jalr.hb  $4, $5
+
+0x08 0x00 0xe0 0x00    # CHECK: jr  $7
+
+0xc6 0x23 0xa4 0x80    # CHECK: lb  $4, 9158($5)
+
+0x06 0x00 0xa4 0x90    # CHECK: lbu $4, 6($5)
+
+0xc6 0x23 0xe9 0xd4    # CHECK: ldc1  $f9, 9158($7)
+
+0x01 0x02 0xf7 0x4d    # CHECK: ldxc1   $f8, $23($15)
+
+0x0c 0x00 0xa4 0x84    # CHECK: lh  $4, 12($5)
+
+0x0c 0x00 0xa4 0x84    # CHECK: lh  $4, 12($5)
+
+0xc6 0x23 0xe9 0xc0    # CHECK: ll  $9, 9158($7)
+
+0x67 0x45 0x06 0x3c    # CHECK: lui  $6, 17767
+
+0x05 0x00 0xa6 0x4c    # CHECK: luxc1 $f0, $6($5)
+
+0x18 0x00 0xa4 0x8c    # CHECK: lw  $4, 24($5)
+
+0xc6 0x23 0xe9 0xc4    # CHECK: lwc1  $f9, 9158($7)
+
+0x03 0x00 0x82 0x88    # CHECK: lwl   $2,  3($4)
+
+0x10 0x00 0xa3 0x98    # CHECK: lwr   $3, 16($5)
+
+0x00 0x05 0xcc 0x4d    # CHECK: lwxc1 $f20, $12($14)
+
+0x00 0x00 0xc7 0x70    # CHECK: madd   $6,  $7
+
+0xa1 0xd4 0x94 0x4e    # CHECK: madd.d  $f18, $f20, $f26, $f20
+
+0x60 0x98 0xf9 0x4f    # CHECK: madd.s  $f1, $f31, $f19, $f25
+
+0x01 0x00 0xc7 0x70    # CHECK: maddu  $6,  $7
+
+0x00 0x38 0x06 0x44    # CHECK: mfc1   $6, $f7
+
+0x10 0x28 0x00 0x00    # CHECK: mfhi  $5
+
+0x00 0xc0 0x7e 0x44    # CHECK: mfhc1   $fp, $f24
+
+0x12 0x28 0x00 0x00    # CHECK: mflo  $5
+
+0x86 0x41 0x20 0x46    # CHECK: mov.d $f6, $f8
+
+0x86 0x39 0x00 0x46    # CHECK: mov.s $f6, $f7
+
+0x04 0x00 0xc7 0x70    # CHECK: msub   $6,  $7
+
+0xa9 0xf2 0x52 0x4c    # CHECK: msub.d  $f10, $f2, $f30, $f18
+
+0x28 0x53 0x70 0x4e    # CHECK: msub.s  $f12, $f19, $f10, $f16
+
+0x05 0x00 0xc7 0x70    # CHECK: msubu  $6,  $7
+
+0x00 0x38 0x86 0x44    # CHECK: mtc1   $6, $f7
+
+0x11 0x00 0xe0 0x00    # CHECK: mthi   $7
+
+0x00 0x80 0xe0 0x44    # CHECK: mthc1   $zero, $f16
+
+0x13 0x00 0xe0 0x00    # CHECK: mtlo   $7
+
+0x02 0x62 0x2e 0x46    # CHECK: mul.d $f8, $f12, $f14
+
+0x42 0x32 0x07 0x46    # CHECK: mul.s $f9, $f6, $f7
+
+0x02 0x48 0xc7 0x70    # CHECK: mul $9,  $6,  $7
+
+0x18 0x00 0x65 0x00    # CHECK: mult  $3, $5
+
+0x19 0x00 0x65 0x00    # CHECK: multu $3, $5
+
+0x07 0x73 0x20 0x46    # CHECK: neg.d $f12, $f14
+
+0x87 0x39 0x00 0x46    # CHECK: neg.s $f6, $f7
+
+0xb1 0x74 0x54 0x4d    # CHECK: nmadd.d $f18, $f10, $f14, $f20
+
+0x30 0xc8 0xac 0x4c    # CHECK: nmadd.s $f0, $f5, $f25, $f12
+
+0x00 0x00 0x00 0x00    # CHECK: nop
+
+0x27 0x48 0xc7 0x00    # CHECK: nor $9,  $6, $7
+
+0xb9 0x87 0x1e 0x4d    # CHECK: nmsub.d $f30, $f8, $f16, $f30
+
+0x78 0x98 0x04 0x4f    # CHECK: nmsub.s $f1, $f24, $f19, $f4
+
+0x25 0x18 0x65 0x00    # CHECK: or  $3, $3, $5
+
+0x67 0x45 0xc9 0x34    # CHECK: ori $9,  $6, 17767
+
+0xc2 0x49 0x26 0x00    # CHECK: rotr $9, $6, 7
+
+0x46 0x48 0xe6 0x00    # CHECK:  rotrv $9, $6, $7
+
+0x0c 0x73 0x20 0x46    # CHECK: round.w.d $f12, $f14
+
+0x8c 0x39 0x00 0x46    # CHECK: round.w.s $f6, $f7
+
+0xc6 0x23 0xa4 0xa0    # CHECK: sb  $4, 9158($5)
+
+0x06 0x00 0xa4 0xa0    # CHECK: sb  $4, 6($5)
+
+0xc6 0x23 0xe9 0xe0    # CHECK: sc  $9, 9158($7)
+
+0xc6 0x23 0xe9 0xf4    # CHECK: sdc1  $f9, 9158($7)
+
+0x09 0x40 0x24 0x4f    # CHECK: sdxc1 $f8, $4($25)
+
+0x20 0x34 0x07 0x7c    # CHECK: seb $6, $7
+
+0x20 0x36 0x07 0x7c    # CHECK: seh $6, $7
+
+0xc6 0x23 0xa4 0xa4    # CHECK: sh  $4, 9158($5)
+
+0xc0 0x21 0x03 0x00    # CHECK: sll $4, $3, 7
+
+0x04 0x10 0xa3 0x00    # CHECK: sllv  $2, $3, $5
+
+0x2a 0x18 0x65 0x00    # CHECK: slt $3, $3, $5
+
+0x67 0x00 0x63 0x28    # CHECK: slti  $3, $3, 103
+
+0x67 0x00 0x63 0x2c    # CHECK: sltiu $3, $3, 103
+
+0x2b 0x18 0x65 0x00    # CHECK: sltu  $3, $3, $5
+
+0x04 0x73 0x20 0x46    # CHECK: sqrt.d  $f12, $f14
+
+0x84 0x39 0x00 0x46    # CHECK: sqrt.s  $f6, $f7
+
+0xc3 0x21 0x03 0x00    # CHECK: sra $4, $3, 7
+
+0x07 0x10 0xa3 0x00    # CHECK: srav  $2, $3, $5
+
+0xc2 0x21 0x03 0x00    # CHECK: srl $4, $3, 7
+
+0x06 0x10 0xa3 0x00    # CHECK: srlv  $2, $3, $5
+
+0x01 0x62 0x2e 0x46    # CHECK: sub.d $f8, $f12, $f14
+
+0x41 0x32 0x07 0x46    # CHECK: sub.s $f9, $f6, $f7
+
+0x22 0x48 0xc7 0x00    # CHECK: sub $9,  $6, $7
+
+0x23 0x20 0x65 0x00    # CHECK: subu  $4, $3, $5
+
+0x0d 0x20 0xb8 0x4c    # CHECK: suxc1 $f4, $24($5)
+
+0x18 0x00 0xa4 0xac    # CHECK: sw  $4, 24($5)
+
+0xc6 0x23 0xe9 0xe4    # CHECK: swc1  $f9, 9158($7)
+
+0x10 0x00 0xa4 0xa8    # CHECK: swl $4,  16($5)
+
+0x10 0x00 0xe6 0xb8    # CHECK: swr $6, 16($7)
+
+0x08 0xd0 0xd2 0x4e    # CHECK: swxc1 $f26, $18($22)
+
+0xcf 0x01 0x00 0x00    # CHECK: sync  7
+
+0x0d 0x73 0x20 0x46    # CHECK: trunc.w.d $f12, $f14
+
+0x8d 0x39 0x00 0x46    # CHECK: trunc.w.s $f6, $f7
+
+0xa0 0x30 0x07 0x7c    # CHECK: wsbh  $6, $7
+
+0x26 0x18 0x65 0x00    # CHECK: xor $3, $3, $5
+
+0x67 0x45 0xc9 0x38    # CHECK: xori  $9,  $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
new file mode 100644
index 000000000000..0110dccb6612
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
@@ -0,0 +1,337 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
+
+0x46 0x20 0x73 0x05   # CHECK: abs.d $f12, $f14
+
+0x46 0x00 0x39 0x85   # CHECK: abs.s $f6, $f7
+
+0x00 0xc7 0x48 0x20   # CHECK: add $9, $6, $7
+
+0x46 0x2e 0x62 0x00   # CHECK: add.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x40   # CHECK: add.s $f9, $f6, $f7
+
+0x20 0xc9 0x45 0x67   # CHECK: addi $9, $6, 17767
+
+0x24 0xc9 0xc5 0x67   # CHECK: addiu $9, $6, -15001
+
+0x00 0xc7 0x48 0x21   # CHECK: addu $9, $6, $7
+
+0x00 0xc7 0x48 0x24   # CHECK: and $9, $6, $7
+
+0x30 0xc9 0x45 0x67   # CHECK: andi $9, $6, 17767
+
+0x10 0x00 0x01 0x4c   # CHECK: b 1332
+
+0x45 0x00 0x01 0x4c   # CHECK: bc1f 1332
+
+0x45 0x1c 0x01 0x4c   # CHECK: bc1f $fcc7, 1332
+
+0x45 0x01 0x01 0x4c   # CHECK: bc1t 1332
+
+0x45 0x1d 0x01 0x4c   # CHECK: bc1t $fcc7, 1332
+
+0x11 0x26 0x01 0x4c   # CHECK: beq $9, $6, 1332
+
+0x04 0xc1 0x01 0x4c   # CHECK: bgez  $6, 1332
+
+0x04 0xd1 0x01 0x4c   # CHECK: bgezal  $6, 1332
+
+0x1c 0xc0 0x01 0x4c   # CHECK: bgtz  $6, 1332
+
+0x18 0xc0 0x01 0x4c   # CHECK: blez  $6, 1332
+
+0x15 0x26 0x01 0x4c   # CHECK: bne $9, $6, 1332
+
+0x46 0x2e 0x60 0x32   # CHECK: c.eq.d $f12, $f14
+
+0x46 0x07 0x30 0x32   # CHECK: c.eq.s $f6, $f7
+
+0x46 0x2e 0x60 0x30   # CHECK: c.f.d $f12, $f14
+
+0x46 0x07 0x30 0x30   # CHECK: c.f.s $f6, $f7
+
+0x46 0x2e 0x60 0x3e   # CHECK: c.le.d $f12, $f14
+
+0x46 0x07 0x30 0x3e   # CHECK: c.le.s $f6, $f7
+
+0x46 0x2e 0x60 0x3c   # CHECK: c.lt.d $f12, $f14
+
+0x46 0x07 0x30 0x3c   # CHECK: c.lt.s $f6, $f7
+
+0x46 0x2e 0x60 0x3d   # CHECK: c.nge.d $f12, $f14
+
+0x46 0x07 0x30 0x3d   # CHECK: c.nge.s $f6, $f7
+
+0x46 0x2e 0x60 0x3b   # CHECK: c.ngl.d $f12, $f14
+
+0x46 0x07 0x30 0x3b   # CHECK: c.ngl.s $f6, $f7
+
+0x46 0x2e 0x60 0x39   # CHECK: c.ngle.d $f12, $f14
+
+0x46 0x07 0x30 0x39   # CHECK: c.ngle.s $f6, $f7
+
+0x46 0x2e 0x60 0x3f   # CHECK: c.ngt.d $f12, $f14
+
+0x46 0x07 0x30 0x3f   # CHECK: c.ngt.s $f6, $f7
+
+0x46 0x2e 0x60 0x36   # CHECK: c.ole.d $f12, $f14
+
+0x46 0x07 0x30 0x36   # CHECK: c.ole.s $f6, $f7
+
+0x46 0x2e 0x60 0x34   # CHECK: c.olt.d $f12, $f14
+
+0x46 0x07 0x30 0x34   # CHECK: c.olt.s $f6, $f7
+
+0x46 0x2e 0x60 0x3a   # CHECK: c.seq.d $f12, $f14
+
+0x46 0x07 0x30 0x3a   # CHECK: c.seq.s $f6, $f7
+
+0x46 0x2e 0x60 0x38   # CHECK: c.sf.d $f12, $f14
+
+0x46 0x07 0x30 0x38   # CHECK: c.sf.s $f6, $f7
+
+0x46 0x2e 0x60 0x33   # CHECK: c.ueq.d $f12, $f14
+
+0x46 0x12 0xe0 0x33   # CHECK: c.ueq.s $f28, $f18
+
+0x46 0x2e 0x60 0x37   # CHECK: c.ule.d $f12, $f14
+
+0x46 0x07 0x30 0x37   # CHECK: c.ule.s $f6, $f7
+
+0x46 0x2e 0x60 0x35   # CHECK: c.ult.d $f12, $f14
+
+0x46 0x07 0x30 0x35   # CHECK: c.ult.s $f6, $f7
+
+0x46 0x2e 0x60 0x31   # CHECK: c.un.d $f12, $f14
+
+0x46 0x07 0x30 0x31   # CHECK: c.un.s $f6, $f7
+
+0x46 0x20 0x73 0x0e   # CHECK: ceil.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8e   # CHECK: ceil.w.s $f6, $f7
+
+0x44 0x46 0x38 0x00   # CHECK: cfc1  $6, $7
+
+0x70 0xe6 0x30 0x21   # CHECK: clo  $6, $7
+
+0x70 0xe6 0x30 0x20   # CHECK: clz  $6, $7
+
+0x44 0xc6 0x38 0x00   # CHECK: ctc1  $6, $7
+
+0x46 0x00 0x39 0xa1   # CHECK: cvt.d.s $f6, $f7
+
+0x46 0x80 0x73 0x21   # CHECK: cvt.d.w $f12, $f14
+
+0x46 0x20 0x73 0x25   # CHECK: cvt.l.d $f12, $f14
+
+0x46 0x00 0x39 0xa5   # CHECK: cvt.l.s $f6, $f7
+
+0x46 0x20 0x73 0x20   # CHECK: cvt.s.d $f12, $f14
+
+0x46 0x80 0x39 0xa0   # CHECK: cvt.s.w $f6, $f7
+
+0x46 0x20 0x73 0x24   # CHECK: cvt.w.d $f12, $f14
+
+0x46 0x00 0x39 0xa4   # CHECK: cvt.w.s $f6, $f7
+
+0x41 0x7e 0x60 0x00   # CHECK: di  $fp
+
+0x41 0x60 0x60 0x00   # CHECK: di
+
+0x41 0x6e 0x60 0x20   # CHECK: ei  $14
+
+0x41 0x60 0x60 0x20   # CHECK: ei
+
+0x46 0x20 0x73 0x0f   # CHECK: floor.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8f   # CHECK: floor.w.s $f6, $f7
+
+0x7d 0x33 0x61 0x84   # CHECK: ins $19, $9, 6, 7
+
+0x08 0x00 0x01 0x4c   # CHECK: j 1328
+
+0x0c 0x00 0x01 0x4c   # CHECK: jal 1328
+
+0x74 0x00 0x01 0x4c   # CHECK: jalx 1328
+
+0x00 0xe0 0xf8 0x09   # CHECK: jalr  $7
+
+0x00 0x80 0xfc 0x09   # CHECK: jalr.hb  $4
+
+0x00 0xa0 0x24 0x09   # CHECK: jalr.hb  $4, $5
+
+0x00 0xe0 0x00 0x08   # CHECK: jr  $7
+
+0x80 0xa4 0x23 0xc6   # CHECK: lb  $4, 9158($5)
+
+0x90 0xa4 0x00 0x06   # CHECK: lbu $4, 6($5)
+
+0xd4 0xe9 0x23 0xc6   # CHECK: ldc1  $f9, 9158($7)
+
+0x4d 0xf7 0x02 0x01   # CHECK: ldxc1   $f8, $23($15)
+
+0x84 0xa4 0x00 0x0c   # CHECK: lh  $4, 12($5)
+
+0x84 0xa4 0x00 0x0c   # CHECK: lh  $4, 12($5)
+
+0xc0 0xe9 0x23 0xc6   # CHECK: ll  $9, 9158($7)
+
+0x3c 0x06 0x45 0x67   # CHECK: lui  $6, 17767
+
+0x4c 0xa6 0x00 0x05   # CHECK: luxc1 $f0, $6($5)
+
+0x8c 0xa4 0x00 0x18   # CHECK: lw  $4, 24($5)
+
+0xc4 0xe9 0x23 0xc6   # CHECK: lwc1  $f9, 9158($7)
+
+0x88 0x82 0x00 0x03   # CHECK: lwl   $2,  3($4)
+
+0x98 0xa3 0x00 0x10   # CHECK: lwr   $3, 16($5)
+
+0x4d 0xcc 0x05 0x00   # CHECK: lwxc1 $f20, $12($14)
+
+0x70 0xc7 0x00 0x00   # CHECK: madd   $6,  $7
+
+0x4e 0x94 0xd4 0xa1   # CHECK: madd.d  $f18, $f20, $f26, $f20
+
+0x4f 0xf9 0x98 0x60   # CHECK: madd.s  $f1, $f31, $f19, $f25
+
+0x70 0xc7 0x00 0x01   # CHECK: maddu  $6,  $7
+
+0x44 0x06 0x38 0x00   # CHECK: mfc1   $6, $f7
+
+0x00 0x00 0x28 0x10   # CHECK: mfhi  $5
+
+0x44 0x7e 0xc0 0x00   # CHECK: mfhc1   $fp, $f24
+
+0x00 0x00 0x28 0x12   # CHECK: mflo  $5
+
+0x46 0x20 0x41 0x86   # CHECK: mov.d $f6, $f8
+
+0x46 0x00 0x39 0x86   # CHECK: mov.s $f6, $f7
+
+0x70 0xc7 0x00 0x04   # CHECK: msub   $6,  $7
+
+0x4c 0x52 0xf2 0xa9   # CHECK: msub.d  $f10, $f2, $f30, $f18
+
+0x4e 0x70 0x53 0x28   # CHECK: msub.s  $f12, $f19, $f10, $f16
+
+0x70 0xc7 0x00 0x05   # CHECK: msubu  $6,  $7
+
+0x44 0x86 0x38 0x00   # CHECK: mtc1   $6, $f7
+
+0x00 0xe0 0x00 0x11   # CHECK: mthi   $7
+
+0x44 0xe0 0x80 0x00   # CHECK: mthc1   $zero, $f16
+
+0x00 0xe0 0x00 0x13   # CHECK: mtlo   $7
+
+0x46 0x2e 0x62 0x02   # CHECK: mul.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x42   # CHECK: mul.s $f9, $f6, $f7
+
+0x70 0xc7 0x48 0x02   # CHECK: mul $9,  $6,  $7
+
+0x00 0x65 0x00 0x18   # CHECK: mult  $3, $5
+
+0x00 0x65 0x00 0x19   # CHECK: multu $3, $5
+
+0x46 0x20 0x73 0x07   # CHECK: neg.d $f12, $f14
+
+0x46 0x00 0x39 0x87   # CHECK: neg.s $f6, $f7
+
+0x4d 0x54 0x74 0xb1   # CHECK: nmadd.d $f18, $f10, $f14, $f20
+
+0x4c 0xac 0xc8 0x30   # CHECK: nmadd.s $f0, $f5, $f25, $f12
+
+0x00 0x00 0x00 0x00   # CHECK: nop
+
+0x00 0xc7 0x48 0x27   # CHECK: nor $9,  $6, $7
+
+0x4d 0x1e 0x87 0xb9   # CHECK: nmsub.d $f30, $f8, $f16, $f30
+
+0x4f 0x04 0x98 0x78   # CHECK: nmsub.s $f1, $f24, $f19, $f4
+
+0x00 0x65 0x18 0x25   # CHECK: or  $3, $3, $5
+
+0x34 0xc9 0x45 0x67   # CHECK: ori $9,  $6, 17767
+
+0x00 0x26 0x49 0xc2   # CHECK: rotr $9, $6, 7
+
+0x00 0xe6 0x48 0x46   # CHECK:  rotrv $9, $6, $7
+
+0x46 0x20 0x73 0x0c   # CHECK: round.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8c   # CHECK: round.w.s $f6, $f7
+
+0xa0 0xa4 0x23 0xc6   # CHECK: sb  $4, 9158($5)
+
+0xa0 0xa4 0x00 0x06   # CHECK: sb  $4, 6($5)
+
+0xe0 0xe9 0x23 0xc6   # CHECK: sc  $9, 9158($7)
+
+0xf4 0xe9 0x23 0xc6   # CHECK: sdc1  $f9, 9158($7)
+
+0x4f 0x24 0x40 0x09   # CHECK: sdxc1 $f8, $4($25)
+
+0x7c 0x07 0x34 0x20   # CHECK: seb $6, $7
+
+0x7c 0x07 0x36 0x20   # CHECK: seh $6, $7
+
+0xa4 0xa4 0x23 0xc6   # CHECK: sh  $4, 9158($5)
+
+0x00 0x03 0x21 0xc0   # CHECK: sll $4, $3, 7
+
+0x00 0xa3 0x10 0x04   # CHECK: sllv  $2, $3, $5
+
+0x00 0x65 0x18 0x2a   # CHECK: slt $3, $3, $5
+
+0x28 0x63 0x00 0x67   # CHECK: slti  $3, $3, 103
+
+0x2c 0x63 0x00 0x67   # CHECK: sltiu $3, $3, 103
+
+0x00 0x65 0x18 0x2b   # CHECK: sltu  $3, $3, $5
+
+0x46 0x20 0x73 0x04   # CHECK: sqrt.d  $f12, $f14
+
+0x46 0x00 0x39 0x84   # CHECK: sqrt.s  $f6, $f7
+
+0x00 0x03 0x21 0xc3   # CHECK: sra $4, $3, 7
+
+0x00 0xa3 0x10 0x07   # CHECK: srav  $2, $3, $5
+
+0x00 0x03 0x21 0xc2   # CHECK: srl $4, $3, 7
+
+0x00 0xa3 0x10 0x06   # CHECK: srlv  $2, $3, $5
+
+0x46 0x2e 0x62 0x01   # CHECK: sub.d $f8, $f12, $f14
+
+0x46 0x07 0x32 0x41   # CHECK: sub.s $f9, $f6, $f7
+
+0x00 0xc7 0x48 0x22   # CHECK: sub $9,  $6, $7
+
+0x00 0x65 0x20 0x23   # CHECK: subu  $4, $3, $5
+
+0x4c 0xb8 0x20 0x0d   # CHECK: suxc1 $f4, $24($5)
+
+0xac 0xa4 0x00 0x18   # CHECK: sw  $4, 24($5)
+
+0xe4 0xe9 0x23 0xc6   # CHECK: swc1  $f9, 9158($7)
+
+0xa8 0xa4 0x00 0x10   # CHECK: swl $4,  16($5)
+
+0xb8 0xe6 0x00 0x10   # CHECK: swr $6, 16($7)
+
+0x4e 0xd2 0xd0 0x08   # CHECK: swxc1 $f26, $18($22)
+
+0x00 0x00 0x01 0xcf   # CHECK: sync  7
+
+0x46 0x20 0x73 0x0d   # CHECK: trunc.w.d $f12, $f14
+
+0x46 0x00 0x39 0x8d   # CHECK: trunc.w.s $f6, $f7
+
+0x7c 0x07 0x30 0xa0   # CHECK: wsbh  $6, $7
+
+0x00 0x65 0x18 0x26   # CHECK: xor $3, $3, $5
+
+0x38 0xc9 0x45 0x67   # CHECK: xori  $9,  $6, 17767
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-xfail-mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2/valid-xfail-mips32r2.txt
new file mode 100644
index 000000000000..b70d1889a238
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-xfail-mips32r2.txt
@@ -0,0 +1,83 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r2 | FileCheck %s
+# XFAIL: *
+0x46 0x2f 0x79 0x32  # CHECK :      c.eq.d    $fcc1,$f15,$f15
+0x46 0x11 0xc5 0x32  # CHECK :      c.eq.s    $fcc5,$f24,$f17
+0x46 0x35 0x5c 0x30  # CHECK :      c.f.d     $fcc4,$f11,$f21
+0x46 0x07 0xf4 0x30  # CHECK :      c.f.s     $fcc4,$f30,$f7
+0x46 0x21 0x94 0x3e  # CHECK :      c.le.d    $fcc4,$f18,$f1
+0x46 0x04 0xc6 0x3e  # CHECK :      c.le.s    $fcc6,$f24,$f4
+0x46 0x23 0x4b 0x3c  # CHECK :      c.lt.d    $fcc3,$f9,$f3
+0x46 0x0e 0x8a 0x3c  # CHECK :      c.lt.s    $fcc2,$f17,$f14
+0x46 0x30 0xad 0x3d  # CHECK :      c.nge.d   $fcc5,$f21,$f16
+0x46 0x08 0x5b 0x3d  # CHECK :      c.nge.s   $fcc3,$f11,$f8
+0x46 0x17 0xfa 0x3b  # CHECK :      c.ngl.s   $fcc2,$f31,$f23
+0x46 0x17 0x92 0x39  # CHECK :      c.ngle.s  $fcc2,$f18,$f23
+0x46 0x27 0xc4 0x3f  # CHECK :      c.ngt.d   $fcc4,$f24,$f7
+0x46 0x0d 0x45 0x3f  # CHECK :      c.ngt.s   $fcc5,$f8,$f13
+0x46 0x3f 0x82 0x36  # CHECK :      c.ole.d   $fcc2,$f16,$f31
+0x46 0x14 0x3b 0x36  # CHECK :      c.ole.s   $fcc3,$f7,$f20
+0x46 0x3c 0x9c 0x34  # CHECK :      c.olt.d   $fcc4,$f19,$f28
+0x46 0x07 0xa6 0x34  # CHECK :      c.olt.s   $fcc6,$f20,$f7
+0x46 0x27 0xfc 0x3a  # CHECK :      c.seq.d   $fcc4,$f31,$f7
+0x46 0x19 0x0f 0x3a  # CHECK :      c.seq.s   $fcc7,$f1,$f25
+0x46 0x39 0x6c 0x33  # CHECK :      c.ueq.d   $fcc4,$f13,$f25
+0x46 0x1e 0x1e 0x33  # CHECK :      c.ueq.s   $fcc6,$f3,$f30
+0x46 0x32 0xcf 0x37  # CHECK :      c.ule.d   $fcc7,$f25,$f18
+0x46 0x1e 0xaf 0x37  # CHECK :      c.ule.s   $fcc7,$f21,$f30
+0x46 0x31 0x36 0x35  # CHECK :      c.ult.d   $fcc6,$f6,$f17
+0x46 0x0a 0xc7 0x35  # CHECK :      c.ult.s   $fcc7,$f24,$f10
+0x46 0x38 0xbe 0x31  # CHECK :      c.un.d    $fcc6,$f23,$f24
+0x46 0x04 0xf1 0x31  # CHECK :      c.un.s    $fcc1,$f30,$f4
+0x46 0xc0 0x45 0x85  # CHECK :      abs.ps  $f22,$f8
+0x46 0xcc 0xc6 0x00  # CHECK :      add.ps  $f24,$f24,$f12
+0x46 0xca 0x04 0x32  # CHECK :      c.eq.ps $fcc4,$f0,$f10
+0x46 0xcc 0x66 0x30  # CHECK :      c.f.ps  $fcc6,$f12,$f12
+0x46 0xd4 0x42 0x3e  # CHECK :      c.le.ps $fcc2,$f8,$f20
+0x46 0xc4 0x90 0x3c  # CHECK :      c.lt.ps $f18,$f4
+0x46 0xda 0x10 0x3d  # CHECK :      c.nge.ps   $f2,$f26
+0x46 0xde 0xb0 0x3b  # CHECK :      c.ngl.ps   $f22,$f30
+0x46 0xd4 0x66 0x39  # CHECK :      c.ngle.ps  $fcc6,$f12,$f20
+0x46 0xc6 0xf6 0x3f  # CHECK :      c.ngt.ps   $fcc6,$f30,$f6
+0x46 0xc8 0xa6 0x36  # CHECK :      c.ole.ps   $fcc6,$f20,$f8
+0x46 0xd0 0x32 0x34  # CHECK :      c.olt.ps   $fcc2,$f6,$f16
+0x46 0xce 0xf6 0x3a  # CHECK :      c.seq.ps   $fcc6,$f30,$f14
+0x46 0xc6 0x26 0x38  # CHECK :      c.sf.ps $fcc6,$f4,$f6
+0x46 0xdc 0x20 0x33  # CHECK :      c.ueq.ps   $f4,$f28
+0x46 0xc2 0x86 0x37  # CHECK :      c.ule.ps   $fcc6,$f16,$f2
+0x46 0xc0 0x76 0x35  # CHECK :      c.ult.ps   $fcc6,$f14,$f0
+0x46 0xda 0x14 0x31  # CHECK :      c.un.ps $fcc4,$f2,$f26
+0x46 0x20 0x20 0x8a  # CHECK :      ceil.l.d   $f2,$f4
+0x46 0x00 0x6c 0x8a  # CHECK :      ceil.l.s   $f18,$f13
+0x46 0xa0 0x81 0x21  # CHECK :      cvt.d.l $f4,$f16
+0x46 0x14 0x90 0xa6  # CHECK :      cvt.ps.s   $f2,$f18,$f20
+0x46 0xa0 0xf3 0xe0  # CHECK :      cvt.s.l $f15,$f30
+0x46 0xc0 0x17 0xa8  # CHECK :      cvt.s.pl   $f30,$f2
+0x46 0xc0 0xd3 0xa0  # CHECK :      cvt.s.pu   $f14,$f26
+0x46 0x20 0x36 0x8b  # CHECK :      floor.l.d  $f26,$f6
+0x46 0x00 0x23 0x0b  # CHECK :      floor.l.s  $f12,$f4
+0x4c 0x42 0x75 0xa6  # CHECK :      madd.ps $f22,$f2,$f14,$f2
+0x46 0xc0 0x85 0x86  # CHECK :      mov.ps  $f22,$f16
+0x46 0xd8 0xe2 0x91  # CHECK :      movf.ps $f10,$f28,$fcc6
+0x46 0xd3 0xf7 0x93  # CHECK :      movn.ps $f30,$f30,s3
+0x46 0xc9 0xc5 0x11  # CHECK :      movt.ps $f20,$f24,$fcc2
+0x46 0xdf 0x84 0x92  # CHECK :      movz.ps $f18,$f16,ra
+0x4d 0xd0 0xe3 0x2e  # CHECK :      msub.ps $f12,$f14,$f28,$f16
+0x46 0xc0 0x64 0x87  # CHECK :      neg.ps  $f18,$f12
+0x4c 0x98 0x46 0xb6  # CHECK :      nmadd.ps   $f26,$f4,$f8,$f24
+0x4d 0x90 0x71 0xbe  # CHECK :      nmsub.ps   $f6,$f12,$f14,$f16
+0x46 0xde 0x46 0x2c  # CHECK :      pll.ps  $f24,$f8,$f30
+0x46 0xdc 0xd0 0x2d  # CHECK :      plu.ps  $f0,$f26,$f28
+0x46 0xda 0xf2 0x2e  # CHECK :      pul.ps  $f8,$f30,$f26
+0x46 0xc2 0x46 0x2f  # CHECK :      puu.ps  $f24,$f8,$f2
+0x41 0x49 0x98 0x00  # CHECK :      rdpgpr  s3,t1
+0x46 0x20 0x34 0x95  # CHECK :      recip.d $f18,$f6
+0x46 0x00 0xf0 0xd5  # CHECK :      recip.s $f3,$f30
+0x02 0xa7 0x68 0x46  # CHECK :      rorv    t5,a3,s5
+0x46 0x20 0x03 0x08  # CHECK :      round.l.d  $f12,$f0
+0x46 0x00 0x2e 0x08  # CHECK :      round.l.s  $f24,$f5
+0x46 0x20 0xe0 0x96  # CHECK :      rsqrt.d $f2,$f28
+0x46 0x00 0x41 0x16  # CHECK :      rsqrt.s $f4,$f8
+0x46 0xda 0x71 0x01  # CHECK :      sub.ps  $f4,$f14,$f26
+0x46 0x20 0xb5 0x89  # CHECK :      trunc.l.d  $f22,$f22
+0x46 0x00 0xff 0x09  # CHECK :      trunc.l.s  $f28,$f31
+0x41 0xcd 0x00 0x00  # CHECK :      wrpgpr  zero,t5
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
index 0362ca6d8d67..81a05b330fde 100644
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ b/test/MC/Disassembler/Mips/mips32r2_le.txt
@@ -448,3 +448,6 @@
 
 # CHECK: xori  $9,  $6, 17767
 0x67 0x45 0xc9 0x38
+
+# CHECK: synci 7500($19)
+0x4c 0x1d 0x7f 0x06
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
new file mode 100644
index 000000000000..bd7558b7446e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
@@ -0,0 +1,229 @@
+# RUN: llvm-mc %s -triple=mips64el-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# CHECK: .text
+0x85 0xc1 0x20 0x46  # CHECK: abs.d	$f6, $f24
+0x45 0x82 0x00 0x46  # CHECK: abs.s	$f9, $f16
+0x20 0xb8 0x45 0x02  # CHECK: add	$23, $18, $5
+0x48 0x3b 0xc9 0x21  # CHECK: addi	$9, $14, 15176
+0xe7 0xe3 0x18 0x23  # CHECK: addi	$24, $24, -7193
+0x00 0x30 0x3c 0x46  # CHECK: add.d	$f0, $f6, $f28
+0x00 0xaa 0x18 0x46  # CHECK: add.s	$f8, $f21, $f24
+0xd2 0x66 0x2d 0x21  # CHECK: addi	$13, $9, 26322
+0xfe 0xff 0x08 0x21  # CHECK: addi	$8, $8, -2
+0x21 0x48 0x86 0x00  # CHECK: addu	$9, $4, $6
+0x0a 0x00 0x29 0x25  # CHECK: addiu	$9, $9, 10
+0x24 0xb8 0x4c 0x00  # CHECK: and	$23, $2, $12
+0x04 0x00 0x42 0x30  # CHECK: andi	$2, $2, 4
+0x01 0x00 0x00 0x45  # CHECK: bc1f	 8
+0x00 0x00 0x04 0x45  # CHECK: bc1f   $fcc1, 4
+0x06 0x00 0x1e 0x45  # CHECK: bc1fl  $fcc7, 28
+0x0c 0x00 0x02 0x45  # CHECK: bc1fl	 52
+0x01 0x00 0x01 0x45  # CHECK: bc1t	 8
+0x00 0x00 0x05 0x45  # CHECK: bc1t   $fcc1, 4
+0xf4 0xf7 0x03 0x45  # CHECK: bc1tl	 -8236
+0x06 0x00 0x1f 0x45  # CHECK: bc1tl  $fcc7, 28
+0x9b 0x14 0x11 0x04  # CHECK: bal	21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x9b 0x14 0xd0 0x04  # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x40 0x0c 0xd3 0x51  # CHECK: beql	$14, $19, 12548
+0x1f 0x07 0x93 0x05  # CHECK: bgezall	$12, 7296
+0x4d 0xf9 0x83 0x04  # CHECK: bgezl	$4, -6856
+0x59 0xfc 0x40 0x5d  # CHECK: bgtzl	$10, -3736
+0xe7 0x02 0xc0 0x58  # CHECK: blezl	$6, 2976
+0x7a 0x00 0xd2 0x04  # CHECK: bltzall	$6, 492
+0x45 0xf6 0x22 0x06  # CHECK: bltzl	$17, -9960
+0xfc 0x04 0x94 0x57  # CHECK: bnel	$gp, $20, 5108
+0x08 0x00 0xa1 0xbc  # CHECK: cache   1, 8($5)
+0x3b 0xe0 0x3c 0x46  # CHECK: c.ngl.d	$f28, $f28
+0x39 0x00 0x30 0x46  # CHECK: c.ngle.d	$f0, $f16
+0x38 0xf0 0x20 0x46  # CHECK: c.sf.d	$f30, $f0
+0x38 0x70 0x16 0x46  # CHECK: c.sf.s	$f14, $f22
+0x4a 0x18 0x20 0x46  # CHECK: ceil.l.d  $f1, $f3
+0x8a 0x6c 0x00 0x46  # CHECK: ceil.l.s  $f18, $f13
+0xce 0xc2 0x20 0x46  # CHECK: ceil.w.d	$f11, $f24
+0x8e 0xa1 0x00 0x46  # CHECK: ceil.w.s	$f6, $f20
+0x00 0xa8 0x51 0x44  # CHECK: cfc1	$17, $21
+0x00 0xd0 0xc6 0x44  # CHECK: ctc1	$6, $26
+0xa1 0xe5 0x00 0x46  # CHECK: cvt.d.s	$f22, $f28
+0xa1 0x5e 0x80 0x46  # CHECK: cvt.d.w	$f26, $f11
+0x21 0x81 0xa0 0x46  # CHECK: cvt.d.l $f4, $f16
+0x25 0x7e 0x20 0x46  # CHECK: cvt.l.d $f24, $f15
+0xe5 0xea 0x00 0x46  # CHECK: cvt.l.s $f11, $f29
+0xe0 0xf3 0xa0 0x46  # CHECK: cvt.s.l $f15, $f30
+0xa0 0x46 0x20 0x46  # CHECK: cvt.s.d	$f26, $f8
+0xa0 0x7d 0x80 0x46  # CHECK: cvt.s.w	$f22, $f15
+0x24 0x75 0x20 0x46  # CHECK: cvt.w.d	$f20, $f14
+0x24 0xc5 0x00 0x46  # CHECK: cvt.w.s	$f20, $f24
+0x2c 0x98 0x3f 0x00  # CHECK: dadd    $19, $1, $ra
+0xc7 0x93 0x9d 0x62  # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0xbd 0x63  # CHECK: daddi   $sp, $sp, -27705
+0xc7 0x93 0x9d 0x62  # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0x9d 0x62  # CHECK: daddi   $sp, $20, -27705
+0xc7 0x93 0xbd 0x63  # CHECK: daddi   $sp, $sp, -27705
+0x16 0xee 0xda 0x66  # CHECK: daddiu  $26, $22, -4586
+0x2d 0x98 0x3f 0x00  # CHECK: daddu   $19, $1, $ra
+0x9f 0x46 0x58 0x64  # CHECK: daddiu  $24, $2, 18079
+0x3f 0x69 0x73 0x66  # CHECK: daddiu  $19, $19, 26943
+0x1e 0x00 0x53 0x03  # CHECK: ddiv    $zero, $26, $19
+0x1f 0x00 0x11 0x02  # CHECK: ddivu   $zero, $16, $17
+0x1a 0x00 0x2b 0x03  # CHECK: div	$zero, $25, $11
+0x03 0xa7 0x3a 0x46  # CHECK: div.d	$f28, $f20, $f26
+0x03 0x29 0x0f 0x46  # CHECK: div.s	$f4, $f5, $f15
+0x1b 0x00 0x2f 0x03  # CHECK: divu	$zero, $25, $15
+0x00 0x68 0x2c 0x44  # CHECK: dmfc1   $12, $f13
+0x00 0x70 0xb0 0x44  # CHECK: dmtc1   $16, $f14
+0x1c 0x00 0xe9 0x02  # CHECK: dmult   $23, $9
+0x1d 0x00 0xa6 0x00  # CHECK: dmultu  $5, $6
+0xb8 0x04 0x00 0x00  # CHECK: dsll    $zero, $zero, 18
+0xb8 0x04 0x14 0x00  # CHECK: dsll    $zero, $20, 18
+0x14 0x00 0x94 0x01  # CHECK: dsllv   $zero, $20, $12
+0xbc 0x04 0x00 0x00  # CHECK: dsll32  $zero, $zero, 18
+0xbc 0x04 0x00 0x00  # CHECK: dsll32  $zero, $zero, 18
+0x14 0x00 0x94 0x01  # CHECK: dsllv   $zero, $20, $12
+0xbb 0xe2 0x1c 0x00  # CHECK: dsra    $gp, $gp, 10
+0xbb 0xe2 0x12 0x00  # CHECK: dsra    $gp, $18, 10
+0x17 0xe0 0x72 0x02  # CHECK: dsrav   $gp, $18, $19
+0xbf 0xe2 0x1c 0x00  # CHECK: dsra32  $gp, $gp, 10
+0xbf 0xe2 0x12 0x00  # CHECK: dsra32  $gp, $18, 10
+0x17 0xe0 0x72 0x02  # CHECK: dsrav   $gp, $18, $19
+0xfa 0x9d 0x13 0x00  # CHECK: dsrl    $19, $19, 23
+0xfa 0x9d 0x06 0x00  # CHECK: dsrl    $19, $6, 23
+0x16 0x98 0x86 0x02  # CHECK: dsrlv   $19, $6, $20
+0xfe 0x9d 0x13 0x00  # CHECK: dsrl32  $19, $19, 23
+0xfe 0x9d 0x06 0x00  # CHECK: dsrl32  $19, $6, 23
+0x16 0x98 0x86 0x02  # CHECK: dsrlv   $19, $6, $20
+0x2e 0x38 0xc8 0x02  # CHECK: dsub    $7, $22, $8
+0x2f 0x28 0xba 0x00  # CHECK: dsubu   $5, $5, $26
+0xc0 0x00 0x00 0x00  # CHECK: ehb
+0x18 0x00 0x00 0x42  # CHECK: eret
+0x8f 0x53 0x20 0x46  # CHECK: floor.w.d	$f14, $f10
+0x0f 0x4a 0x00 0x46  # CHECK: floor.w.s	$f8, $f9
+0x8b 0x3e 0x20 0x46  # CHECK: floor.l.d $f26, $f7
+0x0b 0x2b 0x00 0x46  # CHECK: floor.l.s $f12, $f5
+0x4d 0xc7 0x58 0x81  # CHECK: lb	$24, -14515($10)
+0xf3 0x75 0x68 0x90  # CHECK: lbu	$8, 30195($3)
+0x07 0x40 0x0a 0xd6  # CHECK: ldc1	$f10, 16391($16)
+0x43 0xad 0x28 0xd8  # CHECK: ldc2	$8, -21181($1)
+0x94 0xde 0xab 0x86  # CHECK: lh	$11, -8556($21)
+0xbd 0xa6 0x53 0x94  # CHECK: lhu	$19, -22851($2)
+0xb3 0x8b 0x01 0x24  # CHECK: addiu	$1, $zero, -29773
+0x3f 0x8b 0x00 0x24  # CHECK: addiu	$zero, $zero, -29889
+0x67 0xe3 0x42 0xc2  # CHECK: ll	$2, -7321($18)
+0x2a 0x16 0xa8 0x8c  # CHECK: lw	$8, 5674($5)
+0xf1 0x27 0x50 0xc7  # CHECK: lwc1	$f16, 10225($26)
+0xb7 0xfc 0xd2 0xc8  # CHECK: lwc2	$18, -841($6)
+0x79 0xef 0xf4 0x89  # CHECK: lwl	$20, -4231($15)
+0x35 0xb5 0x80 0x9b  # CHECK: lwr	$zero, -19147($gp)
+0x00 0x03 0xd1 0x4f  # CHECK: lwxc1   $f12, $17($fp)
+0x00 0xd8 0x07 0x44  # CHECK: mfc1	$7, $f27
+0x10 0x98 0x00 0x00  # CHECK: mfhi	$19
+0x10 0xe8 0x00 0x00  # CHECK: mfhi	$sp
+0x12 0x88 0x00 0x00  # CHECK: mflo	$17
+0x06 0x75 0x20 0x46  # CHECK: mov.d	$f20, $f14
+0x86 0xd8 0x00 0x46  # CHECK: mov.s	$f2, $f27
+0x01 0xe0 0x1c 0x01  # CHECK: movf    $gp, $8, $fcc7
+0x91 0x59 0x34 0x46  # CHECK: movf.d  $f6, $f11, $fcc5
+0xd1 0x2d 0x18 0x46  # CHECK: movf.s  $f23, $f5, $fcc6
+0x21 0xf0 0x80 0x00  # CHECK: move	 $fp, $4
+0x21 0xc8 0xc0 0x00  # CHECK: move	 $25, $6
+0x0b 0x18 0x30 0x02  # CHECK: movn   $3, $17, $16
+0xd3 0xae 0x3a 0x46  # CHECK: movn.d $f27, $f21, $26
+0x13 0x03 0x17 0x46  # CHECK: movn.s $f12, $f0, $23
+0x01 0x00 0x95 0x02  # CHECK: movt   $zero, $20, $fcc5
+0x11 0x10 0x21 0x46  # CHECK: movt.d $f0, $f2, $fcc0
+0x91 0x17 0x05 0x46  # CHECK: movt.s $f30, $f2, $fcc1
+0x0a 0x28 0xc9 0x02  # CHECK: movz   $5, $22, $9
+0x12 0xeb 0x29 0x46  # CHECK: movz.d $f12, $f29, $9
+0x52 0x3e 0x03 0x46  # CHECK: movz.s $f25, $f7, $3
+0x00 0x48 0x9e 0x44  # CHECK: mtc1	$fp, $f9
+0x11 0x00 0x20 0x02  # CHECK: mthi	$17
+0x13 0x00 0xa0 0x03  # CHECK: mtlo	$sp
+0x13 0x00 0x20 0x03  # CHECK: mtlo	$25
+0x02 0xa5 0x30 0x46  # CHECK: mul.d	$f20, $f20, $f16
+0x82 0x57 0x02 0x46  # CHECK: mul.s	$f30, $f10, $f2
+0x18 0x00 0xb4 0x03  # CHECK: mult	$sp, $20
+0x18 0x00 0xa2 0x03  # CHECK: mult	$sp, $2
+0x19 0x00 0x9a 0x03  # CHECK: multu	$gp, $26
+0x19 0x00 0x32 0x01  # CHECK: multu	$9, $18
+0x23 0x10 0x02 0x00  # CHECK: negu	 $2, $2
+0x23 0x10 0x03 0x00  # CHECK: negu	 $2, $3
+0x87 0x96 0x20 0x46  # CHECK: neg.d	$f26, $f18
+0x47 0x78 0x00 0x46  # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x27 0x38 0x07 0x00  # CHECK: nor	$7, $zero, $7
+0x25 0x60 0x1d 0x02  # CHECK: or	$12, $16, $sp
+0x04 0x00 0x42 0x34  # CHECK: ori	$2, $2, 4
+0x08 0x00 0xa1 0xcc  # CHECK: pref    1, 8($5)  
+0x08 0x0b 0x20 0x46  # CHECK: round.l.d $f12, $f1
+0x48 0x2e 0x00 0x46  # CHECK: round.l.s $f25, $f5
+0x8c 0x21 0x20 0x46  # CHECK: round.w.d	$f6, $f4
+0xcc 0xe6 0x00 0x46  # CHECK: round.w.s	$f27, $f28
+0x6f 0xb2 0xd6 0xa1  # CHECK: sb	$22, -19857($14)
+0xd8 0x49 0x6f 0xe2  # CHECK: sc	$15, 18904($19)
+0xcd 0xdf 0xaf 0xf3  # CHECK: scd   $15, -8243($sp)
+0xcb 0x16 0x4c 0xfd  # CHECK: sd    $12, 5835($10)
+0x1f 0xae 0xc7 0xb3  # CHECK: sdl   $7, -20961($fp)
+0x39 0xb0 0x8b 0xb5  # CHECK: sdr   $11, -20423($12)
+0x6e 0x77 0xbe 0xf5  # CHECK: sdc1	$f30, 30574($13)
+0x75 0x5a 0x54 0xfa  # CHECK: sdc2	$20, 23157($18)
+0x09 0x58 0xca 0x4d  # CHECK: sdxc1   $f11, $10($14)
+0xd0 0xe5 0xee 0xa5  # CHECK: sh	$14, -6704($15)
+0x80 0x3c 0x07 0x00  # CHECK: sll	$7, $7, 18
+0x80 0x3c 0x00 0x00  # CHECK: sll	$7, $zero, 18
+0x04 0x38 0x20 0x01  # CHECK: sllv	$7, $zero, $9
+0x04 0x38 0x20 0x01  # CHECK: sllv	$7, $zero, $9
+0x2a 0xb8 0x7b 0x01  # CHECK: slt	$23, $11, $27
+0x11 0x25 0x51 0x29  # CHECK: slti	$17, $10, 9489
+0x55 0xc3 0x39 0x2f  # CHECK: sltiu	$25, $25, -15531
+0x2b 0xa0 0xab 0x02  # CHECK: sltu	$20, $21, $11
+0x55 0xc3 0x38 0x2f  # CHECK: sltiu	$24, $25, -15531
+0x04 0xb4 0x20 0x46  # CHECK: sqrt.d	$f16, $f22
+0x04 0x08 0x00 0x46  # CHECK: sqrt.s	$f0, $f1
+0xc3 0x8b 0x11 0x00  # CHECK: sra	$17, $17, 15
+0xc3 0x8b 0x17 0x00  # CHECK: sra	$17, $23, 15
+0x07 0x88 0xb7 0x03  # CHECK: srav	$17, $23, $sp
+0x07 0x88 0xb7 0x03  # CHECK: srav	$17, $23, $sp
+0xc2 0x11 0x02 0x00  # CHECK: srl	$2, $2, 7
+0xc2 0x11 0x02 0x00  # CHECK: srl	$2, $2, 7
+0x06 0xc8 0x94 0x00  # CHECK: srlv	$25, $20, $4
+0x06 0xc8 0x94 0x00  # CHECK: srlv	$25, $20, $4
+0x40 0x00 0x00 0x00  # CHECK: ssnop
+0x22 0xb0 0x6c 0x02  # CHECK: sub	$22, $19, $12
+0x36 0x0c 0x36 0x22  # CHECK: addi	$22, $17, 3126
+0x90 0xe6 0xad 0x21  # CHECK: addi	$13, $13, -6512
+0x81 0x14 0x30 0x46  # CHECK: sub.d	$f18, $f2, $f16
+0xc1 0xb5 0x16 0x46  # CHECK: sub.s	$f23, $f22, $f22
+0x23 0xe8 0xd6 0x02  # CHECK: subu	$sp, $22, $22
+0x50 0xd8 0xbf 0xaf  # CHECK: sw	$ra, -10160($sp)
+0xef 0xde 0x06 0xe7  # CHECK: swc1	$f6, -8465($24)
+0x30 0x61 0x19 0xea  # CHECK: swc2	$25, 24880($16)
+0x7e 0x35 0x6f 0xaa  # CHECK: swl	$15, 13694($19)
+0x22 0x98 0xd1 0xb9  # CHECK: swr	$17, -26590($14)
+0x08 0x98 0x4c 0x4f  # CHECK: swxc1   $f19, $12($26)
+0x34 0x00 0x03 0x00  # CHECK: teq	 $zero, $3
+0x34 0x9b 0xa7 0x00  # CHECK: teq	$5, $7, 620
+0xa0 0xbb 0xac 0x06  # CHECK: teqi	$21, 48032
+0x30 0x00 0xea 0x00  # CHECK: tge	 $7, $10
+0x30 0x55 0xb3 0x00  # CHECK: tge	$5, $19, 340
+0xa1 0x13 0x28 0x06  # CHECK: tgei	$17, 5025
+0x33 0x90 0xa9 0x07  # CHECK: tgeiu	$sp, 36915
+0x31 0x00 0xdc 0x02  # CHECK: tgeu	 $22, $gp
+0xf1 0x5e 0x8e 0x02  # CHECK: tgeu	$20, $14, 379
+0x08 0x00 0x00 0x42  # CHECK: tlbp
+0x01 0x00 0x00 0x42  # CHECK: tlbr
+0x02 0x00 0x00 0x42  # CHECK: tlbwi
+0x06 0x00 0x00 0x42  # CHECK: tlbwr
+0x32 0x00 0xed 0x01  # CHECK: tlt	 $15, $13
+0x72 0x21 0x53 0x00  # CHECK: tlt	$2, $19, 133
+0xbd 0xad 0xca 0x05  # CHECK: tlti	$14, 44477
+0x2c 0xec 0xeb 0x07  # CHECK: tltiu	$ra, 60460
+0x33 0x00 0x70 0x01  # CHECK: tltu	 $11, $16
+0x33 0xfe 0x1d 0x02  # CHECK: tltu	$16, $sp, 1016
+0x36 0x00 0xd1 0x00  # CHECK: tne	 $6, $17
+0x76 0xdd 0xe8 0x00  # CHECK: tne	$7, $8, 885
+0x31 0x8c 0x8e 0x05  # CHECK: tnei	$12, 35889
+0xc9 0xbd 0x20 0x46  # CHECK: trunc.l.d $f23, $f23
+0x09 0xff 0x00 0x46  # CHECK: trunc.l.s $f28, $f31
+0x8d 0x75 0x20 0x46  # CHECK: trunc.w.d	$f22, $f14
+0x0d 0xf7 0x00 0x46  # CHECK: trunc.w.s	$f28, $f30
+0x26 0x90 0x9e 0x00  # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
new file mode 100644
index 000000000000..abccbbfe7a98
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
@@ -0,0 +1,229 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# CHECK: .text
+0x46 0x20 0xc1 0x85  # CHECK: abs.d	$f6, $f24
+0x46 0x00 0x82 0x45  # CHECK: abs.s	$f9, $f16
+0x02 0x45 0xb8 0x20  # CHECK: add	$23, $18, $5
+0x21 0xc9 0x3b 0x48  # CHECK: addi	$9, $14, 15176
+0x23 0x18 0xe3 0xe7  # CHECK: addi	$24, $24, -7193
+0x46 0x3c 0x30 0x00  # CHECK: add.d	$f0, $f6, $f28
+0x46 0x18 0xaa 0x00  # CHECK: add.s	$f8, $f21, $f24
+0x21 0x2d 0x66 0xd2  # CHECK: addi	$13, $9, 26322
+0x21 0x08 0xff 0xfe  # CHECK: addi	$8, $8, -2
+0x00 0x86 0x48 0x21  # CHECK: addu	$9, $4, $6
+0x25 0x29 0x00 0x0a  # CHECK: addiu	$9, $9, 10
+0x00 0x4c 0xb8 0x24  # CHECK: and	$23, $2, $12
+0x30 0x42 0x00 0x04  # CHECK: andi	$2, $2, 4
+0x45 0x00 0x00 0x01  # CHECK: bc1f	 8
+0x45 0x04 0x00 0x00  # CHECK: bc1f   $fcc1, 4
+0x45 0x1e 0x00 0x06  # CHECK: bc1fl  $fcc7, 28
+0x45 0x02 0x00 0x0c  # CHECK: bc1fl	 52
+0x45 0x01 0x00 0x01  # CHECK: bc1t	 8
+0x45 0x05 0x00 0x00  # CHECK: bc1t   $fcc1, 4
+0x45 0x03 0xf7 0xf4  # CHECK: bc1tl	 -8236
+0x45 0x1f 0x00 0x06  # CHECK: bc1tl  $fcc7, 28
+0x04 0x11 0x14 0x9b  # CHECK: bal	21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x04 0xd0 0x14 0x9b  # CHECK: bltzal	$6, 21104
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x51 0xd3 0x0c 0x40  # CHECK: beql	$14, $19, 12548
+0x05 0x93 0x07 0x1f  # CHECK: bgezall	$12, 7296
+0x04 0x83 0xf9 0x4d  # CHECK: bgezl	$4, -6856
+0x5d 0x40 0xfc 0x59  # CHECK: bgtzl	$10, -3736
+0x58 0xc0 0x02 0xe7  # CHECK: blezl	$6, 2976
+0x04 0xd2 0x00 0x7a  # CHECK: bltzall	$6, 492
+0x06 0x22 0xf6 0x45  # CHECK: bltzl	$17, -9960
+0x57 0x94 0x04 0xfc  # CHECK: bnel	$gp, $20, 5108
+0xbc 0xa1 0x00 0x08  # CHECK: cache   1, 8($5)
+0x46 0x3c 0xe0 0x3b  # CHECK: c.ngl.d	$f28, $f28
+0x46 0x30 0x00 0x39  # CHECK: c.ngle.d	$f0, $f16
+0x46 0x20 0xf0 0x38  # CHECK: c.sf.d	$f30, $f0
+0x46 0x16 0x70 0x38  # CHECK: c.sf.s	$f14, $f22
+0x46 0x20 0x18 0x4a  # CHECK: ceil.l.d  $f1, $f3
+0x46 0x00 0x6c 0x8a  # CHECK: ceil.l.s  $f18, $f13
+0x46 0x20 0xc2 0xce  # CHECK: ceil.w.d	$f11, $f24
+0x46 0x00 0xa1 0x8e  # CHECK: ceil.w.s	$f6, $f20
+0x44 0x51 0xa8 0x00  # CHECK: cfc1	$17, $21
+0x44 0xc6 0xd0 0x00  # CHECK: ctc1	$6, $26
+0x46 0x00 0xe5 0xa1  # CHECK: cvt.d.s	$f22, $f28
+0x46 0x80 0x5e 0xa1  # CHECK: cvt.d.w	$f26, $f11
+0x46 0xa0 0x81 0x21  # CHECK: cvt.d.l $f4, $f16
+0x46 0x20 0x7e 0x25  # CHECK: cvt.l.d $f24, $f15
+0x46 0x00 0xea 0xe5  # CHECK: cvt.l.s $f11, $f29
+0x46 0xa0 0xf3 0xe0  # CHECK: cvt.s.l $f15, $f30
+0x46 0x20 0x46 0xa0  # CHECK: cvt.s.d	$f26, $f8
+0x46 0x80 0x7d 0xa0  # CHECK: cvt.s.w	$f22, $f15
+0x46 0x20 0x75 0x24  # CHECK: cvt.w.d	$f20, $f14
+0x46 0x00 0xc5 0x24  # CHECK: cvt.w.s	$f20, $f24
+0x00 0x3f 0x98 0x2c  # CHECK: dadd    $19, $1, $ra
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x63 0xbd 0x93 0xc7  # CHECK: daddi   $sp, $sp, -27705
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x62 0x9d 0x93 0xc7  # CHECK: daddi   $sp, $20, -27705
+0x63 0xbd 0x93 0xc7  # CHECK: daddi   $sp, $sp, -27705
+0x66 0xda 0xee 0x16  # CHECK: daddiu  $26, $22, -4586
+0x00 0x3f 0x98 0x2d  # CHECK: daddu   $19, $1, $ra
+0x64 0x58 0x46 0x9f  # CHECK: daddiu  $24, $2, 18079
+0x66 0x73 0x69 0x3f  # CHECK: daddiu  $19, $19, 26943
+0x03 0x53 0x00 0x1e  # CHECK: ddiv    $zero, $26, $19
+0x02 0x11 0x00 0x1f  # CHECK: ddivu   $zero, $16, $17
+0x03 0x2b 0x00 0x1a  # CHECK: div	$zero, $25, $11
+0x46 0x3a 0xa7 0x03  # CHECK: div.d	$f28, $f20, $f26
+0x46 0x0f 0x29 0x03  # CHECK: div.s	$f4, $f5, $f15
+0x03 0x2f 0x00 0x1b  # CHECK: divu	$zero, $25, $15
+0x44 0x2c 0x68 0x00  # CHECK: dmfc1   $12, $f13
+0x44 0xb0 0x70 0x00  # CHECK: dmtc1   $16, $f14
+0x02 0xe9 0x00 0x1c  # CHECK: dmult   $23, $9
+0x00 0xa6 0x00 0x1d  # CHECK: dmultu  $5, $6
+0x00 0x00 0x04 0xb8  # CHECK: dsll    $zero, $zero, 18
+0x00 0x14 0x04 0xb8  # CHECK: dsll    $zero, $20, 18
+0x01 0x94 0x00 0x14  # CHECK: dsllv   $zero, $20, $12
+0x00 0x00 0x04 0xbc  # CHECK: dsll32  $zero, $zero, 18
+0x00 0x00 0x04 0xbc  # CHECK: dsll32  $zero, $zero, 18
+0x01 0x94 0x00 0x14  # CHECK: dsllv   $zero, $20, $12
+0x00 0x1c 0xe2 0xbb  # CHECK: dsra    $gp, $gp, 10
+0x00 0x12 0xe2 0xbb  # CHECK: dsra    $gp, $18, 10
+0x02 0x72 0xe0 0x17  # CHECK: dsrav   $gp, $18, $19
+0x00 0x1c 0xe2 0xbf  # CHECK: dsra32  $gp, $gp, 10
+0x00 0x12 0xe2 0xbf  # CHECK: dsra32  $gp, $18, 10
+0x02 0x72 0xe0 0x17  # CHECK: dsrav   $gp, $18, $19
+0x00 0x13 0x9d 0xfa  # CHECK: dsrl    $19, $19, 23
+0x00 0x06 0x9d 0xfa  # CHECK: dsrl    $19, $6, 23
+0x02 0x86 0x98 0x16  # CHECK: dsrlv   $19, $6, $20
+0x00 0x13 0x9d 0xfe  # CHECK: dsrl32  $19, $19, 23
+0x00 0x06 0x9d 0xfe  # CHECK: dsrl32  $19, $6, 23
+0x02 0x86 0x98 0x16  # CHECK: dsrlv   $19, $6, $20
+0x02 0xc8 0x38 0x2e  # CHECK: dsub    $7, $22, $8
+0x00 0xba 0x28 0x2f  # CHECK: dsubu   $5, $5, $26
+0x00 0x00 0x00 0xc0  # CHECK: ehb
+0x42 0x00 0x00 0x18  # CHECK: eret
+0x46 0x20 0x53 0x8f  # CHECK: floor.w.d	$f14, $f10
+0x46 0x00 0x4a 0x0f  # CHECK: floor.w.s	$f8, $f9
+0x46 0x20 0x3e 0x8b  # CHECK: floor.l.d $f26, $f7
+0x46 0x00 0x2b 0x0b  # CHECK: floor.l.s $f12, $f5
+0x81 0x58 0xc7 0x4d  # CHECK: lb	$24, -14515($10)
+0x90 0x68 0x75 0xf3  # CHECK: lbu	$8, 30195($3)
+0xd6 0x0a 0x40 0x07  # CHECK: ldc1	$f10, 16391($16)
+0xd8 0x28 0xad 0x43  # CHECK: ldc2	$8, -21181($1)
+0x86 0xab 0xde 0x94  # CHECK: lh	$11, -8556($21)
+0x94 0x53 0xa6 0xbd  # CHECK: lhu	$19, -22851($2)
+0x24 0x01 0x8b 0xb3  # CHECK: addiu	$1, $zero, -29773
+0x24 0x00 0x8b 0x3f  # CHECK: addiu	$zero, $zero, -29889
+0xc2 0x42 0xe3 0x67  # CHECK: ll	$2, -7321($18)
+0x8c 0xa8 0x16 0x2a  # CHECK: lw	$8, 5674($5)
+0xc7 0x50 0x27 0xf1  # CHECK: lwc1	$f16, 10225($26)
+0xc8 0xd2 0xfc 0xb7  # CHECK: lwc2	$18, -841($6)
+0x89 0xf4 0xef 0x79  # CHECK: lwl	$20, -4231($15)
+0x9b 0x80 0xb5 0x35  # CHECK: lwr	$zero, -19147($gp)
+0x4f 0xd1 0x03 0x00  # CHECK: lwxc1   $f12, $17($fp)
+0x44 0x07 0xd8 0x00  # CHECK: mfc1	$7, $f27
+0x00 0x00 0x98 0x10  # CHECK: mfhi	$19
+0x00 0x00 0xe8 0x10  # CHECK: mfhi	$sp
+0x00 0x00 0x88 0x12  # CHECK: mflo	$17
+0x46 0x20 0x75 0x06  # CHECK: mov.d	$f20, $f14
+0x46 0x00 0xd8 0x86  # CHECK: mov.s	$f2, $f27
+0x01 0x1c 0xe0 0x01  # CHECK: movf    $gp, $8, $fcc7
+0x46 0x34 0x59 0x91  # CHECK: movf.d  $f6, $f11, $fcc5
+0x46 0x18 0x2d 0xd1  # CHECK: movf.s  $f23, $f5, $fcc6
+0x00 0x80 0xf0 0x21  # CHECK: move	 $fp, $4
+0x00 0xc0 0xc8 0x21  # CHECK: move	 $25, $6
+0x02 0x30 0x18 0x0b  # CHECK: movn   $3, $17, $16
+0x46 0x3a 0xae 0xd3  # CHECK: movn.d $f27, $f21, $26
+0x46 0x17 0x03 0x13  # CHECK: movn.s $f12, $f0, $23
+0x02 0x95 0x00 0x01  # CHECK: movt   $zero, $20, $fcc5
+0x46 0x21 0x10 0x11  # CHECK: movt.d $f0, $f2, $fcc0
+0x46 0x05 0x17 0x91  # CHECK: movt.s $f30, $f2, $fcc1
+0x02 0xc9 0x28 0x0a  # CHECK: movz   $5, $22, $9
+0x46 0x29 0xeb 0x12  # CHECK: movz.d $f12, $f29, $9
+0x46 0x03 0x3e 0x52  # CHECK: movz.s $f25, $f7, $3
+0x44 0x9e 0x48 0x00  # CHECK: mtc1	$fp, $f9
+0x02 0x20 0x00 0x11  # CHECK: mthi	$17
+0x03 0xa0 0x00 0x13  # CHECK: mtlo	$sp
+0x03 0x20 0x00 0x13  # CHECK: mtlo	$25
+0x46 0x30 0xa5 0x02  # CHECK: mul.d	$f20, $f20, $f16
+0x46 0x02 0x57 0x82  # CHECK: mul.s	$f30, $f10, $f2
+0x03 0xb4 0x00 0x18  # CHECK: mult	$sp, $20
+0x03 0xa2 0x00 0x18  # CHECK: mult	$sp, $2
+0x03 0x9a 0x00 0x19  # CHECK: multu	$gp, $26
+0x01 0x32 0x00 0x19  # CHECK: multu	$9, $18
+0x00 0x02 0x10 0x23  # CHECK: negu	 $2, $2
+0x00 0x03 0x10 0x23  # CHECK: negu	 $2, $3
+0x46 0x20 0x96 0x87  # CHECK: neg.d	$f26, $f18
+0x46 0x00 0x78 0x47  # CHECK: neg.s	$f1, $f15
+0x00 0x00 0x00 0x00  # CHECK: nop
+0x00 0x07 0x38 0x27  # CHECK: nor	$7, $zero, $7
+0x02 0x1d 0x60 0x25  # CHECK: or	$12, $16, $sp
+0x34 0x42 0x00 0x04  # CHECK: ori	$2, $2, 4
+0xcc 0xa1 0x00 0x08  # CHECK: pref    1, 8($5)  
+0x46 0x20 0x0b 0x08  # CHECK: round.l.d $f12, $f1
+0x46 0x00 0x2e 0x48  # CHECK: round.l.s $f25, $f5
+0x46 0x20 0x21 0x8c  # CHECK: round.w.d	$f6, $f4
+0x46 0x00 0xe6 0xcc  # CHECK: round.w.s	$f27, $f28
+0xa1 0xd6 0xb2 0x6f  # CHECK: sb	$22, -19857($14)
+0xe2 0x6f 0x49 0xd8  # CHECK: sc	$15, 18904($19)
+0xf3 0xaf 0xdf 0xcd  # CHECK: scd   $15, -8243($sp)
+0xfd 0x4c 0x16 0xcb  # CHECK: sd    $12, 5835($10)
+0xb3 0xc7 0xae 0x1f  # CHECK: sdl   $7, -20961($fp)
+0xb5 0x8b 0xb0 0x39  # CHECK: sdr   $11, -20423($12)
+0xf5 0xbe 0x77 0x6e  # CHECK: sdc1	$f30, 30574($13)
+0xfa 0x54 0x5a 0x75  # CHECK: sdc2	$20, 23157($18)
+0x4d 0xca 0x58 0x09  # CHECK: sdxc1   $f11, $10($14)
+0xa5 0xee 0xe5 0xd0  # CHECK: sh	$14, -6704($15)
+0x00 0x07 0x3c 0x80  # CHECK: sll	$7, $7, 18
+0x00 0x00 0x3c 0x80  # CHECK: sll	$7, $zero, 18
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x20 0x38 0x04  # CHECK: sllv	$7, $zero, $9
+0x01 0x7b 0xb8 0x2a  # CHECK: slt	$23, $11, $27
+0x29 0x51 0x25 0x11  # CHECK: slti	$17, $10, 9489
+0x2f 0x39 0xc3 0x55  # CHECK: sltiu	$25, $25, -15531
+0x02 0xab 0xa0 0x2b  # CHECK: sltu	$20, $21, $11
+0x2f 0x38 0xc3 0x55  # CHECK: sltiu	$24, $25, -15531
+0x46 0x20 0xb4 0x04  # CHECK: sqrt.d	$f16, $f22
+0x46 0x00 0x08 0x04  # CHECK: sqrt.s	$f0, $f1
+0x00 0x11 0x8b 0xc3  # CHECK: sra	$17, $17, 15
+0x00 0x17 0x8b 0xc3  # CHECK: sra	$17, $23, 15
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x03 0xb7 0x88 0x07  # CHECK: srav	$17, $23, $sp
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x02 0x11 0xc2  # CHECK: srl	$2, $2, 7
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x94 0xc8 0x06  # CHECK: srlv	$25, $20, $4
+0x00 0x00 0x00 0x40  # CHECK: ssnop
+0x02 0x6c 0xb0 0x22  # CHECK: sub	$22, $19, $12
+0x22 0x36 0x0c 0x36  # CHECK: addi	$22, $17, 3126
+0x21 0xad 0xe6 0x90  # CHECK: addi	$13, $13, -6512
+0x46 0x30 0x14 0x81  # CHECK: sub.d	$f18, $f2, $f16
+0x46 0x16 0xb5 0xc1  # CHECK: sub.s	$f23, $f22, $f22
+0x02 0xd6 0xe8 0x23  # CHECK: subu	$sp, $22, $22
+0xaf 0xbf 0xd8 0x50  # CHECK: sw	$ra, -10160($sp)
+0xe7 0x06 0xde 0xef  # CHECK: swc1	$f6, -8465($24)
+0xea 0x19 0x61 0x30  # CHECK: swc2	$25, 24880($16)
+0xaa 0x6f 0x35 0x7e  # CHECK: swl	$15, 13694($19)
+0xb9 0xd1 0x98 0x22  # CHECK: swr	$17, -26590($14)
+0x4f 0x4c 0x98 0x08  # CHECK: swxc1   $f19, $12($26)
+0x00 0x03 0x00 0x34  # CHECK: teq	 $zero, $3
+0x00 0xa7 0x9b 0x34  # CHECK: teq	$5, $7, 620
+0x06 0xac 0xbb 0xa0  # CHECK: teqi	$21, 48032
+0x00 0xea 0x00 0x30  # CHECK: tge	 $7, $10
+0x00 0xb3 0x55 0x30  # CHECK: tge	$5, $19, 340
+0x06 0x28 0x13 0xa1  # CHECK: tgei	$17, 5025
+0x07 0xa9 0x90 0x33  # CHECK: tgeiu	$sp, 36915
+0x02 0xdc 0x00 0x31  # CHECK: tgeu	 $22, $gp
+0x02 0x8e 0x5e 0xf1  # CHECK: tgeu	$20, $14, 379
+0x42 0x00 0x00 0x08  # CHECK: tlbp
+0x42 0x00 0x00 0x01  # CHECK: tlbr
+0x42 0x00 0x00 0x02  # CHECK: tlbwi
+0x42 0x00 0x00 0x06  # CHECK: tlbwr
+0x01 0xed 0x00 0x32  # CHECK: tlt	 $15, $13
+0x00 0x53 0x21 0x72  # CHECK: tlt	$2, $19, 133
+0x05 0xca 0xad 0xbd  # CHECK: tlti	$14, 44477
+0x07 0xeb 0xec 0x2c  # CHECK: tltiu	$ra, 60460
+0x01 0x70 0x00 0x33  # CHECK: tltu	 $11, $16
+0x02 0x1d 0xfe 0x33  # CHECK: tltu	$16, $sp, 1016
+0x00 0xd1 0x00 0x36  # CHECK: tne	 $6, $17
+0x00 0xe8 0xdd 0x76  # CHECK: tne	$7, $8, 885
+0x05 0x8e 0x8c 0x31  # CHECK: tnei	$12, 35889
+0x46 0x20 0xbd 0xc9  # CHECK: trunc.l.d $f23, $f23
+0x46 0x00 0xff 0x09  # CHECK: trunc.l.s $f28, $f31
+0x46 0x20 0x75 0x8d  # CHECK: trunc.w.d	$f22, $f14
+0x46 0x00 0xf7 0x0d  # CHECK: trunc.w.s	$f28, $f30
+0x00 0x9e 0x90 0x26  # CHECK: xor	$18, $4, $fp
diff --git a/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
new file mode 100644
index 000000000000..aa35e46ab4b8
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/valid-xfail-mips4.txt
@@ -0,0 +1,42 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# XFAIL: *
+0x46 0x2f 0x79 0x32  # CHECK :      c.eq.d    $fcc1,$f15,$f15
+0x46 0x11 0xc5 0x32  # CHECK :      c.eq.s    $fcc5,$f24,$f17
+0x46 0x35 0x5c 0x30  # CHECK :      c.f.d     $fcc4,$f11,$f21
+0x46 0x07 0xf4 0x30  # CHECK :      c.f.s     $fcc4,$f30,$f7
+0x46 0x21 0x94 0x3e  # CHECK :      c.le.d    $fcc4,$f18,$f1
+0x46 0x04 0xc6 0x3e  # CHECK :      c.le.s    $fcc6,$f24,$f4
+0x46 0x23 0x4b 0x3c  # CHECK :      c.lt.d    $fcc3,$f9,$f3
+0x46 0x0e 0x8a 0x3c  # CHECK :      c.lt.s    $fcc2,$f17,$f14
+0x46 0x30 0xad 0x3d  # CHECK :      c.nge.d   $fcc5,$f21,$f16
+0x46 0x08 0x5b 0x3d  # CHECK :      c.nge.s   $fcc3,$f11,$f8
+0x46 0x17 0xfa 0x3b  # CHECK :      c.ngl.s   $fcc2,$f31,$f23
+0x46 0x17 0x92 0x39  # CHECK :      c.ngle.s  $fcc2,$f18,$f23
+0x46 0x27 0xc4 0x3f  # CHECK :      c.ngt.d   $fcc4,$f24,$f7
+0x46 0x0d 0x45 0x3f  # CHECK :      c.ngt.s   $fcc5,$f8,$f13
+0x46 0x3f 0x82 0x36  # CHECK :      c.ole.d   $fcc2,$f16,$f31
+0x46 0x14 0x3b 0x36  # CHECK :      c.ole.s   $fcc3,$f7,$f20
+0x46 0x3c 0x9c 0x34  # CHECK :      c.olt.d   $fcc4,$f19,$f28
+0x46 0x07 0xa6 0x34  # CHECK :      c.olt.s   $fcc6,$f20,$f7
+0x46 0x27 0xfc 0x3a  # CHECK :      c.seq.d   $fcc4,$f31,$f7
+0x46 0x19 0x0f 0x3a  # CHECK :      c.seq.s   $fcc7,$f1,$f25
+0x46 0x39 0x6c 0x33  # CHECK :      c.ueq.d   $fcc4,$f13,$f25
+0x46 0x1e 0x1e 0x33  # CHECK :      c.ueq.s   $fcc6,$f3,$f30
+0x46 0x32 0xcf 0x37  # CHECK :      c.ule.d   $fcc7,$f25,$f18
+0x46 0x1e 0xaf 0x37  # CHECK :      c.ule.s   $fcc7,$f21,$f30
+0x46 0x31 0x36 0x35  # CHECK :      c.ult.d   $fcc6,$f6,$f17
+0x46 0x0a 0xc7 0x35  # CHECK :      c.ult.s   $fcc7,$f24,$f10
+0x46 0x38 0xbe 0x31  # CHECK :      c.un.d    $fcc6,$f23,$f24
+0x46 0x04 0xf1 0x31  # CHECK :      c.un.s    $fcc1,$f30,$f4
+0x4e 0x74 0xd4 0xa1  # CHECK :      madd.d    $f18,$f19,$f26,$f20
+0x4f 0xf9 0x98 0x60  # CHECK :      madd.s    $f1,$f31,$f19,$f25
+0x4c 0x32 0xfa 0xa9  # CHECK :      msub.d    $f10,$f1,$f31,$f18
+0x4e 0x70 0x53 0x28  # CHECK :      msub.s    $f12,$f19,$f10,$f16
+0x4d 0x33 0x74 0xb1  # CHECK :      nmadd.d   $f18,$f9,$f14,$f19
+0x4c 0xac 0xc8 0x30  # CHECK :      nmadd.s   $f0,$f5,$f25,$f12
+0x4d 0x1e 0x87 0xb9  # CHECK :      nmsub.d   $f30,$f8,$f16,$f30
+0x4f 0x04 0x98 0x78  # CHECK :      nmsub.s   $f1,$f24,$f19,$f4
+0x46 0x20 0x34 0xd5  # CHECK :      recip.d   $f19,$f6
+0x46 0x00 0xf0 0xd5  # CHECK :      recip.s   $f3,$f30
+0x46 0x20 0xe0 0xd6  # CHECK :      rsqrt.d   $f3,$f28
+0x46 0x00 0x41 0x16  # CHECK :      rsqrt.s   $f4,$f8
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-4xx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-4xx.txt
new file mode 100644
index 000000000000..92e88f8e7a58
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-4xx.txt
@@ -0,0 +1,26 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-unknown -mcpu=pwr7 | FileCheck %s
+
+# CHECK: mfdcr 3, 178
+0x7c 0x72 0x2a 0x86
+# CHECK: mtdcr 178, 3
+0x7c 0x72 0x2b 0x86
+
+# CHECK: tlbre 2, 3, 0
+0x7c 0x43 0x07 0x64
+# CHECK: tlbre 2, 3, 1
+0x7c 0x43 0x0f 0x64
+
+# CHECK: tlbwe 2, 3, 0
+0x7c 0x43 0x07 0xa4
+# CHECK: tlbwe 2, 3, 1
+0x7c 0x43 0x0f 0xa4
+
+# CHECK: tlbsx 2, 3, 1
+0x7c 0x43 0x0f 0x24
+# CHECK: tlbsx. 2, 3, 1
+0x7c 0x43 0x0f 0x25
+
+# CHECK: dccci 5, 6
+0x7c 0x05 0x33 0x8c
+# CHECK: iccci 5, 6
+0x7c 0x05 0x37 0x8c
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-6xx.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-6xx.txt
new file mode 100644
index 000000000000..7276847adc51
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-6xx.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-unknown -mcpu=pwr7 | FileCheck %s
+
+# CHECK: tlbld 4
+0x7c 0x00 0x27 0xa4
+# CHECK: tlbli 4
+0x7c 0x00 0x27 0xe4
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt
index 5e6033d4299a..7a30b5cb2b69 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt
@@ -3,6 +3,9 @@
 # CHECK: icbi 2, 3                       
 0x7c 0x02 0x1f 0xac
 
+# CHECK: icbt 0, 5, 31
+0x7c 0x05 0xf8 0x2c
+
 # CHECK: dcbt 2, 3                       
 0x7c 0x02 0x1a 0x2c
 
@@ -33,6 +36,9 @@
 # CHECK: wait 2                          
 0x7c 0x40 0x00 0x7c
 
+# CHECK: mbar 1
+0x7c 0x20 0x06 0xac
+
 # CHECK: dcbf 2, 3                       
 0x7c 0x02 0x18 0xac
 
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
index c5d615568cc3..7996ed178a12 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-bookIII.txt
@@ -105,3 +105,23 @@
 # CHECK: tlbie 4,0                       
 0x7c 0x00 0x22 0x64
 
+# CHECK: rfi
+0x4c 0x00 0x00 0x64
+# CHECK: rfci
+0x4c 0x00 0x00 0x66
+
+# CHECK: wrtee 12
+0x7d 0x80 0x01 0x06
+# CHECK: wrteei 0
+0x7c 0x00 0x01 0x46
+# CHECK: wrteei 1
+0x7c 0x00 0x81 0x46
+
+# CHECK: tlbre
+0x7c 0x00 0x07 0x64
+# CHECK: tlbwe
+0x7c 0x00 0x07 0xa4
+# CHECK: tlbivax 11, 12
+0x7c 0x0b 0x66 0x24
+# CHECK: tlbsx 11, 12
+0x7c 0x0b 0x67 0x24
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-e500.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-e500.txt
new file mode 100644
index 000000000000..ef013d7e8c9a
--- /dev/null
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-e500.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc --disassemble %s -triple powerpc64-unknown-unknown -mcpu=pwr7 | FileCheck %s
+
+# CHECK: rfdi
+0x4c 0x00 0x00 0x4e
+# CHECK: rfmci
+0x4c 0x00 0x00 0x4c
+
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding-ext.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding-ext.txt
index 108df30aa8c8..4f10c742db97 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding-ext.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding-ext.txt
@@ -2251,3 +2251,47 @@
 # CHECK: mtcrf 255, 2
 0x7c 0x4f 0xf1 0x20
 
+# CHECK: dss 3
+0x7c 0x60 0x06 0x6c
+# CHECK: dssall
+0x7e 0x00 0x06 0x6c
+# CHECK: dst 12, 11, 3
+0x7c 0x6c 0x5a 0xac
+# CHECK: dstt 12, 11, 3
+0x7e 0x6c 0x5a 0xac
+# CHECK: dstst 12, 11, 3
+0x7c 0x6c 0x5a 0xec
+# CHECK: dststt 12, 11, 3
+0x7e 0x6c 0x5a 0xec
+
+# CHECK: tlbia
+0x7c 0x00 0x02 0xe4
+
+# CHECK: lswi 8, 6, 7
+0x7d 0x06 0x3c 0xaa
+# CHECK: stswi 8, 6, 7
+0x7d 0x06 0x3d 0xaa
+
+# CHECK: rfid
+0x4c 0x00 0x00 0x24
+
+# CHECK: lbzcix 21, 5, 7
+0x7e 0xa5 0x3e 0xaa
+# CHECK: lhzcix 21, 5, 7
+0x7e 0xa5 0x3e 0x6a
+# CHECK: lwzcix 21, 5, 7
+0x7e 0xa5 0x3e 0x2a
+# CHECK: ldcix  21, 5, 7
+0x7e 0xa5 0x3e 0xea
+# CHECK: stbcix 21, 5, 7
+0x7e 0xa5 0x3f 0xaa
+# CHECK: sthcix 21, 5, 7
+0x7e 0xa5 0x3f 0x6a
+# CHECK: stwcix 21, 5, 7
+0x7e 0xa5 0x3f 0x2a
+# CHECK: stdcix 21, 5, 7
+0x7e 0xa5 0x3f 0xea
+
+# CHECK: attn
+0x00 0x00 0x02 0x00
+
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
index 33a8c0ed5ded..e99d49bbf45c 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -499,6 +499,9 @@
 # CHECK: popcntd 2, 3                    
 0x7c 0x62 0x03 0xf4
 
+# CHECK: cmpb 7, 21, 4
+0x7e 0xa7 0x23 0xf8
+
 # CHECK: rlwinm 2, 3, 4, 5, 6            
 0x54 0x62 0x21 0x4c
 
@@ -619,3 +622,7 @@
 # CHECK: mfocrf 16, 8                    
 0x7e 0x10 0x80 0x26
 
+# CHECK: mtsrin 10, 12
+0x7d 0x40 0x61 0xe4
+# CHECK: mfsrin 10, 12
+0x7d 0x40 0x65 0x26
diff --git a/test/MC/Disassembler/X86/avx-512.txt b/test/MC/Disassembler/X86/avx-512.txt
index f78db552935e..62fc35bd1cff 100644
--- a/test/MC/Disassembler/X86/avx-512.txt
+++ b/test/MC/Disassembler/X86/avx-512.txt
@@ -1,4 +1,5 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=skx | FileCheck --check-prefix=CHECK-SKX %s
 
 # CHECK: vpbroadcastd    %xmm18, %zmm28 {%k7} {z}
 0x62 0x22 0x7d 0xcf 0x58 0xe2
@@ -13,7 +14,13 @@
 0x62 0x32 0xed 0x48 0x16 0x04 0x96
 
 # CHECK: vpbroadcastmw2d %k2, %zmm8
-0x62 0xd2 0x7e 0x48 0x3a 0xd0
+0x62 0x72 0x7e 0x48 0x3a 0xc2
+
+# CHECK-SKX: vpbroadcastmw2d %k2, %xmm8
+0x62 0x72 0x7e 0x08 0x3a 0xc2
+
+# CHECK-SKX: vpbroadcastmw2d %k2, %ymm8
+0x62 0x72 0x7e 0x28 0x3a 0xc2
 
 # CHECK: vpbroadcastq    (%r9,%rax), %zmm28
 0x62 0x42 0xfd 0x48 0x59 0x24 0x01
diff --git a/test/MC/Disassembler/X86/intel-syntax-32.txt b/test/MC/Disassembler/X86/intel-syntax-32.txt
index 2298823604aa..66c87b8bc07b 100644
--- a/test/MC/Disassembler/X86/intel-syntax-32.txt
+++ b/test/MC/Disassembler/X86/intel-syntax-32.txt
@@ -29,3 +29,15 @@
 
 # CHECK: mov dword ptr [878082192], eax
 0xa3 0x90 0x78 0x56 0x34
+
+# CHECK: lea	cx, [si + 4]
+0x67 0x66 0x8d 0x4c 0x04
+
+# CHECK: lea	ecx, [si + 4]
+0x67 0x8d 0x4c 0x04 
+
+# CHECK: lea	cx, [esp + 4]
+0x66 0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	ecx, [esp + 4]
+0x8d 0x4c 0x24 0x04 
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 3689525d92fa..0a628af02c15 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -152,3 +152,23 @@
 
 # CHECK: movabs qword ptr [-6066930261531658096], rax
 0x48 0xa3 0x90 0x78 0x56 0x34 0x12 0xef 0xcd 0xab
+
+# CHECK: lea	cx, [esp + 4]
+0x67 0x66 0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	ecx, [esp + 4]
+0x67 0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	rcx, [esp + 4]
+0x67 0x48 0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	cx, [rsp + 4]
+0x66 0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	ecx, [rsp + 4]
+0x8d 0x4c 0x24 0x04 
+
+# CHECK: lea	rcx, [rsp + 4]
+0x48 0x8d 0x4c 0x24 0x04 
+
+
diff --git a/test/MC/Disassembler/X86/invalid-cmp-imm.txt b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
deleted file mode 100644
index 7b2ea2aa06c7..000000000000
--- a/test/MC/Disassembler/X86/invalid-cmp-imm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 2>&1 | grep "invalid instruction encoding"
-
-# This instruction would decode as cmpordps if the immediate byte was less than 8.
-0x0f 0xc2 0xc7 0x08
-# This instruction would decode as cmpordpd if the immediate byte was less than 8.
-0x66 0x0f 0xc2 0xc7 0x08
-# This instruction would decode as cmpordss if the immediate byte was less than 8.
-0xf3 0x0f 0xc2 0xc7 0x08
-# This instruction would decode as cmpordsd if the immediate byte was less than 8.
-0xf2 0x0f 0xc2 0xc7 0x08
diff --git a/test/MC/Disassembler/X86/moffs.txt b/test/MC/Disassembler/X86/moffs.txt
index dd2664cb7737..ac1885954944 100644
--- a/test/MC/Disassembler/X86/moffs.txt
+++ b/test/MC/Disassembler/X86/moffs.txt
@@ -1,86 +1,86 @@
-# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=i686-linux-gnu-code16 | FileCheck --check-prefix=16 %s
-# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=i686-linux-gnu | FileCheck --check-prefix=32 %s
-# RUN: llvm-mc --disassemble --print-imm-hex %s -triple=x86_64-linux-gnu | FileCheck --check-prefix=64 %s
+# RUN: llvm-mc --disassemble --show-encoding --print-imm-hex %s -triple=i686-linux-gnu-code16 | FileCheck --check-prefix=16 %s
+# RUN: llvm-mc --disassemble --show-encoding --print-imm-hex %s -triple=i686-linux-gnu | FileCheck --check-prefix=32 %s
+# RUN: llvm-mc --disassemble --show-encoding --print-imm-hex %s -triple=x86_64-linux-gnu | FileCheck --check-prefix=64 %s
 
-# 16: movb 0x5a5a, %al
-# 32: movb 0x5a5a5a5a, %al
-# 64: movabsb 0x5a5a5a5a5a5a5a5a, %al
+# 16: movb 0x5a5a, %al # encoding: [0xa0,0x5a,0x5a]
+# 32: movb 0x5a5a5a5a, %al # encoding: [0xa0,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsb 0x5a5a5a5a5a5a5a5a, %al # encoding: [0xa0,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
 0xa0 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movb 0x5a5a5a5a, %al
-# 32: movb 0x5a5a, %al
-# 64: movabsb 0x5a5a5a5a, %al
+# 16: movb 0x5a5a5a5a, %al # encoding: [0x67,0xa0,0x5a,0x5a,0x5a,0x5a]
+# 32: movb 0x5a5a, %al # encoding: [0x67,0xa0,0x5a,0x5a]
+# 64: movb 0x5a5a5a5a, %al # encoding: [0x67,0xa0,0x5a,0x5a,0x5a,0x5a]
 0x67 0xa0 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movw 0x5a5a, %ax
-# 32: movl 0x5a5a5a5a, %eax
-# 64: movabsl 0x5a5a5a5a5a5a5a5a, %eax
+# 16: movw 0x5a5a, %ax # encoding: [0xa1,0x5a,0x5a]
+# 32: movl 0x5a5a5a5a, %eax # encoding: [0xa1,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsl 0x5a5a5a5a5a5a5a5a, %eax # encoding: [0xa1,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movw 0x5a5a5a5a, %ax
-# 32: movl 0x5a5a, %eax
-# 64: movabsl 0x5a5a5a5a, %eax
+# 16: movw 0x5a5a5a5a, %ax # encoding: [0x67,0xa1,0x5a,0x5a,0x5a,0x5a]
+# 32: movl 0x5a5a, %eax # encoding: [0x67,0xa1,0x5a,0x5a]
+# 64: movl 0x5a5a5a5a, %eax # encoding: [0x67,0xa1,0x5a,0x5a,0x5a,0x5a]
 0x67 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl 0x5a5a, %eax
-# 32: movw 0x5a5a5a5a, %ax
-# 64: movabsw 0x5a5a5a5a5a5a5a5a, %ax
+# 16: movl 0x5a5a, %eax # encoding: [0x66,0xa1,0x5a,0x5a]
+# 32: movw 0x5a5a5a5a, %ax # encoding: [0x66,0xa1,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsw 0x5a5a5a5a5a5a5a5a, %ax # encoding: [0x66,0xa1,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl 0x5a5a5a5a, %eax
-# 32: movw 0x5a5a, %ax
-# 64: movabsw 0x5a5a5a5a, %ax
+# 16: movl 0x5a5a5a5a, %eax # encoding: [0x67,0x66,0xa1,0x5a,0x5a,0x5a,0x5a]
+# 32: movw 0x5a5a, %ax # encoding: [0x67,0x66,0xa1,0x5a,0x5a]
+# 64: movw 0x5a5a5a5a, %ax # encoding: [0x67,0x66,0xa1,0x5a,0x5a,0x5a,0x5a]
 0x66 0x67 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl 0x5a5a5a5a, %eax
-# 32: movw 0x5a5a, %ax
-# 64: movabsw 0x5a5a5a5a, %ax
+# 16: movl 0x5a5a5a5a, %eax # encoding: [0x67,0x66,0xa1,0x5a,0x5a,0x5a,0x5a]
+# 32: movw 0x5a5a, %ax # encoding: [0x67,0x66,0xa1,0x5a,0x5a]
+# 64: movw 0x5a5a5a5a, %ax # encoding: [0x67,0x66,0xa1,0x5a,0x5a,0x5a,0x5a]
 0x67 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl %es:0x5a5a5a5a, %eax
-# 32: movw %es:0x5a5a, %ax
-# 64: movabsw %es:0x5a5a5a5a, %ax
-0x67 0x26 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+# 16: movl %es:0x5a5a5a5a, %eax # encoding: [0x67,0x66,0x26,0xa1,0x5a,0x5a,0x5a,0x5a]
+# 32: movw %es:0x5a5a, %ax # encoding: [0x67,0x66,0x26,0xa1,0x5a,0x5a]
+# 64: movw %es:0x5a5a5a5a, %ax # encoding: [0x67,0x66,0x26,0xa1,0x5a,0x5a,0x5a,0x5a]
+0x67 0x26 0x66 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a # encoding: [0xa0,0x5a,0x5a]
 
 
 
-# 16: movb %al, 0x5a5a
-# 32: movb %al, 0x5a5a5a5a
-# 64: movabsb %al, 0x5a5a5a5a5a5a5a5a
-0xa2 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+# 16: movb %al, 0x5a5a # encoding: [0xa2,0x5a,0x5a]
+# 32: movb %al, 0x5a5a5a5a # encoding: [0xa2,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsb %al, 0x5a5a5a5a5a5a5a5a # encoding: [0xa2,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
+0xa2 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a # encoding: [0xa0,0x5a,0x5a]
 
-# 16: movb %al, 0x5a5a5a5a
-# 32: movb %al, 0x5a5a
-# 64: movabsb %al, 0x5a5a5a5a
+# 16: movb %al, 0x5a5a5a5a # encoding: [0x67,0xa2,0x5a,0x5a,0x5a,0x5a]
+# 32: movb %al, 0x5a5a # encoding: [0x67,0xa2,0x5a,0x5a]
+# 64: movb %al, 0x5a5a5a5a # encoding: [0x67,0xa2,0x5a,0x5a,0x5a,0x5a]
 0x67 0xa2 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movw %ax, 0x5a5a
-# 32: movl %eax, 0x5a5a5a5a
-# 64: movabsl %eax, 0x5a5a5a5a5a5a5a5a
+# 16: movw %ax, 0x5a5a # encoding: [0xa3,0x5a,0x5a]
+# 32: movl %eax, 0x5a5a5a5a # encoding: [0xa3,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsl %eax, 0x5a5a5a5a5a5a5a5a # encoding: [0xa3,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movw %ax, %gs:0x5a5a5a5a
-# 32: movl %eax, %gs:0x5a5a
-# 64: movabsl %eax, %gs:0x5a5a5a5a
+# 16: movw %ax, %gs:0x5a5a5a5a # encoding: [0x67,0x65,0xa3,0x5a,0x5a,0x5a,0x5a]
+# 32: movl %eax, %gs:0x5a5a # encoding: [0x67,0x65,0xa3,0x5a,0x5a]
+# 64: movl %eax, %gs:0x5a5a5a5a # encoding: [0x67,0x65,0xa3,0x5a,0x5a,0x5a,0x5a]
 0x65 0x67 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl %eax, 0x5a5a
-# 32: movw %ax, 0x5a5a5a5a
-# 64: movabsw %ax, 0x5a5a5a5a5a5a5a5a
+# 16: movl %eax, 0x5a5a # encoding: [0x66,0xa3,0x5a,0x5a]
+# 32: movw %ax, 0x5a5a5a5a # encoding: [0x66,0xa3,0x5a,0x5a,0x5a,0x5a]
+# 64: movabsw %ax, 0x5a5a5a5a5a5a5a5a # encoding: [0x66,0xa3,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a,0x5a]
 0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl %eax, 0x5a5a5a5a
-# 32: movw %ax, 0x5a5a
-# 64: movabsw %ax, 0x5a5a5a5a
+# 16: movl %eax, 0x5a5a5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a,0x5a,0x5a]
+# 32: movw %ax, 0x5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a]
+# 64: movw %ax, 0x5a5a5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a,0x5a,0x5a]
 0x66 0x67 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl %eax, 0x5a5a5a5a
-# 32: movw %ax, 0x5a5a
-# 64: movabsw %ax, 0x5a5a5a5a
+# 16: movl %eax, 0x5a5a5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a,0x5a,0x5a]
+# 32: movw %ax, 0x5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a]
+# 64: movw %ax, 0x5a5a5a5a # encoding: [0x67,0x66,0xa3,0x5a,0x5a,0x5a,0x5a]
 0x67 0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
-# 16: movl %eax, %es:0x5a5a5a5a
-# 32: movw %ax, %es:0x5a5a
-# 64: movabsw %ax, %es:0x5a5a5a5a
+# 16: movl %eax, %es:0x5a5a5a5a # encoding: [0x67,0x66,0x26,0xa3,0x5a,0x5a,0x5a,0x5a]
+# 32: movw %ax, %es:0x5a5a # encoding: [0x67,0x66,0x26,0xa3,0x5a,0x5a]
+# 64: movw %ax, %es:0x5a5a5a5a # encoding: [0x67,0x66,0x26,0xa3,0x5a,0x5a,0x5a,0x5a]
 0x67 0x26 0x66 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
 
diff --git a/test/MC/Disassembler/X86/prefixes.txt b/test/MC/Disassembler/X86/prefixes.txt
index b8830dc3f3b9..9e002fab4656 100644
--- a/test/MC/Disassembler/X86/prefixes.txt
+++ b/test/MC/Disassembler/X86/prefixes.txt
@@ -8,11 +8,11 @@
 0x64 0x48 0x8b 0x3c 0x25 0x00 0x03 0x00 0x00
 
 # CHECK: rep
-# CHECK-NEXT:		stosq
+# CHECK-NEXT:		stosq %rax, %es:(%rdi)
 0xf3 0x48 0xab
 
 # CHECK: rep
-# CHECK-NEXT:		stosl
+# CHECK-NEXT:		stosq %rax, %es:(%edi)
 0xf3 0x67 0x48 0xab
 
 # CHECK: movl 32(%rbp), %eax
@@ -54,6 +54,17 @@
 # CHECK-NEXT: stosq
 0xf3 0xf3 0x48 0xab
 
+
+# Test that we can disassembler control registers above CR8
+# CHECK: movq %cr15, %rax
+0x44 0x0f 0x20 0xf8
+# CHECK: movq %dr15, %rax
+0x44 0x0f 0x21 0xf8
+
+# Test that MMX ignore REX.R and REX.B.
+# CHECK: movq %mm0, %mm1
+0x46 0x0f 0x7f 0xc1
+
 # Test that a prefix on it's own works. It's debatable as to if this is 
 # something that is considered valid, but however as LLVM's own disassembler
 # has decided to disassemble prefixes as being separate opcodes, it therefore 
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index e6e9c7bc9cc5..8fab6c9febcb 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -90,9 +90,24 @@
 # CHECK: movq	%cr0, %rcx
 0x0f 0x20 0xc1
 
+# CHECK: leaw	4(%esp), %cx
+0x67 0x66 0x8d 0x4c 0x24 0x04 
+
+# CHECK: leal	4(%esp), %ecx
+0x67 0x8d 0x4c 0x24 0x04 
+
+# CHECK: leaq	4(%esp), %rcx
+0x67 0x48 0x8d 0x4c 0x24 0x04 
+
+# CHECK: leaw	4(%rsp), %cx
+0x66 0x8d 0x4c 0x24 0x04 
+
 # CHECK: leal	4(%rsp), %ecx
 0x8d 0x4c 0x24 0x04 
 
+# CHECK: leaq	4(%rsp), %rcx
+0x48 0x8d 0x4c 0x24 0x04 
+
 # CHECK: enter	$1, $2
 0xc8 0x01 0x00 0x02
 
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index c9c508680c5a..35c8cce9cbd7 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -711,3 +711,32 @@
 
 # CHECK: movq %mm0, %mm1
 0x0f 0x7f 0xc1
+
+# CHECK: vpermq $-18, %ymm2, %ymm2
+0xc4 0xe3 0xfd 0x00 0xd2 0xee
+
+# CHECK: cmpps $8, %xmm7, %xmm0
+0x0f 0xc2 0xc7 0x08
+# CHECK: cmppd $8, %xmm7, %xmm0
+0x66 0x0f 0xc2 0xc7 0x08
+# CHECK: cmpss $8, %xmm7, %xmm0
+0xf3 0x0f 0xc2 0xc7 0x08
+# CHECK: cmpsd $8, %xmm7, %xmm0
+0xf2 0x0f 0xc2 0xc7 0x08
+
+# CHECK: addb $38, 5277496
+0x82 0x05 0x38 0x87 0x50 0x00 0x26
+# CHECK: orb $38, 5277496
+0x82 0x0d 0x38 0x87 0x50 0x00 0x26
+# CHECK: adcb $38, 5277496
+0x82 0x15 0x38 0x87 0x50 0x00 0x26
+# CHECK: sbbb $38, 5277496
+0x82 0x1d 0x38 0x87 0x50 0x00 0x26
+# CHECK: andb $38, 5277496
+0x82 0x25 0x38 0x87 0x50 0x00 0x26
+# CHECK: subb $38, 5277496
+0x82 0x2D 0x38 0x87 0x50 0x00 0x26
+# CHECK: xorb $38, 5277496
+0x82 0x35 0x38 0x87 0x50 0x00 0x26
+# CHECK: cmpb $38, 5277496
+0x82 0x3d 0x38 0x87 0x50 0x00 0x26
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 6f072df7e4b7..e67a4f9383e0 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -107,6 +107,9 @@
 # CHECK: xbegin	53
 0xc7 0xf8 0x35 0x00 0x00 0x00
 
+# CHECK: xbegin	53
+0x66 0xc7 0xf8 0x35 0x00
+
 # CHECK: xend
 0x0f 0x01 0xd5
 
@@ -265,3 +268,15 @@
 
 # CHECK: $0, 305419896(%rbp)
 0x80 0x84 0x25 0x78 0x56 0x34 0x12 0x00
+
+# CHECK: movabsq 6510615555426900570, %rax
+0x48 0xa1 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# CHECK: movq 1515870810, %rax
+0x67, 0x48 0xa1 0x5a 0x5a 0x5a 0x5a
+
+# CHECK: movabsq %rax, 6510615555426900570
+0x48 0xa3 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a 0x5a
+
+# CHECK: movq %rax, 1515870810
+0x67, 0x48 0xa3 0x5a 0x5a 0x5a 0x5a
diff --git a/test/MC/ELF/alias.s b/test/MC/ELF/alias.s
index 2e65ace6ba11..8e1318230bb8 100644
--- a/test/MC/ELF/alias.s
+++ b/test/MC/ELF/alias.s
@@ -20,6 +20,10 @@ bar5 = bar4
 
         .long foo2
 
+// Test that bar6 is a function that doesn't have the same value as foo4.
+bar6 = bar5
+bar6:
+
 // CHECK:      Symbols [
 // CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name:  (0)
@@ -58,6 +62,15 @@ bar5 = bar4
 // CHECK-NEXT:     Section: .text
 // CHECK-NEXT:   }
 // CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: bar6
+// CHECK-NEXT:     Value: 0x5
+// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Binding: Local
+// CHECK-NEXT:     Type: None
+// CHECK-NEXT:     Other: 0
+// CHECK-NEXT:     Section: .text
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
 // CHECK-NEXT:     Name: foo
 // CHECK-NEXT:     Value: 0x0
 // CHECK-NEXT:     Size: 0
diff --git a/test/MC/ELF/cfi-large-model.s b/test/MC/ELF/cfi-large-model.s
new file mode 100644
index 000000000000..16073ad7f0ee
--- /dev/null
+++ b/test/MC/ELF/cfi-large-model.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu -code-model=large %s \
+// RUN:   -o - | llvm-readobj -s -sd | FileCheck %s
+
+// CHECK:      Section {
+// CHECK:        Index: 
+// CHECK:        Name: .eh_frame
+// CHECK-NEXT:   Type: SHT_PROGBITS
+// CHECK-NEXT:   Flags [
+// CHECK-NEXT:     SHF_ALLOC
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Address: 0x0
+// CHECK-NEXT:   Offset: 0x40
+// CHECK-NEXT:   Size: 56
+// CHECK-NEXT:   Link: 0
+// CHECK-NEXT:   Info: 0
+// CHECK-NEXT:   AddressAlignment: 8
+// CHECK-NEXT:   EntrySize: 0
+// CHECK-NEXT:   SectionData (
+// CHECK-NEXT:     0000: 14000000 00000000 037A5200 01781001  |.........zR..x..|
+// CHECK-NEXT:     0010: 1C0C0708 90010000 1C000000 1C000000  |................|
+// CHECK-NEXT:     0020: 00000000 00000000 00000000 00000000  |................|
+// CHECK-NEXT:     0030: 00000000 00000000                    |........|
+// CHECK-NEXT:   )
+
+f:
+    .cfi_startproc
+    .cfi_endproc
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
index 10daa1dd408e..c8a9978d7292 100644
--- a/test/MC/ELF/cfi-version.ll
+++ b/test/MC/ELF/cfi-version.ll
@@ -22,19 +22,19 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"/tmp"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @foo, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5.0 "}
-!12 = metadata !{i32 2, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"/tmp"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\00256\000\002", !1, !5, !6, null, i32 ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0 "}
+!12 = !MDLocation(line: 2, scope: !4)
 
 ; DWARF2:      .debug_frame contents:
 ; DWARF2:        Version:               1
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 68b0f328f9bc..479667552204 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -1,6 +1,6 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t | FileCheck %s
 
-// Test that we produce the group sections and that they are a the beginning
+// Test that we produce the group sections and that they are at the beginning
 // of the file.
 
 // CHECK:        Section {
@@ -41,7 +41,7 @@
 // CHECK-NEXT:     Offset: 0x54
 // CHECK-NEXT:     Size: 12
 // CHECK-NEXT:     Link: 13
-// CHECK-NEXT:     Info: 13
+// CHECK-NEXT:     Info: 10
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 4
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/reloc-same-name-section.s b/test/MC/ELF/reloc-same-name-section.s
new file mode 100644
index 000000000000..e63ea548e5aa
--- /dev/null
+++ b/test/MC/ELF/reloc-same-name-section.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux %s -o - | llvm-readobj -r --expand-relocs | FileCheck %s
+
+// test that we produce one relocation against each section.
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section {{.*}} {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset:
+// CHECK-NEXT:       Type:
+// CHECK-NEXT:       Symbol:  .foo (7)
+// CHECK-NEXT:       Addend:
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset:
+// CHECK-NEXT:       Type:
+// CHECK-NEXT:       Symbol:  .foo (8)
+// CHECK-NEXT:       Addend:
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+	.section	.foo,"aG",@progbits,v,comdat
+f:
+
+	.section	.foo,"a",@progbits
+g:
+
+
+	.section	.bar
+	.quad	f
+	.quad	g
diff --git a/test/MC/ELF/section-sym-err.s b/test/MC/ELF/section-sym-err.s
new file mode 100644
index 000000000000..789fee7c422c
--- /dev/null
+++ b/test/MC/ELF/section-sym-err.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t.o 2>&1 | FileCheck %s
+
+.section foo
+foo:
+
+// CHECK: error: invalid symbol redefinition
diff --git a/test/MC/ELF/section-sym.s b/test/MC/ELF/section-sym.s
new file mode 100644
index 000000000000..3b76d813fe7c
--- /dev/null
+++ b/test/MC/ELF/section-sym.s
@@ -0,0 +1,91 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -t -r --expand-relocs | FileCheck %s
+
+.section foo, "aG", @progbits, f1, comdat
+.section foo, "G", @progbits, f2, comdat
+.section bar
+.long foo
+
+// Test that the relocation points to the first section foo.
+
+// The first seciton foo has index 6
+// CHECK:      Section {
+// CHECK:        Index:   6
+// CHECK-NEXT:   Name:    foo (28)
+// CHECK-NEXT:   Type:    SHT_PROGBITS (0x1)
+// CHECK-NEXT:   Flags [ (0x202)
+// CHECK-NEXT:     SHF_ALLOC (0x2)
+// CHECK-NEXT:     SHF_GROUP (0x200)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Address:         0x0
+// CHECK-NEXT:   Offset:  0x50
+// CHECK-NEXT:   Size:    0
+// CHECK-NEXT:   Link:    0
+// CHECK-NEXT:   Info:    0
+// CHECK-NEXT:   AddressAlignment:        1
+// CHECK-NEXT:   EntrySize:       0
+// CHECK-NEXT: }
+// CHECK-NEXT: Section {
+// CHECK-NEXT:   Index:   7
+// CHECK-NEXT:   Name:    foo (28)
+// CHECK-NEXT:   Type:    SHT_PROGBITS (0x1)
+// CHECK-NEXT:   Flags [ (0x200)
+// CHECK-NEXT:     SHF_GROUP (0x200)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Address:         0x0
+// CHECK-NEXT:   Offset:  0x50
+// CHECK-NEXT:   Size:    0
+// CHECK-NEXT:   Link:    0
+// CHECK-NEXT:   Info:    0
+// CHECK-NEXT:   AddressAlignment:        1
+// CHECK-NEXT:   EntrySize:       0
+// CHECK-NEXT: }
+
+// The relocation points to symbol 6
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section (9) .relabar {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset:  0x0
+// CHECK-NEXT:       Type:    R_X86_64_32 (10)
+// CHECK-NEXT:       Symbol:  foo (6)
+// CHECK-NEXT:       Addend:  0x0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+
+
+// The symbol 6 corresponds section 6
+// CHECK: Symbols [
+
+// symbol 0
+// CHECK-NOT: Name
+// CHECK: Name:
+
+// symbol 1
+// CHECK-NOT: Name
+// CHECK: Name:    f1
+
+// symbol 2
+// CHECK-NOT: Name
+// CHECK: Name:    f2
+
+// symbol 3
+// CHECK-NOT: Name
+// CHECK: Name:    .text
+
+// symbol 4
+// CHECK-NOT: Name
+// CHECK: Name:    .data
+
+// symbol 5
+// CHECK-NOT: Name
+// CHECK: Name:    .bss
+
+// symbol 6
+// CHECK-NOT: Name
+// CHECK: Name:    foo
+// CHECK: Section: foo (0x6)
+
+// symbol 7
+// CHECK-NOT: Name
+// CHECK: Name:    foo
+// CHECK: Section: foo (0x7)
diff --git a/test/MC/ELF/section-sym2.s b/test/MC/ELF/section-sym2.s
new file mode 100644
index 000000000000..acdb7d9547d0
--- /dev/null
+++ b/test/MC/ELF/section-sym2.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj  -t -r --expand-relocs | FileCheck %s
+
+// Test that we can forward reference a section.
+
+mov .rodata, %rsi
+.section .rodata
+
+// CHECK:Relocations [
+// CHECK:  Section (2) .rela.text {
+// CHECK:    Relocation {
+// CHECK:      Offset: 0x4
+// CHECK:      Type: R_X86_64_32S (11)
+// CHECK:      Symbol: .rodata
+// CHECK:      Addend: 0x0
+// CHECK:    }
+// CHECK:  }
+// CHECK:]
+
+// There is only one .rodata symbol
+
+// CHECK:Symbols [
+// CHECK-NOT:    Name: .rodata
+// CHECK:        Name: .rodata
+// CHECK-NEXT:   Value: 0x0
+// CHECK-NEXT:   Size: 0
+// CHECK-NEXT:   Binding: Local (0x0)
+// CHECK-NEXT:   Type: Section (0x3)
+// CHECK-NOT:    Name: .rodata
diff --git a/test/MC/Hexagon/basic.ll b/test/MC/Hexagon/basic.ll
new file mode 100644
index 000000000000..8a5d2e6f872c
--- /dev/null
+++ b/test/MC/Hexagon/basic.ll
@@ -0,0 +1,7 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-readobj -h -r | FileCheck -check-prefix=OBJ %s
+
+; OBJ: Format: ELF32-hexagon
+; OBJ: Arch: hexagon
+; OBJ: AddressSize: 32bit
+; OBJ: Machine: EM_HEXAGON
diff --git a/test/MC/Hexagon/inst_add.ll b/test/MC/Hexagon/inst_add.ll
new file mode 100644
index 000000000000..20a7b312ed6d
--- /dev/null
+++ b/test/MC/Hexagon/inst_add.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a, i32 %b)
+{
+  %1 = add i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK:  0000 004100f3 00c09f52
diff --git a/test/MC/Hexagon/inst_add64.ll b/test/MC/Hexagon/inst_add64.ll
new file mode 100644
index 000000000000..72160678886b
--- /dev/null
+++ b/test/MC/Hexagon/inst_add64.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i64 @foo (i64 %a, i64 %b)
+{
+  %1 = add i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK:  0000 e04200d3 00c09f52
diff --git a/test/MC/Hexagon/inst_and.ll b/test/MC/Hexagon/inst_and.ll
new file mode 100644
index 000000000000..854adf080b85
--- /dev/null
+++ b/test/MC/Hexagon/inst_and.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a, i32 %b)
+{
+  %1 = and i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK:  0000 004100f1 00c09f52
diff --git a/test/MC/Hexagon/inst_and64.ll b/test/MC/Hexagon/inst_and64.ll
new file mode 100644
index 000000000000..0b8307463261
--- /dev/null
+++ b/test/MC/Hexagon/inst_and64.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i64 @foo (i64 %a, i64 %b)
+{
+  %1 = and i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK:  0000 0042e0d3 00c09f52
diff --git a/test/MC/Hexagon/inst_aslh.ll b/test/MC/Hexagon/inst_aslh.ll
new file mode 100644
index 000000000000..680943818e0f
--- /dev/null
+++ b/test/MC/Hexagon/inst_aslh.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a)
+{
+  %1 = shl i32 %a, 16
+  ret i32 %1
+}
+
+; CHECK:   0000 00400070 00c09f52
diff --git a/test/MC/Hexagon/inst_asrh.ll b/test/MC/Hexagon/inst_asrh.ll
new file mode 100644
index 000000000000..f16f0a1f24f4
--- /dev/null
+++ b/test/MC/Hexagon/inst_asrh.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a)
+{
+  %1 = ashr i32 %a, 16
+  ret i32 %1
+}
+
+; CHECK:   0000 00402070 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_eq.ll b/test/MC/Hexagon/inst_cmp_eq.ll
new file mode 100644
index 000000000000..113db631fa92
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_eq.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a, i32 %b)
+{
+  %1 = icmp eq i32 %a, %b
+  ret i1 %1
+}
+
+; CHECK:  0000 004100f2 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_eqi.ll b/test/MC/Hexagon/inst_cmp_eqi.ll
new file mode 100644
index 000000000000..70c4c308e1f4
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_eqi.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a)
+{
+  %1 = icmp eq i32 %a, 42
+  ret i1 %1
+}
+
+; CHECK:  0000 40450075 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_gt.ll b/test/MC/Hexagon/inst_cmp_gt.ll
new file mode 100644
index 000000000000..85fedbfb5034
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_gt.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a, i32 %b)
+{
+  %1 = icmp sgt i32 %a, %b
+  ret i1 %1
+}
+
+; CHECK:  0000 004140f2 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_gti.ll b/test/MC/Hexagon/inst_cmp_gti.ll
new file mode 100644
index 000000000000..18ba3e463ef7
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_gti.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a)
+{
+  %1 = icmp sgt i32 %a, 42
+  ret i1 %1
+}
+
+; CHECK:  0000 40454075 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_lt.ll b/test/MC/Hexagon/inst_cmp_lt.ll
new file mode 100644
index 000000000000..3a7618421e3d
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_lt.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a, i32 %b)
+{
+  %1 = icmp slt i32 %a, %b
+  ret i1 %1
+}
+
+; CHECK:  0000 004041f2 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_ugt.ll b/test/MC/Hexagon/inst_cmp_ugt.ll
new file mode 100644
index 000000000000..096536f54a7a
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_ugt.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a, i32 %b)
+{
+  %1 = icmp ugt i32 %a, %b
+  ret i1 %1
+}
+
+; CHECK:  0000 004160f2 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_ugti.ll b/test/MC/Hexagon/inst_cmp_ugti.ll
new file mode 100644
index 000000000000..a83583457d13
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_ugti.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a)
+{
+  %1 = icmp ugt i32 %a, 42
+  ret i1 %1
+}
+
+; CHECK:  0000 40458075 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_cmp_ult.ll b/test/MC/Hexagon/inst_cmp_ult.ll
new file mode 100644
index 000000000000..4323fa0834d6
--- /dev/null
+++ b/test/MC/Hexagon/inst_cmp_ult.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i1 @foo (i32 %a, i32 %b)
+{
+  %1 = icmp ult i32 %a, %b
+  ret i1 %1
+}
+
+; CHECK:  0000 004061f2 00404089 00c09f52
diff --git a/test/MC/Hexagon/inst_or.ll b/test/MC/Hexagon/inst_or.ll
new file mode 100644
index 000000000000..1b966fcf98e7
--- /dev/null
+++ b/test/MC/Hexagon/inst_or.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a, i32 %b)
+{
+  %1 = or i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK:   0000 004120f1 00c09f52
diff --git a/test/MC/Hexagon/inst_or64.ll b/test/MC/Hexagon/inst_or64.ll
new file mode 100644
index 000000000000..ea104300da3e
--- /dev/null
+++ b/test/MC/Hexagon/inst_or64.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i64 @foo (i64 %a, i64 %b)
+{
+  %1 = or i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK:  0000 4042e0d3 00c09f52
diff --git a/test/MC/Hexagon/inst_select.ll b/test/MC/Hexagon/inst_select.ll
new file mode 100644
index 000000000000..7e88c65a8185
--- /dev/null
+++ b/test/MC/Hexagon/inst_select.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i1 %a, i32 %b, i32 %c)
+{
+  %1 = select i1 %a, i32 %b, i32 %c
+  ret i32 %1
+}
+
+; CHECK:  0000 00400085 004201f4 00c09f52
diff --git a/test/MC/Hexagon/inst_sub.ll b/test/MC/Hexagon/inst_sub.ll
new file mode 100644
index 000000000000..51d38c9d20f8
--- /dev/null
+++ b/test/MC/Hexagon/inst_sub.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a, i32 %b)
+{
+  %1 = sub i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK:  0000 004021f3 00c09f52
diff --git a/test/MC/Hexagon/inst_sub64.ll b/test/MC/Hexagon/inst_sub64.ll
new file mode 100644
index 000000000000..99f102dca593
--- /dev/null
+++ b/test/MC/Hexagon/inst_sub64.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i64 @foo (i64 %a, i64 %b)
+{
+  %1 = sub i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK:  0000 e04022d3 00c09f52
diff --git a/test/MC/Hexagon/inst_sxtb.ll b/test/MC/Hexagon/inst_sxtb.ll
new file mode 100644
index 000000000000..4a217420a960
--- /dev/null
+++ b/test/MC/Hexagon/inst_sxtb.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i8 %a)
+{
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+; CHECK:   0000 0040a070 00c09f52
diff --git a/test/MC/Hexagon/inst_sxth.ll b/test/MC/Hexagon/inst_sxth.ll
new file mode 100644
index 000000000000..f0bcf584efa9
--- /dev/null
+++ b/test/MC/Hexagon/inst_sxth.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i16 %a)
+{
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+; CHECK:   0000 0040e070 00c09f52
diff --git a/test/MC/Hexagon/inst_xor.ll b/test/MC/Hexagon/inst_xor.ll
new file mode 100644
index 000000000000..f54f3ba543ec
--- /dev/null
+++ b/test/MC/Hexagon/inst_xor.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i32 %a, i32 %b)
+{
+  %1 = xor i32 %a, %b
+  ret i32 %1
+}
+
+; CHECK:   0000 004160f1 00c09f52
diff --git a/test/MC/Hexagon/inst_xor64.ll b/test/MC/Hexagon/inst_xor64.ll
new file mode 100644
index 000000000000..7f77c4614cf0
--- /dev/null
+++ b/test/MC/Hexagon/inst_xor64.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i64 @foo (i64 %a, i64 %b)
+{
+  %1 = xor i64 %a, %b
+  ret i64 %1
+}
+
+; CHECK:  0000 8042e0d3 00c09f52
diff --git a/test/MC/Hexagon/inst_zxtb.ll b/test/MC/Hexagon/inst_zxtb.ll
new file mode 100644
index 000000000000..622c03692b2f
--- /dev/null
+++ b/test/MC/Hexagon/inst_zxtb.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i8 %a)
+{
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+; CHECK:   0000 e05f0076 00c09f52
diff --git a/test/MC/Hexagon/inst_zxth.ll b/test/MC/Hexagon/inst_zxth.ll
new file mode 100644
index 000000000000..962210b17511
--- /dev/null
+++ b/test/MC/Hexagon/inst_zxth.ll
@@ -0,0 +1,10 @@
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: | llvm-objdump -s - | FileCheck %s
+
+define i32 @foo (i16 %a)
+{
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+; CHECK:   0000 0040c070 00c09f52
diff --git a/test/MC/Hexagon/lit.local.cfg b/test/MC/Hexagon/lit.local.cfg
new file mode 100644
index 000000000000..ba72ff632d4e
--- /dev/null
+++ b/test/MC/Hexagon/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/AArch64/darwin-ARM64-reloc.s b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
index 7f586aedd636..07d52528e911 100644
--- a/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
+++ b/test/MC/MachO/AArch64/darwin-ARM64-reloc.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -n -triple arm64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+; RUN: llvm-mc -n -triple arm64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
 
 	.text
 _fred:
@@ -15,6 +15,7 @@ _fred:
 
 	adrp	x3, _data_ext@gotpage
         ldr	w2, [x3, _data_ext@gotpageoff]
+        adrp    x0, L_.str@PAGE
 
 	.data
 _data:
@@ -28,130 +29,230 @@ _data:
         .quad _foo@got
         .long _foo@got - .
 
+        .section __TEXT,__cstring,cstring_literals
+L_.str:
+        .asciz "foo"
 
-; CHECK: ('cputype', 16777228)
-; CHECK: ('cpusubtype', 0)
-; CHECK: ('filetype', 1)
-; CHECK: ('num_load_commands', 3)
-; CHECK: ('load_commands_size', 336)
-; CHECK: ('flag', 0)
-; CHECK: ('reserved', 0)
-; CHECK: ('load_commands', [
-; CHECK:   # Load Command 0
-; CHECK:  (('command', 25)
-; CHECK:   ('size', 232)
-; CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-; CHECK:   ('vm_addr', 0)
-; CHECK:   ('vm_size', 84)
-; CHECK:   ('file_offset', 368)
-; CHECK:   ('file_size', 84)
-; CHECK:   ('maxprot', 7)
-; CHECK:   ('initprot', 7)
-; CHECK:   ('num_sections', 2)
-; CHECK:   ('flags', 0)
-; CHECK:   ('sections', [
-; CHECK:     # Section 0
-; CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-; CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-; CHECK:     ('address', 0)
-; CHECK:     ('size', 36)
-; CHECK:     ('offset', 368)
-; CHECK:     ('alignment', 0)
-; CHECK:     ('reloc_offset', 452)
-; CHECK:     ('num_reloc', 13)
-; CHECK:     ('flags', 0x80000400)
-; CHECK:     ('reserved1', 0)
-; CHECK:     ('reserved2', 0)
-; CHECK:     ('reserved3', 0)
-; CHECK:    ),
-; CHECK:   ('_relocations', [
-; CHECK:     # Relocation 0
-; CHECK:     (('word-0', 0x20),
-; CHECK:      ('word-1', 0x6c000005)),
-; CHECK:     # Relocation 1
-; CHECK:     (('word-0', 0x1c),
-; CHECK:      ('word-1', 0x5d000005)),
-; CHECK:     # Relocation 2
-; CHECK:     (('word-0', 0x18),
-; CHECK:      ('word-1', 0xa4000004)),
-; CHECK:     # Relocation 3
-; CHECK:     (('word-0', 0x18),
-; CHECK:      ('word-1', 0x4c000002)),
-; CHECK:     # Relocation 4
-; CHECK:     (('word-0', 0x14),
-; CHECK:      ('word-1', 0xa4000001)),
-; CHECK:     # Relocation 5
-; CHECK:     (('word-0', 0x14),
-; CHECK:      ('word-1', 0x3d000002)),
-; CHECK:     # Relocation 6
-; CHECK:     (('word-0', 0x10),
-; CHECK:      ('word-1', 0xa4000004)),
-; CHECK:     # Relocation 7
-; CHECK:     (('word-0', 0x10),
-; CHECK:      ('word-1', 0x4c000002)),
-; CHECK:     # Relocation 8
-; CHECK:     (('word-0', 0xc),
-; CHECK:      ('word-1', 0x4c000002)),
-; CHECK:     # Relocation 9
-; CHECK:     (('word-0', 0x8),
-; CHECK:      ('word-1', 0x3d000002)),
-; CHECK:     # Relocation 10
-; CHECK:     (('word-0', 0x4),
-; CHECK:      ('word-1', 0xa4000014)),
-; CHECK:     # Relocation 11
-; CHECK:     (('word-0', 0x4),
-; CHECK:      ('word-1', 0x2d000007)),
-; CHECK:     # Relocation 12
-; CHECK:     (('word-0', 0x0),
-; CHECK:      ('word-1', 0x2d000007)),
-; CHECK:   ])
-; CHECK:   ('_section_data', '00000094 00000094 03000090 620040b9 63000091 03000090 620040b9 03000090 620040b9')
-; CHECK:     # Section 1
-; CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-; CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-; CHECK:     ('address', 36)
-; CHECK:     ('size', 48)
-; CHECK:     ('offset', 404)
-; CHECK:     ('alignment', 0)
-; CHECK:     ('reloc_offset', 556)
-; CHECK:     ('num_reloc', 10)
-; CHECK:     ('flags', 0x0)
-; CHECK:     ('reserved1', 0)
-; CHECK:     ('reserved2', 0)
-; CHECK:     ('reserved3', 0)
-; CHECK:    ),
-; CHECK:   ('_relocations', [
-; CHECK:     # Relocation 0
-; CHECK:     (('word-0', 0x2c),
-; CHECK:      ('word-1', 0x7d000006)),
-; CHECK:     # Relocation 1
-; CHECK:     (('word-0', 0x24),
-; CHECK:      ('word-1', 0x7e000006)),
-; CHECK:     # Relocation 2
-; CHECK:     (('word-0', 0x20),
-; CHECK:      ('word-1', 0x1c000004)),
-; CHECK:     # Relocation 3
-; CHECK:     (('word-0', 0x20),
-; CHECK:      ('word-1', 0xc000006)),
-; CHECK:     # Relocation 4
-; CHECK:     (('word-0', 0x18),
-; CHECK:      ('word-1', 0x1e000004)),
-; CHECK:     # Relocation 5
-; CHECK:     (('word-0', 0x18),
-; CHECK:      ('word-1', 0xe000006)),
-; CHECK:     # Relocation 6
-; CHECK:     (('word-0', 0x10),
-; CHECK:      ('word-1', 0x1e000004)),
-; CHECK:     # Relocation 7
-; CHECK:     (('word-0', 0x10),
-; CHECK:      ('word-1', 0xe000006)),
-; CHECK:     # Relocation 8
-; CHECK:     (('word-0', 0x8),
-; CHECK:      ('word-1', 0xe000006)),
-; CHECK:     # Relocation 9
-; CHECK:     (('word-0', 0x0),
-; CHECK:      ('word-1', 0xe000006)),
-; CHECK:   ])
-; CHECK:   ('_section_data', '00000000 00000000 04000000 00000000 00000000 00000000 04000000 00000000 00000000 00000000 00000000 d4ffffff')
-; CHECK:   ])
-; CHECK:  ),
+
+; CHECK:     Relocations [
+; CHECK-NEXT:  Section __text {
+; CHECK-NEXT:    Relocation {
+; CHECK-NEXT:       Offset: 0x24
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGE21 (3)
+; CHECK-NEXT:       Symbol: L_.str
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:    Relocation {
+; CHECK-NEXT:       Offset: 0x20
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_GOT_LOAD_PAGEOFF12 (6)
+; CHECK-NEXT:       Symbol: _data_ext
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x1C
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_GOT_LOAD_PAGE21 (5)
+; CHECK-NEXT:       Symbol: _data_ext
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x18
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 0
+; CHECK-NEXT:       Type: ARM64_RELOC_ADDEND (10)
+; CHECK-NEXT:       Symbol: 0x4
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x18
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGEOFF12 (4)
+; CHECK-NEXT:       Symbol: _data
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x14
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 0
+; CHECK-NEXT:       Type: ARM64_RELOC_ADDEND (10)
+; CHECK-NEXT:       Symbol: 0x1
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x14
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGE21 (3)
+; CHECK-NEXT:       Symbol: _data
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x10
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 0
+; CHECK-NEXT:       Type: ARM64_RELOC_ADDEND (10)
+; CHECK-NEXT:       Symbol: 0x4
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x10
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGEOFF12 (4)
+; CHECK-NEXT:       Symbol: _data
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0xC
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGEOFF12 (4)
+; CHECK-NEXT:       Symbol: _data
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x8
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGE21 (3)
+; CHECK-NEXT:       Symbol: _data
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x4
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 0
+; CHECK-NEXT:       Type: ARM64_RELOC_ADDEND (10)
+; CHECK-NEXT:       Symbol: 0x14
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x4
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_BRANCH26 (2)
+; CHECK-NEXT:       Symbol: _func
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x0
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_BRANCH26 (2)
+; CHECK-NEXT:       Symbol: _func
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT:   Section __data {
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x2C
+; CHECK-NEXT:       PCRel: 1
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_POINTER_TO_GOT (7)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x24
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_POINTER_TO_GOT (7)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x20
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_SUBTRACTOR (1)
+; CHECK-NEXT:       Symbol: _bar
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x20
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x18
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_SUBTRACTOR (1)
+; CHECK-NEXT:       Symbol: _bar
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x18
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x10
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_SUBTRACTOR (1)
+; CHECK-NEXT:       Symbol: _bar
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x10
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x8
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x0
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: _foo
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
diff --git a/test/MC/MachO/AArch64/mergeable.s b/test/MC/MachO/AArch64/mergeable.s
new file mode 100644
index 000000000000..fc6ec04f37bf
--- /dev/null
+++ b/test/MC/MachO/AArch64/mergeable.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple aarch64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
+
+// Test that we "S + K" produce a relocation with a symbol, but just S produces
+// a relocation with the section.
+
+	.section	__TEXT,__literal4,4byte_literals
+L0:
+	.long	42
+
+	.section	__TEXT,__cstring,cstring_literals
+L1:
+	.asciz	"42"
+
+	.section	__DATA,__data
+	.quad	L0
+	.quad	L0 + 1
+	.quad	L1
+	.quad	L1 + 1
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section __data {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x18
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: L1
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x10
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: L1
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x8
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: L0
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x0
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x2
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/AArch64/reloc-crash.s b/test/MC/MachO/AArch64/reloc-crash.s
new file mode 100644
index 000000000000..4984947f65b5
--- /dev/null
+++ b/test/MC/MachO/AArch64/reloc-crash.s
@@ -0,0 +1,27 @@
+; RUN: llvm-mc -triple arm64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
+
+; Test tha we produce an external relocation. There is no apparent need for it, but
+; ld64 (241.9) crashes if we don't.
+
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section __bar {
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x0
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 3
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_UNSIGNED (0)
+; CHECK-NEXT:       Symbol: Lbar
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
+
+        .section        __TEXT,__cstring
+Lfoo:
+        .asciz  "Hello World!"
+Lbar:
+        .asciz  "cString"
+
+        .section        __foo,__bar,literal_pointers
+        .quad   Lbar
diff --git a/test/MC/MachO/AArch64/reloc-crash2.s b/test/MC/MachO/AArch64/reloc-crash2.s
new file mode 100644
index 000000000000..6ae44715c63e
--- /dev/null
+++ b/test/MC/MachO/AArch64/reloc-crash2.s
@@ -0,0 +1,24 @@
+; RUN: llvm-mc -triple arm64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
+
+; This is a regression test making sure we don't crash.
+
+; CHECK:      Relocations [
+; CHECK-NEXT:   Section __text {
+; CHECK-NEXT:     Relocation {
+; CHECK-NEXT:       Offset: 0x0
+; CHECK-NEXT:       PCRel: 0
+; CHECK-NEXT:       Length: 2
+; CHECK-NEXT:       Extern: 1
+; CHECK-NEXT:       Type: ARM64_RELOC_PAGEOFF12 (4)
+; CHECK-NEXT:       Symbol: ltmp1
+; CHECK-NEXT:       Scattered: 0
+; CHECK-NEXT:     }
+; CHECK-NEXT:   }
+; CHECK-NEXT: ]
+
+
+        ldr     x0, [x8, L_bar@PAGEOFF]
+
+        .section        __foo,__bar,regular,no_dead_strip
+L_bar:
+        .quad   0
diff --git a/test/MC/MachO/ARM/aliased-symbols.s b/test/MC/MachO/ARM/aliased-symbols.s
index 0b4463d055aa..e87b81c6a150 100644
--- a/test/MC/MachO/ARM/aliased-symbols.s
+++ b/test/MC/MachO/ARM/aliased-symbols.s
@@ -70,7 +70,7 @@ Ltmp0:
         // alias_to_local is an alias, but what it points to has no
         // MachO representation. We must resolve it.
 // CHECK: Symbol {
-// CHECK-NEXT:   Name: alias_to_local (37)
+// CHECK-NEXT:   Name: alias_to_local (42)
 // CHECK-NEXT:   Type: Section (0xE)
 // CHECK-NEXT:   Section:  (0x0)
 // CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
@@ -93,7 +93,7 @@ Ltmp0:
 
         // var1 was another alias to an unknown variable. Not extern this time.
 // CHECK: Symbol {
-// CHECK-NEXT:   Name: var1 (1)
+// CHECK-NEXT:   Name: var1 (89)
 // CHECK-NEXT:   Type: Indirect (0xA)
 // CHECK-NEXT:   Section:  (0x0)
 // CHECK-NEXT:   RefType: UndefinedNonLazy (0x0)
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
index b98c80c46e8e..374f8804a52c 100644
--- a/test/MC/MachO/ARM/darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -110,10 +110,10 @@ Lsc0_0:
 @ CHECK:   ('nsyms', 4)
 @ CHECK:   ('stroff', 488)
 @ CHECK:   ('strsize', 24)
-@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
+@ CHECK:   ('_string_data', '\x00_printf\x00_f1\x00_f0\x00_d0\x00\x00\x00\x00')
 @ CHECK:   ('_symbols', [
 @ CHECK:     # Symbol 0
-@ CHECK:    (('n_strx', 9)
+@ CHECK:    (('n_strx', 13)
 @ CHECK:     ('n_type', 0xe)
 @ CHECK:     ('n_sect', 1)
 @ CHECK:     ('n_desc', 0)
@@ -121,7 +121,7 @@ Lsc0_0:
 @ CHECK:     ('_string', '_f0')
 @ CHECK:    ),
 @ CHECK:     # Symbol 1
-@ CHECK:    (('n_strx', 13)
+@ CHECK:    (('n_strx', 9)
 @ CHECK:     ('n_type', 0xe)
 @ CHECK:     ('n_sect', 1)
 @ CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/ARM/ios-version-min-load-command.s b/test/MC/MachO/ARM/ios-version-min-load-command.s
index e065d147be76..9f63c9bd27c7 100644
--- a/test/MC/MachO/ARM/ios-version-min-load-command.s
+++ b/test/MC/MachO/ARM/ios-version-min-load-command.s
@@ -6,5 +6,5 @@
 // CHECK:  (('command', 37)
 // CHECK:   ('size', 16)
 // CHECK:   ('version, 6490119)
-// CHECK:   ('reserved, 0)
+// CHECK:   ('sdk, 0)
 // CHECK:  ),
diff --git a/test/MC/MachO/ARM/static-movt-relocs.s b/test/MC/MachO/ARM/static-movt-relocs.s
index dce56832929d..4385549035e7 100644
--- a/test/MC/MachO/ARM/static-movt-relocs.s
+++ b/test/MC/MachO/ARM/static-movt-relocs.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj -r --expand-relocs | FileCheck %s
         .thumb
         .thumb_func foo
 foo:
@@ -6,18 +6,43 @@ foo:
         movt r0, :upper16:(bar + 16)
         bx r0
 
-
-@ CHECK:  ('_relocations', [
-@ CHECK:    # Relocation 0
-@ CHECK:    (('word-0', 0x4),
-@ CHECK:     ('word-1', 0x8e000001)),
-@ CHECK:    # Relocation 1
-@ CHECK:    (('word-0', 0x10),
-@ CHECK:     ('word-1', 0x16ffffff)),
-@ CHECK:    # Relocation 2
-@ CHECK:    (('word-0', 0x0),
-@ CHECK:     ('word-1', 0x8c000001)),
-@ CHECK:    # Relocation 3
-@ CHECK:    (('word-0', 0x0),
-@ CHECK:     ('word-1', 0x14ffffff)),
-@ CHECK:  ])
+@ CHECK:      Relocations [
+@ CHECK-NEXT:   Section __text {
+@ CHECK-NEXT:     Relocation {
+@ CHECK-NEXT:       Offset: 0x4
+@ CHECK-NEXT:       PCRel: 0
+@ CHECK-NEXT:       Length: 3
+@ CHECK-NEXT:       Extern: 1
+@ CHECK-NEXT:       Type: ARM_RELOC_HALF (8)
+@ CHECK-NEXT:       Symbol: bar
+@ CHECK-NEXT:       Scattered: 0
+@ CHECK-NEXT:     }
+@ CHECK-NEXT:     Relocation {
+@ CHECK-NEXT:       Offset: 0x10
+@ CHECK-NEXT:       PCRel: 0
+@ CHECK-NEXT:       Length: 3
+@ CHECK-NEXT:       Extern: 0
+@ CHECK-NEXT:       Type: ARM_RELOC_PAIR (1)
+@ CHECK-NEXT:       Symbol: 0xFFFFFF
+@ CHECK-NEXT:       Scattered: 0
+@ CHECK-NEXT:     }
+@ CHECK-NEXT:     Relocation {
+@ CHECK-NEXT:       Offset: 0x0
+@ CHECK-NEXT:       PCRel: 0
+@ CHECK-NEXT:       Length: 2
+@ CHECK-NEXT:       Extern: 1
+@ CHECK-NEXT:       Type: ARM_RELOC_HALF (8)
+@ CHECK-NEXT:       Symbol: bar
+@ CHECK-NEXT:       Scattered: 0
+@ CHECK-NEXT:     }
+@ CHECK-NEXT:     Relocation {
+@ CHECK-NEXT:       Offset: 0x0
+@ CHECK-NEXT:       PCRel: 0
+@ CHECK-NEXT:       Length: 2
+@ CHECK-NEXT:       Extern: 0
+@ CHECK-NEXT:       Type: ARM_RELOC_PAIR (1)
+@ CHECK-NEXT:       Symbol: 0xFFFFFF
+@ CHECK-NEXT:       Scattered: 0
+@ CHECK-NEXT:     }
+@ CHECK-NEXT:   }
+@ CHECK-NEXT: ]
diff --git a/test/MC/MachO/absolute.s b/test/MC/MachO/absolute.s
index 784e32a7e41d..0b22afb1b4d0 100644
--- a/test/MC/MachO/absolute.s
+++ b/test/MC/MachO/absolute.s
@@ -63,10 +63,10 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:   ('nsyms', 8)
 // CHECK:   ('stroff', 420)
 // CHECK:   ('strsize', 84)
-// CHECK:   ('_string_data', '\x00foo_set1_global\x00foo_set2_global\x00_bar\x00_foo\x00foo_set1\x00foo_set2\x00foo_equals\x00foo_equals2\x00')
+// CHECK:   ('_string_data', '\x00foo_equals\x00_bar\x00_foo\x00foo_set2_global\x00foo_set1_global\x00foo_set2\x00foo_equals2\x00foo_set1\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 33)
+// CHECK:    (('n_strx', 12)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -74,7 +74,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', '_bar')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 38)
+// CHECK:    (('n_strx', 17)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -82,7 +82,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', '_foo')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 43)
+// CHECK:    (('n_strx', 75)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -90,7 +90,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', 'foo_set1')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 52)
+// CHECK:    (('n_strx', 54)
 // CHECK:     ('n_type', 0x2)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
@@ -98,7 +98,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', 'foo_set2')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 61)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -106,7 +106,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', 'foo_equals')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 72)
+// CHECK:    (('n_strx', 63)
 // CHECK:     ('n_type', 0x2)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -114,7 +114,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', 'foo_equals2')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 38)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -122,7 +122,7 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
 // CHECK:     ('_string', 'foo_set1_global')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 17)
+// CHECK:    (('n_strx', 22)
 // CHECK:     ('n_type', 0x3)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
diff --git a/test/MC/MachO/absolutize.s b/test/MC/MachO/absolutize.s
index 39571dddebd6..19917e3bae1c 100644
--- a/test/MC/MachO/absolutize.s
+++ b/test/MC/MachO/absolutize.s
@@ -150,10 +150,10 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:   ('nsyms', 4)
 // CHECK:   ('stroff', 572)
 // CHECK:   ('strsize', 36)
-// CHECK:   ('_string_data', '\x00_text_a\x00_text_b\x00_data_a\x00_data_b\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_text_b\x00_data_b\x00_text_a\x00_data_a\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 17)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -161,7 +161,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:     ('_string', '_text_a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 9)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -169,7 +169,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:     ('_string', '_text_b')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 17)
+// CHECK:    (('n_strx', 25)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -177,7 +177,7 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 // CHECK:     ('_string', '_data_a')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 25)
+// CHECK:    (('n_strx', 9)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
new file mode 100644
index 000000000000..518ae6423dbc
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+_Z:
+.long (_Z+4)-_b
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_b' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
new file mode 100644
index 000000000000..3aefd87c557c
--- /dev/null
+++ b/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
+
+_Z:
+.long (_a+4)-_Z
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_a' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/comm-1.s b/test/MC/MachO/comm-1.s
index 5ffa979eb342..cb240f98e14f 100644
--- a/test/MC/MachO/comm-1.s
+++ b/test/MC/MachO/comm-1.s
@@ -51,10 +51,10 @@
 // CHECK:   ('nsyms', 4)
 // CHECK:   ('stroff', 304)
 // CHECK:   ('strsize', 48)
-// CHECK:   ('_string_data', '\x00sym_comm_B\x00sym_comm_A\x00sym_comm_C\x00sym_comm_D\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00sym_comm_D\x00sym_comm_C\x00sym_comm_B\x00sym_comm_A\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 12)
+// CHECK:    (('n_strx', 34)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -62,7 +62,7 @@
 // CHECK:     ('_string', 'sym_comm_A')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 23)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -70,7 +70,7 @@
 // CHECK:     ('_string', 'sym_comm_B')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 23)
+// CHECK:    (('n_strx', 12)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 544)
@@ -78,7 +78,7 @@
 // CHECK:     ('_string', 'sym_comm_C')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 34)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 768)
diff --git a/test/MC/MachO/darwin-complex-difference.s b/test/MC/MachO/darwin-complex-difference.s
index e66bd096711f..f31d3ade33bc 100644
--- a/test/MC/MachO/darwin-complex-difference.s
+++ b/test/MC/MachO/darwin-complex-difference.s
@@ -74,10 +74,10 @@ _d:
 // CHECK:   ('nsyms', 3)
 // CHECK:   ('stroff', 392)
 // CHECK:   ('strsize', 12)
-// CHECK:   ('_string_data', '\x00_a\x00_c\x00_d\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_d\x00_c\x00_a\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 7)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -93,7 +93,7 @@ _d:
 // CHECK:     ('_string', '_c')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 7)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
index 5d548790a7a9..b69cd1b1710b 100644
--- a/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign-2.s
@@ -1,38 +1,16 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
-
-// Test case for rdar://9356266
-
-// This tests that this expression does not cause a crash and produces these
-// four relocation entries:
-// Relocation information (__DATA,__data) 4 entries
-// address  pcrel length extern type    scattered symbolnum/value
-// 00000004 False long   False  SUB     False     2 (__DATA,__data)
-// 00000004 False long   False  UNSIGND False     2 (__DATA,__data)
-// 00000000 False long   False  SUB     False     2 (__DATA,__data)
-// 00000000 False long   False  UNSIGND False     2 (__DATA,__data)
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 	.data
 L_var1:
 L_var2:
-// This was working fine
 	.long L_var2 - L_var1
-	
 	.set L_var3, .
 	.set L_var4, .
-// But this was causing a crash
 	.long L_var4 - L_var3
 
-// CHECK:  ('_relocations', [
-// CHECK:    # Relocation 0
-// CHECK:    (('word-0', 0x4),
-// CHECK:     ('word-1', 0x54000002)),
-// CHECK:    # Relocation 1
-// CHECK:    (('word-0', 0x4),
-// CHECK:     ('word-1', 0x4000002)),
-// CHECK:    # Relocation 2
-// CHECK:    (('word-0', 0x0),
-// CHECK:     ('word-1', 0x54000002)),
-// CHECK:    # Relocation 3
-// CHECK:    (('word-0', 0x0),
-// CHECK:     ('word-1', 0x4000002)),
-// CHECK:  ])
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section __data {
+// CHECK-NEXT:     0x4 0 2 0 X86_64_RELOC_SUBTRACTOR 0 0x2
+// CHECK-NEXT:     0x4 0 2 0 X86_64_RELOC_UNSIGNED 0 0x2
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
index f5d93ae6c0b9..eb28cf1af158 100644
--- a/test/MC/MachO/darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -258,7 +258,7 @@ L3:
 // CHECK:   ('nsyms', 5)
 // CHECK:   ('stroff', 908)
 // CHECK:   ('strsize', 24)
-// CHECK:   ('_string_data', '\x00_foo\x00_g0\x00_g1\x00_g2\x00_g3\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_foo\x00_g3\x00_g2\x00_g1\x00_g0\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
 // CHECK:    (('n_strx', 1)
@@ -269,7 +269,7 @@ L3:
 // CHECK:     ('_string', '_foo')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 6)
+// CHECK:    (('n_strx', 18)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -277,7 +277,7 @@ L3:
 // CHECK:     ('_string', '_g0')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 10)
+// CHECK:    (('n_strx', 14)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -285,7 +285,7 @@ L3:
 // CHECK:     ('_string', '_g1')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 14)
+// CHECK:    (('n_strx', 10)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -293,7 +293,7 @@ L3:
 // CHECK:     ('_string', '_g2')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 18)
+// CHECK:    (('n_strx', 6)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/darwin-x86_64-reloc.s b/test/MC/MachO/darwin-x86_64-reloc.s
index 83c0de788f86..48dd6b4b2297 100644
--- a/test/MC/MachO/darwin-x86_64-reloc.s
+++ b/test/MC/MachO/darwin-x86_64-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
 
 // These examples are taken from <mach-o/x86_64/reloc.h>.
 
@@ -86,320 +86,363 @@ L6:
 
         .text
         cmpq $0, _foo@GOTPCREL(%rip)
-        
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 496)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 392)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 311)
-// CHECK:   ('file_offset', 528)
-// CHECK:   ('file_size', 311)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 4)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 40)
-// CHECK:     ('offset', 528)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 840)
-// CHECK:     ('num_reloc', 5)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x20),
-// CHECK:      ('word-1', 0x6000004)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x18),
-// CHECK:      ('word-1', 0xe000006)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x10),
-// CHECK:      ('word-1', 0x6000004)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0x4d000000)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0x4d000008)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 04000000 04000000 00000000 1f010000 00000000 00000000 00000000 2f010000 00000000')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 40)
-// CHECK:     ('size', 223)
-// CHECK:     ('offset', 568)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 880)
-// CHECK:     ('num_reloc', 32)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xda),
-// CHECK:      ('word-1', 0x4d000000)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0xd3),
-// CHECK:      ('word-1', 0x15000004)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0xcd),
-// CHECK:      ('word-1', 0x1d000006)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0xc7),
-// CHECK:      ('word-1', 0x15000004)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0xc1),
-// CHECK:      ('word-1', 0x15000001)),
-// CHECK:     # Relocation 5
-// CHECK:     (('word-0', 0xa5),
-// CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 6
-// CHECK:     (('word-0', 0xa5),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 7
-// CHECK:     (('word-0', 0x9d),
-// CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 8
-// CHECK:     (('word-0', 0x9d),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 9
-// CHECK:     (('word-0', 0x95),
-// CHECK:      ('word-1', 0xe000003)),
-// CHECK:     # Relocation 10
-// CHECK:     (('word-0', 0x8d),
-// CHECK:      ('word-1', 0xe000003)),
-// CHECK:     # Relocation 11
-// CHECK:     (('word-0', 0x79),
-// CHECK:      ('word-1', 0x8d000003)),
-// CHECK:     # Relocation 12
-// CHECK:     (('word-0', 0x71),
-// CHECK:      ('word-1', 0x7d000003)),
-// CHECK:     # Relocation 13
-// CHECK:     (('word-0', 0x69),
-// CHECK:      ('word-1', 0x6d000003)),
-// CHECK:     # Relocation 14
-// CHECK:     (('word-0', 0x63),
-// CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     # Relocation 15
-// CHECK:     (('word-0', 0x5c),
-// CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     # Relocation 16
-// CHECK:     (('word-0', 0x55),
-// CHECK:      ('word-1', 0x5c000002)),
-// CHECK:     # Relocation 17
-// CHECK:     (('word-0', 0x55),
-// CHECK:      ('word-1', 0xc000000)),
-// CHECK:     # Relocation 18
-// CHECK:     (('word-0', 0x4d),
-// CHECK:      ('word-1', 0x5e000002)),
-// CHECK:     # Relocation 19
-// CHECK:     (('word-0', 0x4d),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 20
-// CHECK:     (('word-0', 0x45),
-// CHECK:      ('word-1', 0x5e000002)),
-// CHECK:     # Relocation 21
-// CHECK:     (('word-0', 0x45),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 22
-// CHECK:     (('word-0', 0x3d),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 23
-// CHECK:     (('word-0', 0x35),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 24
-// CHECK:     (('word-0', 0x2d),
-// CHECK:      ('word-1', 0x8d000000)),
-// CHECK:     # Relocation 25
-// CHECK:     (('word-0', 0x26),
-// CHECK:      ('word-1', 0x6d000000)),
-// CHECK:     # Relocation 26
-// CHECK:     (('word-0', 0x20),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 27
-// CHECK:     (('word-0', 0x1a),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 28
-// CHECK:     (('word-0', 0x14),
-// CHECK:      ('word-1', 0x4d000000)),
-// CHECK:     # Relocation 29
-// CHECK:     (('word-0', 0xe),
-// CHECK:      ('word-1', 0x3d000000)),
-// CHECK:     # Relocation 30
-// CHECK:     (('word-0', 0x7),
-// CHECK:      ('word-1', 0x2d000000)),
-// CHECK:     # Relocation 31
-// CHECK:     (('word-0', 0x2),
-// CHECK:      ('word-1', 0x2d000000)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'c3e80000 0000e804 00000048 8b050000 0000ff35 00000000 8b050000 00008b05 04000000 c605ffff ffff12c7 05fcffff ff785634 12000000 00000000 00040000 00000000 00000000 00000000 00040000 00000000 00000000 00488d05 2c000000 488d0514 00000083 05130000 00066681 05120000 00f40181 05100000 00f40100 00909090 90909090 90909090 902c0000 00000000 00140000 00000000 00e4ffff ffffffff ffd4ffff ffffffff ff2c0000 00000000 0083c000 03042503 0000008b 051fffff ff8b052c 0000008b 05000000 008b0530 00000048 833dffff ffff00')
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__debug_frame\x00\x00\x00')
-// CHECK:     ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 263)
-// CHECK:     ('size', 16)
-// CHECK:     ('offset', 791)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 1136)
-// CHECK:     ('num_reloc', 2)
-// CHECK:     ('flags', 0x2000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0xe000007)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0x6000002)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'd5000000 00000000 00000000 00000000')
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 279)
-// CHECK:     ('size', 32)
-// CHECK:     ('offset', 807)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x4)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 1152)
-// CHECK:   ('nsyms', 9)
-// CHECK:   ('stroff', 1296)
-// CHECK:   ('strsize', 52)
-// CHECK:   ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00f6\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 18)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 40)
-// CHECK:     ('_string', '_foo')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 23)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 41)
-// CHECK:     ('_string', '_baz')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 28)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 101)
-// CHECK:     ('_string', '_bar')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 33)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 169)
-// CHECK:     ('_string', '_prev')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 39)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 221)
-// CHECK:     ('_string', '_f2')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 43)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 224)
-// CHECK:     ('_string', '_f3')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 47)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 295)
-// CHECK:     ('_string', 'f6')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 9)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_ext_foo')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_foobar')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 7)
-// CHECK:   ('iextdefsym', 7)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 7)
-// CHECK:   ('nundefsym', 2)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section __data {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x20
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x4
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x18
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: f6
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x10
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x4
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x8
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_GOT (4)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x4
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_GOT (4)
+// CHECK-NEXT:       Symbol: _foobar
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section __text {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xDA
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_GOT (4)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xD3
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: 0x4
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xCD
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: f6
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xC7
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: 0x4
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xC1
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: 0x1
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xA5
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SUBTRACTOR (5)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xA5
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x9D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SUBTRACTOR (5)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x9D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x95
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x8D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x79
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED_4 (8)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x71
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED_2 (7)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x69
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED_1 (6)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x63
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x5C
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: _prev
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x55
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SUBTRACTOR (5)
+// CHECK-NEXT:       Symbol: _bar
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x55
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x4D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SUBTRACTOR (5)
+// CHECK-NEXT:       Symbol: _bar
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x4D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x45
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SUBTRACTOR (5)
+// CHECK-NEXT:       Symbol: _bar
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x45
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x3D
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x35
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x2D
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED_4 (8)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x26
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED_1 (6)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x20
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x1A
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_SIGNED (1)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x14
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_GOT (4)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0xE
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_GOT_LOAD (3)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x7
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_BRANCH (2)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x2
+// CHECK-NEXT:       PCRel: 1
+// CHECK-NEXT:       Length: 2
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_BRANCH (2)
+// CHECK-NEXT:       Symbol: _foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section __debug_frame {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x8
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: _ext_foo
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x0
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x2
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/empty-dwarf-lines.s b/test/MC/MachO/empty-dwarf-lines.s
deleted file mode 100644
index 4bdc16b55f5c..000000000000
--- a/test/MC/MachO/empty-dwarf-lines.s
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-
-// This tests that when producing files for darwin9 or older we make sure
-// that debug_line sections are of a minimum size to avoid the linker bug
-// described in PR8715.
-
-        .section        __DATA,__data
-        .file   1 "test.c"
-        .globl  _c                      ## @c
-_c:
-        .asciz   "hi\n"
-
-// CHECK:      (('section_name', '__debug_line\x00\x00\x00\x00')
-// CHECK-NEXT:  ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:  ('address', 4)
-// CHECK-NEXT:  ('size', 44)
-// CHECK-NEXT:  ('offset', 452)
-// CHECK-NEXT:  ('alignment', 0)
-// CHECK-NEXT:  ('reloc_offset', 0)
-// CHECK-NEXT:  ('num_reloc', 0)
-// CHECK-NEXT:  ('flags', 0x2000000)
-// CHECK-NEXT:  ('reserved1', 0)
-// CHECK-NEXT:  ('reserved2', 0)
-// CHECK-NEXT:  ('reserved3', 0)
-// CHECK-NEXT: ),
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
index 0168747ae19a..a7d6c20b885f 100644
--- a/test/MC/MachO/file.s
+++ b/test/MC/MachO/file.s
@@ -1,22 +1,27 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -section-data | FileCheck %s
 
         .file	1 "dir/foo"
         nop
 
-// CHECK:         ('_section_data', '90')
-// CHECK-NEXT:      # Section 1
-// CHECK-NEXT:     (('section_name', '__debug_line\x00\x00\x00\x00')
-// CHECK-NEXT:      ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:      ('address', 1)
-// CHECK-NEXT:      ('size', 45)
-// CHECK-NEXT:      ('offset', 221)
-// CHECK-NEXT:      ('alignment', 0)
-// CHECK-NEXT:      ('reloc_offset', 0)
-// CHECK-NEXT:      ('num_reloc', 0)
-// CHECK-NEXT:      ('flags', 0x2000000)
-// CHECK-NEXT:      ('reserved1', 0)
-// CHECK-NEXT:      ('reserved2', 0)
-// CHECK-NEXT:     ),
-// CHECK-NEXT:    ('_relocations', [
-// CHECK-NEXT:    ])
-// CHECK-NEXT:    ('_section_data', '29000000 02001e00 00000101 fb0e0d00 01010101 00000001 00000164 69720000 666f6f00 01000000 02000001 01')
+// CHECK:       Section {
+// CHECK:         Index: 1
+// CHECK-NEXT:    Name: __debug_line
+// CHECK-NEXT:    Segment: __DWARF
+// CHECK-NEXT:    Address: 0x1
+// CHECK-NEXT:    Size: 0x28
+// CHECK-NEXT:    Offset: 221
+// CHECK-NEXT:    Alignment: 0
+// CHECK-NEXT:    RelocationOffset: 0x0
+// CHECK-NEXT:    RelocationCount: 0
+// CHECK-NEXT:    Type: 0x0
+// CHECK-NEXT:    Attributes [ (0x20000)
+// CHECK-NEXT:      Debug (0x20000)
+// CHECK-NEXT:    ]
+// CHECK-NEXT:    Reserved1: 0x0
+// CHECK-NEXT:    Reserved2: 0x0
+// CHECK-NEXT:    SectionData (
+// CHECK-NEXT:      0000: 24000000 02001E00 00000101 FB0E0D00
+// CHECK-NEXT:      0010: 01010101 00000001 00000164 69720000
+// CHECK-NEXT:      0020: 666F6F00 01000000
+// CHECK-NEXT:    )
+// CHECK-NEXT:  }
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index 997c83498ef1..ad0a562aaf70 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -46,12 +46,12 @@ _x:	.long 1
 // CHECK:    DW_AT_name [DW_FORM_string]
 // We don't check the DW_AT_comp_dir which is the current working directory
 // CHECK:    DW_AT_producer [DW_FORM_string]	("llvm-mc (based on {{.*}})")
-// CHECK:    DW_AT_language [DW_FORM_data2]	(0x8001)
+// CHECK:    DW_AT_language [DW_FORM_data2]	(DW_LANG_Mips_Assembler)
 
 // CHECK:    DW_TAG_label [2] *
 // CHECK:      DW_AT_name [DW_FORM_string]	("bar")
-// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
-// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000005)
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	([[FILE:".*gen-dwarf.s"]])
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(5)
 // CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
 // CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
 
@@ -61,8 +61,8 @@ _x:	.long 1
 
 // CHECK:    DW_TAG_label [2] *
 // CHECK:      DW_AT_name [DW_FORM_string]	("foo")
-// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
-// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000009)
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	([[FILE]])
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(9)
 // CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
 // CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
 
@@ -72,8 +72,8 @@ _x:	.long 1
 
 // CHECK:    DW_TAG_label [2] *
 // CHECK:      DW_AT_name [DW_FORM_string]	("baz")
-// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
-// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x0000000a)
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	([[FILE]])
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(10)
 // CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
 // CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
 
diff --git a/test/MC/MachO/indirect-symbols.s b/test/MC/MachO/indirect-symbols.s
index 90fd23154754..079576833cfe 100644
--- a/test/MC/MachO/indirect-symbols.s
+++ b/test/MC/MachO/indirect-symbols.s
@@ -97,10 +97,10 @@ _e:
 // CHECK:   ('nsyms', 6)
 // CHECK:   ('stroff', 516)
 // CHECK:   ('strsize', 20)
-// CHECK:   ('_string_data', '\x00_d\x00_a\x00_b\x00_c\x00_e\x00_f\x00\x00')
+// CHECK:   ('_string_data', '\x00_f\x00_e\x00_d\x00_c\x00_b\x00_a\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 7)
+// CHECK:    (('n_strx', 13)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -116,7 +116,7 @@ _e:
 // CHECK:     ('_string', '_c')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 13)
+// CHECK:    (('n_strx', 4)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -124,7 +124,7 @@ _e:
 // CHECK:     ('_string', '_e')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 16)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x2)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -132,7 +132,7 @@ _e:
 // CHECK:     ('_string', '_f')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 4)
+// CHECK:    (('n_strx', 16)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 1)
@@ -140,7 +140,7 @@ _e:
 // CHECK:     ('_string', '_a')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 7)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/lcomm-attributes.s b/test/MC/MachO/lcomm-attributes.s
index 1e9592438fe0..6e49e8016d1c 100644
--- a/test/MC/MachO/lcomm-attributes.s
+++ b/test/MC/MachO/lcomm-attributes.s
@@ -73,10 +73,10 @@
 // CHECK:   ('nsyms', 4)
 // CHECK:   ('stroff', 372)
 // CHECK:   ('strsize', 68)
-// CHECK:   ('_string_data', '\x00sym_lcomm_ext_A\x00sym_lcomm_ext_B\x00sym_zfill_ext_A\x00sym_zfill_ext_B\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00sym_lcomm_ext_B\x00sym_zfill_ext_B\x00sym_lcomm_ext_A\x00sym_zfill_ext_A\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 33)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -84,7 +84,7 @@
 // CHECK:     ('_string', 'sym_lcomm_ext_A')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 17)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -92,7 +92,7 @@
 // CHECK:     ('_string', 'sym_lcomm_ext_B')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 33)
+// CHECK:    (('n_strx', 49)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -100,7 +100,7 @@
 // CHECK:     ('_string', 'sym_zfill_ext_A')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 49)
+// CHECK:    (('n_strx', 17)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/linker-options.ll b/test/MC/MachO/linker-options.ll
index 827adfd70890..2cda835c100c 100644
--- a/test/MC/MachO/linker-options.ll
+++ b/test/MC/MachO/linker-options.ll
@@ -34,10 +34,6 @@
 ; CHECK-OBJ:  ),
 ; CHECK-OBJ: ])
 
-!0 = metadata !{ i32 6, metadata !"Linker Options", 
-   metadata !{
-      metadata !{ metadata !"-lz" },
-      metadata !{ metadata !"-framework", metadata !"Cocoa" },
-      metadata !{ metadata !"-lmath" } } }
+!0 = !{i32 6, !"Linker Options", !{!{!"-lz"}, !{!"-framework", !"Cocoa"}, !{!"-lmath"}}}
 
 !llvm.module.flags = !{ !0 }
diff --git a/test/MC/MachO/osx-version-min-load-command.s b/test/MC/MachO/osx-version-min-load-command.s
index 2a73609dc012..cb62565cef9a 100644
--- a/test/MC/MachO/osx-version-min-load-command.s
+++ b/test/MC/MachO/osx-version-min-load-command.s
@@ -6,5 +6,5 @@
 // CHECK:  (('command', 36)
 // CHECK:   ('size', 16)
 // CHECK:   ('version, 1639169)
-// CHECK:   ('reserved, 0)
+// CHECK:   ('sdk, 0)
 // CHECK:  ),
diff --git a/test/MC/MachO/reloc.s b/test/MC/MachO/reloc.s
index f6a3446b51b4..55c99402529a 100644
--- a/test/MC/MachO/reloc.s
+++ b/test/MC/MachO/reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r -expand-relocs | FileCheck %s
 
         .data
         .long undef
@@ -53,240 +53,164 @@ _f1:
         .long _f1
         .long _f1 + 4
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 260)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 76)
-// CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 76)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 13)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 468)
-// CHECK:     ('num_reloc', 2)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x6),
-// CHECK:      ('word-1', 0x5000003)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x1),
-// CHECK:      ('word-1', 0x5000000)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'e9f9cabe bae93a00 0000ebf4 c3')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 13)
-// CHECK:     ('size', 51)
-// CHECK:     ('offset', 405)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 484)
-// CHECK:     ('num_reloc', 11)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x2f),
-// CHECK:      ('word-1', 0xc000007)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x2b),
-// CHECK:      ('word-1', 0xc000007)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x8000002a),
-// CHECK:      ('word-1', 0x1d)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x90000028),
-// CHECK:      ('word-1', 0x1d)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0xa0000024),
-// CHECK:      ('word-1', 0x1d)),
-// CHECK:     # Relocation 5
-// CHECK:     (('word-0', 0xa0000020),
-// CHECK:      ('word-1', 0x1d)),
-// CHECK:     # Relocation 6
-// CHECK:     (('word-0', 0xa4000014),
-// CHECK:      ('word-1', 0x21)),
-// CHECK:     # Relocation 7
-// CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x29)),
-// CHECK:     # Relocation 8
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0x4000002)),
-// CHECK:     # Relocation 9
-// CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0xc000009)),
-// CHECK:     # Relocation 10
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xc000009)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 04000000 15000000 00000000 00000000 ed000000 00000000 00000000 1e000000 27000000 31007600 00000004 000000')
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 64)
-// CHECK:     ('size', 12)
-// CHECK:     ('offset', 456)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 572)
-// CHECK:     ('num_reloc', 4)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0x4000001)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x4),
-// CHECK:      ('word-1', 0x4000003)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0xa4000000),
-// CHECK:      ('word-1', 0x1d)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x40)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'feffffff 44000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 604)
-// CHECK:   ('nsyms', 10)
-// CHECK:   ('stroff', 724)
-// CHECK:   ('strsize', 88)
-// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00.objc_class_name_A\x00_f1\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 42)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 25)
-// CHECK:     ('_string', 'local_a')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 50)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 29)
-// CHECK:     ('_string', 'local_a_elt')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 62)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 33)
-// CHECK:     ('_string', 'local_b')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 70)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 41)
-// CHECK:     ('_string', 'local_c')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 78)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 64)
-// CHECK:     ('_string', 'bar')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 82)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_f0')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 19)
-// CHECK:     ('n_type', 0x3)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '.objc_class_name_A')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 38)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 128)
-// CHECK:     ('n_value', 13)
-// CHECK:     ('_string', '_f1')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 7)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 21)
-// CHECK:     ('_string', 'local_a_ext')
-// CHECK:    ),
-// CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'undef')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 6)
-// CHECK:   ('iextdefsym', 6)
-// CHECK:   ('nextdefsym', 3)
-// CHECK:   ('iundefsym', 9)
-// CHECK:   ('nundefsym', 1)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK:     Relocations [
+// CHECK-NEXT:  Section __text {
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x6
+// CHECK-NEXT:      PCRel: 1
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 0
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x3
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x1
+// CHECK-NEXT:      PCRel: 1
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 0
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x0
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
+// CHECK-NEXT:  Section __data {
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x2F
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 1
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: _f1
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x2B
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 1
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: _f1
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x2A
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 0
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x1D
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x28
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 1
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x1D
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x24
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x1D
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x20
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x1D
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x14
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
+// CHECK-NEXT:      Symbol: 0x21
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x0
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_PAIR (1)
+// CHECK-NEXT:      Symbol: 0x29
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x8
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 0
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x2
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x4
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 1
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: undef
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x0
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 1
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: undef
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
+// CHECK-NEXT:  Section __const {
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x8
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 0
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x1
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x4
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: 0
+// CHECK-NEXT:      Type: GENERIC_RELOC_VANILLA (0)
+// CHECK-NEXT:      Symbol: 0x3
+// CHECK-NEXT:      Scattered: 0
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x0
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_LOCAL_SECTDIFF (4)
+// CHECK-NEXT:      Symbol: 0x1D
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:    Relocation {
+// CHECK-NEXT:      Offset: 0x0
+// CHECK-NEXT:      PCRel: 0
+// CHECK-NEXT:      Length: 2
+// CHECK-NEXT:      Extern: N/A
+// CHECK-NEXT:      Type: GENERIC_RELOC_PAIR (1)
+// CHECK-NEXT:      Symbol: 0x40
+// CHECK-NEXT:      Scattered: 1
+// CHECK-NEXT:    }
+// CHECK-NEXT:  }
+// CHECK-NEXT:]
diff --git a/test/MC/MachO/section-align-2.s b/test/MC/MachO/section-align-2.s
index e0d7b8df8f48..086fc4a4f15a 100644
--- a/test/MC/MachO/section-align-2.s
+++ b/test/MC/MachO/section-align-2.s
@@ -82,10 +82,10 @@ baz:
 // CHECK:   ('nsyms', 3)
 // CHECK:   ('stroff', 444)
 // CHECK:   ('strsize', 16)
-// CHECK:   ('_string_data', '\x00foo\x00bar\x00baz\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00baz\x00bar\x00foo\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 9)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -101,7 +101,7 @@ baz:
 // CHECK:     ('_string', 'bar')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 9)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/string-table.s b/test/MC/MachO/string-table.s
index 179528eaae52..3a935eee0500 100644
--- a/test/MC/MachO/string-table.s
+++ b/test/MC/MachO/string-table.s
@@ -53,10 +53,10 @@
 // CHECK:   ('nsyms', 2)
 // CHECK:   ('stroff', 308)
 // CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00a\x00b\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00b\x00a\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 3)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -64,7 +64,7 @@
 // CHECK:     ('_string', 'a')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 3)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/symbol-diff.s b/test/MC/MachO/symbol-diff.s
index 1483df16333c..dae32878b8e2 100644
--- a/test/MC/MachO/symbol-diff.s
+++ b/test/MC/MachO/symbol-diff.s
@@ -75,10 +75,10 @@ _g.eh:
 // CHECK-NEXT:  ('nsyms', 2)
 // CHECK-NEXT:  ('stroff', 424)
 // CHECK-NEXT:  ('strsize', 12)
-// CHECK-NEXT:  ('_string_data', '\x00_g\x00_g.eh\x00\x00\x00')
+// CHECK-NEXT:  ('_string_data', '\x00_g.eh\x00_g\x00\x00\x00')
 // CHECK-NEXT:  ('_symbols', [
 // CHECK-NEXT:    # Symbol 0
-// CHECK-NEXT:   (('n_strx', 1)
+// CHECK-NEXT:   (('n_strx', 7)
 // CHECK-NEXT:    ('n_type', 0xe)
 // CHECK-NEXT:    ('n_sect', 1)
 // CHECK-NEXT:    ('n_desc', 0)
@@ -86,7 +86,7 @@ _g.eh:
 // CHECK-NEXT:    ('_string', '_g')
 // CHECK-NEXT:   ),
 // CHECK-NEXT:    # Symbol 1
-// CHECK-NEXT:   (('n_strx', 4)
+// CHECK-NEXT:   (('n_strx', 1)
 // CHECK-NEXT:    ('n_type', 0xe)
 // CHECK-NEXT:    ('n_sect', 2)
 // CHECK-NEXT:    ('n_desc', 0)
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
index 7a4f8e4031ae..561d88a14e73 100644
--- a/test/MC/MachO/symbol-flags.s
+++ b/test/MC/MachO/symbol-flags.s
@@ -118,10 +118,10 @@ sym_desc_flags:
 // CHECK:   ('nsyms', 24)
 // CHECK:   ('stroff', 612)
 // CHECK:   ('strsize', 388)
-// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_ref_def_D\x00sym_ref_def_E\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_weak_def_B\x00sym_weak_def_C\x00sym_lazy_ref_A\x00sym_lazy_ref_D\x00sym_lazy_ref_E\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_private_ext_E\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_symbol_resolver_A\x00sym_desc_flags\x00\x00')
+// CHECK:   ('_string_data', '\x00sym_desc_flags\x00sym_private_ext_E\x00sym_lazy_ref_E\x00sym_ref_def_E\x00sym_private_ext_D\x00sym_lazy_ref_D\x00sym_ref_def_D\x00sym_private_ext_C\x00sym_lazy_ref_C\x00sym_weak_def_C\x00sym_ref_def_C\x00sym_private_ext_B\x00sym_lazy_ref_B\x00sym_weak_def_B\x00sym_weak_ref_def_B\x00sym_private_ext_A\x00sym_symbol_resolver_A\x00sym_no_dead_strip_A\x00sym_lazy_ref_A\x00sym_ref_A\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_ref_def_A\x00sym_weak_ref_def_A\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 254)
+// CHECK:    (('n_strx', 354)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -129,7 +129,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 268)
+// CHECK:    (('n_strx', 158)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 32)
@@ -137,7 +137,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_C')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 282)
+// CHECK:    (('n_strx', 368)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 64)
@@ -145,7 +145,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_ref_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 301)
+// CHECK:    (('n_strx', 220)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -153,7 +153,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_ref_def_B')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 320)
+// CHECK:    (('n_strx', 190)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 32)
@@ -161,7 +161,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_B')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 335)
+// CHECK:    (('n_strx', 128)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 32)
@@ -169,7 +169,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_C')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 350)
+// CHECK:    (('n_strx', 257)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 256)
@@ -177,7 +177,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_symbol_resolver_A')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 372)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 64)
@@ -185,7 +185,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_desc_flags')
 // CHECK:    ),
 // CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 162)
+// CHECK:    (('n_strx', 172)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -193,7 +193,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_B')
 // CHECK:    ),
 // CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 180)
+// CHECK:    (('n_strx', 110)
 // CHECK:     ('n_type', 0x1f)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -201,7 +201,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_C')
 // CHECK:    ),
 // CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 54)
+// CHECK:    (('n_strx', 339)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 128)
@@ -209,7 +209,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_def_A')
 // CHECK:    ),
 // CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 69)
+// CHECK:    (('n_strx', 205)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 128)
@@ -217,7 +217,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_def_B')
 // CHECK:    ),
 // CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 84)
+// CHECK:    (('n_strx', 143)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 128)
@@ -225,7 +225,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_weak_def_C')
 // CHECK:    ),
 // CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 99)
+// CHECK:    (('n_strx', 299)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 33)
@@ -233,7 +233,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_A')
 // CHECK:    ),
 // CHECK:     # Symbol 14
-// CHECK:    (('n_strx', 114)
+// CHECK:    (('n_strx', 81)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
@@ -241,7 +241,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_D')
 // CHECK:    ),
 // CHECK:     # Symbol 15
-// CHECK:    (('n_strx', 129)
+// CHECK:    (('n_strx', 34)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 33)
@@ -249,7 +249,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_lazy_ref_E')
 // CHECK:    ),
 // CHECK:     # Symbol 16
-// CHECK:    (('n_strx', 234)
+// CHECK:    (('n_strx', 279)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
@@ -257,7 +257,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_no_dead_strip_A')
 // CHECK:    ),
 // CHECK:     # Symbol 17
-// CHECK:    (('n_strx', 144)
+// CHECK:    (('n_strx', 239)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -265,7 +265,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_A')
 // CHECK:    ),
 // CHECK:     # Symbol 18
-// CHECK:    (('n_strx', 198)
+// CHECK:    (('n_strx', 63)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -273,7 +273,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_D')
 // CHECK:    ),
 // CHECK:     # Symbol 19
-// CHECK:    (('n_strx', 216)
+// CHECK:    (('n_strx', 16)
 // CHECK:     ('n_type', 0x11)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -281,7 +281,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_private_ext_E')
 // CHECK:    ),
 // CHECK:     # Symbol 20
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 314)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 4660)
@@ -289,7 +289,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_A')
 // CHECK:    ),
 // CHECK:     # Symbol 21
-// CHECK:    (('n_strx', 11)
+// CHECK:    (('n_strx', 96)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
@@ -297,7 +297,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_D')
 // CHECK:    ),
 // CHECK:     # Symbol 22
-// CHECK:    (('n_strx', 25)
+// CHECK:    (('n_strx', 49)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 32)
@@ -305,7 +305,7 @@ sym_desc_flags:
 // CHECK:     ('_string', 'sym_ref_def_E')
 // CHECK:    ),
 // CHECK:     # Symbol 23
-// CHECK:    (('n_strx', 39)
+// CHECK:    (('n_strx', 324)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 64)
diff --git a/test/MC/MachO/symbol-indirect.s b/test/MC/MachO/symbol-indirect.s
index 2412970322bf..1cdeed1f4ef6 100644
--- a/test/MC/MachO/symbol-indirect.s
+++ b/test/MC/MachO/symbol-indirect.s
@@ -137,10 +137,10 @@ sym_nlp_G:
 // CHECK:   ('nsyms', 10)
 // CHECK:   ('stroff', 592)
 // CHECK:   ('strsize', 104)
-// CHECK:   ('_string_data', '\x00sym_lsp_A\x00sym_lsp_G\x00sym_nlp_A\x00sym_nlp_G\x00sym_nlp_B\x00sym_nlp_E\x00sym_lsp_B\x00sym_lsp_E\x00sym_lsp_C\x00sym_nlp_C\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00sym_lsp_G\x00sym_nlp_G\x00sym_lsp_E\x00sym_nlp_E\x00sym_lsp_C\x00sym_nlp_C\x00sym_lsp_B\x00sym_nlp_B\x00sym_lsp_A\x00sym_nlp_A\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 81)
+// CHECK:    (('n_strx', 41)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -148,7 +148,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_lsp_C')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 91)
+// CHECK:    (('n_strx', 51)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -156,7 +156,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_nlp_C')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 11)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -164,7 +164,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_lsp_G')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 31)
+// CHECK:    (('n_strx', 11)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -172,7 +172,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_nlp_G')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 81)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -188,7 +188,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_lsp_B')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 71)
+// CHECK:    (('n_strx', 21)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 1)
@@ -196,7 +196,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_lsp_E')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 21)
+// CHECK:    (('n_strx', 91)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -204,7 +204,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_nlp_A')
 // CHECK:    ),
 // CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 41)
+// CHECK:    (('n_strx', 71)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
@@ -212,7 +212,7 @@ sym_nlp_G:
 // CHECK:     ('_string', 'sym_nlp_B')
 // CHECK:    ),
 // CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 51)
+// CHECK:    (('n_strx', 31)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s
index cf05afa7509f..8b663dc71762 100644
--- a/test/MC/MachO/symbols-1.s
+++ b/test/MC/MachO/symbols-1.s
@@ -59,10 +59,10 @@ Lsym_asm_temp:
 // CHECK-X86_32:   ('nsyms', 9)
 // CHECK-X86_32:   ('stroff', 368)
 // CHECK-X86_32:   ('strsize', 140)
-// CHECK-X86_32:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK-X86_32:   ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
 // CHECK-X86_32:   ('_symbols', [
 // CHECK-X86_32:     # Symbol 0
-// CHECK-X86_32:    (('n_strx', 103)
+// CHECK-X86_32:    (('n_strx', 47)
 // CHECK-X86_32:     ('n_type', 0xe)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -70,7 +70,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_local_B')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 1
-// CHECK-X86_32:    (('n_strx', 115)
+// CHECK-X86_32:    (('n_strx', 93)
 // CHECK-X86_32:     ('n_type', 0xe)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -78,7 +78,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_local_A')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 2
-// CHECK-X86_32:    (('n_strx', 127)
+// CHECK-X86_32:    (('n_strx', 1)
 // CHECK-X86_32:     ('n_type', 0xe)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -86,7 +86,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_local_C')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 3
-// CHECK-X86_32:    (('n_strx', 35)
+// CHECK-X86_32:    (('n_strx', 123)
 // CHECK-X86_32:     ('n_type', 0xf)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -94,7 +94,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_globl_def_A')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 4
-// CHECK-X86_32:    (('n_strx', 1)
+// CHECK-X86_32:    (('n_strx', 77)
 // CHECK-X86_32:     ('n_type', 0xf)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -102,7 +102,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_globl_def_B')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 5
-// CHECK-X86_32:    (('n_strx', 69)
+// CHECK-X86_32:    (('n_strx', 31)
 // CHECK-X86_32:     ('n_type', 0xf)
 // CHECK-X86_32:     ('n_sect', 1)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -110,7 +110,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_globl_def_C')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 6
-// CHECK-X86_32:    (('n_strx', 51)
+// CHECK-X86_32:    (('n_strx', 105)
 // CHECK-X86_32:     ('n_type', 0x1)
 // CHECK-X86_32:     ('n_sect', 0)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -118,7 +118,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_globl_undef_A')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 7
-// CHECK-X86_32:    (('n_strx', 17)
+// CHECK-X86_32:    (('n_strx', 59)
 // CHECK-X86_32:     ('n_type', 0x1)
 // CHECK-X86_32:     ('n_sect', 0)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -126,7 +126,7 @@ Lsym_asm_temp:
 // CHECK-X86_32:     ('_string', 'sym_globl_undef_B')
 // CHECK-X86_32:    ),
 // CHECK-X86_32:     # Symbol 8
-// CHECK-X86_32:    (('n_strx', 85)
+// CHECK-X86_32:    (('n_strx', 13)
 // CHECK-X86_32:     ('n_type', 0x1)
 // CHECK-X86_32:     ('n_sect', 0)
 // CHECK-X86_32:     ('n_desc', 0)
@@ -207,10 +207,10 @@ Lsym_asm_temp:
 // CHECK-X86_64:   ('nsyms', 9)
 // CHECK-X86_64:   ('stroff', 436)
 // CHECK-X86_64:   ('strsize', 140)
-// CHECK-X86_64:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK-X86_64:   ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
 // CHECK-X86_64:   ('_symbols', [
 // CHECK-X86_64:     # Symbol 0
-// CHECK-X86_64:    (('n_strx', 103)
+// CHECK-X86_64:    (('n_strx', 47)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -218,7 +218,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_local_B')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 1
-// CHECK-X86_64:    (('n_strx', 115)
+// CHECK-X86_64:    (('n_strx', 93)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -226,7 +226,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_local_A')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 2
-// CHECK-X86_64:    (('n_strx', 127)
+// CHECK-X86_64:    (('n_strx', 1)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -234,7 +234,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_local_C')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 3
-// CHECK-X86_64:    (('n_strx', 35)
+// CHECK-X86_64:    (('n_strx', 123)
 // CHECK-X86_64:     ('n_type', 0xf)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -242,7 +242,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_globl_def_A')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 4
-// CHECK-X86_64:    (('n_strx', 1)
+// CHECK-X86_64:    (('n_strx', 77)
 // CHECK-X86_64:     ('n_type', 0xf)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -250,7 +250,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_globl_def_B')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 5
-// CHECK-X86_64:    (('n_strx', 69)
+// CHECK-X86_64:    (('n_strx', 31)
 // CHECK-X86_64:     ('n_type', 0xf)
 // CHECK-X86_64:     ('n_sect', 1)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -258,7 +258,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_globl_def_C')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 6
-// CHECK-X86_64:    (('n_strx', 51)
+// CHECK-X86_64:    (('n_strx', 105)
 // CHECK-X86_64:     ('n_type', 0x1)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -266,7 +266,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_globl_undef_A')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 7
-// CHECK-X86_64:    (('n_strx', 17)
+// CHECK-X86_64:    (('n_strx', 59)
 // CHECK-X86_64:     ('n_type', 0x1)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -274,7 +274,7 @@ Lsym_asm_temp:
 // CHECK-X86_64:     ('_string', 'sym_globl_undef_B')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 8
-// CHECK-X86_64:    (('n_strx', 85)
+// CHECK-X86_64:    (('n_strx', 13)
 // CHECK-X86_64:     ('n_type', 0x1)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
diff --git a/test/MC/MachO/tbss.s b/test/MC/MachO/tbss.s
index 8eae14296865..1c23aa548d76 100644
--- a/test/MC/MachO/tbss.s
+++ b/test/MC/MachO/tbss.s
@@ -67,10 +67,10 @@
 // CHECK:   ('nsyms', 2)
 // CHECK:   ('stroff', 400)
 // CHECK:   ('strsize', 28)
-// CHECK:   ('_string_data', '\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_b$tlv$init\x00_a$tlv$init\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 13)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -78,7 +78,7 @@
 // CHECK:     ('_string', '_a$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 13)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/tls.s b/test/MC/MachO/tls.s
index 44b61beeb489..33e23a9c4276 100644
--- a/test/MC/MachO/tls.s
+++ b/test/MC/MachO/tls.s
@@ -167,10 +167,10 @@ _b:
 // CHECK:   ('nsyms', 9)
 // CHECK:   ('stroff', 840)
 // CHECK:   ('strsize', 80)
-// CHECK:   ('_string_data', '\x00_c$tlv$init\x00_c\x00___tlv_bootstrap\x00_d$tlv$init\x00_d\x00_a\x00_b\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_d$tlv$init\x00_c$tlv$init\x00_b$tlv$init\x00_a$tlv$init\x00___tlv_bootstrap\x00_d\x00_c\x00_b\x00_a\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 54)
+// CHECK:    (('n_strx', 37)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -178,7 +178,7 @@ _b:
 // CHECK:     ('_string', '_a$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 66)
+// CHECK:    (('n_strx', 25)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -186,7 +186,7 @@ _b:
 // CHECK:     ('_string', '_b$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 48)
+// CHECK:    (('n_strx', 75)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -194,7 +194,7 @@ _b:
 // CHECK:     ('_string', '_a')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 51)
+// CHECK:    (('n_strx', 72)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -202,7 +202,7 @@ _b:
 // CHECK:     ('_string', '_b')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 13)
+// CHECK:    (('n_strx', 69)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -210,7 +210,7 @@ _b:
 // CHECK:     ('_string', '_c')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 13)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -218,7 +218,7 @@ _b:
 // CHECK:     ('_string', '_c$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 45)
+// CHECK:    (('n_strx', 66)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -226,7 +226,7 @@ _b:
 // CHECK:     ('_string', '_d')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 33)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -234,7 +234,7 @@ _b:
 // CHECK:     ('_string', '_d$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 16)
+// CHECK:    (('n_strx', 49)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/tlv-reloc.s b/test/MC/MachO/tlv-reloc.s
index d11124184723..80e0565c59b7 100644
--- a/test/MC/MachO/tlv-reloc.s
+++ b/test/MC/MachO/tlv-reloc.s
@@ -111,10 +111,10 @@ _foo:
 // CHECK:   ('nsyms', 4)
 // CHECK:   ('stroff', 576)
 // CHECK:   ('strsize', 40)
-// CHECK:   ('_string_data', '\x00_a\x00__tlv_bootstrap\x00_foo\x00_a$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00_a$tlv$init\x00__tlv_bootstrap\x00_foo\x00_a\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 25)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -122,7 +122,7 @@ _foo:
 // CHECK:     ('_string', '_a$tlv$init')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 34)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -130,7 +130,7 @@ _foo:
 // CHECK:     ('_string', '_a')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 20)
+// CHECK:    (('n_strx', 29)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -138,7 +138,7 @@ _foo:
 // CHECK:     ('_string', '_foo')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 4)
+// CHECK:    (('n_strx', 13)
 // CHECK:     ('n_type', 0x1)
 // CHECK:     ('n_sect', 0)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index a7fa45d571a3..ac781ef7f67d 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -134,10 +134,10 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:   ('nsyms', 10)
 // CHECK-I386:   ('stroff', 576)
 // CHECK-I386:   ('strsize', 24)
-// CHECK-I386:   ('_string_data', '\x00d2\x00d\x00d3\x00a\x00b\x00c\x00e\x00g\x00f\x00t0\x00')
+// CHECK-I386:   ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
 // CHECK-I386:   ('_symbols', [
 // CHECK-I386:     # Symbol 0
-// CHECK-I386:    (('n_strx', 9)
+// CHECK-I386:    (('n_strx', 13)
 // CHECK-I386:     ('n_type', 0xe)
 // CHECK-I386:     ('n_sect', 2)
 // CHECK-I386:     ('n_desc', 0)
@@ -153,7 +153,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 'b')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 2
-// CHECK-I386:    (('n_strx', 13)
+// CHECK-I386:    (('n_strx', 9)
 // CHECK-I386:     ('n_type', 0xe)
 // CHECK-I386:     ('n_sect', 2)
 // CHECK-I386:     ('n_desc', 0)
@@ -161,7 +161,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 'c')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 3
-// CHECK-I386:    (('n_strx', 15)
+// CHECK-I386:    (('n_strx', 5)
 // CHECK-I386:     ('n_type', 0xe)
 // CHECK-I386:     ('n_sect', 2)
 // CHECK-I386:     ('n_desc', 0)
@@ -169,7 +169,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 'e')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 4
-// CHECK-I386:    (('n_strx', 17)
+// CHECK-I386:    (('n_strx', 1)
 // CHECK-I386:     ('n_type', 0xe)
 // CHECK-I386:     ('n_sect', 2)
 // CHECK-I386:     ('n_desc', 0)
@@ -177,7 +177,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 'g')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 5
-// CHECK-I386:    (('n_strx', 19)
+// CHECK-I386:    (('n_strx', 3)
 // CHECK-I386:     ('n_type', 0xe)
 // CHECK-I386:     ('n_sect', 2)
 // CHECK-I386:     ('n_desc', 0)
@@ -193,7 +193,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 't0')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 7
-// CHECK-I386:    (('n_strx', 4)
+// CHECK-I386:    (('n_strx', 7)
 // CHECK-I386:     ('n_type', 0x1)
 // CHECK-I386:     ('n_sect', 0)
 // CHECK-I386:     ('n_desc', 0)
@@ -201,15 +201,15 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-I386:     ('_string', 'd')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 8
-// CHECK-I386:    (('n_strx', 1)
+// CHECK-I386:    (('n_strx', 18)
 // CHECK-I386:     ('n_type', 0xb)
 // CHECK-I386:     ('n_sect', 0)
 // CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 4)
+// CHECK-I386:     ('n_value', 7)
 // CHECK-I386:     ('_string', 'd2')
 // CHECK-I386:    ),
 // CHECK-I386:     # Symbol 9
-// CHECK-I386:    (('n_strx', 6)
+// CHECK-I386:    (('n_strx', 15)
 // CHECK-I386:     ('n_type', 0x1)
 // CHECK-I386:     ('n_sect', 0)
 // CHECK-I386:     ('n_desc', 0)
@@ -335,10 +335,10 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:   ('nsyms', 10)
 // CHECK-X86_64:   ('stroff', 660)
 // CHECK-X86_64:   ('strsize', 24)
-// CHECK-X86_64:   ('_string_data', '\x00d2\x00d\x00d3\x00a\x00b\x00c\x00e\x00g\x00f\x00t0\x00')
+// CHECK-X86_64:   ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
 // CHECK-X86_64:   ('_symbols', [
 // CHECK-X86_64:     # Symbol 0
-// CHECK-X86_64:    (('n_strx', 9)
+// CHECK-X86_64:    (('n_strx', 13)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 2)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -354,7 +354,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 'b')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 2
-// CHECK-X86_64:    (('n_strx', 13)
+// CHECK-X86_64:    (('n_strx', 9)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 2)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -362,7 +362,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 'c')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 3
-// CHECK-X86_64:    (('n_strx', 15)
+// CHECK-X86_64:    (('n_strx', 5)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 2)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -370,7 +370,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 'e')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 4
-// CHECK-X86_64:    (('n_strx', 17)
+// CHECK-X86_64:    (('n_strx', 1)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 2)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -378,7 +378,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 'g')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 5
-// CHECK-X86_64:    (('n_strx', 19)
+// CHECK-X86_64:    (('n_strx', 3)
 // CHECK-X86_64:     ('n_type', 0xe)
 // CHECK-X86_64:     ('n_sect', 2)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -394,7 +394,7 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 't0')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 7
-// CHECK-X86_64:    (('n_strx', 4)
+// CHECK-X86_64:    (('n_strx', 7)
 // CHECK-X86_64:     ('n_type', 0x1)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
@@ -402,15 +402,15 @@ Lt0_x = Lt0_a - Lt0_b
 // CHECK-X86_64:     ('_string', 'd')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 8
-// CHECK-X86_64:    (('n_strx', 1)
+// CHECK-X86_64:    (('n_strx', 18)
 // CHECK-X86_64:     ('n_type', 0xb)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 4)
+// CHECK-X86_64:     ('n_value', 7)
 // CHECK-X86_64:     ('_string', 'd2')
 // CHECK-X86_64:    ),
 // CHECK-X86_64:     # Symbol 9
-// CHECK-X86_64:    (('n_strx', 6)
+// CHECK-X86_64:    (('n_strx', 15)
 // CHECK-X86_64:     ('n_type', 0x1)
 // CHECK-X86_64:     ('n_sect', 0)
 // CHECK-X86_64:     ('n_desc', 0)
diff --git a/test/MC/MachO/x86_32-symbols.s b/test/MC/MachO/x86_32-symbols.s
index 35ada354d29e..95aa507305fd 100644
--- a/test/MC/MachO/x86_32-symbols.s
+++ b/test/MC/MachO/x86_32-symbols.s
@@ -690,10 +690,10 @@ D39:
 // CHECK:   ('nsyms', 40)
 // CHECK:   ('stroff', 3116)
 // CHECK:   ('strsize', 152)
-// CHECK:   ('_string_data', '\x00D0\x00D1\x00D2\x00D3\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00D10\x00D11\x00D12\x00D13\x00D14\x00D15\x00D16\x00D17\x00D18\x00D19\x00D20\x00D21\x00D22\x00D23\x00D24\x00D25\x00D26\x00D27\x00D28\x00D29\x00D30\x00D31\x00D32\x00D33\x00D34\x00D35\x00D36\x00D37\x00D38\x00D39\x00\x00')
+// CHECK:   ('_string_data', '\x00D9\x00D39\x00D29\x00D19\x00D8\x00D38\x00D28\x00D18\x00D7\x00D37\x00D27\x00D17\x00D6\x00D36\x00D26\x00D16\x00D5\x00D35\x00D25\x00D15\x00D4\x00D34\x00D24\x00D14\x00D3\x00D33\x00D23\x00D13\x00D2\x00D32\x00D22\x00D12\x00D1\x00D31\x00D21\x00D11\x00D0\x00D30\x00D20\x00D10\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 136)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -701,7 +701,7 @@ D39:
 // CHECK:     ('_string', 'D0')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 4)
+// CHECK:    (('n_strx', 121)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 1)
 // CHECK:     ('n_desc', 0)
@@ -709,7 +709,7 @@ D39:
 // CHECK:     ('_string', 'D1')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 7)
+// CHECK:    (('n_strx', 106)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -717,7 +717,7 @@ D39:
 // CHECK:     ('_string', 'D2')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 10)
+// CHECK:    (('n_strx', 91)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 3)
 // CHECK:     ('n_desc', 0)
@@ -725,7 +725,7 @@ D39:
 // CHECK:     ('_string', 'D3')
 // CHECK:    ),
 // CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 13)
+// CHECK:    (('n_strx', 76)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -733,7 +733,7 @@ D39:
 // CHECK:     ('_string', 'D4')
 // CHECK:    ),
 // CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 16)
+// CHECK:    (('n_strx', 61)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 5)
 // CHECK:     ('n_desc', 0)
@@ -741,7 +741,7 @@ D39:
 // CHECK:     ('_string', 'D5')
 // CHECK:    ),
 // CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 19)
+// CHECK:    (('n_strx', 46)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 6)
 // CHECK:     ('n_desc', 0)
@@ -749,7 +749,7 @@ D39:
 // CHECK:     ('_string', 'D6')
 // CHECK:    ),
 // CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 22)
+// CHECK:    (('n_strx', 31)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 7)
 // CHECK:     ('n_desc', 0)
@@ -757,7 +757,7 @@ D39:
 // CHECK:     ('_string', 'D7')
 // CHECK:    ),
 // CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 25)
+// CHECK:    (('n_strx', 16)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 8)
 // CHECK:     ('n_desc', 0)
@@ -765,7 +765,7 @@ D39:
 // CHECK:     ('_string', 'D8')
 // CHECK:    ),
 // CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 28)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 9)
 // CHECK:     ('n_desc', 0)
@@ -773,7 +773,7 @@ D39:
 // CHECK:     ('_string', 'D9')
 // CHECK:    ),
 // CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 31)
+// CHECK:    (('n_strx', 147)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 10)
 // CHECK:     ('n_desc', 0)
@@ -781,7 +781,7 @@ D39:
 // CHECK:     ('_string', 'D10')
 // CHECK:    ),
 // CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 35)
+// CHECK:    (('n_strx', 132)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 11)
 // CHECK:     ('n_desc', 0)
@@ -789,7 +789,7 @@ D39:
 // CHECK:     ('_string', 'D11')
 // CHECK:    ),
 // CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 39)
+// CHECK:    (('n_strx', 117)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 12)
 // CHECK:     ('n_desc', 0)
@@ -797,7 +797,7 @@ D39:
 // CHECK:     ('_string', 'D12')
 // CHECK:    ),
 // CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 43)
+// CHECK:    (('n_strx', 102)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 13)
 // CHECK:     ('n_desc', 0)
@@ -805,7 +805,7 @@ D39:
 // CHECK:     ('_string', 'D13')
 // CHECK:    ),
 // CHECK:     # Symbol 14
-// CHECK:    (('n_strx', 47)
+// CHECK:    (('n_strx', 87)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 14)
 // CHECK:     ('n_desc', 0)
@@ -813,7 +813,7 @@ D39:
 // CHECK:     ('_string', 'D14')
 // CHECK:    ),
 // CHECK:     # Symbol 15
-// CHECK:    (('n_strx', 51)
+// CHECK:    (('n_strx', 72)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 15)
 // CHECK:     ('n_desc', 0)
@@ -821,7 +821,7 @@ D39:
 // CHECK:     ('_string', 'D15')
 // CHECK:    ),
 // CHECK:     # Symbol 16
-// CHECK:    (('n_strx', 55)
+// CHECK:    (('n_strx', 57)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 16)
 // CHECK:     ('n_desc', 0)
@@ -829,7 +829,7 @@ D39:
 // CHECK:     ('_string', 'D16')
 // CHECK:    ),
 // CHECK:     # Symbol 17
-// CHECK:    (('n_strx', 59)
+// CHECK:    (('n_strx', 42)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 17)
 // CHECK:     ('n_desc', 0)
@@ -837,7 +837,7 @@ D39:
 // CHECK:     ('_string', 'D17')
 // CHECK:    ),
 // CHECK:     # Symbol 18
-// CHECK:    (('n_strx', 63)
+// CHECK:    (('n_strx', 27)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 18)
 // CHECK:     ('n_desc', 0)
@@ -845,7 +845,7 @@ D39:
 // CHECK:     ('_string', 'D18')
 // CHECK:    ),
 // CHECK:     # Symbol 19
-// CHECK:    (('n_strx', 67)
+// CHECK:    (('n_strx', 12)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 19)
 // CHECK:     ('n_desc', 0)
@@ -853,7 +853,7 @@ D39:
 // CHECK:     ('_string', 'D19')
 // CHECK:    ),
 // CHECK:     # Symbol 20
-// CHECK:    (('n_strx', 71)
+// CHECK:    (('n_strx', 143)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 20)
 // CHECK:     ('n_desc', 0)
@@ -861,7 +861,7 @@ D39:
 // CHECK:     ('_string', 'D20')
 // CHECK:    ),
 // CHECK:     # Symbol 21
-// CHECK:    (('n_strx', 75)
+// CHECK:    (('n_strx', 128)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 21)
 // CHECK:     ('n_desc', 0)
@@ -869,7 +869,7 @@ D39:
 // CHECK:     ('_string', 'D21')
 // CHECK:    ),
 // CHECK:     # Symbol 22
-// CHECK:    (('n_strx', 79)
+// CHECK:    (('n_strx', 113)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 22)
 // CHECK:     ('n_desc', 0)
@@ -877,7 +877,7 @@ D39:
 // CHECK:     ('_string', 'D22')
 // CHECK:    ),
 // CHECK:     # Symbol 23
-// CHECK:    (('n_strx', 83)
+// CHECK:    (('n_strx', 98)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 23)
 // CHECK:     ('n_desc', 0)
@@ -885,7 +885,7 @@ D39:
 // CHECK:     ('_string', 'D23')
 // CHECK:    ),
 // CHECK:     # Symbol 24
-// CHECK:    (('n_strx', 87)
+// CHECK:    (('n_strx', 83)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 24)
 // CHECK:     ('n_desc', 0)
@@ -893,7 +893,7 @@ D39:
 // CHECK:     ('_string', 'D24')
 // CHECK:    ),
 // CHECK:     # Symbol 25
-// CHECK:    (('n_strx', 91)
+// CHECK:    (('n_strx', 68)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 25)
 // CHECK:     ('n_desc', 0)
@@ -901,7 +901,7 @@ D39:
 // CHECK:     ('_string', 'D25')
 // CHECK:    ),
 // CHECK:     # Symbol 26
-// CHECK:    (('n_strx', 95)
+// CHECK:    (('n_strx', 53)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 26)
 // CHECK:     ('n_desc', 0)
@@ -909,7 +909,7 @@ D39:
 // CHECK:     ('_string', 'D26')
 // CHECK:    ),
 // CHECK:     # Symbol 27
-// CHECK:    (('n_strx', 99)
+// CHECK:    (('n_strx', 38)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 27)
 // CHECK:     ('n_desc', 0)
@@ -917,7 +917,7 @@ D39:
 // CHECK:     ('_string', 'D27')
 // CHECK:    ),
 // CHECK:     # Symbol 28
-// CHECK:    (('n_strx', 103)
+// CHECK:    (('n_strx', 23)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 28)
 // CHECK:     ('n_desc', 0)
@@ -925,7 +925,7 @@ D39:
 // CHECK:     ('_string', 'D28')
 // CHECK:    ),
 // CHECK:     # Symbol 29
-// CHECK:    (('n_strx', 107)
+// CHECK:    (('n_strx', 8)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 29)
 // CHECK:     ('n_desc', 0)
@@ -933,7 +933,7 @@ D39:
 // CHECK:     ('_string', 'D29')
 // CHECK:    ),
 // CHECK:     # Symbol 30
-// CHECK:    (('n_strx', 111)
+// CHECK:    (('n_strx', 139)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 30)
 // CHECK:     ('n_desc', 0)
@@ -941,7 +941,7 @@ D39:
 // CHECK:     ('_string', 'D30')
 // CHECK:    ),
 // CHECK:     # Symbol 31
-// CHECK:    (('n_strx', 115)
+// CHECK:    (('n_strx', 124)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 31)
 // CHECK:     ('n_desc', 0)
@@ -949,7 +949,7 @@ D39:
 // CHECK:     ('_string', 'D31')
 // CHECK:    ),
 // CHECK:     # Symbol 32
-// CHECK:    (('n_strx', 119)
+// CHECK:    (('n_strx', 109)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 32)
 // CHECK:     ('n_desc', 0)
@@ -957,7 +957,7 @@ D39:
 // CHECK:     ('_string', 'D32')
 // CHECK:    ),
 // CHECK:     # Symbol 33
-// CHECK:    (('n_strx', 123)
+// CHECK:    (('n_strx', 94)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 33)
 // CHECK:     ('n_desc', 0)
@@ -965,7 +965,7 @@ D39:
 // CHECK:     ('_string', 'D33')
 // CHECK:    ),
 // CHECK:     # Symbol 34
-// CHECK:    (('n_strx', 127)
+// CHECK:    (('n_strx', 79)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 34)
 // CHECK:     ('n_desc', 0)
@@ -973,7 +973,7 @@ D39:
 // CHECK:     ('_string', 'D34')
 // CHECK:    ),
 // CHECK:     # Symbol 35
-// CHECK:    (('n_strx', 131)
+// CHECK:    (('n_strx', 64)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -981,7 +981,7 @@ D39:
 // CHECK:     ('_string', 'D35')
 // CHECK:    ),
 // CHECK:     # Symbol 36
-// CHECK:    (('n_strx', 135)
+// CHECK:    (('n_strx', 49)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -989,7 +989,7 @@ D39:
 // CHECK:     ('_string', 'D36')
 // CHECK:    ),
 // CHECK:     # Symbol 37
-// CHECK:    (('n_strx', 139)
+// CHECK:    (('n_strx', 34)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 4)
 // CHECK:     ('n_desc', 0)
@@ -997,7 +997,7 @@ D39:
 // CHECK:     ('_string', 'D37')
 // CHECK:    ),
 // CHECK:     # Symbol 38
-// CHECK:    (('n_strx', 143)
+// CHECK:    (('n_strx', 19)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 35)
 // CHECK:     ('n_desc', 0)
@@ -1005,7 +1005,7 @@ D39:
 // CHECK:     ('_string', 'D38')
 // CHECK:    ),
 // CHECK:     # Symbol 39
-// CHECK:    (('n_strx', 147)
+// CHECK:    (('n_strx', 4)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 36)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/MachO/x86_64-mergeable.s b/test/MC/MachO/x86_64-mergeable.s
new file mode 100644
index 000000000000..972477693ed2
--- /dev/null
+++ b/test/MC/MachO/x86_64-mergeable.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin14 %s -filetype=obj -o - | llvm-readobj -r --expand-relocs | FileCheck %s
+
+// Test that we "S + K" produce a relocation with a symbol, but just S produces
+// a relocation with the section.
+
+	.section	__TEXT,__literal4,4byte_literals
+L0:
+	.long	42
+
+	.section	__TEXT,__cstring,cstring_literals
+L1:
+	.asciz	"42"
+
+	.section	__DATA,__data
+	.quad	L0
+	.quad	L0 + 1
+	.quad	L1
+	.quad	L1 + 1
+
+// CHECK:      Relocations [
+// CHECK-NEXT:   Section __data {
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x18
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: L1
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x10
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x3
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x8
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 1
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: L0
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:     Relocation {
+// CHECK-NEXT:       Offset: 0x0
+// CHECK-NEXT:       PCRel: 0
+// CHECK-NEXT:       Length: 3
+// CHECK-NEXT:       Extern: 0
+// CHECK-NEXT:       Type: X86_64_RELOC_UNSIGNED (0)
+// CHECK-NEXT:       Symbol: 0x2
+// CHECK-NEXT:       Scattered: 0
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/x86_64-symbols.s b/test/MC/MachO/x86_64-symbols.s
index 804cee84791e..f40183df853c 100644
--- a/test/MC/MachO/x86_64-symbols.s
+++ b/test/MC/MachO/x86_64-symbols.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
 
         .text
 L0:
@@ -121,878 +121,372 @@ D38:
 //L39:
 //D39:
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 2656)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 2552)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 2688)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 31)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__static_const\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 4
-// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x3)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 5
-// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x4)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 6
-// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xe)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 7
-// CHECK:    (('section_name', '__constructor\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 8
-// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 9
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 10
-// CHECK:    (('section_name', '__static_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 11
-// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 12
-// CHECK:    (('section_name', '__mod_init_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x9)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 13
-// CHECK:    (('section_name', '__mod_term_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xa)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 14
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 15
-// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 16
-// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 17
-// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 18
-// CHECK:    (('section_name', '__cat_inst_meth\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 19
-// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 20
-// CHECK:    (('section_name', '__string_object\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 21
-// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 22
-// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 23
-// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 24
-// CHECK:    (('section_name', '__message_refs\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 25
-// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 26
-// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 27
-// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 28
-// CHECK:    (('section_name', '__instance_vars\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 29
-// CHECK:    (('section_name', '__module_info\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 30
-// CHECK:    (('section_name', '__selector_strs\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2688)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 2688)
-// CHECK:   ('nsyms', 40)
-// CHECK:   ('stroff', 3328)
-// CHECK:   ('strsize', 152)
-// CHECK:   ('_string_data', '\x00D0\x00D1\x00D2\x00D3\x00L4\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00D12\x00D13\x00D16\x00D17\x00D18\x00D19\x00D20\x00D21\x00D22\x00D23\x00D24\x00D25\x00D26\x00D27\x00D28\x00D29\x00D30\x00D31\x00D32\x00D33\x00D34\x00L35\x00D35\x00L36\x00D36\x00L37\x00D37\x00L38\x00D38\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D0')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 4)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D1')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 7)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D2')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 10)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D3')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'L4')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 16)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D4')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 19)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 5)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D5')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 22)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 6)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D6')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 25)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 7)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D7')
-// CHECK:    ),
-// CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 28)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 8)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D8')
-// CHECK:    ),
-// CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 31)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 9)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D9')
-// CHECK:    ),
-// CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 34)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 10)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D12')
-// CHECK:    ),
-// CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 38)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 11)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D13')
-// CHECK:    ),
-// CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 42)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 12)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D16')
-// CHECK:    ),
-// CHECK:     # Symbol 14
-// CHECK:    (('n_strx', 46)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 13)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D17')
-// CHECK:    ),
-// CHECK:     # Symbol 15
-// CHECK:    (('n_strx', 50)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 14)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D18')
-// CHECK:    ),
-// CHECK:     # Symbol 16
-// CHECK:    (('n_strx', 54)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 15)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D19')
-// CHECK:    ),
-// CHECK:     # Symbol 17
-// CHECK:    (('n_strx', 58)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 16)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D20')
-// CHECK:    ),
-// CHECK:     # Symbol 18
-// CHECK:    (('n_strx', 62)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 17)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D21')
-// CHECK:    ),
-// CHECK:     # Symbol 19
-// CHECK:    (('n_strx', 66)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 18)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D22')
-// CHECK:    ),
-// CHECK:     # Symbol 20
-// CHECK:    (('n_strx', 70)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 19)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D23')
-// CHECK:    ),
-// CHECK:     # Symbol 21
-// CHECK:    (('n_strx', 74)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 20)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D24')
-// CHECK:    ),
-// CHECK:     # Symbol 22
-// CHECK:    (('n_strx', 78)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 21)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D25')
-// CHECK:    ),
-// CHECK:     # Symbol 23
-// CHECK:    (('n_strx', 82)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 22)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D26')
-// CHECK:    ),
-// CHECK:     # Symbol 24
-// CHECK:    (('n_strx', 86)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 23)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D27')
-// CHECK:    ),
-// CHECK:     # Symbol 25
-// CHECK:    (('n_strx', 90)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 24)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D28')
-// CHECK:    ),
-// CHECK:     # Symbol 26
-// CHECK:    (('n_strx', 94)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 25)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D29')
-// CHECK:    ),
-// CHECK:     # Symbol 27
-// CHECK:    (('n_strx', 98)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 26)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D30')
-// CHECK:    ),
-// CHECK:     # Symbol 28
-// CHECK:    (('n_strx', 102)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 27)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D31')
-// CHECK:    ),
-// CHECK:     # Symbol 29
-// CHECK:    (('n_strx', 106)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 28)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D32')
-// CHECK:    ),
-// CHECK:     # Symbol 30
-// CHECK:    (('n_strx', 110)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 29)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D33')
-// CHECK:    ),
-// CHECK:     # Symbol 31
-// CHECK:    (('n_strx', 114)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 30)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D34')
-// CHECK:    ),
-// CHECK:     # Symbol 32
-// CHECK:    (('n_strx', 118)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'L35')
-// CHECK:    ),
-// CHECK:     # Symbol 33
-// CHECK:    (('n_strx', 122)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D35')
-// CHECK:    ),
-// CHECK:     # Symbol 34
-// CHECK:    (('n_strx', 126)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'L36')
-// CHECK:    ),
-// CHECK:     # Symbol 35
-// CHECK:    (('n_strx', 130)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D36')
-// CHECK:    ),
-// CHECK:     # Symbol 36
-// CHECK:    (('n_strx', 134)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'L37')
-// CHECK:    ),
-// CHECK:     # Symbol 37
-// CHECK:    (('n_strx', 138)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D37')
-// CHECK:    ),
-// CHECK:     # Symbol 38
-// CHECK:    (('n_strx', 142)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 31)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'L38')
-// CHECK:    ),
-// CHECK:     # Symbol 39
-// CHECK:    (('n_strx', 146)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 31)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D38')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 40)
-// CHECK:   ('iextdefsym', 40)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 40)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+        .section foo, bar
+        .long L4 + 1
+        .long L35 + 1
+        .long L36 + 1
+        .long L37 + 1
+        .long L38 + 1
+
+// CHECK: Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D0 (139)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __text (0x1)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D1 (128)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __text (0x1)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D2 (113)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __const (0x2)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D3 (98)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __static_const (0x3)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: L4 (84)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D4 (87)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D5 (69)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __literal4 (0x5)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D6 (50)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __literal8 (0x6)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D7 (31)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __literal16 (0x7)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D8 (12)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __constructor (0x8)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D9 (1)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __destructor (0x9)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D12 (124)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __data (0xA)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D13 (109)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __static_data (0xB)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D16 (65)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __dyld (0xC)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D17 (46)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __mod_init_func (0xD)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D18 (27)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __mod_term_func (0xE)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D19 (8)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __const (0xF)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D20 (146)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __class (0x10)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D21 (135)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __meta_class (0x11)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D22 (120)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cat_cls_meth (0x12)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D23 (105)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cat_inst_meth (0x13)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D24 (94)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __protocol (0x14)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D25 (80)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __string_object (0x15)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D26 (61)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cls_meth (0x16)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D27 (42)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __inst_meth (0x17)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D28 (23)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cls_refs (0x18)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D29 (4)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __message_refs (0x19)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D30 (142)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __symbols (0x1A)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D31 (131)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __category (0x1B)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D32 (116)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __class_vars (0x1C)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D33 (101)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __instance_vars (0x1D)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D34 (90)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __module_info (0x1E)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: L35 (72)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D35 (76)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: L36 (53)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D36 (57)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: L37 (34)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D37 (38)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __cstring (0x4)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: L38 (15)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __selector_strs (0x1F)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: D38 (19)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __selector_strs (0x1F)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/zerofill-3.s b/test/MC/MachO/zerofill-3.s
index cc81fa863420..a4cd31ec0a40 100644
--- a/test/MC/MachO/zerofill-3.s
+++ b/test/MC/MachO/zerofill-3.s
@@ -78,10 +78,10 @@
 // CHECK:   ('nsyms', 4)
 // CHECK:   ('stroff', 372)
 // CHECK:   ('strsize', 52)
-// CHECK:   ('_string_data', '\x00sym_lcomm_C\x00sym_lcomm_D\x00sym_lcomm_A\x00sym_lcomm_B\x00\x00\x00\x00')
+// CHECK:   ('_string_data', '\x00sym_lcomm_D\x00sym_lcomm_C\x00sym_lcomm_B\x00sym_lcomm_A\x00\x00\x00\x00')
 // CHECK:   ('_symbols', [
 // CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 25)
+// CHECK:    (('n_strx', 37)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -89,7 +89,7 @@
 // CHECK:     ('_string', 'sym_lcomm_A')
 // CHECK:    ),
 // CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 37)
+// CHECK:    (('n_strx', 25)
 // CHECK:     ('n_type', 0xe)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -97,7 +97,7 @@
 // CHECK:     ('_string', 'sym_lcomm_B')
 // CHECK:    ),
 // CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 1)
+// CHECK:    (('n_strx', 13)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
@@ -105,7 +105,7 @@
 // CHECK:     ('_string', 'sym_lcomm_C')
 // CHECK:    ),
 // CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 13)
+// CHECK:    (('n_strx', 1)
 // CHECK:     ('n_type', 0xf)
 // CHECK:     ('n_sect', 2)
 // CHECK:     ('n_desc', 0)
diff --git a/test/MC/Mips/cpload-bad.s b/test/MC/Mips/cpload-bad.s
index 7d186f66f728..803610af922f 100644
--- a/test/MC/Mips/cpload-bad.s
+++ b/test/MC/Mips/cpload-bad.s
@@ -3,13 +3,25 @@
 
         .text
         .option pic2
+        .set noreorder
+        .set mips16
+        .cpload $25
+# ASM: :[[@LINE-1]]:17: error: .cpload is not supported in Mips16 mode
+
+        .set nomips16
         .set reorder
         .cpload $25
-# ASM: :[[@LINE-1]]:9: warning: .cpload in reorder section
+# ASM: :[[@LINE-1]]:9: warning: .cpload should be inside a noreorder section
+
         .set noreorder
         .cpload $32
 # ASM: :[[@LINE-1]]:17: error: invalid register
+
         .cpload $foo
 # ASM: :[[@LINE-1]]:17: error: expected register containing function address
+
         .cpload bar
 # ASM: :[[@LINE-1]]:17: error: expected register containing function address
+
+        .cpload $25 foobar
+# ASM: :[[@LINE-1]]:21: error: unexpected token, expected end of statement
diff --git a/test/MC/Mips/cpload.s b/test/MC/Mips/cpload.s
index bc5e79787ba3..46b3ee4c7955 100644
--- a/test/MC/Mips/cpload.s
+++ b/test/MC/Mips/cpload.s
@@ -1,12 +1,16 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=ASM
 #
-# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=obj -o -| \
-# RUN: llvm-objdump -d -r -arch=mips - | \
-# RUN: FileCheck %s -check-prefix=OBJ
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+o32 -filetype=obj -o -| \
+# RUN:  llvm-objdump -d -r -arch=mips - | \
+# RUN:    FileCheck %s -check-prefix=OBJ-O32
 
-# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -filetype=obj -o -| \
-# RUN: llvm-objdump -d -r -arch=mips - | \
-# RUN: FileCheck %s -check-prefix=OBJ64
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=-n64,+n32 -filetype=obj -o -| \
+# RUN:  llvm-objdump -d -r -arch=mips - | \
+# RUN:    FileCheck %s -check-prefix=OBJ-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+n64 -filetype=obj -o -| \
+# RUN:  llvm-objdump -d -r -arch=mips - | \
+# RUN:    FileCheck %s -check-prefix=OBJ-N64
 
 # ASM:    .text
 # ASM:    .option pic2
@@ -14,17 +18,22 @@
 # ASM:    .cpload $25
 # ASM:    .set reorder
 
-# OBJ:    .text
-# OBJ:    lui $gp, 0
-# OBJ: R_MIPS_HI16 _gp_disp
-# OBJ:    addiu $gp, $gp, 0
-# OBJ: R_MIPS_LO16 _gp_disp
-# OBJ:    addu $gp, $gp, $25
+# OBJ-O32:    .text
+# OBJ-O32:    lui $gp, 0
+# OBJ-O32: R_MIPS_HI16 _gp_disp
+# OBJ-O32:    addiu $gp, $gp, 0
+# OBJ-O32: R_MIPS_LO16 _gp_disp
+# OBJ-O32:    addu $gp, $gp, $25
+
+# OBJ-N32-NOT: .text
+# OBJ-N32-NOT: lui   $gp, 0
+# OBJ-N32-NOT: addiu $gp, $gp, 0
+# OBJ-N32-NOT: addu  $gp, $gp, $25
 
-# OBJ64: .text
-# OBJ64-NOT: lui $gp, 0
-# OBJ64-NOT: addiu $gp, $gp, 0
-# OBJ64-NOT: addu $gp, $gp, $25
+# OBJ-N64-NOT: .text
+# OBJ-N64-NOT: lui   $gp, 0
+# OBJ-N64-NOT: addiu $gp, $gp, 0
+# OBJ-N64-NOT: addu  $gp, $gp, $25
 
         .text
         .option pic2
diff --git a/test/MC/Mips/elf-objdump.s b/test/MC/Mips/elf-objdump.s
deleted file mode 100644
index 6a5c2a5bf6fa..000000000000
--- a/test/MC/Mips/elf-objdump.s
+++ /dev/null
@@ -1,11 +0,0 @@
-// 32 bit big endian
-// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux  - | FileCheck %s
-// 32 bit little endian
-// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux  - | FileCheck %s
-// 64 bit big endian
-// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s
-// 64 bit little endian
-// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | llvm-objdump -d -triple mips-unknown-linux - | FileCheck %s
-
-// We just want to see if llvm-objdump works at all.
-// CHECK: .text
diff --git a/test/MC/Mips/micromips-16-bit-instructions.s b/test/MC/Mips/micromips-16-bit-instructions.s
index 31bddcc58d7f..8fd6d450ac90 100644
--- a/test/MC/Mips/micromips-16-bit-instructions.s
+++ b/test/MC/Mips/micromips-16-bit-instructions.s
@@ -9,19 +9,141 @@
 #------------------------------------------------------------------------------
 # Little endian
 #------------------------------------------------------------------------------
+# CHECK-EL: addu16  $6, $17, $4     # encoding: [0x42,0x07]
+# CHECK-EL: subu16  $5, $16, $3     # encoding: [0xb1,0x06]
+# CHECK-EL: andi16  $16, $2, 31     # encoding: [0x29,0x2c]
+# CHECK-EL: and16   $16, $2         # encoding: [0x82,0x44]
+# CHECK-EL: not16   $17, $3         # encoding: [0x0b,0x44]
+# CHECK-EL: or16    $16, $4         # encoding: [0xc4,0x44]
+# CHECK-EL: xor16   $17, $5         # encoding: [0x4d,0x44]
+# CHECK-EL: sll16   $3, $16, 5      # encoding: [0x8a,0x25]
+# CHECK-EL: srl16   $4, $17, 6      # encoding: [0x1d,0x26]
+# CHECK-EL: lbu16   $3, 4($17)      # encoding: [0x94,0x09]
+# CHECK-EL: lbu16   $3, -1($16)     # encoding: [0x8f,0x09]
+# CHECK-EL: lhu16   $3, 4($16)      # encoding: [0x82,0x29]
+# CHECK-EL: lw16    $4, 8($17)      # encoding: [0x12,0x6a]
+# CHECK-EL: sb16    $3, 4($16)      # encoding: [0x84,0x89]
+# CHECK-EL: sh16    $4, 8($17)      # encoding: [0x14,0xaa]
+# CHECK-EL: sw16    $4, 4($17)      # encoding: [0x11,0xea]
+# CHECK-EL: sw16    $zero, 4($17)   # encoding: [0x11,0xe8]
+# CHECK-EL: lw      $3, 32($sp)     # encoding: [0x68,0x48]
+# CHECK-EL: sw      $4, 124($sp)    # encoding: [0x9f,0xc8]
+# CHECK-EL: li16    $3, -1          # encoding: [0xff,0xed]
+# CHECK-EL: li16    $3, 126         # encoding: [0xfe,0xed]
+# CHECK-EL: addiur1sp $7, 4         # encoding: [0x83,0x6f]
+# CHECK-EL: addiur2 $6, $7, -1      # encoding: [0x7e,0x6f]
+# CHECK-EL: addiur2 $6, $7, 12      # encoding: [0x76,0x6f]
+# CHECK-EL: addius5 $7, -2          # encoding: [0xfc,0x4c]
+# CHECK-EL: addiusp -1028           # encoding: [0xff,0x4f]
+# CHECK-EL: addiusp -1032           # encoding: [0xfd,0x4f]
+# CHECK-EL: addiusp 1024            # encoding: [0x01,0x4c]
+# CHECK-EL: addiusp 1028            # encoding: [0x03,0x4c]
+# CHECK-EL: addiusp -16             # encoding: [0xf9,0x4f]
 # CHECK-EL: mfhi    $9              # encoding: [0x09,0x46]
 # CHECK-EL: mflo    $9              # encoding: [0x49,0x46]
 # CHECK-EL: move    $25, $1         # encoding: [0x21,0x0f]
-# CHECK-EL: jalr    $9              # encoding: [0xc9,0x45]
+# CHECK-EL: jrc     $9              # encoding: [0xa9,0x45]
+# CHECK-NEXT: jalr    $9            # encoding: [0xc9,0x45]
+# CHECK-EL: jraddiusp 20            # encoding: [0x05,0x47]
+# CHECK-NEXT: jalrs16 $9            # encoding: [0xe9,0x45]
+# CHECK-EL: nop                     # encoding: [0x00,0x0c]
+# CHECK-EL: jr16    $9              # encoding: [0x89,0x45]
+# CHECK-EL: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: beqz16 $6, 20           # encoding: [0x0a,0x8f]
+# CHECK-EL: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bnez16 $6, 20           # encoding: [0x0a,0xaf]
+# CHECK-EL: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: break16 8               # encoding: [0x88,0x46]
+# CHECK-EL: sdbbp16 14              # encoding: [0xce,0x46]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
+# CHECK-EB: addu16  $6, $17, $4     # encoding: [0x07,0x42]
+# CHECK-EB: subu16  $5, $16, $3     # encoding: [0x06,0xb1]
+# CHECK-EB: andi16  $16, $2, 31     # encoding: [0x2c,0x29]
+# CHECK-EB: and16   $16, $2         # encoding: [0x44,0x82]
+# CHECK-EB: not16   $17, $3         # encoding: [0x44,0x0b]
+# CHECK-EB: or16    $16, $4         # encoding: [0x44,0xc4]
+# CHECK-EB: xor16   $17, $5         # encoding: [0x44,0x4d]
+# CHECK-EB: sll16   $3, $16, 5      # encoding: [0x25,0x8a]
+# CHECK-EB: srl16   $4, $17, 6      # encoding: [0x26,0x1d]
+# CHECK-EB: lbu16   $3, 4($17)      # encoding: [0x09,0x94]
+# CHECK-EB: lbu16   $3, -1($16)     # encoding: [0x09,0x8f]
+# CHECK-EB: lhu16   $3, 4($16)      # encoding: [0x29,0x82]
+# CHECK-EB: lw16    $4, 8($17)      # encoding: [0x6a,0x12]
+# CHECK-EB: sb16    $3, 4($16)      # encoding: [0x89,0x84]
+# CHECK-EB: sh16    $4, 8($17)      # encoding: [0xaa,0x14]
+# CHECK-EB: sw16    $4, 4($17)      # encoding: [0xea,0x11]
+# CHECK-EB: sw16    $zero, 4($17)   # encoding: [0xe8,0x11]
+# CHECK-EB: lw      $3, 32($sp)     # encoding: [0x48,0x68]
+# CHECK-EB: sw      $4, 124($sp)    # encoding: [0xc8,0x9f]
+# CHECK-EB: li16    $3, -1          # encoding: [0xed,0xff]
+# CHECK-EB: li16    $3, 126         # encoding: [0xed,0xfe]
+# CHECK-EB: addiur1sp $7, 4         # encoding: [0x6f,0x83]
+# CHECK-EB: addiur2 $6, $7, -1      # encoding: [0x6f,0x7e]
+# CHECK-EB: addiur2 $6, $7, 12      # encoding: [0x6f,0x76]
+# CHECK-EB: addius5 $7, -2          # encoding: [0x4c,0xfc]
+# CHECK-EB: addiusp -1028           # encoding: [0x4f,0xff]
+# CHECK-EB: addiusp -1032           # encoding: [0x4f,0xfd]
+# CHECK-EB: addiusp 1024            # encoding: [0x4c,0x01]
+# CHECK-EB: addiusp 1028            # encoding: [0x4c,0x03]
+# CHECK-EB: addiusp -16             # encoding: [0x4f,0xf9]
 # CHECK-EB: mfhi    $9              # encoding: [0x46,0x09]
 # CHECK-EB: mflo    $9              # encoding: [0x46,0x49]
 # CHECK-EB: move    $25, $1         # encoding: [0x0f,0x21]
-# CHECK-EB: jalr    $9              # encoding: [0x45,0xc9]
+# CHECK-EB: jrc     $9              # encoding: [0x45,0xa9]
+# CHECK-NEXT: jalr    $9            # encoding: [0x45,0xc9]
+# CHECK-EB: jraddiusp 20            # encoding: [0x47,0x05]
+# CHECK-NEXT: jalrs16 $9            # encoding: [0x45,0xe9]
+# CHECK-EB: nop                     # encoding: [0x0c,0x00]
+# CHECK-EB: jr16    $9              # encoding: [0x45,0x89]
+# CHECK-EB: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: beqz16 $6, 20           # encoding: [0x8f,0x0a]
+# CHECK-EB: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bnez16 $6, 20           # encoding: [0xaf,0x0a]
+# CHECK-EB: nop                     # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: break16 8               # encoding: [0x46,0x88]
+# CHECK-EB: sdbbp16 14              # encoding: [0x46,0xce]
 
+    addu16  $6, $17, $4
+    subu16  $5, $16, $3
+    andi16  $16, $2, 31
+    and16   $16, $2
+    not16   $17, $3
+    or16    $16, $4
+    xor16   $17, $5
+    sll16   $3, $16, 5
+    srl16   $4, $17, 6
+    lbu16   $3, 4($17)
+    lbu16   $3, -1($16)
+    lhu16   $3, 4($16)
+    lw16    $4, 8($17)
+    sb16    $3, 4($16)
+    sh16    $4, 8($17)
+    sw16    $4, 4($17)
+    sw16    $0, 4($17)
+    lw      $3, 32($sp)
+    sw      $4, 124($sp)
+    li16    $3, -1
+    li16    $3, 126
+    addiur1sp $7, 4
+    addiur2 $6, $7, -1
+    addiur2 $6, $7, 12
+    addius5 $7, -2
+    addiusp -1028
+    addiusp -1032
+    addiusp 1024
+    addiusp 1028
+    addiusp -16
     mfhi    $9
     mflo    $9
     move    $25, $1
+    jrc     $9
     jalr    $9
+    jraddiusp 20
+    jalrs16 $9
+    jr16    $9
+    beqz16 $6, 20
+    bnez16 $6, 20
+    break16 8
+    sdbbp16 14
diff --git a/test/MC/Mips/micromips-bad-branches.s b/test/MC/Mips/micromips-bad-branches.s
index 573605e18d6b..f64cd9f9e5bd 100644
--- a/test/MC/Mips/micromips-bad-branches.s
+++ b/test/MC/Mips/micromips-bad-branches.s
@@ -126,6 +126,11 @@
 # CHECK: error: branch target out of range
 # CHECK:        bc1t $fcc0, 65536
 
+# CHECK: error: branch to misaligned address
+# CHECK:        beqz16 $6, 31
+# CHECK: error: branch target out of range
+# CHECK:        beqz16 $6, 130
+
         b -65535
         b -65536
         b -65537
@@ -223,3 +228,6 @@
         bc1t $fcc0, 65534
         bc1t $fcc0, 65535
         bc1t $fcc0, 65536
+
+        beqz16 $6, 31
+        beqz16 $6, 130
diff --git a/test/MC/Mips/micromips-branch-instructions.s b/test/MC/Mips/micromips-branch-instructions.s
index 84df2a17c83c..d1e78988901a 100644
--- a/test/MC/Mips/micromips-branch-instructions.s
+++ b/test/MC/Mips/micromips-branch-instructions.s
@@ -29,6 +29,10 @@
 # CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK-EL: bltz $6, 1332        # encoding: [0x06,0x40,0x9a,0x02]
 # CHECK-EL: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: bgezals $6, 1332     # encoding: [0x66,0x42,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x0c]
+# CHECK-EL: bltzals $6, 1332     # encoding: [0x26,0x42,0x9a,0x02]
+# CHECK-EL: nop                  # encoding: [0x00,0x0c]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
@@ -52,6 +56,10 @@
 # CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
 # CHECK-EB: bltz $6, 1332        # encoding: [0x40,0x06,0x02,0x9a]
 # CHECK-EB: nop                  # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: bgezals $6, 1332     # encoding: [0x42,0x66,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x0c,0x00]
+# CHECK-EB: bltzals $6, 1332     # encoding: [0x42,0x26,0x02,0x9a]
+# CHECK-EB: nop                  # encoding: [0x0c,0x00]
 
      b      1332
      beq    $9,$6,1332
@@ -63,3 +71,5 @@
      bne    $9,$6,1332
      bal    1332
      bltz   $6,1332
+     bgezals $6,1332
+     bltzals $6,1332
diff --git a/test/MC/Mips/micromips-branch7.s b/test/MC/Mips/micromips-branch7.s
new file mode 100644
index 000000000000..deb7307e938c
--- /dev/null
+++ b/test/MC/Mips/micromips-branch7.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding \
+# RUN: -mattr=micromips | FileCheck %s -check-prefix=CHECK-FIXUP
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK-ELF
+#------------------------------------------------------------------------------
+# Check that the assembler can handle the documented syntax
+# for relocations.
+#------------------------------------------------------------------------------
+# CHECK-FIXUP: beqz16 $6, bar  # encoding: [0b0AAAAAAA,0x8f]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC7_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-FIXUP: bnez16 $6, bar  # encoding: [0b0AAAAAAA,0xaf]
+# CHECK-FIXUP:                 #   fixup A - offset: 0,
+# CHECK-FIXUP:                     value: bar, kind: fixup_MICROMIPS_PC7_S1
+# CHECK-FIXUP: nop             # encoding: [0x00,0x00,0x00,0x00]
+#------------------------------------------------------------------------------
+# Check that the appropriate relocations were created.
+#------------------------------------------------------------------------------
+# CHECK-ELF: Relocations [
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC7_S1
+# CHECK-ELF:     0x{{[0-9,A-F]+}} R_MICROMIPS_PC7_S1
+# CHECK-ELF: ]
+
+  beqz16 $6, bar
+  bnez16 $6, bar
diff --git a/test/MC/Mips/micromips-control-instructions.s b/test/MC/Mips/micromips-control-instructions.s
index aff84c245941..76c953f85d55 100644
--- a/test/MC/Mips/micromips-control-instructions.s
+++ b/test/MC/Mips/micromips-control-instructions.s
@@ -9,6 +9,17 @@
 #------------------------------------------------------------------------------
 # Little endian
 #------------------------------------------------------------------------------
+# CHECK-EL:    sdbbp                      # encoding: [0x00,0x00,0x7c,0xdb]
+# CHECK-EL:    sdbbp 34                   # encoding: [0x22,0x00,0x7c,0xdb]
+# CHECK-EL:    .set push
+# CHECK-EL:    .set mips32r2
+# CHECK-EL:    rdhwr $5, $29
+# CHECK-EL:    .set pop                   # encoding: [0xbd,0x00,0x3c,0x6b]
+# CHECK-EL:    cache 1, 8($5)             # encoding: [0x25,0x20,0x08,0x60]
+# CHECK-EL:    pref 1, 8($5)              # encoding: [0x25,0x60,0x08,0x20]
+# CHECK-EL:    ssnop                      # encoding: [0x00,0x00,0x00,0x08]
+# CHECK-EL:    ehb                        # encoding: [0x00,0x00,0x00,0x18]
+# CHECK-EL:    pause                      # encoding: [0x00,0x00,0x00,0x28]
 # CHECK-EL:    break                      # encoding: [0x00,0x00,0x07,0x00]
 # CHECK-EL:    break 7                    # encoding: [0x07,0x00,0x07,0x00]
 # CHECK-EL:    break 7, 5                 # encoding: [0x07,0x00,0x47,0x01]
@@ -24,9 +35,24 @@
 # CHECK-EL:    ei  $10                    # encoding: [0x0a,0x00,0x7c,0x57]
 # CHECK-EL:    wait                       # encoding: [0x00,0x00,0x7c,0x93]
 # CHECK-EL:    wait 17                    # encoding: [0x11,0x00,0x7c,0x93]
+# CHECK-EL:    tlbp                       # encoding: [0x00,0x00,0x7c,0x03]
+# CHECK-EL:    tlbr                       # encoding: [0x00,0x00,0x7c,0x13]
+# CHECK-EL:    tlbwi                      # encoding: [0x00,0x00,0x7c,0x23]
+# CHECK-EL:    tlbwr                      # encoding: [0x00,0x00,0x7c,0x33]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
+# CHECK-EB:   sdbbp                       # encoding: [0x00,0x00,0xdb,0x7c]
+# CHECK-EB:   sdbbp 34                    # encoding: [0x00,0x22,0xdb,0x7c]
+# CHECK-EB:   .set push
+# CHECK-EB:   .set mips32r2
+# CHECK-EB:   rdhwr $5, $29
+# CHECK-EB:   .set pop                    # encoding: [0x00,0xbd,0x6b,0x3c]
+# CHECK-EB:   cache 1, 8($5)              # encoding: [0x20,0x25,0x60,0x08]
+# CHECK-EB:   pref 1, 8($5)               # encoding: [0x60,0x25,0x20,0x08]
+# CHECK-EB:   ssnop                       # encoding: [0x00,0x00,0x08,0x00]
+# CHECK-EB:   ehb                         # encoding: [0x00,0x00,0x18,0x00]
+# CHECK-EB:   pause                       # encoding: [0x00,0x00,0x28,0x00]
 # CHECK-EB:   break                       # encoding: [0x00,0x00,0x00,0x07]
 # CHECK-EB:   break 7                     # encoding: [0x00,0x07,0x00,0x07]
 # CHECK-EB:   break 7, 5                  # encoding: [0x00,0x07,0x01,0x47]
@@ -42,7 +68,19 @@
 # CHECK-EB:   ei  $10                     # encoding: [0x00,0x0a,0x57,0x7c]
 # CHECK-EB:   wait                        # encoding: [0x00,0x00,0x93,0x7c]
 # CHECK-EB:   wait 17                     # encoding: [0x00,0x11,0x93,0x7c]
+# CHECK-EB:   tlbp                        # encoding: [0x00,0x00,0x03,0x7c]
+# CHECK-EB:   tlbr                        # encoding: [0x00,0x00,0x13,0x7c]
+# CHECK-EB:   tlbwi                       # encoding: [0x00,0x00,0x23,0x7c]
+# CHECK-EB:   tlbwr                       # encoding: [0x00,0x00,0x33,0x7c]
 
+    sdbbp
+    sdbbp 34
+    rdhwr $5, $29
+    cache 1, 8($5)
+    pref 1, 8($5)
+    ssnop
+    ehb
+    pause
     break
     break 7
     break 7,5
@@ -58,3 +96,7 @@
     ei $10
     wait
     wait 17
+    tlbp
+    tlbr
+    tlbwi
+    tlbwr
diff --git a/test/MC/Mips/micromips-fpu-instructions.s b/test/MC/Mips/micromips-fpu-instructions.s
index 5af4f98670e7..0aeb326b4313 100644
--- a/test/MC/Mips/micromips-fpu-instructions.s
+++ b/test/MC/Mips/micromips-fpu-instructions.s
@@ -53,6 +53,8 @@
 # CHECK-EL: ctc1    $6, $0              # encoding: [0xc0,0x54,0x3b,0x18]
 # CHECK-EL: mfc1    $6, $f8             # encoding: [0xc8,0x54,0x3b,0x20]
 # CHECK-EL: mtc1    $6, $f8             # encoding: [0xc8,0x54,0x3b,0x28]
+# CHECK-EL: mfhc1   $6, $f8             # encoding: [0xc8,0x54,0x3b,0x30]
+# CHECK-EL: mthc1   $6, $f8             # encoding: [0xc8,0x54,0x3b,0x38]
 # CHECK-EL: movz.s  $f4, $f6, $7        # encoding: [0xe6,0x54,0x78,0x20]
 # CHECK-EL: movz.d  $f4, $f6, $7        # encoding: [0xe6,0x54,0x78,0x21]
 # CHECK-EL: movn.s  $f4, $f6, $7        # encoding: [0xe6,0x54,0x38,0x20]
@@ -116,6 +118,8 @@
 # CHECK-EB: ctc1    $6, $0              # encoding: [0x54,0xc0,0x18,0x3b]
 # CHECK-EB: mfc1    $6, $f8             # encoding: [0x54,0xc8,0x20,0x3b]
 # CHECK-EB: mtc1    $6, $f8             # encoding: [0x54,0xc8,0x28,0x3b]
+# CHECK-EB: mfhc1   $6, $f8             # encoding: [0x54,0xc8,0x30,0x3b]
+# CHECK-EB: mthc1   $6, $f8             # encoding: [0x54,0xc8,0x38,0x3b]
 # CHECK-EB: movz.s  $f4, $f6, $7        # encoding: [0x54,0xe6,0x20,0x78]
 # CHECK-EB: movz.d  $f4, $f6, $7        # encoding: [0x54,0xe6,0x21,0x78]
 # CHECK-EB: movn.s  $f4, $f6, $7        # encoding: [0x54,0xe6,0x20,0x38]
@@ -175,6 +179,8 @@
     ctc1       $6, $0
     mfc1       $6, $f8
     mtc1       $6, $f8
+    mfhc1      $6, $f8
+    mthc1      $6, $f8
     movz.s     $f4, $f6, $7
     movz.d     $f4, $f6, $7
     movn.s     $f4, $f6, $7
diff --git a/test/MC/Mips/micromips-func-addr.s b/test/MC/Mips/micromips-func-addr.s
new file mode 100644
index 000000000000..e2a4d23d35ff
--- /dev/null
+++ b/test/MC/Mips/micromips-func-addr.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -filetype=obj -triple=mipsel-unknown-linux \
+# RUN: -mattr=micromips | llvm-readobj -r \
+# RUN: | FileCheck %s -check-prefix=CHECK
+# CHECK: Relocations [
+# CHECK:     0x0 R_MIPS_32 bar 0x0
+# CHECK:     0x4 R_MIPS_32 L1 0x0
+
+  .set    micromips
+  .type   bar,@function
+bar:
+L1:
+  nop
+  .data
+  .4byte bar 
+  .4byte L1 
+
diff --git a/test/MC/Mips/micromips-invalid.s b/test/MC/Mips/micromips-invalid.s
new file mode 100644
index 000000000000..1dbfb113a2c9
--- /dev/null
+++ b/test/MC/Mips/micromips-invalid.s
@@ -0,0 +1,68 @@
+# RUN: not llvm-mc %s -triple=mipsel -show-encoding -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+  addiur1sp $7, 260 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiur1sp $7, 241 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: misaligned immediate operand value
+  addiur1sp $8, 240 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addius5 $7, 9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiusp 1032   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addu16  $6, $14, $4 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  subu16  $5, $16, $9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  andi16  $16, $10, 0x1f # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  andi16  $16, $2, 17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  and16   $16, $8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  not16   $18, $9   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  or16    $16, $10  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  xor16   $15, $5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sll16   $1, $16, 5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  srl16   $4, $9, 6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sll16   $3, $16, 9 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  srl16   $4, $5, 15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  li16  $8, -1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  li16  $4, -2 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiur2 $9, $7, -1 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur2 $6, $7, 10 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lwm16   $5, $6, $ra, 8($sp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  lwm16   $16, $19, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  lwm16   $16-$25, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lwm16   $16, 8($sp)            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16   $16, $17, 8($sp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16   $16-$20, 8($sp)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16   $5, $6, $ra, 8($sp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  swm16   $16, $19, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  swm16   $16-$25, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lwm32   $5, $6, 8($4)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  lwm32   $16, $19, 8($4)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  lwm32   $16-$25, 8($4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  swm32   $5, $6, 8($4)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  swm32   $16, $19, 8($4)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  swm32   $16-$25, 8($4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lwm32 $16, $17, $18, $19, $20, $21, $22, $23, $24, 8($4) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lbu16 $9, 8($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lw16  $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16  $9, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $9, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $9, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sb16  $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sh16  $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sw16  $4, 64($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lbu16 $3, -2($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $3, 64($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $4, 68($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sb16  $16, 4($16) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $16, 8($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $16, 4($17) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lbu16 $16, 8($9)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $16, 4($9)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lw16  $17, 8($10) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16  $7, 4($9)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $7, 8($9)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $7, 4($10)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  cache 256, 8($5)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  pref 256, 8($5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  beqz16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  bnez16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips-jump-instructions.s b/test/MC/Mips/micromips-jump-instructions.s
index a6c7676f8093..76f85abafe79 100644
--- a/test/MC/Mips/micromips-jump-instructions.s
+++ b/test/MC/Mips/micromips-jump-instructions.s
@@ -19,6 +19,10 @@
 # CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
 # CHECK-EL: jr $7       # encoding: [0x07,0x00,0x3c,0x0f]
 # CHECK-EL: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EL: jals 1328         # encoding: [0x00,0x74,0x98,0x02]
+# CHECK-EL: nop               # encoding: [0x00,0x0c]
+# CHECK-EL: jalrs $ra, $6     # encoding: [0xe6,0x03,0x3c,0x4f]
+# CHECK-EL: nop               # encoding: [0x00,0x0c]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
@@ -32,9 +36,15 @@
 # CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
 # CHECK-EB: jr $7       # encoding: [0x00,0x07,0x0f,0x3c]
 # CHECK-EB: nop         # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-EB: jals 1328         # encoding: [0x74,0x00,0x02,0x98]
+# CHECK-EB: nop               # encoding: [0x0c,0x00]
+# CHECK-EB: jalrs $ra, $6     # encoding: [0x03,0xe6,0x4f,0x3c]
+# CHECK-EB: nop               # encoding: [0x0c,0x00]
 
      j 1328
      jal 1328
      jalr $ra, $6
      jr $7
      j $7
+     jals 1328
+     jalrs $ra, $6
diff --git a/test/MC/Mips/micromips-label-test-sections.s b/test/MC/Mips/micromips-label-test-sections.s
new file mode 100644
index 000000000000..569b64c6f98a
--- /dev/null
+++ b/test/MC/Mips/micromips-label-test-sections.s
@@ -0,0 +1,35 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 \
+# RUN:   -mattr=+micromips -filetype=obj -o - | llvm-readobj -t | FileCheck %s
+  .text
+  .set micromips
+f:
+  nop
+g:
+  .section .text
+h:
+  nop
+
+# CHECK: Symbols [
+# CHECK:   Symbol {
+# CHECK:     Name: f
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 128
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: g
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 0
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: h
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 128
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK: ]
+
diff --git a/test/MC/Mips/micromips-label-test.s b/test/MC/Mips/micromips-label-test.s
new file mode 100644
index 000000000000..cc1566b9e678
--- /dev/null
+++ b/test/MC/Mips/micromips-label-test.s
@@ -0,0 +1,54 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 \
+# RUN:   -mattr=+micromips -filetype=obj -o - | llvm-readobj -t | FileCheck %s
+  .text
+  .set nomicromips
+f:
+  nop
+g:
+  .set micromips
+  nop
+h:
+  .word 0
+i:
+  nop
+j:
+  .set nomicromips
+  nop
+# CHECK: Symbols [
+# CHECK:   Symbol {
+# CHECK:     Name: f
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 0
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: g
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 128
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: h
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 0
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: i
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 128
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK:   Symbol {
+# CHECK:     Name: j
+# CHECK:     Binding: Local
+# CHECK:     Type: None
+# CHECK:     Other: 0
+# CHECK:     Section: .text
+# CHECK:   }
+# CHECK: ]
+
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
index 8a1b93babdd8..995af035dde5 100644
--- a/test/MC/Mips/micromips-loadstore-instructions.s
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -9,39 +9,84 @@
 #------------------------------------------------------------------------------
 # Little endian
 #------------------------------------------------------------------------------
-# CHECK-EL: lb     $5, 8($4)      # encoding: [0xa4,0x1c,0x08,0x00]
-# CHECK-EL: lbu    $6, 8($4)      # encoding: [0xc4,0x14,0x08,0x00]
-# CHECK-EL: lh     $2, 8($4)      # encoding: [0x44,0x3c,0x08,0x00]
-# CHECK-EL: lhu    $4, 8($2)      # encoding: [0x82,0x34,0x08,0x00]
-# CHECK-EL: lw     $6, 4($5)      # encoding: [0xc5,0xfc,0x04,0x00]
-# CHECK-EL: sb     $5, 8($4)      # encoding: [0xa4,0x18,0x08,0x00]
-# CHECK-EL: sh     $2, 8($4)      # encoding: [0x44,0x38,0x08,0x00]
-# CHECK-EL: sw     $5, 4($6)      # encoding: [0xa6,0xf8,0x04,0x00]
-# CHECK-EL: ll     $2, 8($4)      # encoding: [0x44,0x60,0x08,0x30]
-# CHECK-EL: sc     $2, 8($4)      # encoding: [0x44,0x60,0x08,0xb0]
-# CHECK-EL: lwu    $2, 8($4)      # encoding: [0x44,0x60,0x08,0xe0]
+# CHECK-EL: lb     $5, 8($4)                  # encoding: [0xa4,0x1c,0x08,0x00]
+# CHECK-EL: lbu    $6, 8($4)                  # encoding: [0xc4,0x14,0x08,0x00]
+# CHECK-EL: lh     $2, 8($4)                  # encoding: [0x44,0x3c,0x08,0x00]
+# CHECK-EL: lhu    $4, 8($2)                  # encoding: [0x82,0x34,0x08,0x00]
+# CHECK-EL: lw     $6, 4($5)                  # encoding: [0xc5,0xfc,0x04,0x00]
+# CHECK-EL: lw     $6, 123($sp)               # encoding: [0xdd,0xfc,0x7b,0x00]
+# CHECK-EL: sb     $5, 8($4)                  # encoding: [0xa4,0x18,0x08,0x00]
+# CHECK-EL: sh     $2, 8($4)                  # encoding: [0x44,0x38,0x08,0x00]
+# CHECK-EL: sw     $5, 4($6)                  # encoding: [0xa6,0xf8,0x04,0x00]
+# CHECK-EL: sw     $5, 123($sp)               # encoding: [0xbd,0xf8,0x7b,0x00]
+# CHECK-EL: ll     $2, 8($4)                  # encoding: [0x44,0x60,0x08,0x30]
+# CHECK-EL: sc     $2, 8($4)                  # encoding: [0x44,0x60,0x08,0xb0]
+# CHECK-EL: lwu    $2, 8($4)                  # encoding: [0x44,0x60,0x08,0xe0]
+# CHECK-EL: lwxs   $2, $3($4)                 # encoding: [0x64,0x00,0x18,0x11]
+# CHECK-EL: lwm32  $16, $17, 8($4)            # encoding: [0x44,0x20,0x08,0x50]
+# CHECK-EL: lwm32  $16, $17, $18, $19, 8($4)  # encoding: [0x84,0x20,0x08,0x50]
+# CHECK-EL: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, 8($4)      # encoding: [0x24,0x21,0x08,0x50]
+# CHECK-EL: lwm32  $16, $17, $18, $19, $ra, 8($4)                          # encoding: [0x84,0x22,0x08,0x50]
+# CHECK-EL: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, $ra, 8($4) # encoding: [0x24,0x23,0x08,0x50]
+# CHECK-EL: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, $ra, 8($4) # encoding: [0x24,0x23,0x08,0x50]
+# CHECK-EL: swm32  $16, $17, 8($4)            # encoding: [0x44,0x20,0x08,0xd0]
+# CHECK-EL: swm32  $16, $17, $18, $19, 8($4)  # encoding: [0x84,0x20,0x08,0xd0]
+# CHECK-EL: lwm16  $16, $17, $ra, 8($sp)      # encoding: [0x12,0x45]
+# CHECK-EL: swm16  $16, $17, $ra, 8($sp)      # encoding: [0x52,0x45]
+# CHECK-EL: swp    $16, 8($4)                 # encoding: [0x04,0x22,0x08,0x90]
+# CHECK-EL: lwp    $16, 8($4)                 # encoding: [0x04,0x22,0x08,0x10]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
-# CHECK-EB: lb     $5, 8($4)      # encoding: [0x1c,0xa4,0x00,0x08]
-# CHECK-EB: lbu    $6, 8($4)      # encoding: [0x14,0xc4,0x00,0x08]
-# CHECK-EB: lh     $2, 8($4)      # encoding: [0x3c,0x44,0x00,0x08]
-# CHECK-EB: lhu    $4, 8($2)      # encoding: [0x34,0x82,0x00,0x08]
-# CHECK-EB: lw     $6, 4($5)      # encoding: [0xfc,0xc5,0x00,0x04]
-# CHECK-EB: sb     $5, 8($4)      # encoding: [0x18,0xa4,0x00,0x08]
-# CHECK-EB: sh     $2, 8($4)      # encoding: [0x38,0x44,0x00,0x08]
-# CHECK-EB: sw     $5, 4($6)      # encoding: [0xf8,0xa6,0x00,0x04]
-# CHECK-EB: ll     $2, 8($4)      # encoding: [0x60,0x44,0x30,0x08]
-# CHECK-EB: sc     $2, 8($4)      # encoding: [0x60,0x44,0xb0,0x08]
-# CHECK-EB: lwu    $2, 8($4)      # encoding: [0x60,0x44,0xe0,0x08]
+# CHECK-EB: lb     $5, 8($4)                 # encoding: [0x1c,0xa4,0x00,0x08]
+# CHECK-EB: lbu    $6, 8($4)                 # encoding: [0x14,0xc4,0x00,0x08]
+# CHECK-EB: lh     $2, 8($4)                 # encoding: [0x3c,0x44,0x00,0x08]
+# CHECK-EB: lhu    $4, 8($2)                 # encoding: [0x34,0x82,0x00,0x08]
+# CHECK-EB: lw     $6, 4($5)                 # encoding: [0xfc,0xc5,0x00,0x04]
+# CHECK-EB: lw     $6, 123($sp)              # encoding: [0xfc,0xdd,0x00,0x7b]
+# CHECK-EB: sb     $5, 8($4)                 # encoding: [0x18,0xa4,0x00,0x08]
+# CHECK-EB: sh     $2, 8($4)                 # encoding: [0x38,0x44,0x00,0x08]
+# CHECK-EB: sw     $5, 4($6)                 # encoding: [0xf8,0xa6,0x00,0x04]
+# CHECK-EB: sw     $5, 123($sp)              # encoding: [0xf8,0xbd,0x00,0x7b]
+# CHECK-EB: ll     $2, 8($4)                 # encoding: [0x60,0x44,0x30,0x08]
+# CHECK-EB: sc     $2, 8($4)                 # encoding: [0x60,0x44,0xb0,0x08]
+# CHECK-EB: lwu    $2, 8($4)                 # encoding: [0x60,0x44,0xe0,0x08]
+# CHECK-EB: lwxs   $2, $3($4)                # encoding: [0x00,0x64,0x11,0x18]
+# CHECK-EB: lwm32  $16, $17, 8($4)           # encoding: [0x20,0x44,0x50,0x08]
+# CHECK-EB: lwm32  $16, $17, $18, $19, 8($4) # encoding: [0x20,0x84,0x50,0x08]
+# CHECK-EB: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, 8($4)      # encoding: [0x21,0x24,0x50,0x08]
+# CHECK-EB: lwm32  $16, $17, $18, $19, $ra, 8($4)                          # encoding: [0x22,0x84,0x50,0x08]
+# CHECK-EB: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, $ra, 8($4) # encoding: [0x23,0x24,0x50,0x08]
+# CHECK-EB: lwm32  $16, $17, $18, $19, $20, $21, $22, $23, $fp, $ra, 8($4) # encoding: [0x23,0x24,0x50,0x08]
+# CHECK-EB: swm32  $16, $17, 8($4)           # encoding: [0x20,0x44,0xd0,0x08]
+# CHECK-EB: swm32  $16, $17, $18, $19, 8($4) # encoding: [0x20,0x84,0xd0,0x08]
+# CHECK-EB: lwm16  $16, $17, $ra, 8($sp)     # encoding: [0x45,0x12]
+# CHECK-EB: swm16  $16, $17, $ra, 8($sp)     # encoding: [0x45,0x52]
+# CHECK-EB: swp    $16, 8($4)                # encoding: [0x22,0x04,0x90,0x08]
+# CHECK-EB: lwp    $16, 8($4)                # encoding: [0x22,0x04,0x10,0x08]
      lb     $5, 8($4)
      lbu    $6, 8($4)
      lh     $2, 8($4)
      lhu    $4, 8($2)
      lw     $6, 4($5)
+     lw     $6, 123($sp)
      sb     $5, 8($4)
      sh     $2, 8($4)
      sw     $5, 4($6)
+     sw     $5, 123($sp)
      ll     $2, 8($4)
      sc     $2, 8($4)
      lwu    $2, 8($4)
+     lwxs   $2, $3($4)
+     lwm32  $16, $17, 8($4)
+     lwm32  $16 - $19, 8($4)
+     lwm32  $16-$23, $30, 8($4)
+     lwm32  $16-$19, $31, 8($4)
+     lwm32  $16-$23, $30, $31, 8($4)
+     lwm32  $16-$23, $30 - $31, 8($4)
+     swm32  $16, $17, 8($4)
+     swm32  $16 - $19, 8($4)
+     lwm16  $16, $17, $ra, 8($sp)
+     swm16  $16, $17, $ra, 8($sp)
+     swp    $16, 8($4)
+     lwp    $16, 8($4)
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
index a137deb8d172..8d8516926f2d 100644
--- a/test/MC/Mips/mips-expansions-bad.s
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -2,5 +2,5 @@
 # RUN: FileCheck %s < %t1
 
         .text
-        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
-        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a CPU feature not currently enabled
+        li $5, 0x100000000 # CHECK: :[[@LINE]]:9: error: instruction requires a 64-bit architecture
+        dli $5, 1 # CHECK: :[[@LINE]]:9: error: instruction requires a 64-bit architecture
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index f0a04a5a2515..bdc76fb44fa1 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -17,6 +17,22 @@
 # CHECK: lui     $7, 1               # encoding: [0x01,0x00,0x07,0x3c]
 # CHECK: ori     $7, $7, 2           # encoding: [0x02,0x00,0xe7,0x34]
 # CHECK: addu    $7, $7, $8          # encoding: [0x21,0x38,0xe8,0x00]
+# CHECK: lui     $8, %hi(symbol)     # encoding: [A,A,0x08,0x3c]
+                                     #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: ori     $8, $8, %lo(symbol) # encoding: [A,A,0x08,0x35]
+                                     #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: .set    mips64
+# CHECK: lui     $8, %highest(symbol)    # encoding: [A,A,0x08,0x3c]
+                                         #   fixup A - offset: 0, value: symbol@HIGHEST, kind: fixup_Mips_HIGHEST
+# CHECK: ori     $8, $8, %higher(symbol) # encoding: [A,A,0x08,0x35]
+                                         #   fixup A - offset: 0, value: symbol@HIGHER, kind: fixup_Mips_HIGHER
+# CHECK: dsll    $8, $8, 16              # encoding: [0x38,0x44,0x08,0x00]
+# CHECK: ori     $8, $8, %hi(symbol)     # encoding: [A,A,0x08,0x35]
+                                         #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+# CHECK: dsll    $8, $8, 16              # encoding: [0x38,0x44,0x08,0x00]
+# CHECK: ori     $8, $8, %lo(symbol)     # encoding: [A,A,0x08,0x35]
+                                         #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: .set    mips32r2
 # CHECK: lui     $10, %hi(symbol)        # encoding: [A,A,0x0a,0x3c]
 # CHECK:                                 #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
 # CHECK: addu    $10, $10, $4            # encoding: [0x21,0x50,0x44,0x01]
@@ -48,6 +64,10 @@
     la $7,65538
     la $a0, 20($a1)
     la $7,65538($8)
+    la $t0, symbol
+    .set mips64
+    la $t0, symbol
+    .set mips32r2
 
     .set noat
     lw  $t2, symbol($a0)
diff --git a/test/MC/Mips/mips-hwr-register-names.s b/test/MC/Mips/mips-hwr-register-names.s
new file mode 100644
index 000000000000..3849675cdebc
--- /dev/null
+++ b/test/MC/Mips/mips-hwr-register-names.s
@@ -0,0 +1,199 @@
+# Check the hardware registers
+#
+# FIXME: Use the code generator in order to print the .set directives
+#        instead of the instruction printer.
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN:      FileCheck %s
+        .set noat
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $hwr_cpunum
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x00,0x3b]
+        rdhwr     $a0,$hwr_cpunum
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $hwr_cpunum
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x00,0x3b]
+        rdhwr     $a0,$0
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $5, $hwr_synci_step
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x05,0x08,0x3b]
+        rdhwr     $a1,$hwr_synci_step
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $5, $hwr_synci_step
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x05,0x08,0x3b]
+        rdhwr     $a1,$1
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $6, $hwr_cc
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x06,0x10,0x3b]
+        rdhwr     $a2,$hwr_cc
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $6, $hwr_cc
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x06,0x10,0x3b]
+        rdhwr     $a2,$2
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $7, $hwr_ccres
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x07,0x18,0x3b]
+        rdhwr     $a3,$hwr_ccres
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $7, $hwr_ccres
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x07,0x18,0x3b]
+        rdhwr     $a3,$3
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $4
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x20,0x3b]
+        rdhwr     $a0,$4
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $5
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x28,0x3b]
+        rdhwr     $a0,$5
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $6
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x30,0x3b]
+        rdhwr     $a0,$6
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $7
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x38,0x3b]
+        rdhwr     $a0,$7
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $8
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x40,0x3b]
+        rdhwr     $a0,$8
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $9
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x48,0x3b]
+        rdhwr     $a0,$9
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $10
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x50,0x3b]
+        rdhwr     $a0,$10
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $11
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x58,0x3b]
+        rdhwr     $a0,$11
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $12
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x60,0x3b]
+        rdhwr     $a0,$12
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $13
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x68,0x3b]
+        rdhwr     $a0,$13
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $14
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x70,0x3b]
+        rdhwr     $a0,$14
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $15
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x78,0x3b]
+        rdhwr     $a0,$15
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $16
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x80,0x3b]
+        rdhwr     $a0,$16
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $17
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x88,0x3b]
+        rdhwr     $a0,$17
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $18
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x90,0x3b]
+        rdhwr     $a0,$18
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $19
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0x98,0x3b]
+        rdhwr     $a0,$19
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $20
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xa0,0x3b]
+        rdhwr     $a0,$20
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $21
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xa8,0x3b]
+        rdhwr     $a0,$21
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $22
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xb0,0x3b]
+        rdhwr     $a0,$22
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $23
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xb8,0x3b]
+        rdhwr     $a0,$23
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $24
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xc0,0x3b]
+        rdhwr     $a0,$24
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $25
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xc8,0x3b]
+        rdhwr     $a0,$25
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $26
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xd0,0x3b]
+        rdhwr     $a0,$26
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $27
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xd8,0x3b]
+        rdhwr     $a0,$27
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $28
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xe0,0x3b]
+        rdhwr     $a0,$28
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $29
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xe8,0x3b]
+        rdhwr     $a0,$29
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $29
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xe8,0x3b]
+        rdhwr     $a0,$hwr_ulr
+
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $30
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xf0,0x3b]
+        rdhwr     $a0,$30
+        # CHECK:      .set    push
+        # CHECK-NEXT: .set    mips32r2
+        # CHECK-NEXT: rdhwr   $4, $31
+        # CHECK-NEXT: .set    pop             # encoding: [0x7c,0x04,0xf8,0x3b]
+        rdhwr     $a0,$31
diff --git a/test/MC/Mips/mips-jump-delay-slots.s b/test/MC/Mips/mips-jump-delay-slots.s
new file mode 100644
index 000000000000..49f6c159b680
--- /dev/null
+++ b/test/MC/Mips/mips-jump-delay-slots.s
@@ -0,0 +1,122 @@
+# Verify that every branch and jump instruction is followed by a delay slot
+# except for the branch likely instructions.
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 | FileCheck %s
+
+        .set noat
+        # CHECK: b 1332
+        # CHECK: nop
+        b 1332
+        # CHECK: bc1f 1332
+        # CHECK: nop
+        bc1f 1332
+        # CHECK: bc1t 1332
+        # CHECK: nop
+        bc1t 1332
+        # CHECK: beq $9, $6, 1332
+        # CHECK: nop
+        beq $9,$6,1332
+        # CHECK: bgez $6, 1332
+        # CHECK: nop
+        bgez $6,1332
+        # CHECK: bgezal $6, 1332
+        # CHECK: nop
+        bgezal $6,1332
+        # CHECK: bgtz $6, 1332
+        # CHECK: nop
+        bgtz $6,1332
+        # CHECK: blez $6, 1332
+        # CHECK: nop
+        blez $6,1332
+        # CHECK: bltz $6, 1332
+        # CHECK: nop
+        bltz $6,1332
+        # CHECK: bne $9, $6, 1332
+        # CHECK: nop
+        bne $9,$6,1332
+        # CHECK: bltzal $6, 1332
+        # CHECK: nop
+        bltzal $6,1332
+        # CHECK: bal 1332
+        # CHECK: nop
+        bal 1332
+        # CHECK: bnez $11, 1332
+        # CHECK: nop
+        bnez $11,1332
+        # CHECK: beqz $11, 1332
+        # CHECK: nop
+        beqz $11,1332
+
+        # CHECK: bc1fl 1332
+        # CHECK-NOT: nop
+        bc1fl 1332
+        # CHECK: bc1fl 1332
+        # CHECK-NOT: nop
+        bc1fl $fcc0, 1332
+        # CHECK: bc1fl $fcc3, 1332
+        # CHECK-NOT: nop
+        bc1fl $fcc3, 1332
+        # CHECK: bc1tl 1332
+        # CHECK-NOT: nop
+        bc1tl 1332
+        # CHECK: bc1tl 1332
+        # CHECK-NOT: nop
+        bc1tl $fcc0, 1332
+        # CHECK: bc1tl $fcc3, 1332
+        # CHECK-NOT: nop
+        bc1tl $fcc3, 1332
+        # CHECK: beql $9, $6, 1332
+        # CHECK-NOT: nop
+        beql $9,$6,1332
+        # CHECK: bnel $9, $6, 1332
+        # CHECK-NOT: nop
+        bnel $9,$6,1332
+        # CHECK: bgezl $6, 1332
+        # CHECK-NOT: nop
+        bgezl $6,1332
+        # CHECK: bgtzl $6, 1332
+        # CHECK-NOT: nop
+        bgtzl $6,1332
+        # CHECK: blezl $6, 1332
+        # CHECK-NOT: nop
+        blezl $6,1332
+        # CHECK: bltzl $6, 1332
+        # CHECK-NOT: nop
+        bltzl $6,1332
+        # CHECK: bgezall $6, 1332
+        # CHECK-NOT: nop
+        bgezall $6,1332
+        # CHECK: bltzall $6, 1332
+        # CHECK-NOT: nop
+        bltzall $6,1332
+
+        # CHECK: j 1328
+        # CHECK: nop
+        j 1328
+        # CHECK: jal 1328
+        # CHECK: nop
+        jal 1328
+        # CHECK: jalr $6
+        # CHECK: nop
+        jalr $6
+        # CHECK: jalr $25
+        # CHECK: nop
+        jalr $31,$25
+        # CHECK: jalr $10, $11
+        # CHECK: nop
+        jalr $10,$11
+        # CHECK: jr $7
+        # CHECK: nop
+        jr $7
+        # CHECK: jr $7
+        # CHECK: nop
+        j $7
+        # CHECK: jalr $25
+        # CHECK: nop
+        jal $25
+        # CHECK: jalr $4, $25
+        # CHECK: nop
+        jal $4,$25
+        # CHECK: jalx lab
+        # CHECK: nop
+        jalx lab
diff --git a/test/MC/Mips/mips-noat.s b/test/MC/Mips/mips-noat.s
index 07db251b0506..f9d4efde185c 100644
--- a/test/MC/Mips/mips-noat.s
+++ b/test/MC/Mips/mips-noat.s
@@ -12,7 +12,7 @@ test1:
 
 test2:
         .set noat
-        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: pseudo-instruction requires $at, which is not available
 
 
 # Can we switch it back on successfully?
@@ -26,4 +26,4 @@ test3:
 
 test4:
         .set at=$0
-        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: Pseudo instruction requires $at, which is not available
+        lw      $2, 65536($2) # ERROR: mips-noat.s:[[@LINE]]:9: error: pseudo-instruction requires $at, which is not available
diff --git a/test/MC/Mips/mips-pdr-bad.s b/test/MC/Mips/mips-pdr-bad.s
new file mode 100644
index 000000000000..40c6ba2a1f2e
--- /dev/null
+++ b/test/MC/Mips/mips-pdr-bad.s
@@ -0,0 +1,42 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1 -check-prefix=ASM
+
+        .text
+        
+        .ent # ASM: :[[@LINE]]:14: error: expected identifier after .ent
+        .ent bar, # ASM: :[[@LINE]]:19: error: expected number after comma
+        .ent foo, bar # AMS: :[[@LINE]]:23: error: expected an absolute expression after comma
+        .ent foo, 5, bar # AMS: :[[@LINE]]:20: error: unexpected token, expected end of statement
+ 
+        .frame # ASM: :[[@LINE]]:16: error: expected stack register
+        .frame bar # ASM: :[[@LINE]]:16: error: expected stack register
+        .frame $f1, 8, # ASM: :[[@LINE]]:16: error: expected general purpose register
+        .frame $sp # ASM: :[[@LINE]]:20: error: unexpected token, expected comma
+        .frame $sp, # ASM: :[[@LINE]]:21: error: expected frame size value
+        .frame $sp, bar # ASM: :[[@LINE]]:25: error: frame size not an absolute expression
+        .frame $sp, 8 # ASM: :[[@LINE]]:23: error: unexpected token, expected comma
+        .frame $sp, 8, # ASM: :[[@LINE]]:24: error: expected return register
+        .frame $sp, 8, $f1 # ASM: :[[@LINE]]:24: error: expected general purpose register
+        .frame $sp, 8, $ra, foo # ASM: :[[@LINE]]:27: error: unexpected token, expected end of statement
+
+        .mask  # ASM: :[[@LINE]]:16: error: expected bitmask value
+        .mask foo # ASM: :[[@LINE]]:19: error: bitmask not an absolute expression
+        .mask 0x80000000 # ASM: :[[@LINE]]:26: error: unexpected token, expected comma
+        .mask 0x80000000, # ASM: :[[@LINE]]:27: error: expected frame offset value
+        .mask 0x80000000, foo # ASM: :[[@LINE]]:31: error: frame offset not an absolute expression
+        .mask 0x80000000, -4, bar # ASM: :[[@LINE]]:29: error: unexpected token, expected end of statement
+        
+        .fmask  # ASM: :[[@LINE]]:17: error: expected bitmask value
+        .fmask foo # ASM: :[[@LINE]]:20: error: bitmask not an absolute expression
+        .fmask 0x80000000 # ASM: :[[@LINE]]:27: error: unexpected token, expected comma
+        .fmask 0x80000000, # ASM: :[[@LINE]]:28: error: expected frame offset value
+        .fmask 0x80000000, foo # ASM: :[[@LINE]]:32: error: frame offset not an absolute expression
+        .fmask 0x80000000, -4, bar # ASM: :[[@LINE]]:30: error: unexpected token, expected end of statement
+
+        .end # ASM: :[[@LINE]]:14: error: expected identifier after .end
+        .ent _local_foo_bar
+        .end _local_foo_bar, foo # ASM: :[[@LINE]]:28: error: unexpected token, expected end of statement
+        .end _local_foo_bar
+        .end _local_foo # ASM: :[[@LINE]]:25: error: .end used without .ent
+        .ent _local_foo, 2
+        .end _local_foo_bar # ASM: :[[@LINE]]:29: error: .end symbol does not match .ent symbol
diff --git a/test/MC/Mips/mips-pdr.s b/test/MC/Mips/mips-pdr.s
new file mode 100644
index 000000000000..372c259bb098
--- /dev/null
+++ b/test/MC/Mips/mips-pdr.s
@@ -0,0 +1,64 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=asm | \
+# RUN:   FileCheck %s -check-prefix=ASMOUT
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -filetype=obj -o - | \
+# RUN:   llvm-readobj -s -section-data | \
+# RUN:     FileCheck %s -check-prefix=OBJOUT
+
+# ASMOUT: .text
+# ASMOUT:        .type _local_foo,@function
+# ASMOUT:        .ent _local_foo
+# ASMOUT:_local_foo:
+# ASMOUT:        .frame $fp,16,$ra
+# ASMOUT:        .mask 0x10101010,-4
+# ASMOUT:        .fmask 0x01010101,-8
+# ASMOUT:        .end _local_foo
+# ASMOUT:        .size local_foo,
+
+# OBJOUT: Section {
+# OBJOUT:     Name: .pdr
+# OBJOUT:     Type: SHT_PROGBITS (0x1)
+# OBJOUT:     Flags [ (0xB)
+# OBJOUT:       SHF_ALLOC (0x2)
+# OBJOUT:       SHF_WRITE (0x1)
+# OBJOUT:     ]
+# OBJOUT:     Size: 64
+# OBJOUT:     SectionData (
+# OBJOUT:       0000: 00000000 10101010 FFFFFFFC 01010101
+# OBJOUT:       0010: FFFFFFF8 00000010 0000001E 0000001F
+# OBJOUT:       0020: 00000000 10101010 FFFFFFFC 01010101
+# OBJOUT:       0030: FFFFFFF8 00000010 0000001E 0000001F
+# OBJOUT:     )
+# OBJOUT:   }
+
+# We should also check if relocation information was correctly generated.
+# OBJOUT:   Section {
+# OBJOUT:     Name: .rel.pdr
+# OBJOUT:     Type: SHT_REL (0x9)
+# OBJOUT:     Flags [ (0x0)
+# OBJOUT:     ]
+# OBJOUT:     Size: 16
+# OBJOUT:     SectionData (
+# OBJOUT:       0000: 00000000 00000202 00000020 00000802
+# OBJOUT:     )
+# OBJOUT:   }
+
+.text
+        .type _local_foo,@function
+        .ent _local_foo
+_local_foo:
+        .frame $fp,16,$ra
+        .mask 0x10101010,-4
+        .fmask 0x01010101,-8
+        .end _local_foo
+        .size local_foo,.-_local_foo
+
+        .globl _global_foo
+        .type _global_foo,@function
+        .ent _global_foo
+_global_foo:
+        .frame $fp,16,$ra
+        .mask 0x10101010,-4
+        .fmask 0x01010101,-8
+        .end _global_foo
+        .size global_foo,.-_global_foo
diff --git a/test/MC/Mips/mips1/invalid-mips2.s b/test/MC/Mips/mips1/invalid-mips2.s
index 7db261d42c98..29bd223f2910 100644
--- a/test/MC/Mips/mips1/invalid-mips2.s
+++ b/test/MC/Mips/mips1/invalid-mips2.s
@@ -5,6 +5,18 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
+        bc1fl     $fcc0,-8239     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1fl     -8239           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc0,-8239     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     -8239           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        beql      $14,$s3,12544   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezall   $12,7293        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezl     $4,-6858        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtzl     $10,-3738       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        blezl     $6,2974         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzall   $6,488          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzl     $s1,-9964       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bnel      $gp,$s4,5107    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.d  $f11,$f25       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ceil.w.s  $f6,$f20        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.w.d $f14,$f11       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -13,11 +25,23 @@
         round.w.s $f27,$f28       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sqrt.d    $f17,$f22       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sqrt.s    $f0,$f1         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teq       $0,$3           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        teq       $5,$7,620       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         teqi      $s5,-17504      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tge       $7,$10          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tge       $5,$19,340      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgei      $s1,5025        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tgeiu     $sp,-28621      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeu      $22,$28         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tgeu      $20,$14,379     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlt       $15,$13         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlt       $2,$19,133      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tlti      $t6,-21059      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tltiu     $ra,-5076       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltu      $11,$16         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tltu      $16,$29,1016    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tne       $6,$17          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tne       $7,$8,885       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         tnei      $t4,-29647      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.d $f22,$f15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         trunc.w.s $f28,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips3.s b/test/MC/Mips/mips1/invalid-mips3.s
index d1b0eec17a88..d4be08e48fbd 100644
--- a/test/MC/Mips/mips1/invalid-mips3.s
+++ b/test/MC/Mips/mips1/invalid-mips3.s
@@ -19,6 +19,8 @@
         daddi     $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         daddiu    $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         daddu     $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $24,$2,18079      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu     $19,26943         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ddiv      $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ddivu     $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dmfc1     $12,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -43,6 +45,8 @@
         dsrl32    $s3,$6,23         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsrlv     $s3,$14,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsubu     $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $15,$11,5025      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu     $14,-4586         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.l.d $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.l.s $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.w.d $f14,$f11         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips32r2.s b/test/MC/Mips/mips1/invalid-mips32r2.s
new file mode 100644
index 000000000000..679f21fef6d0
--- /dev/null
+++ b/test/MC/Mips/mips1/invalid-mips32r2.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips1 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
index 2016e701b0f9..cec30c862329 100644
--- a/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips4-wrong-error.s
@@ -6,6 +6,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ld        $sp,-28645($s1)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldc1      $f11,16391($s0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ldc2      $8,-21181($at)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
index 74473a3bf343..18c0b61ff55f 100644
--- a/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips1/invalid-mips5-wrong-error.s
@@ -6,41 +6,41 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index 66e11ba2fe52..53ff6a0740a3 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -10,7 +10,10 @@
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
@@ -97,6 +100,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
diff --git a/test/MC/Mips/mips2/invalid-mips3.s b/test/MC/Mips/mips2/invalid-mips3.s
index 458c416c0e9e..e72b228c67ec 100644
--- a/test/MC/Mips/mips2/invalid-mips3.s
+++ b/test/MC/Mips/mips2/invalid-mips3.s
@@ -15,6 +15,8 @@
         daddi      $sp,$s4,-27705    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         daddiu     $k0,$s6,-4586     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         daddu      $s3,$at,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu      $24,$2,18079      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        daddu      $19,26943         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ddiv       $zero,$k0,$s3     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ddivu      $zero,$s0,$s1     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dmfc1      $t0,$f13          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -40,6 +42,8 @@
         dsrlv      $s3,$t2,$s4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsub       $a3,$s6,$a4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         dsubu      $a1,$a1,$k0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu      $15,$11,5025      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dsubu      $14,-4586         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         eret                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.l.d  $f26,$f7          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         floor.l.s  $f12,$f5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips32r2.s b/test/MC/Mips/mips2/invalid-mips32r2.s
index 72a570aa69ab..6dc8159a9acc 100644
--- a/test/MC/Mips/mips2/invalid-mips32r2.s
+++ b/test/MC/Mips/mips2/invalid-mips32r2.s
@@ -13,7 +13,9 @@
         cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         deret                       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         eret                        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1   $f8,$s7($t7)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1   $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
index 193f6d796a13..28a98ba13f08 100644
--- a/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips2/invalid-mips4-wrong-error.s
@@ -6,6 +6,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc1fl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1tl     $fcc7,27        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         ld        $sp,-28645($s1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwu       $s3,-24086($v1) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         scd       $15,-8243($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips2/invalid-mips5-wrong-error.s b/test/MC/Mips/mips2/invalid-mips5-wrong-error.s
index 0c58c6c960eb..5eaeaa2a988a 100644
--- a/test/MC/Mips/mips2/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips2/invalid-mips5-wrong-error.s
@@ -6,41 +6,41 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 9c3706ee3ff5..34843bc9f777 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -6,21 +6,36 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,50             # CHECK: bc1fl 50      # encoding: [0x45,0x02,0x00,0x0c]
+        bc1fl     50                   # CHECK: bc1fl 50      # encoding: [0x45,0x02,0x00,0x0c]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,-8239          # CHECK: bc1tl -8239   # encoding: [0x45,0x03,0xf7,0xf4]
+        bc1tl     -8239                # CHECK: bc1tl -8239   # encoding: [0x45,0x03,0xf7,0xf4]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -113,6 +128,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -123,15 +140,27 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.w.d $f22,$f15
         trunc.w.s $f28,$f30
diff --git a/test/MC/Mips/mips3/invalid-mips32r2.s b/test/MC/Mips/mips3/invalid-mips32r2.s
new file mode 100644
index 000000000000..178e0f02a584
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips32r2.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips3/invalid-mips4-wrong-error.s b/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
new file mode 100644
index 000000000000..c9af39a46fef
--- /dev/null
+++ b/test/MC/Mips/mips3/invalid-mips4-wrong-error.s
@@ -0,0 +1,10 @@
+# Instructions that are invalid and are correctly rejected but use the wrong
+# error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        bc1fl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        bc1tl     $fcc7,27          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
index 2c0246a746b5..cf809d3af2d6 100644
--- a/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips3/invalid-mips5-wrong-error.s
@@ -6,41 +6,41 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index cb209fdb208f..a55576d9e673 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -6,21 +6,36 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,50             # CHECK: bc1fl 50      # encoding: [0x45,0x02,0x00,0x0c]
+        bc1fl     50                   # CHECK: bc1fl 50      # encoding: [0x45,0x02,0x00,0x0c]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,-8239          # CHECK: bc1tl -8239   # encoding: [0x45,0x03,0xf7,0xf4]
+        bc1tl     -8239                # CHECK: bc1tl -8239   # encoding: [0x45,0x03,0xf7,0xf4]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -50,6 +65,8 @@
         daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
+        daddu     $24,$2,18079         # CHECK: daddiu $24, $2, 18079   # encoding: [0x64,0x58,0x46,0x9f]
+        daddu     $19,26943            # CHECK: daddiu $19, $19, 26943  # encoding: [0x66,0x73,0x69,0x3f]
         ddiv      $zero,$k0,$s3
         ddivu     $zero,$s0,$s1
         div       $zero,$25,$11
@@ -84,6 +101,8 @@
         dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
         dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
+        dsubu     $15,$11,5025         # CHECK: daddiu $15, $11, -5025  # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu     $14,-4586            # CHECK: daddiu $14, $14, 4586   # encoding: [0x65,0xce,0x11,0xea]
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
         floor.l.d $f26,$f7
@@ -171,6 +190,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -180,15 +201,27 @@
         swl       $15,13694($s3)
         swr       $s1,-26590($14)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.l.d $f23,$f23
         trunc.l.s $f28,$f31
diff --git a/test/MC/Mips/mips32/invalid-mips32r2.s b/test/MC/Mips/mips32/invalid-mips32r2.s
index fa6fe326e163..07a1e8f53a1d 100644
--- a/test/MC/Mips/mips32/invalid-mips32r2.s
+++ b/test/MC/Mips/mips32/invalid-mips32r2.s
@@ -8,7 +8,9 @@
         cvt.l.d $f24,$f15           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         cvt.l.s $f11,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1   $f8,$s7($t7)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         luxc1   $f19,$s6($s5)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1   $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index d330905ae29a..d79c390c47ec 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -141,6 +158,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -151,15 +170,27 @@
         swr       $s1,-26590($14)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.w.d $f22,$f15
         trunc.w.s $f28,$f30
diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s
index ef02d51b53fd..13385d06ce81 100644
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@@ -293,7 +293,6 @@
         swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
-        synci           20023($s0)
         tlbginv
         tlbginvf
         tlbgp
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 631c691ed711..97cfa36e7122 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -43,13 +60,15 @@
         cvt.w.d   $f20,$f14
         cvt.w.s   $f20,$f24
         deret
-        di        $s8
+        di        $s8                  # CHECK: di  $fp       # encoding: [0x41,0x7e,0x60,0x00]
+        di                             # CHECK: di            # encoding: [0x41,0x60,0x60,0x00]
         div       $zero,$25,$11
         div.d     $f29,$f20,$f27
         div.s     $f4,$f5,$f15
         divu      $zero,$25,$15
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-        ei        $14
+        ei        $14                  # CHECK: ei  $14       # encoding: [0x41,0x6e,0x60,0x20]
+        ei                             # CHECK: ei            # encoding: [0x41,0x60,0x60,0x20]
         eret
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
@@ -132,7 +151,12 @@
         or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
-        rdhwr     $sp,$11              
+        # FIXME: Use the code generator in order to print the .set directives
+        #        instead of the instruction printer.
+        rdhwr     $sp,$11              # CHECK:      .set  push
+                                       # CHECK-NEXT: .set  mips32r2
+                                       # CHECK-NEXT: rdhwr $sp, $11
+                                       # CHECK-NEXT: .set  pop          # encoding: [0x7c,0x1d,0x58,0x3b]
         rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
         rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
         rotrv     $1,$14,$15           # CHECK: rotrv $1, $14, $15      # encoding: [0x01,0xee,0x08,0x46]
@@ -169,6 +193,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -181,17 +207,30 @@
         swxc1     $f19,$12($k0)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.w.d $f22,$f15
         trunc.w.s $f28,$f30
         wsbh      $k1,$9
         xor       $s2,$a0,$s8
+        synci     -15842($a2)          # CHECK: synci -15842($6)        # encoding: [0x04,0xdf,0xc2,0x1e]
diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
index 52fa5f52b8db..cc7d403eaf8b 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -5,13 +5,13 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips1.s b/test/MC/Mips/mips32r6/invalid-mips1.s
index 44d4fbb86623..94810f44de44 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        add       $9,$14,15176        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        add       $24,-7193           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         c.ngl.d   $f29,$f29           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         c.ngle.d  $f0,$f16            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -22,3 +24,5 @@
         multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
 #       div has been re-encoded. See valid.s
 #       divu has been re-encoded. See valid.s
+        sub       $22,$17,-3126       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sub       $13,6512            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s
index b799c8e3fcd7..1cec777c27a6 100644
--- a/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips2-wrong-error.s
@@ -6,15 +6,5 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips2.s b/test/MC/Mips/mips32r6/invalid-mips2.s
index bfa2c4c3ee74..642d6bdb2c46 100644
--- a/test/MC/Mips/mips32r6/invalid-mips2.s
+++ b/test/MC/Mips/mips32r6/invalid-mips2.s
@@ -6,6 +6,18 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1fl     $fcc0,-8239         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1fl     -8239               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc0,-8239         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     -8239               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        beql      $14,$s3,12544       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bnel      $gp,$s4,5107        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezl     $4,-6858            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtzl     $10,-3738           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        blezl     $6,2974             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzl     $s1,-9964           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezall   $12,7293            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzall   $6,488              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
index e63bdd4e7078..3131c5a8fced 100644
--- a/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips32-wrong-error.s
@@ -6,15 +6,11 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips32.s b/test/MC/Mips/mips32r6/invalid-mips32.s
index e0889ea07bba..b2330c280b42 100644
--- a/test/MC/Mips/mips32r6/invalid-mips32.s
+++ b/test/MC/Mips/mips32r6/invalid-mips32.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
+        bc1fl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $s6,$13       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         madd      $zero,$9      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         maddu     $s3,$gp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
index f3131a947367..06bf58c1e8fa 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
@@ -6,16 +6,6 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4.s b/test/MC/Mips/mips32r6/invalid-mips4.s
index 8ba2ed88ad6e..9d8f02fb6d67 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc1fl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         ldxc1     $f8,$s7($15)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         lwxc1     $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$10($14)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s
index 99d10c327a4b..b5d738093186 100644
--- a/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips5-wrong-error.s
@@ -5,7 +5,7 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        bc1any2f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any2t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any4f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any4t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc1any2f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any2t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any4f  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any4t  $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index f23dbd7302f9..362785ba7ef2 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -17,6 +17,7 @@
         # FIXME: Add the instructions carried forward from older ISA's
         and     $2,4             # CHECK: andi $2, $2, 4      # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
+        addu    $9,10            # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
         aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
@@ -96,8 +97,12 @@
         cmp.sle.d  $f2,$f3,$f4      # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
         cmp.sule.s $f2,$f3,$f4      # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f]
         cmp.sule.d $f2,$f3,$f4      # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f]
+        di      $s8              # CHECK: di  $fp          # encoding: [0x41,0x7e,0x60,0x00]
+        di                       # CHECK: di               # encoding: [0x41,0x60,0x60,0x00]
         div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
+        ei      $14              # CHECK: ei  $14          # encoding: [0x41,0x6e,0x60,0x20]
+        ei                       # CHECK: ei               # encoding: [0x41,0x60,0x60,0x20]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
         lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
@@ -114,6 +119,12 @@
         msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
         msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
         pref    1, 8($5)         # CHECK: pref 1, 8($5)          # encoding: [0x7c,0xa1,0x04,0x35]
+        # FIXME: Use the code generator in order to print the .set directives
+        #        instead of the instruction printer.
+        rdhwr   $sp,$11          # CHECK:      .set  push
+                                 # CHECK-NEXT: .set  mips32r2
+                                 # CHECK-NEXT: rdhwr $sp, $11
+                                 # CHECK-NEXT: .set  pop      # encoding: [0x7c,0x1d,0x58,0x3b]
         sel.d   $f0,$f1,$f2      # CHECK: sel.d $f0, $f1, $f2 # encoding: [0x46,0x22,0x08,0x10]
         sel.s   $f0,$f1,$f2      # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10]
         seleqz  $2,$3,$4         # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35]
@@ -152,3 +163,15 @@
         sdbbp     34             # CHECK: sdbbp 34               # encoding: [0x00,0x00,0x08,0x8e]
         sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq     $0,$3            # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq     $5,$7,620        # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
+        tge     $7,$10           # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge     $5,$19,340       # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
+        tgeu    $22,$28          # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu    $20,$14,379      # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
+        tlt     $15,$13          # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt     $2,$19,133       # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
+        tltu    $11,$16          # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu    $16,$29,1016     # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne     $6,$17           # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne     $7,$8,885        # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
diff --git a/test/MC/Mips/mips4/invalid-mips32r2.s b/test/MC/Mips/mips4/invalid-mips32r2.s
new file mode 100644
index 000000000000..3e787585744b
--- /dev/null
+++ b/test/MC/Mips/mips4/invalid-mips32r2.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips4 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips4/invalid-mips5-wrong-error.s b/test/MC/Mips/mips4/invalid-mips5-wrong-error.s
index c6c8968d2554..5c8ab23539b8 100644
--- a/test/MC/Mips/mips4/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips4/invalid-mips5-wrong-error.s
@@ -6,41 +6,41 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        abs.ps    $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        add.ps    $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        alnv.ps   $f12,$f18,$f30,$t0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.ps   $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.f.ps    $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.le.ps   $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.lt.ps   $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.nge.ps  $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngl.ps  $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngle.ps $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngt.ps  $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ole.ps  $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.olt.ps  $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.seq.ps  $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.sf.ps   $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ueq.ps  $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ule.ps  $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ult.ps  $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.un.ps   $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.s  $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pl  $f30,$f1            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.s.pu  $f14,$f25           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        madd.ps   $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mov.ps    $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movf.ps   $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movn.ps   $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movt.ps   $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movz.ps   $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        msub.ps   $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mul.ps    $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        neg.ps    $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmadd.ps  $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmsub.ps  $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pll.ps    $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        plu.ps    $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pul.ps    $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        puu.ps    $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sub.ps    $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 949b91da922c..c221b764b16b 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -52,6 +69,8 @@
         daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
+        daddu     $24,$2,18079         # CHECK: daddiu $24, $2, 18079  # encoding: [0x64,0x58,0x46,0x9f]
+        daddu     $19,26943            # CHECK: daddiu $19, $19, 26943 # encoding: [0x66,0x73,0x69,0x3f]
         ddiv      $zero,$k0,$s3
         ddivu     $zero,$s0,$s1
         div       $zero,$25,$11
@@ -86,6 +105,8 @@
         dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
         dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
+        dsubu     $15,$11,5025         # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu     $14,-4586            # CHECK: daddiu $14, $14, 4586  # encoding: [0x65,0xce,0x11,0xea]
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
         floor.l.d $f26,$f7
@@ -189,6 +210,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -199,15 +222,27 @@
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.l.d $f23,$f23
         trunc.l.s $f28,$f31
diff --git a/test/MC/Mips/mips5/invalid-mips32r2.s b/test/MC/Mips/mips5/invalid-mips32r2.s
new file mode 100644
index 000000000000..a369efa5a65d
--- /dev/null
+++ b/test/MC/Mips/mips5/invalid-mips32r2.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips5 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index 3afdee1887c1..b93b22fbd3de 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -52,6 +69,8 @@
         daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
+        daddu     $24,$2,18079         # CHECK: daddiu $24, $2, 18079  # encoding: [0x64,0x58,0x46,0x9f]
+        daddu     $19,26943            # CHECK: daddiu $19, $19, 26943 # encoding: [0x66,0x73,0x69,0x3f]
         ddiv      $zero,$k0,$s3
         ddivu     $zero,$s0,$s1
         div       $zero,$25,$11
@@ -86,6 +105,8 @@
         dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
         dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
+        dsubu     $15,$11,5025         # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu     $14,-4586            # CHECK: daddiu $14, $14, 4586  # encoding: [0x65,0xce,0x11,0xea]
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
         floor.l.d $f26,$f7
@@ -190,6 +211,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -201,15 +224,27 @@
         swr       $s1,-26590($14)
         swxc1     $f19,$12($k0)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.l.d $f23,$f23
         trunc.l.s $f28,$f31
diff --git a/test/MC/Mips/mips64-register-names-n32-n64.s b/test/MC/Mips/mips64-register-names-n32-n64.s
index ee6f88fe6ab4..efe1cdb48043 100644
--- a/test/MC/Mips/mips64-register-names-n32-n64.s
+++ b/test/MC/Mips/mips64-register-names-n32-n64.s
@@ -1,7 +1,11 @@
-# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding | FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding 2>%t0 \
+# RUN:     | FileCheck %s
+# RUN: FileCheck -check-prefix=WARNING %s < %t0
+#
 # RUN: llvm-mc %s -triple=mips64-unknown-freebsd -show-encoding \
-# RUN:     -mattr=-n64,+n32 | FileCheck %s
-
+# RUN:     -mattr=-n64,+n32 2>%t1 | FileCheck %s
+# RUN: FileCheck -check-prefix=WARNING %s < %t1
+#
 # Check that the register names are mapped to their correct numbers for n32/n64
 # Second byte of addiu with $zero at rt contains the number of the source
 # register.
@@ -23,9 +27,25 @@ daddiu	$t0, $zero, 0 # [*] # CHECK: encoding: [0x64,0x0c,0x00,0x00]
 daddiu	$t1, $zero, 0 # [*] # CHECK: encoding: [0x64,0x0d,0x00,0x00]
 daddiu	$t2, $zero, 0 # [*] # CHECK: encoding: [0x64,0x0e,0x00,0x00]
 daddiu	$t3, $zero, 0 # [*] # CHECK: encoding: [0x64,0x0f,0x00,0x00]
+# WARNING: mips64-register-names-n32-n64.s:[[@LINE+4]]:9: warning: register names $t4-$t7 are only available in O32.
+# WARNING-NEXT: daddiu  $t4, $zero, 0       # {{CHECK}}: encoding: [0x64,0x0c,0x00,0x00]
+# WARNING-NEXT:          ^~
+# WARNING-NEXT:          Did you mean $t0?
 daddiu	$t4, $zero, 0       # CHECK: encoding: [0x64,0x0c,0x00,0x00]
+# WARNING: mips64-register-names-n32-n64.s:[[@LINE+4]]:9: warning: register names $t4-$t7 are only available in O32.
+# WARNING-NEXT: daddiu  $t5, $zero, 0       # {{CHECK}}: encoding: [0x64,0x0d,0x00,0x00]
+# WARNING-NEXT:          ^~
+# WARNING-NEXT:          Did you mean $t1?
 daddiu	$t5, $zero, 0       # CHECK: encoding: [0x64,0x0d,0x00,0x00]
+# WARNING: mips64-register-names-n32-n64.s:[[@LINE+4]]:9: warning: register names $t4-$t7 are only available in O32.
+# WARNING-NEXT: daddiu  $t6, $zero, 0       # {{CHECK}}: encoding: [0x64,0x0e,0x00,0x00]
+# WARNING-NEXT:          ^~
+# WARNING-NEXT:          Did you mean $t2?
 daddiu	$t6, $zero, 0       # CHECK: encoding: [0x64,0x0e,0x00,0x00]
+# WARNING: mips64-register-names-n32-n64.s:[[@LINE+4]]:9: warning: register names $t4-$t7 are only available in O32.
+# WARNING-NEXT: daddiu  $t7, $zero, 0       # {{CHECK}}: encoding: [0x64,0x0f,0x00,0x00]
+# WARNING-NEXT:          ^~
+# WARNING-NEXT:          Did you mean $t3?
 daddiu	$t7, $zero, 0       # CHECK: encoding: [0x64,0x0f,0x00,0x00]
 daddiu	$s0, $zero, 0       # CHECK: encoding: [0x64,0x10,0x00,0x00]
 daddiu	$s1, $zero, 0       # CHECK: encoding: [0x64,0x11,0x00,0x00]
diff --git a/test/MC/Mips/mips64/invalid-mips32r2.s b/test/MC/Mips/mips64/invalid-mips32r2.s
new file mode 100644
index 000000000000..bc5d1f0c6efe
--- /dev/null
+++ b/test/MC/Mips/mips64/invalid-mips32r2.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        di      $s8                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        di                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei      $t6                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ei                          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 1bd057d757df..032777e48ec0 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -54,6 +71,8 @@
         daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
+        daddu     $24,$2,18079         # CHECK: daddiu $24, $2, 18079  # encoding: [0x64,0x58,0x46,0x9f]
+        daddu     $19,26943            # CHECK: daddiu $19, $19, 26943 # encoding: [0x66,0x73,0x69,0x3f]
         dclo      $s2,$a2              # CHECK: dclo $18, $6   # encoding: [0x70,0xd2,0x90,0x25]
         dclz      $s0,$25              # CHECK: dclz $16, $25  # encoding: [0x73,0x30,0x80,0x24]
         deret
@@ -91,6 +110,8 @@
         dsubi     $sp,$s4,-27705       # CHECK: daddi $sp, $20, 27705  # encoding: [0x62,0x9d,0x6c,0x39]
         dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
+        dsubu     $15,$11,5025         # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu     $14,-4586            # CHECK: daddiu $14, $14, 4586  # encoding: [0x65,0xce,0x11,0xea]
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
         eret
         floor.l.d $f26,$f7
@@ -206,6 +227,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -218,15 +241,27 @@
         swxc1     $f19,$12($k0)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.l.d $f23,$f23
         trunc.l.s $f28,$f31
diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s
index 9ac47f631546..3e28baa75ef5 100644
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@@ -297,7 +297,6 @@
         swe $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
-        synci           20023($s0)
         tlbginv
         tlbginvf
         tlbgp
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 7a2244a66420..77172380bdfc 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -6,23 +6,40 @@
         abs.d     $f7,$f25             # CHECK: encoding:
         abs.s     $f9,$f16
         add       $s7,$s2,$a1
+        add       $9,$14,15176         # CHECK: addi $9, $14, 15176   # encoding: [0x21,0xc9,0x3b,0x48]
+        add       $24,-7193            # CHECK: addi $24, $24, -7193  # encoding: [0x23,0x18,0xe3,0xe7]
         add.d     $f1,$f7,$f29
         add.s     $f8,$f21,$f24
         addi      $13,$9,26322
         addi      $8,$8,~1             # CHECK: addi $8, $8, -2 # encoding: [0x21,0x08,0xff,0xfe]
         addu      $9,$a0,$a2
+        addu      $9,10                # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         and       $s7,$v0,$12
         and       $2,4                 # CHECK: andi $2, $2, 4 # encoding: [0x30,0x42,0x00,0x04]
         bc1f      $fcc0, 4             # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
         bc1f      $fcc1, 4             # CHECK: bc1f $fcc1, 4 # encoding: [0x45,0x04,0x00,0x01]
         bc1f      4                    # CHECK: bc1f 4        # encoding: [0x45,0x00,0x00,0x01]
+        bc1fl     $fcc0,4688           # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     4688                 # CHECK: bc1fl 4688      # encoding: [0x45,0x02,0x04,0x94]
+        bc1fl     $fcc7,27             # CHECK: bc1fl $fcc7, 27 # encoding: [0x45,0x1e,0x00,0x06]
         bc1t      $fcc0, 4             # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
         bc1t      $fcc1, 4             # CHECK: bc1t $fcc1, 4 # encoding: [0x45,0x05,0x00,0x01]
         bc1t      4                    # CHECK: bc1t 4        # encoding: [0x45,0x01,0x00,0x01]
+        bc1tl     $fcc0,4688           # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     4688                 # CHECK: bc1tl 4688      # encoding: [0x45,0x03,0x04,0x94]
+        bc1tl     $fcc7,27             # CHECK: bc1tl $fcc7, 27 # encoding: [0x45,0x1f,0x00,0x06]
         bal       21100                # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $0, 21100            # CHECK: bal 21100     # encoding: [0x04,0x11,0x14,0x9b]
         bgezal    $6, 21100            # CHECK: bgezal $6, 21100 # encoding: [0x04,0xd1,0x14,0x9b]
         bltzal    $6, 21100            # CHECK: bltzal $6, 21100 # encoding: [0x04,0xd0,0x14,0x9b]
+        beql      $14,$s3,12544        # CHECK: beql $14, $19, 12544 # encoding: [0x51,0xd3,0x0c,0x40]
+        bgezall   $12,7293             # CHECK: bgezall $12, 7293    # encoding: [0x05,0x93,0x07,0x1f]
+        bgezl     $4,-6858             # CHECK: bgezl $4, -6858      # encoding: [0x04,0x83,0xf9,0x4d]
+        bgtzl     $10,-3738            # CHECK: bgtzl $10, -3738     # encoding: [0x5d,0x40,0xfc,0x59]
+        blezl     $6,2974              # CHECK: blezl $6, 2974       # encoding: [0x58,0xc0,0x02,0xe7]
+        bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
+        bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
+        bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
         cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
@@ -54,10 +71,13 @@
         daddi     $sp,-27705           # CHECK: daddi $sp, $sp, -27705 # encoding: [0x63,0xbd,0x93,0xc7]
         daddiu    $k0,$s6,-4586
         daddu     $s3,$at,$ra
+        daddu     $24,$2,18079         # CHECK: daddiu $24, $2, 18079  # encoding: [0x64,0x58,0x46,0x9f]
+        daddu     $19,26943            # CHECK: daddiu $19, $19, 26943 # encoding: [0x66,0x73,0x69,0x3f]
         dclo      $s2,$a2              # CHECK: dclo $18, $6   # encoding: [0x70,0xd2,0x90,0x25]
         dclz      $s0,$25              # CHECK: dclz $16, $25  # encoding: [0x73,0x30,0x80,0x24]
         deret
-        di        $s8
+        di        $s8                  # CHECK: di  $fp        # encoding: [0x41,0x7e,0x60,0x00]
+        di                             # CHECK: di             # encoding: [0x41,0x60,0x60,0x00]
         ddiv      $zero,$k0,$s3
         ddivu     $zero,$s0,$s1
         div       $zero,$25,$11
@@ -101,8 +121,11 @@
         dsubi     $sp,-27705           # CHECK: daddi $sp, $sp, 27705  # encoding: [0x63,0xbd,0x6c,0x39]
         dsubu     $a1,$a1,$k0
         dsubu     $a1,$a1,$k0
+        dsubu     $15,$11,5025         # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu     $14,-4586            # CHECK: daddiu $14, $14, 4586  # encoding: [0x65,0xce,0x11,0xea]
         ehb                            # CHECK: ehb # encoding:  [0x00,0x00,0x00,0xc0]
-        ei        $14
+        ei        $14                  # CHECK: ei  $14       # encoding: [0x41,0x6e,0x60,0x20]
+        ei                             # CHECK: ei            # encoding: [0x41,0x60,0x60,0x20]
         eret
         floor.l.d $f26,$f7
         floor.l.s $f12,$f5
@@ -190,7 +213,12 @@
         or        $2, 4                # CHECK: ori $2, $2, 4           # encoding: [0x34,0x42,0x00,0x04]
         pause                          # CHECK: pause # encoding:  [0x00,0x00,0x01,0x40]
         pref      1, 8($5)             # CHECK: pref 1, 8($5)           # encoding: [0xcc,0xa1,0x00,0x08]
-        rdhwr     $sp,$11
+        # FIXME: Use the code generator in order to print the .set directives
+        #        instead of the instruction printer.
+        rdhwr     $sp,$11              # CHECK:      .set  push
+                                       # CHECK-NEXT: .set  mips32r2
+                                       # CHECK-NEXT: rdhwr $sp, $11
+                                       # CHECK-NEXT: .set  pop          # encoding: [0x7c,0x1d,0x58,0x3b]
         rotr      $1,15                # CHECK: rotr $1, $1, 15         # encoding: [0x00,0x21,0x0b,0xc2]
         rotr      $1,$14,15            # CHECK: rotr $1, $14, 15        # encoding: [0x00,0x2e,0x0b,0xc2]
         rotrv     $1,$14,$15           # CHECK: rotrv $1, $14, $15      # encoding: [0x01,0xee,0x08,0x46]
@@ -233,6 +261,8 @@
         srlv      $25,$s4,$a0          # CHECK: srlv $25, $20, $4      # encoding: [0x00,0x94,0xc8,0x06]
         ssnop                          # CHECK: ssnop                  # encoding: [0x00,0x00,0x00,0x40]
         sub       $s6,$s3,$12
+        sub       $22,$17,-3126        # CHECK: addi $22, $17, 3126    # encoding: [0x22,0x36,0x0c,0x36]
+        sub       $13,6512             # CHECK: addi $13, $13, -6512   # encoding: [0x21,0xad,0xe6,0x90]
         sub.d     $f18,$f3,$f17
         sub.s     $f23,$f22,$f22
         subu      $sp,$s6,$s6
@@ -245,15 +275,27 @@
         swxc1     $f19,$12($k0)
         sync                           # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync      1                    # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq       $0,$3                # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq       $5,$7,620            # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
         teqi      $s5,-17504
+        tge       $7,$10               # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge       $5,$19,340           # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
         tgei      $s1,5025
         tgeiu     $sp,-28621
+        tgeu      $22,$28              # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu      $20,$14,379          # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
         tlbp                           # CHECK: tlbp                   # encoding: [0x42,0x00,0x00,0x08]
         tlbr                           # CHECK: tlbr                   # encoding: [0x42,0x00,0x00,0x01]
         tlbwi                          # CHECK: tlbwi                  # encoding: [0x42,0x00,0x00,0x02]
         tlbwr                          # CHECK: tlbwr                  # encoding: [0x42,0x00,0x00,0x06]
+        tlt       $15,$13              # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt       $2,$19,133           # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
         tlti      $14,-21059
         tltiu     $ra,-5076
+        tltu      $11,$16              # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu      $16,$29,1016         # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne       $6,$17               # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne       $7,$8,885            # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
         tnei      $12,-29647
         trunc.l.d $f23,$f23
         trunc.l.s $f28,$f31
diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
index e914c899f66b..5156429340a1 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -5,13 +5,13 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t      4                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips1.s b/test/MC/Mips/mips64r6/invalid-mips1.s
index 6efd8f4cf601..ce0ab978e241 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        add       $9,$14,15176        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        add       $24,-7193           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
@@ -25,3 +27,5 @@
         multu     $gp,$k0             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
 #       div has been re-encoded. See valid.s
 #       divu has been re-encoded. See valid.s
+        sub       $22,$17,-3126       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sub       $13,6512            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips2.s b/test/MC/Mips/mips64r6/invalid-mips2.s
index 8a5c50ca3582..a09a051e18cc 100644
--- a/test/MC/Mips/mips64r6/invalid-mips2.s
+++ b/test/MC/Mips/mips64r6/invalid-mips2.s
@@ -6,9 +6,21 @@
 
 	.set noat
         addi      $13,$9,26322        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1fl     $fcc0,-8239         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1fl     -8239               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc0,-8239         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     -8239               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        beql      $14,$s3,12544       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezall   $12,7293            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgezl     $4,-6858            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtzl     $10,-3738           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        blezl     $6,2974             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzall   $6,488              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltzl     $s1,-9964           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bnel      $gp,$s4,5107        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhi      $s3                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mfhi      $sp                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         mflo      $s1                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
index 7424f493bf56..eda18ac9012e 100644
--- a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
@@ -9,15 +9,15 @@
         ldr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         sdl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         sdr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        ldle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        ldre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sdle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sdre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        ldle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        ldre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sdle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sdre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         lwl       $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s
index cc85f1885635..870231800aa9 100644
--- a/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips32-wrong-error.s
@@ -6,15 +6,11 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        bc1fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2f  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2f  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t  $fcc0,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2t  4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2tl $fcc1,4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
index f3131a947367..06bf58c1e8fa 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
@@ -6,16 +6,6 @@
 # RUN: FileCheck %s < %t1
 
         .set noat
-        beql $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bgtzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        blezl $3,8              # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzall $3,8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bltzl $4,16             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bnel $1,$2,4            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4.s b/test/MC/Mips/mips64r6/invalid-mips4.s
index 706db27835ef..82a1196daf84 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4.s
@@ -5,6 +5,8 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
+        bc1fl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bc1tl     $fcc7,27            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $0, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bgezal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         bltzal    $6, 21100           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
index 4fc94e26eb10..ceeb57722350 100644
--- a/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips5-wrong-error.s
@@ -5,44 +5,44 @@
 # RUN: FileCheck %s < %t1
 
 	.set noat
-        abs.ps          $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        add.ps          $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        alnv.ps         $f12,$f18,$f30,$12  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any2f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any2t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any4f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        bc1any4t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.eq.ps         $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.f.ps          $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.le.ps         $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.lt.ps         $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.nge.ps        $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngl.ps        $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngle.ps       $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ngt.ps        $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ole.ps        $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.olt.ps        $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.seq.ps        $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.sf.ps         $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ueq.ps        $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ule.ps        $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.ult.ps        $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        c.un.ps         $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.s        $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        cvt.ps.pw       $f3,$f18            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        madd.ps         $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mov.ps          $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movf.ps         $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movn.ps         $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movt.ps         $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        movz.ps         $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        msub.ps         $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        mul.ps          $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        neg.ps          $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmadd.ps        $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        nmsub.ps        $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pll.ps          $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        plu.ps          $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        pul.ps          $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        puu.ps          $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
-        sub.ps          $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: Unknown instruction
+        abs.ps          $f22,$f8            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        add.ps          $f25,$f27,$f13      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        alnv.ps         $f12,$f18,$f30,$12  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any2f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any2t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any4f        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        bc1any4t        $fcc2,4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.eq.ps         $fcc5,$f0,$f9       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.f.ps          $fcc6,$f11,$f11     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.le.ps         $fcc1,$f7,$f20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.lt.ps         $f19,$f5            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.nge.ps        $f1,$f26            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngl.ps        $f21,$f30           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngle.ps       $fcc7,$f12,$f20     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ngt.ps        $fcc5,$f30,$f6      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ole.ps        $fcc7,$f21,$f8      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.olt.ps        $fcc3,$f7,$f16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.seq.ps        $fcc6,$f31,$f14     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.sf.ps         $fcc6,$f4,$f6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ueq.ps        $fcc1,$f5,$f29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ule.ps        $fcc6,$f17,$f3      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.ult.ps        $fcc7,$f14,$f0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        c.un.ps         $fcc4,$f2,$f26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.s        $f3,$f18,$f19       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        cvt.ps.pw       $f3,$f18            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        madd.ps         $f22,$f3,$f14,$f3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mov.ps          $f22,$f17           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movf.ps         $f10,$f28,$fcc6     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movn.ps         $f31,$f31,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movt.ps         $f20,$f25,$fcc2     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        movz.ps         $f18,$f17,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        msub.ps         $f12,$f14,$f29,$f17 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        mul.ps          $f14,$f0,$f16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        neg.ps          $f19,$f13           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmadd.ps        $f27,$f4,$f9,$f25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        nmsub.ps        $f6,$f12,$f14,$f17  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pll.ps          $f25,$f9,$f30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        plu.ps          $f1,$f26,$f29       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        pul.ps          $f9,$f30,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        puu.ps          $f24,$f9,$f2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        sub.ps          $f5,$f14,$f26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 34c1dac5be55..3e8fc4156262 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -17,6 +17,7 @@
         # FIXME: Add the instructions carried forward from older ISA's
         and     $2,4           # CHECK: andi $2, $2, 4        # encoding: [0x30,0x42,0x00,0x04]
         addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0xec,0x80,0x00,0x19]
+        addu    $9,10            # CHECK: addiu $9, $9, 10    # encoding: [0x25,0x29,0x00,0x0a]
         align   $4, $2, $3, 2    # CHECK: align $4, $2, $3, 2 # encoding: [0x7c,0x43,0x22,0xa0]
         aluipc  $3, 56           # CHECK: aluipc $3, 56       # encoding: [0xec,0x7f,0x00,0x38]
         aui     $3,$2,-23        # CHECK: aui $3, $2, -23     # encoding: [0x3c,0x62,0xff,0xe9]
@@ -96,13 +97,21 @@
         cmp.sle.d  $f2,$f3,$f4      # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x8e]
         cmp.sule.s $f2,$f3,$f4      # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x46,0x84,0x18,0x8f]
         cmp.sule.d $f2,$f3,$f4      # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x46,0xa4,0x18,0x8f]
+        daddu   $24,$2,18079     # CHECK: daddiu $24, $2, 18079  # encoding: [0x64,0x58,0x46,0x9f]
+        daddu   $19,26943        # CHECK: daddiu $19, $19, 26943 # encoding: [0x66,0x73,0x69,0x3f]
         dalign  $4,$2,$3,5       # CHECK: dalign $4, $2, $3, 5 # encoding: [0x7c,0x43,0x23,0x64]
         daui    $3,$2,0x1234     # CHECK: daui $3, $2, 4660  # encoding: [0x74,0x62,0x12,0x34]
         dahi     $3,0x5678       # CHECK: dahi $3, 22136     # encoding: [0x04,0x66,0x56,0x78]
         dati     $3,0xabcd       # CHECK: dati $3, 43981     # encoding: [0x04,0x7e,0xab,0xcd]
         dbitswap $4, $2          # CHECK: dbitswap $4, $2    # encoding: [0x7c,0x02,0x20,0x24]
+        di      $s8              # CHECK: di  $fp          # encoding: [0x41,0x7e,0x60,0x00]
+        di                       # CHECK: di               # encoding: [0x41,0x60,0x60,0x00]
         div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
+        dsubu   $15,$11,5025     # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
+        dsubu   $14,-4586        # CHECK: daddiu $14, $14, 4586  # encoding: [0x65,0xce,0x11,0xea]
+        ei      $14              # CHECK: ei  $14          # encoding: [0x41,0x6e,0x60,0x20]
+        ei                       # CHECK: ei               # encoding: [0x41,0x60,0x60,0x20]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
@@ -146,6 +155,12 @@
         seleqz.d $f0, $f2, $f4   # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14]
         selnez.s $f0, $f2, $f4   # CHECK: selnez.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x17]
         selnez.d $f0, $f2, $f4   # CHECK: selnez.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x17]
+        # FIXME: Use the code generator in order to print the .set directives
+        #        instead of the instruction printer.
+        rdhwr   $sp,$11          # CHECK:      .set  push
+                                 # CHECK-NEXT: .set  mips32r2
+                                 # CHECK-NEXT: rdhwr $sp, $11
+                                 # CHECK-NEXT: .set  pop         # encoding: [0x7c,0x1d,0x58,0x3b]
         rint.s $f2, $f4          # CHECK: rint.s $f2, $f4        # encoding: [0x46,0x00,0x20,0x9a]
         rint.d $f2, $f4          # CHECK: rint.d $f2, $f4        # encoding: [0x46,0x20,0x20,0x9a]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
@@ -171,3 +186,15 @@
         sdbbp     34             # CHECK: sdbbp 34               # encoding: [0x00,0x00,0x08,0x8e]
         sync                     # CHECK: sync                   # encoding: [0x00,0x00,0x00,0x0f]
         sync    1                # CHECK: sync 1                 # encoding: [0x00,0x00,0x00,0x4f]
+        teq     $0,$3            # CHECK: teq $zero, $3          # encoding: [0x00,0x03,0x00,0x34]
+        teq     $5,$7,620        # CHECK: teq $5, $7, 620        # encoding: [0x00,0xa7,0x9b,0x34]
+        tge     $7,$10           # CHECK: tge $7, $10            # encoding: [0x00,0xea,0x00,0x30]
+        tge     $5,$19,340       # CHECK: tge $5, $19, 340       # encoding: [0x00,0xb3,0x55,0x30]
+        tgeu    $22,$28          # CHECK: tgeu $22, $gp          # encoding: [0x02,0xdc,0x00,0x31]
+        tgeu    $20,$14,379      # CHECK: tgeu $20, $14, 379     # encoding: [0x02,0x8e,0x5e,0xf1]
+        tlt     $15,$13          # CHECK: tlt $15, $13           # encoding: [0x01,0xed,0x00,0x32]
+        tlt     $2,$19,133       # CHECK: tlt $2, $19, 133       # encoding: [0x00,0x53,0x21,0x72]
+        tltu    $11,$16          # CHECK: tltu $11, $16          # encoding: [0x01,0x70,0x00,0x33]
+        tltu    $16,$29,1016     # CHECK: tltu $16, $sp, 1016    # encoding: [0x02,0x1d,0xfe,0x33]
+        tne     $6,$17           # CHECK: tne $6, $17            # encoding: [0x00,0xd1,0x00,0x36]
+        tne     $7,$8,885        # CHECK: tne $7, $8, 885        # encoding: [0x00,0xe8,0xdd,0x76]
diff --git a/test/MC/Mips/mips_directives_bad.s b/test/MC/Mips/mips_directives_bad.s
index c823cacf0cb8..a4512b5ae12e 100644
--- a/test/MC/Mips/mips_directives_bad.s
+++ b/test/MC/Mips/mips_directives_bad.s
@@ -2,7 +2,7 @@
 # RUN: not llvm-mc -triple mips-unknown-unknown %s 2>&1 | FileCheck %s
 
     .abicalls should have no operands
-# CHECK:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in directive
+# CHECK:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .abicalls should have no operands
 # CHECK-NEXT:              ^
 
@@ -12,48 +12,48 @@
 
 # Blank option operand
     .option 
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected identifier
 # CHECK-NEXT:    .option 
 # CHECK-NEXT:            ^
 
 # Numeric option operand
     .option 2
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected identifier
 # CHECK-NEXT:    .option 2
 # CHECK-NEXT:            ^
 
 # Register option operand
     .option $2
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected identifier
 # CHECK-NEXT:    .option $2
 # CHECK-NEXT:            ^
 
     .option WithBadOption
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: warning: unknown option in .option directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: warning: unknown option, expected 'pic0' or 'pic2'
 # CHECK-NEXT:    .option WithBadOption
 # CHECK-NEXT:            ^
 
     .option pic0,
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option pic0 directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .option pic0,
 # CHECK-NEXT:                ^
 
     .option pic0,pic2
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option pic0 directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .option pic0,pic2
 # CHECK-NEXT:                ^
 
     .option pic0 pic2
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option pic0 directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .option pic0 pic2
 # CHECK-NEXT:                 ^
 
     .option pic2,
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option pic2 directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .option pic2,
 # CHECK-NEXT:                ^
 
     .option pic2 pic3
-# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token in .option pic2 directive
+# CHECK-NEXT:    :{{[0-9]+}}:{{[0-9]+}}: error: unexpected token, expected end of statement
 # CHECK-NEXT:    .option pic2 pic3
 # CHECK-NEXT:                 ^
diff --git a/test/MC/Mips/msa/set-msa-directive-bad.s b/test/MC/Mips/msa/set-msa-directive-bad.s
new file mode 100644
index 000000000000..02cb9a67e789
--- /dev/null
+++ b/test/MC/Mips/msa/set-msa-directive-bad.s
@@ -0,0 +1,11 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+    .set nomsa
+    addvi.b     $w14, $w12, 14 # CHECK: error: instruction requires a CPU feature not currently enabled
+
+    .set msa
+    addvi.h     $w26, $w17, 4 
+    
+    .set nomsa
+    addvi.w     $w19, $w13, 11 # CHECK: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/msa/set-msa-directive.s b/test/MC/Mips/msa/set-msa-directive.s
new file mode 100644
index 000000000000..461ddba1945a
--- /dev/null
+++ b/test/MC/Mips/msa/set-msa-directive.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 | FileCheck %s
+
+# CHECK:    .set msa
+# CHECK:    addvi.b     $w14, $w12, 14
+# CHECK:    addvi.h     $w26, $w17, 4
+# CHECK:    addvi.w     $w19, $w13, 11
+# CHECK:    addvi.d     $w16, $w19, 7    
+# CHECK:    subvi.b     $w14, $w12, 14
+# CHECK:    subvi.h     $w26, $w17, 4
+# CHECK:    subvi.w     $w19, $w13, 11
+# CHECK:    subvi.d     $w16, $w19, 7
+
+    .set msa
+    addvi.b     $w14, $w12, 14
+    addvi.h     $w26, $w17, 4
+    addvi.w     $w19, $w13, 11
+    addvi.d     $w16, $w19, 7
+    
+    subvi.b     $w14, $w12, 14
+    subvi.h     $w26, $w17, 4
+    subvi.w     $w19, $w13, 11
+    subvi.d     $w16, $w19, 7
diff --git a/test/MC/Mips/nacl-mask.s b/test/MC/Mips/nacl-mask.s
index 22286ac7dbbc..c77646081f26 100644
--- a/test/MC/Mips/nacl-mask.s
+++ b/test/MC/Mips/nacl-mask.s
@@ -252,10 +252,10 @@ test5:
         jalr $t9
         addiu $4, $zero, 5
 
-# CHECK-LABEL:   test5:
 
+# CHECK:             nop
 # CHECK-NEXT:        nop
-# CHECK-NEXT:        nop
+# CHECK-LABEL:       test5:
 # CHECK-NEXT:        jal
 # CHECK-NEXT:        addiu   $4, $zero, 1
 
@@ -301,10 +301,11 @@ test6:
         jalr $t9
         sw      $sp, 0($sp)
 
-# CHECK-LABEL:   test6:
 
+
+# CHECK:             nop
 # CHECK-NEXT:        nop
-# CHECK-NEXT:        nop
+# CHECK-LABEL:       test6:
 # CHECK-NEXT:        jal
 # CHECK-NEXT:        sw      $4, 0($sp)
 
diff --git a/test/MC/Mips/octeon-instructions.s b/test/MC/Mips/octeon-instructions.s
index b7c89b47f871..2922744c9076 100644
--- a/test/MC/Mips/octeon-instructions.s
+++ b/test/MC/Mips/octeon-instructions.s
@@ -35,6 +35,10 @@
 # CHECK: sne   $23, $23, $20          # encoding: [0x72,0xf4,0xb8,0x2b]
 # CHECK: snei  $4, $16, -313          # encoding: [0x72,0x04,0xb1,0xef]
 # CHECK: snei  $26, $26, 511          # encoding: [0x73,0x5a,0x7f,0xef]
+# CHECK: sync  2                      # encoding: [0x00,0x00,0x00,0x8f]
+# CHECK: sync  6                      # encoding: [0x00,0x00,0x01,0x8f]
+# CHECK: sync  4                      # encoding: [0x00,0x00,0x01,0x0f]
+# CHECK: sync  5                      # encoding: [0x00,0x00,0x01,0x4f]
 # CHECK: v3mulu $21, $10, $21         # encoding: [0x71,0x55,0xa8,0x11]
 # CHECK: v3mulu $20, $20, $10         # encoding: [0x72,0x8a,0xa0,0x11]
 # CHECK: vmm0  $3, $19, $16           # encoding: [0x72,0x70,0x18,0x10]
@@ -77,6 +81,10 @@
   sne   $23, $20
   snei  $4, $16, -313
   snei  $26, 511
+  synciobdma
+  syncs
+  syncw
+  syncws
   v3mulu $21, $10, $21
   v3mulu $20, $10
   vmm0  $3, $19, $16
diff --git a/test/MC/Mips/set-arch.s b/test/MC/Mips/set-arch.s
new file mode 100644
index 000000000000..626746836add
--- /dev/null
+++ b/test/MC/Mips/set-arch.s
@@ -0,0 +1,55 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32 | \
+# RUN:   FileCheck %s
+
+    .text
+    .set arch=mips1
+    add         $2, $2, $2
+    .set arch=mips2
+    ll          $2, -2($2)
+    .set arch=mips3
+    dadd        $2, $2, $2
+    .set arch=mips4
+    ldxc1       $f8, $2($4)
+    .set arch=mips5
+    luxc1       $f19, $2($4)
+    .set arch=mips32
+    clo         $2, $2
+    .set arch=mips32r2
+    rotr        $2, $2, 15
+    .set arch=mips32r6
+    mod         $2, $4, $6
+    .set arch=mips64
+    daddi       $2, $2, 10
+    .set arch=mips64r2
+    drotr32     $1, $14, 15
+    .set arch=mips64r6
+    mod         $2, $4, $6
+    .set arch=cnmips
+    .set arch=r4000
+    dadd        $2, $2, $2
+
+# CHECK: .set arch=mips1
+# CHECK: add         $2, $2, $2
+# CHECK: .set arch=mips2
+# CHECK: ll          $2, -2($2)
+# CHECK: .set arch=mips3
+# CHECK: dadd        $2, $2, $2
+# CHECK: .set arch=mips4
+# CHECK: ldxc1       $f8, $2($4)
+# CHECK: .set arch=mips5
+# CHECK: luxc1       $f19, $2($4)
+# CHECK: .set arch=mips32
+# CHECK: clo         $2, $2
+# CHECK: .set arch=mips32r2
+# CHECK: rotr        $2, $2, 15
+# CHECK: .set arch=mips32r6
+# CHECK: mod         $2, $4, $6
+# CHECK: .set arch=mips64
+# CHECK: daddi       $2, $2, 10
+# CHECK: .set arch=mips64r2
+# CHECK: drotr32     $1, $14, 15
+# CHECK: .set arch=mips64r6
+# CHECK: mod         $2, $4, $6
+# CHECK: .set arch=cnmips
+# CHECK: .set arch=r4000
+# CHECK: dadd        $2, $2, $2
diff --git a/test/MC/Mips/set-at-directive-explicit-at.s b/test/MC/Mips/set-at-directive-explicit-at.s
index 1bd26ffa855d..797a2b782c37 100644
--- a/test/MC/Mips/set-at-directive-explicit-at.s
+++ b/test/MC/Mips/set-at-directive-explicit-at.s
@@ -7,15 +7,15 @@
     .text
 foo:
 # CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
-# WARNINGS: :[[@LINE+2]]:11: warning: Used $at without ".set noat"
+# WARNINGS: :[[@LINE+2]]:11: warning: used $at without ".set noat"
     .set    at=$1
     jr    $at
 
 # CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
-# WARNINGS: :[[@LINE+2]]:11: warning: Used $at without ".set noat"
+# WARNINGS: :[[@LINE+2]]:11: warning: used $at without ".set noat"
     .set    at=$1
     jr    $1
-# WARNINGS-NOT: warning: Used $at without ".set noat"
+# WARNINGS-NOT: warning: used $at without ".set noat"
 
 # CHECK:   jr    $1                      # encoding: [0x08,0x00,0x20,0x00]
     .set    at=$2
@@ -31,12 +31,12 @@ foo:
     jr    $at
 
 # CHECK:   jr    $16                     # encoding: [0x08,0x00,0x00,0x02]
-# WARNINGS: :[[@LINE+2]]:11: warning: Used $16 with ".set at=$16"
+# WARNINGS: :[[@LINE+2]]:11: warning: used $16 with ".set at=$16"
     .set    at=$16
     jr    $s0
 
 # CHECK:   jr    $16                     # encoding: [0x08,0x00,0x00,0x02]
-# WARNINGS: :[[@LINE+2]]:11: warning: Used $16 with ".set at=$16"
+# WARNINGS: :[[@LINE+2]]:11: warning: used $16 with ".set at=$16"
     .set    at=$16
     jr    $16
 # WARNINGS-NOT: warning
diff --git a/test/MC/Mips/set-mips-directives-bad.s b/test/MC/Mips/set-mips-directives-bad.s
new file mode 100644
index 000000000000..672698756599
--- /dev/null
+++ b/test/MC/Mips/set-mips-directives-bad.s
@@ -0,0 +1,30 @@
+# RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips1 2>%t1
+# RUN: FileCheck %s < %t1
+
+# FIXME: At the moment we emit the wrong error message if we try to assemble the
+# ll instruction using an unsupported architecture so we just check for "error" 
+# and ignore the rest of the message.
+
+        .text
+        .set noreorder
+        .set mips1
+        ll  $2,-2($2) # CHECK: error:
+        .set mips2
+        dadd $2,$2,$2 # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips3
+        ldxc1 $f8,$2($4) # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips4
+        luxc1 $f19,$2($4) # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips5
+        clo  $2,$2 # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips32
+        rotr    $2,15 # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips32r2
+        mod $2, $4, $6 # CHECK: error:instruction requires a CPU feature not currently enabled
+        .set mips32r6
+        daddi $2, $2, 10 # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips64
+        drotr32 $1,$14,15 # CHECK: error: instruction requires a CPU feature not currently enabled
+        .set mips64r2
+        mod $2, $4, $6 # CHECK: error: instruction requires a CPU feature not currently enabled
+
diff --git a/test/MC/Mips/set-mips-directives.s b/test/MC/Mips/set-mips-directives.s
new file mode 100644
index 000000000000..96c23088676e
--- /dev/null
+++ b/test/MC/Mips/set-mips-directives.s
@@ -0,0 +1,51 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips1 | \
+# RUN:   FileCheck %s
+
+        .text
+        .set noreorder
+        .set mips1
+        add $2, $2, $2
+        .set mips2
+        ll  $2,-2($2)
+        .set mips3
+        dadd $2,$2,$2
+        .set mips4
+        ldxc1 $f8,$2($4)
+        .set mips5
+        luxc1 $f19,$2($4)
+        .set mips32
+        clo  $2,$2
+        .set mips32r2
+        rotr    $2,15
+        .set mips32r6
+        mod $2, $4, $6
+        .set mips64
+        daddi $2, $2, 10
+        .set mips64r2
+        drotr32 $1,$14,15
+        .set mips64r6
+        mod $2, $4, $6
+
+# CHECK: .set noreorder
+# CHECK: .set mips1
+# CHECK: add $2, $2, $2
+# CHECK: .set mips2
+# CHECK: ll  $2, -2($2)
+# CHECK: .set mips3
+# CHECK: dadd $2, $2, $2
+# CHECK: .set mips4
+# CHECK: ldxc1 $f8, $2($4)
+# CHECK: .set mips5
+# CHECK: luxc1 $f19, $2($4)
+# CHECK: .set mips32
+# CHECK: clo $2, $2
+# CHECK: .set mips32r2
+# CHECK: rotr $2, $2, 15
+# CHECK: .set mips32r6
+# CHECK: mod $2, $4, $6
+# CHECK: .set mips64
+# CHECK: daddi $2, $2, 10
+# CHECK: .set mips64r2
+# CHECK:  drotr32 $1, $14, 15
+# CHECK: .set mips64r6
+# CHECK: mod $2, $4, $6
diff --git a/test/MC/Mips/set-mips0-directive.s b/test/MC/Mips/set-mips0-directive.s
new file mode 100644
index 000000000000..5cb75bb90ea1
--- /dev/null
+++ b/test/MC/Mips/set-mips0-directive.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | \
+# RUN:   FileCheck %s
+
+    .text
+    rotr  $7, $7, 22
+
+    .set mips32r6
+    mod   $2, $4, $6
+    .set mips0
+    rotr  $2, $2, 15
+
+    .set mips3
+    dadd  $4, $4, $4
+    .set mips0
+    rotr  $3, $3, 19
+
+# CHECK: rotr  $7, $7, 22
+
+# CHECK: .set mips32r6
+# CHECK: mod   $2, $4, $6
+# CHECK: .set mips0
+# CHECK: rotr  $2, $2, 15
+
+# CHECK: .set mips3
+# CHECK: dadd  $4, $4, $4
+# CHECK: .set mips0
+# CHECK: rotr  $3, $3, 19
diff --git a/test/MC/Mips/set-mips16-directive.s b/test/MC/Mips/set-mips16-directive.s
new file mode 100644
index 000000000000..cf8090eaa280
--- /dev/null
+++ b/test/MC/Mips/set-mips16-directive.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc %s -arch=mips | FileCheck %s
+# FIXME: Update this test when we have a more mature implementation of Mips16 in the IAS.
+
+.text
+.set mips16
+.set nomips16
+
+# CHECK: .text
+# CHECK: .set mips16
+# CHECK: .set nomips16
diff --git a/test/MC/Mips/set-nodsp.s b/test/MC/Mips/set-nodsp.s
new file mode 100644
index 000000000000..f98cefba3907
--- /dev/null
+++ b/test/MC/Mips/set-nodsp.s
@@ -0,0 +1,12 @@
+# RUN: not llvm-mc %s -mcpu=mips32 -mattr=+dsp -triple mips-unknown-linux 2>%t1
+# RUN: FileCheck %s < %t1
+
+  lbux    $7, $10($11)
+
+  .set nodsp
+  lbux    $6, $10($11)
+  # CHECK: error: instruction requires a CPU feature not currently enabled
+
+  .set dsp
+  lbux    $5, $10($11)
+  # CHECK-NOT: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/set-push-pop-directives-bad.s b/test/MC/Mips/set-push-pop-directives-bad.s
new file mode 100644
index 000000000000..53d8b2308153
--- /dev/null
+++ b/test/MC/Mips/set-push-pop-directives-bad.s
@@ -0,0 +1,14 @@
+# RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 2>%t1
+# RUN:   FileCheck %s < %t1
+
+        .text
+        .set pop
+# CHECK: :[[@LINE-1]]:14: error: .set pop with no .set push
+        .set push
+        .set pop
+        .set pop
+# CHECK: :[[@LINE-1]]:14: error: .set pop with no .set push
+        .set push foo
+# CHECK: :[[@LINE-1]]:19: error: unexpected token, expected end of statement
+        .set pop bar
+# CHECK: :[[@LINE-1]]:18: error: unexpected token, expected end of statement
diff --git a/test/MC/Mips/set-push-pop-directives.s b/test/MC/Mips/set-push-pop-directives.s
new file mode 100644
index 000000000000..5f55b7c7e4d3
--- /dev/null
+++ b/test/MC/Mips/set-push-pop-directives.s
@@ -0,0 +1,53 @@
+# RUN: llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -mattr=+msa | \
+# RUN:   FileCheck %s
+# .set push creates a copy of the current environment.
+# .set pop restores the previous environment.
+# FIXME: Also test resetting of .set macro/nomacro option.
+
+    .text
+    # The first environment on the stack (with initial values).
+    lw       $1, 65536($1)
+    b        1336
+    addvi.b  $w15, $w13, 18
+    
+    # Create a new environment.
+    .set push
+    .set at=$ra           # Test the ATReg option.
+    lw       $1, 65536($1)
+    .set noreorder        # Test the Reorder option.
+    b        1336
+    .set nomsa            # Test the Features option (ASE).
+    .set mips32r6         # Test the Features option (ISA).
+    mod      $2, $4, $6
+
+    # Switch back to the first environment.
+    .set pop
+    lw       $1, 65536($1)
+    b        1336
+    addvi.b  $w15, $w13, 18
+
+# CHECK:  lui      $1, 1
+# CHECK:  addu     $1, $1, $1
+# CHECK:  lw       $1, 0($1)
+# CHECK:  b        1336
+# CHECK:  nop
+# CHECK:  addvi.b  $w15, $w13, 18
+
+# CHECK:  .set push
+# CHECK:  lui      $ra, 1
+# CHECK:  addu     $ra, $ra, $1
+# CHECK:  lw       $1, 0($ra)
+# CHECK:  .set noreorder   
+# CHECK:  b        1336
+# CHECK-NOT:  nop
+# CHECK:  .set nomsa       
+# CHECK:  .set mips32r6    
+# CHECK:  mod      $2, $4, $6
+
+# CHECK:  .set pop
+# CHECK:  lui      $1, 1
+# CHECK:  addu     $1, $1, $1
+# CHECK:  lw       $1, 0($1)
+# CHECK:  b        1336
+# CHECK:  nop
+# CHECK:  addvi.b  $w15, $w13, 18
diff --git a/test/MC/Mips/unaligned-nops.s b/test/MC/Mips/unaligned-nops.s
new file mode 100644
index 000000000000..ebbbb40f4dc6
--- /dev/null
+++ b/test/MC/Mips/unaligned-nops.s
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -filetype=obj  -triple=mipsel %s -o %t
+.byte 1
+.p2align 2
+foo:
diff --git a/test/MC/PowerPC/lcomm.s b/test/MC/PowerPC/lcomm.s
new file mode 100644
index 000000000000..a84f138479b4
--- /dev/null
+++ b/test/MC/PowerPC/lcomm.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc -triple powerpc-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+# RUN: llvm-mc -triple powerpc64-unknown-unknown -filetype=obj %s | \
+# RUN: llvm-readobj -s -sd | FileCheck %s
+
+.lcomm foo, 16, 16
+
+// CHECK:        Section {
+// CHECK:          Name: .bss
+// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK-NEXT:     Flags [
+// CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Offset: 0x40
+// CHECK-NEXT:     Size: 16
+// CHECK-NEXT:     Link: 0
+// CHECK-NEXT:     Info: 0
+// CHECK-NEXT:     AddressAlignment: 16
+// CHECK-NEXT:     EntrySize: 0
diff --git a/test/MC/PowerPC/ppc-reloc.s b/test/MC/PowerPC/ppc-reloc.s
index 19dd2a3cf4e8..e7dd1e20b613 100644
--- a/test/MC/PowerPC/ppc-reloc.s
+++ b/test/MC/PowerPC/ppc-reloc.s
@@ -7,11 +7,13 @@
 	.align 2
 foo:
 	bl printf@plt
+	bl _GLOBAL_OFFSET_TABLE_@local-4
 .LC1:
 	.size foo, . - foo
 
 # CHECK:      Relocations [
 # CHECK-NEXT:   Section (2) .rela.text {
 # CHECK-NEXT:     0x0 R_PPC_PLTREL24 printf 0x0
+# CHECK-NEXT:     0x4 R_PPC_LOCAL24PC _GLOBAL_OFFSET_TABLE_ 0xFFFFFFFC
 # CHECK-NEXT:   }
 # CHECK-NEXT: ]
diff --git a/test/MC/PowerPC/ppc32-ba.s b/test/MC/PowerPC/ppc32-ba.s
new file mode 100644
index 000000000000..133423b4e8c3
--- /dev/null
+++ b/test/MC/PowerPC/ppc32-ba.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple powerpc-unknown-unknown --show-encoding %s | FileCheck %s
+
+# Check that large immediates in 32bit mode are accepted.
+
+# CHECK: ba -33554432 # encoding: [0x4a,0x00,0x00,0x02]
+         ba 0xfe000000
diff --git a/test/MC/PowerPC/ppc64-encoding-4xx.s b/test/MC/PowerPC/ppc64-encoding-4xx.s
new file mode 100644
index 000000000000..5414e1a49bfb
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-4xx.s
@@ -0,0 +1,167 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
+
+# Instructions specific to the PowerPC 4xx embedded controllers:
+
+# CHECK-BE: mfdcr 3, 178                     # encoding: [0x7c,0x72,0x2a,0x86]
+# CHECK-LE: mfdcr 3, 178                     # encoding: [0x86,0x2a,0x72,0x7c]
+            mfdcr 3,178
+# CHECK-BE: mtdcr 178, 3                     # encoding: [0x7c,0x72,0x2b,0x86]
+# CHECK-LE: mtdcr 178, 3                     # encoding: [0x86,0x2b,0x72,0x7c]
+            mtdcr 178,3
+
+# CHECK-BE: tlbre 2, 3, 0                    # encoding: [0x7c,0x43,0x07,0x64]
+# CHECK-LE: tlbre 2, 3, 0                    # encoding: [0x64,0x07,0x43,0x7c]
+            tlbre %r2, %r3, 0
+# CHECK-BE: tlbre 2, 3, 1                    # encoding: [0x7c,0x43,0x0f,0x64]
+# CHECK-LE: tlbre 2, 3, 1                    # encoding: [0x64,0x0f,0x43,0x7c]
+            tlbre %r2, %r3, 1
+# CHECK-BE: tlbre 2, 3, 0                    # encoding: [0x7c,0x43,0x07,0x64]
+# CHECK-LE: tlbre 2, 3, 0                    # encoding: [0x64,0x07,0x43,0x7c]
+            tlbrehi %r2, %r3
+# CHECK-BE: tlbre 2, 3, 1                    # encoding: [0x7c,0x43,0x0f,0x64]
+# CHECK-LE: tlbre 2, 3, 1                    # encoding: [0x64,0x0f,0x43,0x7c]
+            tlbrelo %r2, %r3
+
+# CHECK-BE: tlbwe 2, 3, 0                    # encoding: [0x7c,0x43,0x07,0xa4]
+# CHECK-LE: tlbwe 2, 3, 0                    # encoding: [0xa4,0x07,0x43,0x7c]
+            tlbwe %r2, %r3, 0
+# CHECK-BE: tlbwe 2, 3, 1                    # encoding: [0x7c,0x43,0x0f,0xa4]
+# CHECK-LE: tlbwe 2, 3, 1                    # encoding: [0xa4,0x0f,0x43,0x7c]
+            tlbwe %r2, %r3, 1
+# CHECK-BE: tlbwe 2, 3, 0                    # encoding: [0x7c,0x43,0x07,0xa4]
+# CHECK-LE: tlbwe 2, 3, 0                    # encoding: [0xa4,0x07,0x43,0x7c]
+            tlbwehi %r2, %r3
+# CHECK-BE: tlbwe 2, 3, 1                    # encoding: [0x7c,0x43,0x0f,0xa4]
+# CHECK-LE: tlbwe 2, 3, 1                    # encoding: [0xa4,0x0f,0x43,0x7c]
+            tlbwelo %r2, %r3
+
+# CHECK-BE: tlbsx 2, 3, 1                    # encoding: [0x7c,0x43,0x0f,0x24]
+# CHECK-LE: tlbsx 2, 3, 1                    # encoding: [0x24,0x0f,0x43,0x7c]
+            tlbsx %r2, %r3, %r1
+# CHECK-BE: tlbsx. 2, 3, 1                   # encoding: [0x7c,0x43,0x0f,0x25]
+# CHECK-LE: tlbsx. 2, 3, 1                   # encoding: [0x25,0x0f,0x43,0x7c]
+            tlbsx. %r2, %r3, %r1
+
+# CHECK-BE: mfspr 2, 1018                    # encoding: [0x7c,0x5a,0xfa,0xa6]
+# CHECK-LE: mfspr 2, 1018                    # encoding: [0xa6,0xfa,0x5a,0x7c]
+            mfdccr %r2
+# CHECK-BE: mtspr 1018, 2                    # encoding: [0x7c,0x5a,0xfb,0xa6]
+# CHECK-LE: mtspr 1018, 2                    # encoding: [0xa6,0xfb,0x5a,0x7c]
+            mtdccr %r2
+
+# CHECK-BE: mfspr 2, 1019                    # encoding: [0x7c,0x5b,0xfa,0xa6]
+# CHECK-LE: mfspr 2, 1019                    # encoding: [0xa6,0xfa,0x5b,0x7c]
+            mficcr %r2
+# CHECK-BE: mtspr 1019, 2                    # encoding: [0x7c,0x5b,0xfb,0xa6]
+# CHECK-LE: mtspr 1019, 2                    # encoding: [0xa6,0xfb,0x5b,0x7c]
+            mticcr %r2
+
+# CHECK-BE: mfspr 2, 981                    # encoding: [0x7c,0x55,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 981                    # encoding: [0xa6,0xf2,0x55,0x7c]
+            mfdear %r2
+# CHECK-BE: mtspr 981, 2                    # encoding: [0x7c,0x55,0xf3,0xa6]
+# CHECK-LE: mtspr 981, 2                    # encoding: [0xa6,0xf3,0x55,0x7c]
+            mtdear %r2
+
+# CHECK-BE: mfspr 2, 980                    # encoding: [0x7c,0x54,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 980                    # encoding: [0xa6,0xf2,0x54,0x7c]
+            mfesr %r2
+# CHECK-BE: mtspr 980, 2                    # encoding: [0x7c,0x54,0xf3,0xa6]
+# CHECK-LE: mtspr 980, 2                    # encoding: [0xa6,0xf3,0x54,0x7c]
+            mtesr %r2
+
+# CHECK-BE: mfspr 2, 986                    # encoding: [0x7c,0x5a,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 986                    # encoding: [0xa6,0xf2,0x5a,0x7c]
+            mftcr %r2
+# CHECK-BE: mtspr 986, 2                    # encoding: [0x7c,0x5a,0xf3,0xa6]
+# CHECK-LE: mtspr 986, 2                    # encoding: [0xa6,0xf3,0x5a,0x7c]
+            mttcr %r2
+
+# CHECK-BE: mfspr 2, 989                    # encoding: [0x7c,0x5d,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 989                    # encoding: [0xa6,0xf2,0x5d,0x7c]
+            mftblo %r2
+# CHECK-BE: mtspr 989, 2                    # encoding: [0x7c,0x5d,0xf3,0xa6]
+# CHECK-LE: mtspr 989, 2                    # encoding: [0xa6,0xf3,0x5d,0x7c]
+            mttblo %r2
+# CHECK-BE: mfspr 2, 988                    # encoding: [0x7c,0x5c,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 988                    # encoding: [0xa6,0xf2,0x5c,0x7c]
+            mftbhi %r2
+# CHECK-BE: mtspr 988, 2                    # encoding: [0x7c,0x5c,0xf3,0xa6]
+# CHECK-LE: mtspr 988, 2                    # encoding: [0xa6,0xf3,0x5c,0x7c]
+            mttbhi %r2
+
+# CHECK-BE: dccci 5, 6                      # encoding: [0x7c,0x05,0x33,0x8c]
+# CHECK-LE: dccci 5, 6                      # encoding: [0x8c,0x33,0x05,0x7c]
+            dccci %r5,%r6
+# CHECK-BE: iccci 5, 6                      # encoding: [0x7c,0x05,0x37,0x8c]
+# CHECK-LE: iccci 5, 6                      # encoding: [0x8c,0x37,0x05,0x7c]
+            iccci %r5,%r6
+# CHECK-BE: dccci 0, 0                      # encoding: [0x7c,0x00,0x03,0x8c]
+# CHECK-LE: dccci 0, 0                      # encoding: [0x8c,0x03,0x00,0x7c]
+            dci %r0
+# CHECK-BE: iccci 0, 0                      # encoding: [0x7c,0x00,0x07,0x8c]
+# CHECK-LE: iccci 0, 0                      # encoding: [0x8c,0x07,0x00,0x7c]
+            ici 0
+
+# CHECK-BE: mfspr 2, 990                    # encoding: [0x7c,0x5e,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 990                    # encoding: [0xa6,0xf2,0x5e,0x7c]
+            mfsrr2 2
+# CHECK-BE: mtspr 990, 2                    # encoding: [0x7c,0x5e,0xf3,0xa6]
+# CHECK-LE: mtspr 990, 2                    # encoding: [0xa6,0xf3,0x5e,0x7c]
+            mtsrr2 2
+# CHECK-BE: mfspr 2, 991                    # encoding: [0x7c,0x5f,0xf2,0xa6]
+# CHECK-LE: mfspr 2, 991                    # encoding: [0xa6,0xf2,0x5f,0x7c]
+            mfsrr3 2
+# CHECK-BE: mtspr 991, 2                    # encoding: [0x7c,0x5f,0xf3,0xa6]
+# CHECK-LE: mtspr 991, 2                    # encoding: [0xa6,0xf3,0x5f,0x7c]
+            mtsrr3 2
+
+# CHECK-BE: mfdcr 5, 128                    # encoding: [0x7c,0xa0,0x22,0x86]
+# CHECK-LE: mfdcr 5, 128                    # encoding: [0x86,0x22,0xa0,0x7c]
+            mfbr0 %r5
+# CHECK-BE: mtdcr 128, 5                    # encoding: [0x7c,0xa0,0x23,0x86]
+# CHECK-LE: mtdcr 128, 5                    # encoding: [0x86,0x23,0xa0,0x7c]
+            mtbr0 %r5
+# CHECK-BE: mfdcr 5, 129                    # encoding: [0x7c,0xa1,0x22,0x86]
+# CHECK-LE: mfdcr 5, 129                    # encoding: [0x86,0x22,0xa1,0x7c]
+            mfbr1 %r5
+# CHECK-BE: mtdcr 129, 5                    # encoding: [0x7c,0xa1,0x23,0x86]
+# CHECK-LE: mtdcr 129, 5                    # encoding: [0x86,0x23,0xa1,0x7c]
+            mtbr1 %r5
+# CHECK-BE: mfdcr 5, 130                    # encoding: [0x7c,0xa2,0x22,0x86]
+# CHECK-LE: mfdcr 5, 130                    # encoding: [0x86,0x22,0xa2,0x7c]
+            mfbr2 %r5
+# CHECK-BE: mtdcr 130, 5                    # encoding: [0x7c,0xa2,0x23,0x86]
+# CHECK-LE: mtdcr 130, 5                    # encoding: [0x86,0x23,0xa2,0x7c]
+            mtbr2 %r5
+# CHECK-BE: mfdcr 5, 131                    # encoding: [0x7c,0xa3,0x22,0x86]
+# CHECK-LE: mfdcr 5, 131                    # encoding: [0x86,0x22,0xa3,0x7c]
+            mfbr3 %r5
+# CHECK-BE: mtdcr 131, 5                    # encoding: [0x7c,0xa3,0x23,0x86]
+# CHECK-LE: mtdcr 131, 5                    # encoding: [0x86,0x23,0xa3,0x7c]
+            mtbr3 %r5
+# CHECK-BE: mfdcr 5, 132                    # encoding: [0x7c,0xa4,0x22,0x86]
+# CHECK-LE: mfdcr 5, 132                    # encoding: [0x86,0x22,0xa4,0x7c]
+            mfbr4 %r5
+# CHECK-BE: mtdcr 132, 5                    # encoding: [0x7c,0xa4,0x23,0x86]
+# CHECK-LE: mtdcr 132, 5                    # encoding: [0x86,0x23,0xa4,0x7c]
+            mtbr4 %r5
+# CHECK-BE: mfdcr 5, 133                    # encoding: [0x7c,0xa5,0x22,0x86]
+# CHECK-LE: mfdcr 5, 133                    # encoding: [0x86,0x22,0xa5,0x7c]
+            mfbr5 %r5
+# CHECK-BE: mtdcr 133, 5                    # encoding: [0x7c,0xa5,0x23,0x86]
+# CHECK-LE: mtdcr 133, 5                    # encoding: [0x86,0x23,0xa5,0x7c]
+            mtbr5 %r5
+# CHECK-BE: mfdcr 5, 134                    # encoding: [0x7c,0xa6,0x22,0x86]
+# CHECK-LE: mfdcr 5, 134                    # encoding: [0x86,0x22,0xa6,0x7c]
+            mfbr6 %r5
+# CHECK-BE: mtdcr 134, 5                    # encoding: [0x7c,0xa6,0x23,0x86]
+# CHECK-LE: mtdcr 134, 5                    # encoding: [0x86,0x23,0xa6,0x7c]
+            mtbr6 %r5
+# CHECK-BE: mfdcr 5, 135                    # encoding: [0x7c,0xa7,0x22,0x86]
+# CHECK-LE: mfdcr 5, 135                    # encoding: [0x86,0x22,0xa7,0x7c]
+            mfbr7 %r5
+# CHECK-BE: mtdcr 135, 5                    # encoding: [0x7c,0xa7,0x23,0x86]
+# CHECK-LE: mtdcr 135, 5                    # encoding: [0x86,0x23,0xa7,0x7c]
+            mtbr7 %r5
diff --git a/test/MC/PowerPC/ppc64-encoding-6xx.s b/test/MC/PowerPC/ppc64-encoding-6xx.s
new file mode 100644
index 000000000000..3a5e7a126b84
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-6xx.s
@@ -0,0 +1,109 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
+
+# Instructions specific to the PowerPC 6xx family:
+
+# CHECK-BE: mfspr 12, 528                    # encoding: [0x7d,0x90,0x82,0xa6]
+# CHECK-LE: mfspr 12, 528                    # encoding: [0xa6,0x82,0x90,0x7d]
+mfibatu %r12, 0
+# CHECK-BE: mfspr 12, 529                    # encoding: [0x7d,0x91,0x82,0xa6]
+# CHECK-LE: mfspr 12, 529                    # encoding: [0xa6,0x82,0x91,0x7d]
+mfibatl %r12, 0
+# CHECK-BE: mfspr 12, 530                    # encoding: [0x7d,0x92,0x82,0xa6]
+# CHECK-LE: mfspr 12, 530                    # encoding: [0xa6,0x82,0x92,0x7d]
+mfibatu %r12, 1
+# CHECK-BE: mfspr 12, 531                    # encoding: [0x7d,0x93,0x82,0xa6]
+# CHECK-LE: mfspr 12, 531                    # encoding: [0xa6,0x82,0x93,0x7d]
+mfibatl %r12, 1
+# CHECK-BE: mfspr 12, 532                    # encoding: [0x7d,0x94,0x82,0xa6]
+# CHECK-LE: mfspr 12, 532                    # encoding: [0xa6,0x82,0x94,0x7d]
+mfibatu %r12, 2
+# CHECK-BE: mfspr 12, 533                    # encoding: [0x7d,0x95,0x82,0xa6]
+# CHECK-LE: mfspr 12, 533                    # encoding: [0xa6,0x82,0x95,0x7d]
+mfibatl %r12, 2
+# CHECK-BE: mfspr 12, 534                    # encoding: [0x7d,0x96,0x82,0xa6]
+# CHECK-LE: mfspr 12, 534                    # encoding: [0xa6,0x82,0x96,0x7d]
+mfibatu %r12, 3
+# CHECK-BE: mfspr 12, 535                    # encoding: [0x7d,0x97,0x82,0xa6]
+# CHECK-LE: mfspr 12, 535                    # encoding: [0xa6,0x82,0x97,0x7d]
+mfibatl %r12, 3
+# CHECK-BE: mtspr 528, 12                    # encoding: [0x7d,0x90,0x83,0xa6]
+# CHECK-LE: mtspr 528, 12                    # encoding: [0xa6,0x83,0x90,0x7d]
+mtibatu 0, %r12
+# CHECK-BE: mtspr 529, 12                    # encoding: [0x7d,0x91,0x83,0xa6]
+# CHECK-LE: mtspr 529, 12                    # encoding: [0xa6,0x83,0x91,0x7d]
+mtibatl 0, %r12
+# CHECK-BE: mtspr 530, 12                    # encoding: [0x7d,0x92,0x83,0xa6]
+# CHECK-LE: mtspr 530, 12                    # encoding: [0xa6,0x83,0x92,0x7d]
+mtibatu 1, %r12
+# CHECK-BE: mtspr 531, 12                    # encoding: [0x7d,0x93,0x83,0xa6]
+# CHECK-LE: mtspr 531, 12                    # encoding: [0xa6,0x83,0x93,0x7d]
+mtibatl 1, %r12
+# CHECK-BE: mtspr 532, 12                    # encoding: [0x7d,0x94,0x83,0xa6]
+# CHECK-LE: mtspr 532, 12                    # encoding: [0xa6,0x83,0x94,0x7d]
+mtibatu 2, %r12
+# CHECK-BE: mtspr 533, 12                    # encoding: [0x7d,0x95,0x83,0xa6]
+# CHECK-LE: mtspr 533, 12                    # encoding: [0xa6,0x83,0x95,0x7d]
+mtibatl 2, %r12
+# CHECK-BE: mtspr 534, 12                    # encoding: [0x7d,0x96,0x83,0xa6]
+# CHECK-LE: mtspr 534, 12                    # encoding: [0xa6,0x83,0x96,0x7d]
+mtibatu 3, %r12
+# CHECK-BE: mtspr 535, 12                    # encoding: [0x7d,0x97,0x83,0xa6]
+# CHECK-LE: mtspr 535, 12                    # encoding: [0xa6,0x83,0x97,0x7d]
+mtibatl 3, %r12
+
+# CHECK-BE: mfspr 12, 536                    # encoding: [0x7d,0x98,0x82,0xa6]
+# CHECK-LE: mfspr 12, 536                    # encoding: [0xa6,0x82,0x98,0x7d]
+mfdbatu %r12, 0
+# CHECK-BE: mfspr 12, 537                    # encoding: [0x7d,0x99,0x82,0xa6]
+# CHECK-LE: mfspr 12, 537                    # encoding: [0xa6,0x82,0x99,0x7d]
+mfdbatl %r12, 0
+# CHECK-BE: mfspr 12, 538                    # encoding: [0x7d,0x9a,0x82,0xa6]
+# CHECK-LE: mfspr 12, 538                    # encoding: [0xa6,0x82,0x9a,0x7d]
+mfdbatu %r12, 1
+# CHECK-BE: mfspr 12, 539                    # encoding: [0x7d,0x9b,0x82,0xa6]
+# CHECK-LE: mfspr 12, 539                    # encoding: [0xa6,0x82,0x9b,0x7d]
+mfdbatl %r12, 1
+# CHECK-BE: mfspr 12, 540                    # encoding: [0x7d,0x9c,0x82,0xa6]
+# CHECK-LE: mfspr 12, 540                    # encoding: [0xa6,0x82,0x9c,0x7d]
+mfdbatu %r12, 2
+# CHECK-BE: mfspr 12, 541                    # encoding: [0x7d,0x9d,0x82,0xa6]
+# CHECK-LE: mfspr 12, 541                    # encoding: [0xa6,0x82,0x9d,0x7d]
+mfdbatl %r12, 2
+# CHECK-BE: mfspr 12, 542                    # encoding: [0x7d,0x9e,0x82,0xa6]
+# CHECK-LE: mfspr 12, 542                    # encoding: [0xa6,0x82,0x9e,0x7d]
+mfdbatu %r12, 3
+# CHECK-BE: mfspr 12, 543                    # encoding: [0x7d,0x9f,0x82,0xa6]
+# CHECK-LE: mfspr 12, 543                    # encoding: [0xa6,0x82,0x9f,0x7d]
+mfdbatl %r12, 3
+# CHECK-BE: mtspr 536, 12                    # encoding: [0x7d,0x98,0x83,0xa6]
+# CHECK-LE: mtspr 536, 12                    # encoding: [0xa6,0x83,0x98,0x7d]
+mtdbatu 0, %r12
+# CHECK-BE: mtspr 537, 12                    # encoding: [0x7d,0x99,0x83,0xa6]
+# CHECK-LE: mtspr 537, 12                    # encoding: [0xa6,0x83,0x99,0x7d]
+mtdbatl 0, %r12
+# CHECK-BE: mtspr 538, 12                    # encoding: [0x7d,0x9a,0x83,0xa6]
+# CHECK-LE: mtspr 538, 12                    # encoding: [0xa6,0x83,0x9a,0x7d]
+mtdbatu 1, %r12
+# CHECK-BE: mtspr 539, 12                    # encoding: [0x7d,0x9b,0x83,0xa6]
+# CHECK-LE: mtspr 539, 12                    # encoding: [0xa6,0x83,0x9b,0x7d]
+mtdbatl 1, %r12
+# CHECK-BE: mtspr 540, 12                    # encoding: [0x7d,0x9c,0x83,0xa6]
+# CHECK-LE: mtspr 540, 12                    # encoding: [0xa6,0x83,0x9c,0x7d]
+mtdbatu 2, %r12
+# CHECK-BE: mtspr 541, 12                    # encoding: [0x7d,0x9d,0x83,0xa6]
+# CHECK-LE: mtspr 541, 12                    # encoding: [0xa6,0x83,0x9d,0x7d]
+mtdbatl 2, %r12
+# CHECK-BE: mtspr 542, 12                    # encoding: [0x7d,0x9e,0x83,0xa6]
+# CHECK-LE: mtspr 542, 12                    # encoding: [0xa6,0x83,0x9e,0x7d]
+mtdbatu 3, %r12
+# CHECK-BE: mtspr 543, 12                    # encoding: [0x7d,0x9f,0x83,0xa6]
+# CHECK-LE: mtspr 543, 12                    # encoding: [0xa6,0x83,0x9f,0x7d]
+mtdbatl 3, %r12
+
+# CHECK-BE: tlbld 4                        # encoding: [0x7c,0x00,0x27,0xa4]
+# CHECK-LE: tlbld 4                        # encoding: [0xa4,0x27,0x00,0x7c]
+tlbld %r4
+# CHECK-BE: tlbli 4                        # encoding: [0x7c,0x00,0x27,0xe4]
+# CHECK-LE: tlbli 4                        # encoding: [0xe4,0x27,0x00,0x7c]
+tlbli %r4
diff --git a/test/MC/PowerPC/ppc64-encoding-bookII.s b/test/MC/PowerPC/ppc64-encoding-bookII.s
index 99796ca61002..20eba7083f6d 100644
--- a/test/MC/PowerPC/ppc64-encoding-bookII.s
+++ b/test/MC/PowerPC/ppc64-encoding-bookII.s
@@ -8,6 +8,10 @@
 # CHECK-LE: icbi 2, 3                       # encoding: [0xac,0x1f,0x02,0x7c]
             icbi 2, 3
 
+# CHECK-BE: icbt 0, 5, 31                   # encoding: [0x7c,0x05,0xf8,0x2c]
+# CHECK-LE: icbt 0, 5, 31                   # encoding: [0x2c,0xf8,0x05,0x7c]
+            icbt 0, 5, 31
+
 # FIXME:    dcbt 2, 3, 10
 # CHECK-BE: dcbt 2, 3                       # encoding: [0x7c,0x02,0x1a,0x2c]
 # CHECK-LE: dcbt 2, 3                       # encoding: [0x2c,0x1a,0x02,0x7c]
@@ -53,6 +57,11 @@
 # CHECK-BE: wait 2                          # encoding: [0x7c,0x40,0x00,0x7c]
 # CHECK-LE: wait 2                          # encoding: [0x7c,0x00,0x40,0x7c]
             wait 2
+# CHECK-BE: mbar 1                          # encoding: [0x7c,0x20,0x06,0xac]
+# CHECK-LE: mbar 1                          # encoding: [0xac,0x06,0x20,0x7c]
+            mbar 1
+# CHECK-BE: mbar 0
+            mbar
 
 # Extended mnemonics
 
@@ -101,7 +110,16 @@
 # CHECK-BE: mftb 2, 268                     # encoding: [0x7c,0x4c,0x42,0xe6]
 # CHECK-LE: mftb 2, 268                     # encoding: [0xe6,0x42,0x4c,0x7c]
             mftb 2
+# CHECK-BE: mftb 2, 268                     # encoding: [0x7c,0x4c,0x42,0xe6]
+# CHECK-LE: mftb 2, 268                     # encoding: [0xe6,0x42,0x4c,0x7c]
+            mftbl 2
 # CHECK-BE: mftb 2, 269                     # encoding: [0x7c,0x4d,0x42,0xe6]
 # CHECK-LE: mftb 2, 269                     # encoding: [0xe6,0x42,0x4d,0x7c]
             mftbu 2
 
+# CHECK-BE: mtspr 284, 3                    # encoding: [0x7c,0x7c,0x43,0xa6]
+# CHECK-LE: mtspr 284, 3                    # encoding: [0xa6,0x43,0x7c,0x7c]
+            mttbl 3
+# CHECK-BE: mtspr 285, 3                    # encoding: [0x7c,0x7d,0x43,0xa6]
+# CHECK-LE: mtspr 285, 3                    # encoding: [0xa6,0x43,0x7d,0x7c]
+            mttbu 3
diff --git a/test/MC/PowerPC/ppc64-encoding-bookIII.s b/test/MC/PowerPC/ppc64-encoding-bookIII.s
index dfce39536e39..9e784dbb5e98 100644
--- a/test/MC/PowerPC/ppc64-encoding-bookIII.s
+++ b/test/MC/PowerPC/ppc64-encoding-bookIII.s
@@ -21,53 +21,66 @@
 # CHECK-LE: mtmsrd 4, 1                     # encoding: [0x64,0x01,0x81,0x7c]
             mtmsrd %r4, 1
 
-# CHECK-BE: mfspr 4, 272                    # encoding: [0x7c,0x90,0x42,0xa6]
-# CHECK-LE: mfspr 4, 272                    # encoding: [0xa6,0x42,0x90,0x7c]
-            mfsprg %r4, 0
-
-# CHECK-BE: mfspr 4, 273                    # encoding: [0x7c,0x91,0x42,0xa6]
-# CHECK-LE: mfspr 4, 273                    # encoding: [0xa6,0x42,0x91,0x7c]
-            mfsprg %r4, 1
-
-# CHECK-BE: mfspr 4, 274                    # encoding: [0x7c,0x92,0x42,0xa6]
-# CHECK-LE: mfspr 4, 274                    # encoding: [0xa6,0x42,0x92,0x7c]
-            mfsprg %r4, 2
-
-# CHECK-BE: mfspr 4, 275                    # encoding: [0x7c,0x93,0x42,0xa6]
-# CHECK-LE: mfspr 4, 275                    # encoding: [0xa6,0x42,0x93,0x7c]
-            mfsprg %r4, 3
-
-# CHECK-BE: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
-# CHECK-LE: mtspr 272, 4                    # encoding: [0xa6,0x43,0x90,0x7c]
-            mtsprg 0, %r4
-
-# CHECK-BE: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
-# CHECK-LE: mtspr 273, 4                    # encoding: [0xa6,0x43,0x91,0x7c]
-            mtsprg 1, %r4
-
-# CHECK-BE: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
-# CHECK-LE: mtspr 274, 4                    # encoding: [0xa6,0x43,0x92,0x7c]
-            mtsprg 2, %r4
-
-# CHECK-BE: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
-# CHECK-LE: mtspr 275, 4                    # encoding: [0xa6,0x43,0x93,0x7c]
-            mtsprg 3, %r4
-
-# CHECK-BE: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
-# CHECK-LE: mtspr 272, 4                    # encoding: [0xa6,0x43,0x90,0x7c]
-            mtsprg0 %r4
-
-# CHECK-BE: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
-# CHECK-LE: mtspr 273, 4                    # encoding: [0xa6,0x43,0x91,0x7c]
-            mtsprg1 %r4
-
-# CHECK-BE: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
-# CHECK-LE: mtspr 274, 4                    # encoding: [0xa6,0x43,0x92,0x7c]
-            mtsprg2 %r4
-
-# CHECK-BE: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
-# CHECK-LE: mtspr 275, 4                    # encoding: [0xa6,0x43,0x93,0x7c]
-            mtsprg3 %r4
+# CHECK-BE: mfspr 4, 260                    # encoding: [0x7c,0x84,0x42,0xa6]
+# CHECK-LE: mfspr 4, 260                    # encoding: [0xa6,0x42,0x84,0x7c]
+            mfsprg %r4, 4
+
+# CHECK-BE: mfspr 4, 261                    # encoding: [0x7c,0x85,0x42,0xa6]
+# CHECK-LE: mfspr 4, 261                    # encoding: [0xa6,0x42,0x85,0x7c]
+            mfsprg %r4, 5
+
+# CHECK-BE: mfspr 4, 262                    # encoding: [0x7c,0x86,0x42,0xa6]
+# CHECK-LE: mfspr 4, 262                    # encoding: [0xa6,0x42,0x86,0x7c]
+            mfsprg %r4, 6
+
+# CHECK-BE: mfspr 4, 263                    # encoding: [0x7c,0x87,0x42,0xa6]
+# CHECK-LE: mfspr 4, 263                    # encoding: [0xa6,0x42,0x87,0x7c]
+            mfsprg %r4, 7
+
+# CHECK-BE: mfspr 2, 260                    # encoding: [0x7c,0x44,0x42,0xa6]
+# CHECK-LE: mfspr 2, 260                    # encoding: [0xa6,0x42,0x44,0x7c]
+            mfsprg4 %r2
+# CHECK-BE: mfspr 2, 261                    # encoding: [0x7c,0x45,0x42,0xa6]
+# CHECK-LE: mfspr 2, 261                    # encoding: [0xa6,0x42,0x45,0x7c]
+            mfsprg5 %r2
+# CHECK-BE: mfspr 2, 262                    # encoding: [0x7c,0x46,0x42,0xa6]
+# CHECK-LE: mfspr 2, 262                    # encoding: [0xa6,0x42,0x46,0x7c]
+            mfsprg6 %r2
+# CHECK-BE: mfspr 2, 263                    # encoding: [0x7c,0x47,0x42,0xa6]
+# CHECK-LE: mfspr 2, 263                    # encoding: [0xa6,0x42,0x47,0x7c]
+            mfsprg7 %r2
+
+# NOT-CHECK-BE: mtspr 260, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+# NOT-CHECK-LE: mtspr 260, 4                    # encoding: [0xa6,0x43,0x90,0x7c]
+            mtsprg 4, %r4
+
+# NOT-CHECK-BE: mtspr 261, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+# NOT-CHECK-LE: mtspr 261, 4                    # encoding: [0xa6,0x43,0x91,0x7c]
+            mtsprg 5, %r4
+
+# NOT-CHECK-BE: mtspr 262, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+# NOT-CHECK-LE: mtspr 262, 4                    # encoding: [0xa6,0x43,0x92,0x7c]
+            mtsprg 6, %r4
+
+# NOT-CHECK-BE: mtspr 263, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+# NOT-CHECK-LE: mtspr 263, 4                    # encoding: [0xa6,0x43,0x93,0x7c]
+            mtsprg 7, %r4
+
+# CHECK-BE: mtspr 260, 4                    # encoding: [0x7c,0x84,0x43,0xa6]
+# CHECK-LE: mtspr 260, 4                    # encoding: [0xa6,0x43,0x84,0x7c]
+            mtsprg4 %r4
+
+# CHECK-BE: mtspr 261, 4                    # encoding: [0x7c,0x85,0x43,0xa6]
+# CHECK-LE: mtspr 261, 4                    # encoding: [0xa6,0x43,0x85,0x7c]
+            mtsprg5 %r4
+
+# CHECK-BE: mtspr 262, 4                    # encoding: [0x7c,0x86,0x43,0xa6]
+# CHECK-LE: mtspr 262, 4                    # encoding: [0xa6,0x43,0x86,0x7c]
+            mtsprg6 %r4
+
+# CHECK-BE: mtspr 263, 4                    # encoding: [0x7c,0x87,0x43,0xa6]
+# CHECK-LE: mtspr 263, 4                    # encoding: [0xa6,0x43,0x87,0x7c]
+            mtsprg7 %r4
 
 # CHECK-BE: mtspr 280, 4                    # encoding: [0x7c,0x98,0x43,0xa6]
 # CHECK-LE: mtspr 280, 4                    # encoding: [0xa6,0x43,0x98,0x7c]
@@ -141,3 +154,34 @@
 # CHECK-LE: tlbie 4,0                       # encoding: [0x64,0x22,0x00,0x7c]
             tlbie %r4
 
+# CHECK-BE: rfi                             # encoding: [0x4c,0x00,0x00,0x64]
+# CHECK-LE: rfi                             # encoding: [0x64,0x00,0x00,0x4c]
+            rfi
+# CHECK-BE: rfci                            # encoding: [0x4c,0x00,0x00,0x66]
+# CHECK-LE: rfci                            # encoding: [0x66,0x00,0x00,0x4c]
+            rfci
+
+# CHECK-BE: wrtee 12                        # encoding: [0x7d,0x80,0x01,0x06]
+# CHECK-LE: wrtee 12                        # encoding: [0x06,0x01,0x80,0x7d]
+            wrtee %r12
+
+# CHECK-BE: wrteei 0                        # encoding: [0x7c,0x00,0x01,0x46]
+# CHECK-LE: wrteei 0                        # encoding: [0x46,0x01,0x00,0x7c]
+            wrteei 0
+
+# CHECK-BE: wrteei 1                        # encoding: [0x7c,0x00,0x81,0x46]
+# CHECK-LE: wrteei 1                        # encoding: [0x46,0x81,0x00,0x7c]
+            wrteei 1
+
+# CHECK-BE: tlbre                           # encoding: [0x7c,0x00,0x07,0x64]
+# CHECK-LE: tlbre                           # encoding: [0x64,0x07,0x00,0x7c]
+            tlbre
+# CHECK-BE: tlbwe                           # encoding: [0x7c,0x00,0x07,0xa4]
+# CHECK-LE: tlbwe                           # encoding: [0xa4,0x07,0x00,0x7c]
+            tlbwe
+# CHECK-BE: tlbivax 11, 12                  # encoding: [0x7c,0x0b,0x66,0x24]
+# CHECK-LE: tlbivax 11, 12                  # encoding: [0x24,0x66,0x0b,0x7c]
+            tlbivax %r11, %r12
+# CHECK-BE: tlbsx 11, 12                    # encoding: [0x7c,0x0b,0x67,0x24]
+# CHECK-LE: tlbsx 11, 12                    # encoding: [0x24,0x67,0x0b,0x7c]
+            tlbsx %r11, %r12
diff --git a/test/MC/PowerPC/ppc64-encoding-e500.s b/test/MC/PowerPC/ppc64-encoding-e500.s
new file mode 100644
index 000000000000..fee91ee83e38
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-e500.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
+
+# Instructions specific to the e500 / e500mc cores:
+
+# CHECK-BE: rfdi                            # encoding: [0x4c,0x00,0x00,0x4e]
+# CHECK-LE: rfdi                            # encoding: [0x4e,0x00,0x00,0x4c]
+            rfdi
+# CHECK-BE: rfmci                            # encoding: [0x4c,0x00,0x00,0x4c]
+# CHECK-LE: rfmci                            # encoding: [0x4c,0x00,0x00,0x4c]
+            rfmci
diff --git a/test/MC/PowerPC/ppc64-encoding-ext.s b/test/MC/PowerPC/ppc64-encoding-ext.s
index 2374675fae16..dca8a652674f 100644
--- a/test/MC/PowerPC/ppc64-encoding-ext.s
+++ b/test/MC/PowerPC/ppc64-encoding-ext.s
@@ -3419,6 +3419,72 @@
 # CHECK-BE: mfspr 2, 1                      # encoding: [0x7c,0x41,0x02,0xa6]
 # CHECK-LE: mfspr 2, 1                      # encoding: [0xa6,0x02,0x41,0x7c]
             mfxer 2
+# CHECK-BE: mfspr 2, 4                      # encoding: [0x7c,0x44,0x02,0xa6]
+# CHECK-LE: mfspr 2, 4                      # encoding: [0xa6,0x02,0x44,0x7c]
+            mfrtcu 2
+# CHECK-BE: mfspr 2, 5                      # encoding: [0x7c,0x45,0x02,0xa6]
+# CHECK-LE: mfspr 2, 5                      # encoding: [0xa6,0x02,0x45,0x7c]
+            mfrtcl 2
+# CHECK-BE: mtspr 17, 2                     # encoding: [0x7c,0x51,0x03,0xa6]
+# CHECK-LE: mtspr 17, 2                     # encoding: [0xa6,0x03,0x51,0x7c]
+            mtdscr 2
+# CHECK-BE: mfspr 2, 17                     # encoding: [0x7c,0x51,0x02,0xa6]
+# CHECK-LE: mfspr 2, 17                     # encoding: [0xa6,0x02,0x51,0x7c]
+            mfdscr 2
+# CHECK-BE: mtspr 18, 2                     # encoding: [0x7c,0x52,0x03,0xa6]
+# CHECK-LE: mtspr 18, 2                     # encoding: [0xa6,0x03,0x52,0x7c]
+            mtdsisr 2
+# CHECK-BE: mfspr 2, 18                     # encoding: [0x7c,0x52,0x02,0xa6]
+# CHECK-LE: mfspr 2, 18                     # encoding: [0xa6,0x02,0x52,0x7c]
+            mfdsisr 2
+# CHECK-BE: mtspr 19, 2                     # encoding: [0x7c,0x53,0x03,0xa6]
+# CHECK-LE: mtspr 19, 2                     # encoding: [0xa6,0x03,0x53,0x7c]
+            mtdar 2
+# CHECK-BE: mfspr 2, 19                     # encoding: [0x7c,0x53,0x02,0xa6]
+# CHECK-LE: mfspr 2, 19                     # encoding: [0xa6,0x02,0x53,0x7c]
+            mfdar 2
+# CHECK-BE: mtspr 22, 2                     # encoding: [0x7c,0x56,0x03,0xa6]
+# CHECK-LE: mtspr 22, 2                     # encoding: [0xa6,0x03,0x56,0x7c]
+            mtdec 2
+# CHECK-BE: mfspr 2, 22                     # encoding: [0x7c,0x56,0x02,0xa6]
+# CHECK-LE: mfspr 2, 22                     # encoding: [0xa6,0x02,0x56,0x7c]
+            mfdec 2
+# CHECK-BE: mtspr 25, 2                     # encoding: [0x7c,0x59,0x03,0xa6]
+# CHECK-LE: mtspr 25, 2                     # encoding: [0xa6,0x03,0x59,0x7c]
+            mtsdr1 2
+# CHECK-BE: mfspr 2, 25                     # encoding: [0x7c,0x59,0x02,0xa6]
+# CHECK-LE: mfspr 2, 25                     # encoding: [0xa6,0x02,0x59,0x7c]
+            mfsdr1 2
+# CHECK-BE: mtspr 26, 2                     # encoding: [0x7c,0x5a,0x03,0xa6]
+# CHECK-LE: mtspr 26, 2                     # encoding: [0xa6,0x03,0x5a,0x7c]
+            mtsrr0 2
+# CHECK-BE: mfspr 2, 26                     # encoding: [0x7c,0x5a,0x02,0xa6]
+# CHECK-LE: mfspr 2, 26                     # encoding: [0xa6,0x02,0x5a,0x7c]
+            mfsrr0 2
+# CHECK-BE: mtspr 27, 2                     # encoding: [0x7c,0x5b,0x03,0xa6]
+# CHECK-LE: mtspr 27, 2                     # encoding: [0xa6,0x03,0x5b,0x7c]
+            mtsrr1 2
+# CHECK-BE: mfspr 2, 27                     # encoding: [0x7c,0x5b,0x02,0xa6]
+# CHECK-LE: mfspr 2, 27                     # encoding: [0xa6,0x02,0x5b,0x7c]
+            mfsrr1 2
+# CHECK-BE: mtspr 28, 2                     # encoding: [0x7c,0x5c,0x03,0xa6]
+# CHECK-LE: mtspr 28, 2                     # encoding: [0xa6,0x03,0x5c,0x7c]
+            mtcfar 2
+# CHECK-BE: mfspr 2, 28                     # encoding: [0x7c,0x5c,0x02,0xa6]
+# CHECK-LE: mfspr 2, 28                     # encoding: [0xa6,0x02,0x5c,0x7c]
+            mfcfar 2
+# CHECK-BE: mtspr 29, 2                     # encoding: [0x7c,0x5d,0x03,0xa6]
+# CHECK-LE: mtspr 29, 2                     # encoding: [0xa6,0x03,0x5d,0x7c]
+            mtamr 2
+# CHECK-BE: mfspr 2, 29                     # encoding: [0x7c,0x5d,0x02,0xa6]
+# CHECK-LE: mfspr 2, 29                     # encoding: [0xa6,0x02,0x5d,0x7c]
+            mfamr 2
+# CHECK-BE: mtspr 48, 2                     # encoding: [0x7c,0x50,0x0b,0xa6]
+# CHECK-LE: mtspr 48, 2                     # encoding: [0xa6,0x0b,0x50,0x7c]
+            mtpid 2
+# CHECK-BE: mfspr 2, 48                     # encoding: [0x7c,0x50,0x0a,0xa6]
+# CHECK-LE: mfspr 2, 48                     # encoding: [0xa6,0x0a,0x50,0x7c]
+            mfpid 2
 # CHECK-BE: mtlr 2                          # encoding: [0x7c,0x48,0x03,0xa6]
 # CHECK-LE: mtlr 2                          # encoding: [0xa6,0x03,0x48,0x7c]
             mtlr 2
@@ -3465,3 +3531,138 @@
 # CHECK-LE: mtcrf 255, 2                    # encoding: [0x20,0xf1,0x4f,0x7c]
             mtcr 2
 
+# CHECK-BE: mfspr 4, 272                    # encoding: [0x7c,0x90,0x42,0xa6]
+# CHECK-LE: mfspr 4, 272                    # encoding: [0xa6,0x42,0x90,0x7c]
+            mfsprg %r4, 0
+
+# CHECK-BE: mfspr 4, 273                    # encoding: [0x7c,0x91,0x42,0xa6]
+# CHECK-LE: mfspr 4, 273                    # encoding: [0xa6,0x42,0x91,0x7c]
+            mfsprg %r4, 1
+
+# CHECK-BE: mfspr 4, 274                    # encoding: [0x7c,0x92,0x42,0xa6]
+# CHECK-LE: mfspr 4, 274                    # encoding: [0xa6,0x42,0x92,0x7c]
+            mfsprg %r4, 2
+
+# CHECK-BE: mfspr 4, 275                    # encoding: [0x7c,0x93,0x42,0xa6]
+# CHECK-LE: mfspr 4, 275                    # encoding: [0xa6,0x42,0x93,0x7c]
+            mfsprg %r4, 3
+
+# CHECK-BE: mfspr 2, 272                    # encoding: [0x7c,0x50,0x42,0xa6]
+# CHECK-LE: mfspr 2, 272                    # encoding: [0xa6,0x42,0x50,0x7c]
+            mfsprg0 %r2
+# CHECK-BE: mfspr 2, 273                    # encoding: [0x7c,0x51,0x42,0xa6]
+# CHECK-LE: mfspr 2, 273                    # encoding: [0xa6,0x42,0x51,0x7c]
+            mfsprg1 %r2
+# CHECK-BE: mfspr 2, 274                    # encoding: [0x7c,0x52,0x42,0xa6]
+# CHECK-LE: mfspr 2, 274                    # encoding: [0xa6,0x42,0x52,0x7c]
+            mfsprg2 %r2
+# CHECK-BE: mfspr 2, 275                    # encoding: [0x7c,0x53,0x42,0xa6]
+# CHECK-LE: mfspr 2, 275                    # encoding: [0xa6,0x42,0x53,0x7c]
+            mfsprg3 %r2
+
+# CHECK-BE: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+# CHECK-LE: mtspr 272, 4                    # encoding: [0xa6,0x43,0x90,0x7c]
+            mtsprg 0, %r4
+
+# CHECK-BE: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+# CHECK-LE: mtspr 273, 4                    # encoding: [0xa6,0x43,0x91,0x7c]
+            mtsprg 1, %r4
+
+# CHECK-BE: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+# CHECK-LE: mtspr 274, 4                    # encoding: [0xa6,0x43,0x92,0x7c]
+            mtsprg 2, %r4
+
+# CHECK-BE: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+# CHECK-LE: mtspr 275, 4                    # encoding: [0xa6,0x43,0x93,0x7c]
+            mtsprg 3, %r4
+
+# CHECK-BE: mtspr 272, 4                    # encoding: [0x7c,0x90,0x43,0xa6]
+# CHECK-LE: mtspr 272, 4                    # encoding: [0xa6,0x43,0x90,0x7c]
+            mtsprg0 %r4
+
+# CHECK-BE: mtspr 273, 4                    # encoding: [0x7c,0x91,0x43,0xa6]
+# CHECK-LE: mtspr 273, 4                    # encoding: [0xa6,0x43,0x91,0x7c]
+            mtsprg1 %r4
+
+# CHECK-BE: mtspr 274, 4                    # encoding: [0x7c,0x92,0x43,0xa6]
+# CHECK-LE: mtspr 274, 4                    # encoding: [0xa6,0x43,0x92,0x7c]
+            mtsprg2 %r4
+
+# CHECK-BE: mtspr 275, 4                    # encoding: [0x7c,0x93,0x43,0xa6]
+# CHECK-LE: mtspr 275, 4                    # encoding: [0xa6,0x43,0x93,0x7c]
+            mtsprg3 %r4
+
+# Altivec Data Stream instruction:
+# CHECK-BE: dss 3                            # encoding: [0x7c,0x60,0x06,0x6c]
+# CHECK-LE: dss 3                            # encoding: [0x6c,0x06,0x60,0x7c]
+            dss 3
+# CHECK-BE: dssall                           # encoding: [0x7e,0x00,0x06,0x6c]
+# CHECK-LE: dssall                           # encoding: [0x6c,0x06,0x00,0x7e]
+            dssall
+# CHECK-BE: dst 12, 11, 3                    # encoding: [0x7c,0x6c,0x5a,0xac]
+# CHECK-LE: dst 12, 11, 3                    # encoding: [0xac,0x5a,0x6c,0x7c]
+            dst %r12, %r11, 3
+# CHECK-BE: dstt 12, 11, 3                   # encoding: [0x7e,0x6c,0x5a,0xac]
+# CHECK-LE: dstt 12, 11, 3                   # encoding: [0xac,0x5a,0x6c,0x7e]
+            dstt %r12, %r11, 3
+# CHECK-BE: dstst 12, 11, 3                  # encoding: [0x7c,0x6c,0x5a,0xec]
+# CHECK-LE: dstst 12, 11, 3                  # encoding: [0xec,0x5a,0x6c,0x7c]
+            dstst %r12, %r11, 3
+# CHECK-BE: dststt 12, 11, 3                 # encoding: [0x7e,0x6c,0x5a,0xec]
+# CHECK-LE: dststt 12, 11, 3                 # encoding: [0xec,0x5a,0x6c,0x7e]
+            dststt %r12, %r11, 3
+
+# CHECK-BE: tlbia                            # encoding: [0x7c,0x00,0x02,0xe4]
+# CHECK-LE: tlbia                            # encoding: [0xe4,0x02,0x00,0x7c]
+            tlbia
+
+# CHECK-BE: lswi 8, 6, 7                     # encoding: [0x7d,0x06,0x3c,0xaa]
+# CHECK-LE: lswi 8, 6, 7                     # encoding: [0xaa,0x3c,0x06,0x7d]
+            lswi %r8, %r6, 7
+# CHECK-BE: stswi 8, 6, 7                    # encoding: [0x7d,0x06,0x3d,0xaa]
+# CHECK-LE: stswi 8, 6, 7                    # encoding: [0xaa,0x3d,0x06,0x7d]
+            stswi %r8, %r6, 7
+
+# CHECK-BE: rfid                            # encoding: [0x4c,0x00,0x00,0x24]
+# CHECK-LE: rfid                            # encoding: [0x24,0x00,0x00,0x4c]
+            rfid
+
+# CHECK-BE: mfspr 2, 280                     # encoding: [0x7c,0x58,0x42,0xa6]
+# CHECK-LE: mfspr 2, 280                     # encoding: [0xa6,0x42,0x58,0x7c]
+            mfasr 2
+# CHECK-BE: mtspr 280, 2                     # encoding: [0x7c,0x58,0x43,0xa6]
+# CHECK-LE: mtspr 280, 2                     # encoding: [0xa6,0x43,0x58,0x7c]
+            mtasr 2
+
+# Load and Store Caching Inhibited Instructions
+# CHECK-BE: lbzcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3e,0xaa]
+# CHECK-LE: lbzcix 21, 5, 7                  # encoding: [0xaa,0x3e,0xa5,0x7e]
+            lbzcix 21, 5, 7
+# CHECK-BE: lhzcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3e,0x6a]
+# CHECK-LE: lhzcix 21, 5, 7                  # encoding: [0x6a,0x3e,0xa5,0x7e]
+            lhzcix 21, 5, 7
+# CHECK-BE: lwzcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3e,0x2a]
+# CHECK-LE: lwzcix 21, 5, 7                  # encoding: [0x2a,0x3e,0xa5,0x7e]
+            lwzcix 21, 5, 7
+# CHECK-BE: ldcix  21, 5, 7                  # encoding: [0x7e,0xa5,0x3e,0xea]
+# CHECK-LE: ldcix  21, 5, 7                  # encoding: [0xea,0x3e,0xa5,0x7e]
+            ldcix  21, 5, 7
+            
+# CHECK-BE: stbcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3f,0xaa]
+# CHECK-LE: stbcix 21, 5, 7                  # encoding: [0xaa,0x3f,0xa5,0x7e]
+            stbcix 21, 5, 7
+# CHECK-BE: sthcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3f,0x6a]
+# CHECK-LE: sthcix 21, 5, 7                  # encoding: [0x6a,0x3f,0xa5,0x7e]
+            sthcix 21, 5, 7
+# CHECK-BE: stwcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3f,0x2a]
+# CHECK-LE: stwcix 21, 5, 7                  # encoding: [0x2a,0x3f,0xa5,0x7e]
+            stwcix 21, 5, 7
+# CHECK-BE: stdcix 21, 5, 7                  # encoding: [0x7e,0xa5,0x3f,0xea]
+# CHECK-LE: stdcix 21, 5, 7                  # encoding: [0xea,0x3f,0xa5,0x7e]
+            stdcix 21, 5, 7
+
+# Processor-Specific Instructions
+# CHECK-BE: attn                             # encoding: [0x00,0x00,0x02,0x00]
+# CHECK-LE: attn                             # encoding: [0x00,0x02,0x00,0x00]
+            attn
+
diff --git a/test/MC/PowerPC/ppc64-encoding-spe.s b/test/MC/PowerPC/ppc64-encoding-spe.s
new file mode 100644
index 000000000000..d90eb30f7ae3
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-encoding-spe.s
@@ -0,0 +1,622 @@
+# RUN: llvm-mc -triple powerpc64-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
+# RUN: llvm-mc -triple powerpc64le-unknown-unknown --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
+
+# Instructions from the Signal Processing Engine extension:
+
+# CHECK-BE: evlddx 14, 21, 28               # encoding: [0x11,0xd5,0xe3,0x00]
+# CHECK-LE: evlddx 14, 21, 28               # encoding: [0x00,0xe3,0xd5,0x11]
+            evlddx %r14, %r21, %r28
+# CHECK-BE: evldwx 14, 21, 28               # encoding: [0x11,0xd5,0xe3,0x02]
+# CHECK-LE: evldwx 14, 21, 28               # encoding: [0x02,0xe3,0xd5,0x11]
+            evldwx %r14, %r21, %r28
+# CHECK-BE: evldhx 14, 21, 28               # encoding: [0x11,0xd5,0xe3,0x04]
+# CHECK-LE: evldhx 14, 21, 28               # encoding: [0x04,0xe3,0xd5,0x11]
+            evldhx %r14, %r21, %r28
+# CHECK-BE: evlhhesplatx 14, 21, 28         # encoding: [0x11,0xd5,0xe3,0x08]
+# CHECK-LE: evlhhesplatx 14, 21, 28         # encoding: [0x08,0xe3,0xd5,0x11]
+            evlhhesplatx %r14, %r21, %r28
+# CHECK-BE: evlhhousplatx 14, 21, 28        # encoding: [0x11,0xd5,0xe3,0x0c]
+# CHECK-LE: evlhhousplatx 14, 21, 28        # encoding: [0x0c,0xe3,0xd5,0x11]
+            evlhhousplatx %r14, %r21, %r28
+# CHECK-BE: evlhhossplatx 14, 21, 28        # encoding: [0x11,0xd5,0xe3,0x0e]
+# CHECK-LE: evlhhossplatx 14, 21, 28        # encoding: [0x0e,0xe3,0xd5,0x11]
+            evlhhossplatx %r14, %r21, %r28
+# CHECK-BE: evlwhex 14, 21, 28              # encoding: [0x11,0xd5,0xe3,0x10]
+# CHECK-LE: evlwhex 14, 21, 28              # encoding: [0x10,0xe3,0xd5,0x11]
+            evlwhex %r14, %r21, %r28
+# CHECK-BE: evlwhoux 14, 21, 28             # encoding: [0x11,0xd5,0xe3,0x14]
+# CHECK-LE: evlwhoux 14, 21, 28             # encoding: [0x14,0xe3,0xd5,0x11]
+            evlwhoux %r14, %r21, %r28
+# CHECK-BE: evlwhosx 14, 21, 28             # encoding: [0x11,0xd5,0xe3,0x16]
+# CHECK-LE: evlwhosx 14, 21, 28             # encoding: [0x16,0xe3,0xd5,0x11]
+            evlwhosx %r14, %r21, %r28
+# CHECK-BE: evlwwsplatx 14, 21, 28          # encoding: [0x11,0xd5,0xe3,0x18]
+# CHECK-LE: evlwwsplatx 14, 21, 28          # encoding: [0x18,0xe3,0xd5,0x11]
+            evlwwsplatx %r14, %r21, %r28
+# CHECK-BE: evlwhsplatx 14, 21, 28          # encoding: [0x11,0xd5,0xe3,0x1c]
+# CHECK-LE: evlwhsplatx 14, 21, 28          # encoding: [0x1c,0xe3,0xd5,0x11]
+            evlwhsplatx %r14, %r21, %r28
+# CHECK-BE: evmergehi 14, 21, 28            # encoding: [0x11,0xd5,0xe2,0x2c]
+# CHECK-LE: evmergehi 14, 21, 28            # encoding: [0x2c,0xe2,0xd5,0x11]
+            evmergehi %r14, %r21, %r28
+# CHECK-BE: evmergelo 14, 21, 28            # encoding: [0x11,0xd5,0xe2,0x2d]
+# CHECK-LE: evmergelo 14, 21, 28            # encoding: [0x2d,0xe2,0xd5,0x11]
+            evmergelo %r14, %r21, %r28
+# CHECK-BE: evmergehilo 14, 21, 28          # encoding: [0x11,0xd5,0xe2,0x2e]
+# CHECK-LE: evmergehilo 14, 21, 28          # encoding: [0x2e,0xe2,0xd5,0x11]
+            evmergehilo %r14, %r21, %r28
+# CHECK-BE: evmergelohi 14, 21, 28          # encoding: [0x11,0xd5,0xe2,0x2f]
+# CHECK-LE: evmergelohi 14, 21, 28          # encoding: [0x2f,0xe2,0xd5,0x11]
+            evmergelohi %r14, %r21, %r28
+
+# CHECK-BE: brinc 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x0f]
+# CHECK-LE: brinc 14, 22, 19                # encoding: [0x0f,0x9a,0xd6,0x11]
+            brinc %r14, %r22, %r19
+# CHECK-BE: evabs 14, 22                    # encoding: [0x11,0xd6,0x02,0x08]
+# CHECK-LE: evabs 14, 22                    # encoding: [0x08,0x02,0xd6,0x11]
+            evabs %r14, %r22
+# CHECK-BE: evaddsmiaaw 14, 22              # encoding: [0x11,0xd6,0x04,0xc9]
+# CHECK-LE: evaddsmiaaw 14, 22              # encoding: [0xc9,0x04,0xd6,0x11]
+            evaddsmiaaw %r14, %r22
+# CHECK-BE: evaddssiaaw 14, 22              # encoding: [0x11,0xd6,0x04,0xc1]
+# CHECK-LE: evaddssiaaw 14, 22              # encoding: [0xc1,0x04,0xd6,0x11]
+            evaddssiaaw %r14, %r22
+# CHECK-BE: evaddusiaaw 14, 22              # encoding: [0x11,0xd6,0x04,0xc0]
+# CHECK-LE: evaddusiaaw 14, 22              # encoding: [0xc0,0x04,0xd6,0x11]
+            evaddusiaaw %r14, %r22
+# CHECK-BE: evaddumiaaw 14, 22              # encoding: [0x11,0xd6,0x04,0xc8]
+# CHECK-LE: evaddumiaaw 14, 22              # encoding: [0xc8,0x04,0xd6,0x11]
+            evaddumiaaw %r14, %r22
+# CHECK-BE: evaddw 14, 22, 19               # encoding: [0x11,0xd6,0x9a,0x00]
+# CHECK-LE: evaddw 14, 22, 19               # encoding: [0x00,0x9a,0xd6,0x11]
+            evaddw %r14, %r22, %r19
+# CHECK-BE: evaddiw 14, 29, 19              # encoding: [0x11,0xd3,0xea,0x02]
+# CHECK-LE: evaddiw 14, 29, 19              # encoding: [0x02,0xea,0xd3,0x11]
+            evaddiw %r14, 29, %r19
+# CHECK-BE: evand 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x11]
+# CHECK-LE: evand 14, 22, 19                # encoding: [0x11,0x9a,0xd6,0x11]
+            evand %r14, %r22, %r19
+# CHECK-BE: evandc 14, 22, 19               # encoding: [0x11,0xd6,0x9a,0x12]
+# CHECK-LE: evandc 14, 22, 19               # encoding: [0x12,0x9a,0xd6,0x11]
+            evandc %r14, %r22, %r19
+# CHECK-BE: evcmpeq 3, 22, 19            # encoding: [0x11,0x96,0x9a,0x34]
+# CHECK-LE: evcmpeq 3, 22, 19            # encoding: [0x34,0x9a,0x96,0x11]
+            evcmpeq %cr3, %r22, %r19
+# CHECK-BE: evcmpgts 3, 22, 19           # encoding: [0x11,0x96,0x9a,0x31]
+# CHECK-LE: evcmpgts 3, 22, 19           # encoding: [0x31,0x9a,0x96,0x11]
+            evcmpgts %cr3, %r22, %r19
+# CHECK-BE: evcmpgtu 3, 22, 19           # encoding: [0x11,0x96,0x9a,0x30]
+# CHECK-LE: evcmpgtu 3, 22, 19           # encoding: [0x30,0x9a,0x96,0x11]
+            evcmpgtu %cr3, %r22, %r19
+# CHECK-BE: evcmplts 3, 22, 19           # encoding: [0x11,0x96,0x9a,0x33]
+# CHECK-LE: evcmplts 3, 22, 19           # encoding: [0x33,0x9a,0x96,0x11]
+            evcmplts %cr3, %r22, %r19
+# CHECK-BE: evcmpltu 3, 22, 19           # encoding: [0x11,0x96,0x9a,0x32]
+# CHECK-LE: evcmpltu 3, 22, 19           # encoding: [0x32,0x9a,0x96,0x11]
+            evcmpltu %cr3, %r22, %r19
+# CHECK-BE: evcntlsw 14, 22                 # encoding: [0x11,0xd6,0x02,0x0e]
+# CHECK-LE: evcntlsw 14, 22                 # encoding: [0x0e,0x02,0xd6,0x11]
+            evcntlsw %r14, %r22
+# CHECK-BE: evcntlzw 14, 22                 # encoding: [0x11,0xd6,0x02,0x0d]
+# CHECK-LE: evcntlzw 14, 22                 # encoding: [0x0d,0x02,0xd6,0x11]
+            evcntlzw %r14, %r22
+# CHECK-BE: evdivws 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0xc6]
+# CHECK-LE: evdivws 14, 22, 19              # encoding: [0xc6,0x9c,0xd6,0x11]
+            evdivws %r14, %r22, %r19
+# CHECK-BE: evdivwu 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0xc7]
+# CHECK-LE: evdivwu 14, 22, 19              # encoding: [0xc7,0x9c,0xd6,0x11]
+            evdivwu %r14, %r22, %r19
+# CHECK-BE: eveqv 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x19]
+# CHECK-LE: eveqv 14, 22, 19                # encoding: [0x19,0x9a,0xd6,0x11]
+            eveqv %r14, %r22, %r19
+# CHECK-BE: evextsb 14, 22                  # encoding: [0x11,0xd6,0x02,0x0a]
+# CHECK-LE: evextsb 14, 22                  # encoding: [0x0a,0x02,0xd6,0x11]
+            evextsb %r14, %r22
+# CHECK-BE: evextsh 14, 22                  # encoding: [0x11,0xd6,0x02,0x0b]
+# CHECK-LE: evextsh 14, 22                  # encoding: [0x0b,0x02,0xd6,0x11]
+            evextsh %r14, %r22
+# CHECK-BE: evmhegsmfaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x2b]
+# CHECK-LE: evmhegsmfaa 14, 22, 19          # encoding: [0x2b,0x9d,0xd6,0x11]
+            evmhegsmfaa %r14, %r22, %r19
+# CHECK-BE: evmhegsmfan 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xab]
+# CHECK-LE: evmhegsmfan 14, 22, 19          # encoding: [0xab,0x9d,0xd6,0x11]
+            evmhegsmfan %r14, %r22, %r19
+# CHECK-BE: evmhegsmiaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x29]
+# CHECK-LE: evmhegsmiaa 14, 22, 19          # encoding: [0x29,0x9d,0xd6,0x11]
+            evmhegsmiaa %r14, %r22, %r19
+# CHECK-BE: evmhegsmian 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xa9]
+# CHECK-LE: evmhegsmian 14, 22, 19          # encoding: [0xa9,0x9d,0xd6,0x11]
+            evmhegsmian %r14, %r22, %r19
+# CHECK-BE: evmhegumiaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x28]
+# CHECK-LE: evmhegumiaa 14, 22, 19          # encoding: [0x28,0x9d,0xd6,0x11]
+            evmhegumiaa %r14, %r22, %r19
+# CHECK-BE: evmhegumian 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xa8]
+# CHECK-LE: evmhegumian 14, 22, 19          # encoding: [0xa8,0x9d,0xd6,0x11]
+            evmhegumian %r14, %r22, %r19
+# CHECK-BE: evmhesmf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x0b]
+# CHECK-LE: evmhesmf 14, 22, 19             # encoding: [0x0b,0x9c,0xd6,0x11]
+            evmhesmf %r14, %r22, %r19
+# CHECK-BE: evmhesmfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x2b]
+# CHECK-LE: evmhesmfa 14, 22, 19            # encoding: [0x2b,0x9c,0xd6,0x11]
+            evmhesmfa %r14, %r22, %r19
+# CHECK-BE: evmhesmfaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x0b]
+# CHECK-LE: evmhesmfaaw 14, 22, 19          # encoding: [0x0b,0x9d,0xd6,0x11]
+            evmhesmfaaw %r14, %r22, %r19
+# CHECK-BE: evmhesmfanw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x8b]
+# CHECK-LE: evmhesmfanw 14, 22, 19          # encoding: [0x8b,0x9d,0xd6,0x11]
+            evmhesmfanw %r14, %r22, %r19
+# CHECK-BE: evmhesmi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x09]
+# CHECK-LE: evmhesmi 14, 22, 19             # encoding: [0x09,0x9c,0xd6,0x11]
+            evmhesmi %r14, %r22, %r19
+# CHECK-BE: evmhesmia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x29]
+# CHECK-LE: evmhesmia 14, 22, 19            # encoding: [0x29,0x9c,0xd6,0x11]
+            evmhesmia %r14, %r22, %r19
+# CHECK-BE: evmhesmiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x09]
+# CHECK-LE: evmhesmiaaw 14, 22, 19          # encoding: [0x09,0x9d,0xd6,0x11]
+            evmhesmiaaw %r14, %r22, %r19
+# CHECK-BE: evmhesmianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x89]
+# CHECK-LE: evmhesmianw 14, 22, 19          # encoding: [0x89,0x9d,0xd6,0x11]
+            evmhesmianw %r14, %r22, %r19
+# CHECK-BE: evmhessf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x03]
+# CHECK-LE: evmhessf 14, 22, 19             # encoding: [0x03,0x9c,0xd6,0x11]
+            evmhessf %r14, %r22, %r19
+# CHECK-BE: evmhessfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x23]
+# CHECK-LE: evmhessfa 14, 22, 19            # encoding: [0x23,0x9c,0xd6,0x11]
+            evmhessfa %r14, %r22, %r19
+# CHECK-BE: evmhessfaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x03]
+# CHECK-LE: evmhessfaaw 14, 22, 19          # encoding: [0x03,0x9d,0xd6,0x11]
+            evmhessfaaw %r14, %r22, %r19
+# CHECK-BE: evmhessfanw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x83]
+# CHECK-LE: evmhessfanw 14, 22, 19          # encoding: [0x83,0x9d,0xd6,0x11]
+            evmhessfanw %r14, %r22, %r19
+# CHECK-BE: evmhessiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x01]
+# CHECK-LE: evmhessiaaw 14, 22, 19          # encoding: [0x01,0x9d,0xd6,0x11]
+            evmhessiaaw %r14, %r22, %r19
+# CHECK-BE: evmhessianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x81]
+# CHECK-LE: evmhessianw 14, 22, 19          # encoding: [0x81,0x9d,0xd6,0x11]
+            evmhessianw %r14, %r22, %r19
+# CHECK-BE: evmheumi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x08]
+# CHECK-LE: evmheumi 14, 22, 19             # encoding: [0x08,0x9c,0xd6,0x11]
+            evmheumi %r14, %r22, %r19
+# CHECK-BE: evmheumia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x28]
+# CHECK-LE: evmheumia 14, 22, 19            # encoding: [0x28,0x9c,0xd6,0x11]
+            evmheumia %r14, %r22, %r19
+# CHECK-BE: evmheumiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x08]
+# CHECK-LE: evmheumiaaw 14, 22, 19          # encoding: [0x08,0x9d,0xd6,0x11]
+            evmheumiaaw %r14, %r22, %r19
+# CHECK-BE: evmheumianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x88]
+# CHECK-LE: evmheumianw 14, 22, 19          # encoding: [0x88,0x9d,0xd6,0x11]
+            evmheumianw %r14, %r22, %r19
+# CHECK-BE: evmheusiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x00]
+# CHECK-LE: evmheusiaaw 14, 22, 19          # encoding: [0x00,0x9d,0xd6,0x11]
+            evmheusiaaw %r14, %r22, %r19
+# CHECK-BE: evmheusianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x80]
+# CHECK-LE: evmheusianw 14, 22, 19          # encoding: [0x80,0x9d,0xd6,0x11]
+            evmheusianw %r14, %r22, %r19
+# CHECK-BE: evmhogsmfaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x2f]
+# CHECK-LE: evmhogsmfaa 14, 22, 19          # encoding: [0x2f,0x9d,0xd6,0x11]
+            evmhogsmfaa %r14, %r22, %r19
+# CHECK-BE: evmhogsmfan 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xaf]
+# CHECK-LE: evmhogsmfan 14, 22, 19          # encoding: [0xaf,0x9d,0xd6,0x11]
+            evmhogsmfan %r14, %r22, %r19
+# CHECK-BE: evmhogsmiaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x2d]
+# CHECK-LE: evmhogsmiaa 14, 22, 19          # encoding: [0x2d,0x9d,0xd6,0x11]
+            evmhogsmiaa %r14, %r22, %r19
+# CHECK-BE: evmhogsmian 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xad]
+# CHECK-LE: evmhogsmian 14, 22, 19          # encoding: [0xad,0x9d,0xd6,0x11]
+            evmhogsmian %r14, %r22, %r19
+# CHECK-BE: evmhogumiaa 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x2c]
+# CHECK-LE: evmhogumiaa 14, 22, 19          # encoding: [0x2c,0x9d,0xd6,0x11]
+            evmhogumiaa %r14, %r22, %r19
+# CHECK-BE: evmhogumian 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xac]
+# CHECK-LE: evmhogumian 14, 22, 19          # encoding: [0xac,0x9d,0xd6,0x11]
+            evmhogumian %r14, %r22, %r19
+# CHECK-BE: evmhosmf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x0f]
+# CHECK-LE: evmhosmf 14, 22, 19             # encoding: [0x0f,0x9c,0xd6,0x11]
+            evmhosmf %r14, %r22, %r19
+# CHECK-BE: evmhosmfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x2f]
+# CHECK-LE: evmhosmfa 14, 22, 19            # encoding: [0x2f,0x9c,0xd6,0x11]
+            evmhosmfa %r14, %r22, %r19
+# CHECK-BE: evmhosmfaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x0f]
+# CHECK-LE: evmhosmfaaw 14, 22, 19          # encoding: [0x0f,0x9d,0xd6,0x11]
+            evmhosmfaaw %r14, %r22, %r19
+# CHECK-BE: evmhosmfanw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x8f]
+# CHECK-LE: evmhosmfanw 14, 22, 19          # encoding: [0x8f,0x9d,0xd6,0x11]
+            evmhosmfanw %r14, %r22, %r19
+# CHECK-BE: evmhosmi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x0d]
+# CHECK-LE: evmhosmi 14, 22, 19             # encoding: [0x0d,0x9c,0xd6,0x11]
+            evmhosmi %r14, %r22, %r19
+# CHECK-BE: evmhosmia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x2d]
+# CHECK-LE: evmhosmia 14, 22, 19            # encoding: [0x2d,0x9c,0xd6,0x11]
+            evmhosmia %r14, %r22, %r19
+# CHECK-BE: evmhosmiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x0d]
+# CHECK-LE: evmhosmiaaw 14, 22, 19          # encoding: [0x0d,0x9d,0xd6,0x11]
+            evmhosmiaaw %r14, %r22, %r19
+# CHECK-BE: evmhosmianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x8d]
+# CHECK-LE: evmhosmianw 14, 22, 19          # encoding: [0x8d,0x9d,0xd6,0x11]
+            evmhosmianw %r14, %r22, %r19
+# CHECK-BE: evmhossf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x07]
+# CHECK-LE: evmhossf 14, 22, 19             # encoding: [0x07,0x9c,0xd6,0x11]
+            evmhossf %r14, %r22, %r19
+# CHECK-BE: evmhossfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x27]
+# CHECK-LE: evmhossfa 14, 22, 19            # encoding: [0x27,0x9c,0xd6,0x11]
+            evmhossfa %r14, %r22, %r19
+# CHECK-BE: evmhossfaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x07]
+# CHECK-LE: evmhossfaaw 14, 22, 19          # encoding: [0x07,0x9d,0xd6,0x11]
+            evmhossfaaw %r14, %r22, %r19
+# CHECK-BE: evmhossfanw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x87]
+# CHECK-LE: evmhossfanw 14, 22, 19          # encoding: [0x87,0x9d,0xd6,0x11]
+            evmhossfanw %r14, %r22, %r19
+# CHECK-BE: evmhossiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x05]
+# CHECK-LE: evmhossiaaw 14, 22, 19          # encoding: [0x05,0x9d,0xd6,0x11]
+            evmhossiaaw %r14, %r22, %r19
+# CHECK-BE: evmhossianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x85]
+# CHECK-LE: evmhossianw 14, 22, 19          # encoding: [0x85,0x9d,0xd6,0x11]
+            evmhossianw %r14, %r22, %r19
+# CHECK-BE: evmhoumi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x0c]
+# CHECK-LE: evmhoumi 14, 22, 19             # encoding: [0x0c,0x9c,0xd6,0x11]
+            evmhoumi %r14, %r22, %r19
+# CHECK-BE: evmhoumia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x2c]
+# CHECK-LE: evmhoumia 14, 22, 19            # encoding: [0x2c,0x9c,0xd6,0x11]
+            evmhoumia %r14, %r22, %r19
+# CHECK-BE: evmhoumiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x0c]
+# CHECK-LE: evmhoumiaaw 14, 22, 19          # encoding: [0x0c,0x9d,0xd6,0x11]
+            evmhoumiaaw %r14, %r22, %r19
+# CHECK-BE: evmhoumianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x8c]
+# CHECK-LE: evmhoumianw 14, 22, 19          # encoding: [0x8c,0x9d,0xd6,0x11]
+            evmhoumianw %r14, %r22, %r19
+# CHECK-BE: evmhousiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x04]
+# CHECK-LE: evmhousiaaw 14, 22, 19          # encoding: [0x04,0x9d,0xd6,0x11]
+            evmhousiaaw %r14, %r22, %r19
+# CHECK-BE: evmhousianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x84]
+# CHECK-LE: evmhousianw 14, 22, 19          # encoding: [0x84,0x9d,0xd6,0x11]
+            evmhousianw %r14, %r22, %r19
+# CHECK-BE: evmwhsmf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x4f]
+# CHECK-LE: evmwhsmf 14, 22, 19             # encoding: [0x4f,0x9c,0xd6,0x11]
+            evmwhsmf %r14, %r22, %r19
+# CHECK-BE: evmwhsmfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x6f]
+# CHECK-LE: evmwhsmfa 14, 22, 19            # encoding: [0x6f,0x9c,0xd6,0x11]
+            evmwhsmfa %r14, %r22, %r19
+# CHECK-BE: evmwhsmi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x4d]
+# CHECK-LE: evmwhsmi 14, 22, 19             # encoding: [0x4d,0x9c,0xd6,0x11]
+            evmwhsmi %r14, %r22, %r19
+# CHECK-BE: evmwhsmia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x6d]
+# CHECK-LE: evmwhsmia 14, 22, 19            # encoding: [0x6d,0x9c,0xd6,0x11]
+            evmwhsmia %r14, %r22, %r19
+# CHECK-BE: evmwhssf 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x47]
+# CHECK-LE: evmwhssf 14, 22, 19             # encoding: [0x47,0x9c,0xd6,0x11]
+            evmwhssf %r14, %r22, %r19
+# CHECK-BE: evmwhssfa 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x67]
+# CHECK-LE: evmwhssfa 14, 22, 19            # encoding: [0x67,0x9c,0xd6,0x11]
+            evmwhssfa %r14, %r22, %r19
+# CHECK-BE: evmwhumi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x4c]
+# CHECK-LE: evmwhumi 14, 22, 19             # encoding: [0x4c,0x9c,0xd6,0x11]
+            evmwhumi %r14, %r22, %r19
+# CHECK-BE: evmwhumia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x6c]
+# CHECK-LE: evmwhumia 14, 22, 19            # encoding: [0x6c,0x9c,0xd6,0x11]
+            evmwhumia %r14, %r22, %r19
+# CHECK-BE: evmwlsmiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x49]
+# CHECK-LE: evmwlsmiaaw 14, 22, 19          # encoding: [0x49,0x9d,0xd6,0x11]
+            evmwlsmiaaw %r14, %r22, %r19
+# CHECK-BE: evmwlsmianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xc9]
+# CHECK-LE: evmwlsmianw 14, 22, 19          # encoding: [0xc9,0x9d,0xd6,0x11]
+            evmwlsmianw %r14, %r22, %r19
+# CHECK-BE: evmwlssiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x41]
+# CHECK-LE: evmwlssiaaw 14, 22, 19          # encoding: [0x41,0x9d,0xd6,0x11]
+            evmwlssiaaw %r14, %r22, %r19
+# CHECK-BE: evmwlssianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xc1]
+# CHECK-LE: evmwlssianw 14, 22, 19          # encoding: [0xc1,0x9d,0xd6,0x11]
+            evmwlssianw %r14, %r22, %r19
+# CHECK-BE: evmwlumi 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x48]
+# CHECK-LE: evmwlumi 14, 22, 19             # encoding: [0x48,0x9c,0xd6,0x11]
+            evmwlumi %r14, %r22, %r19
+# CHECK-BE: evmwlumia 14, 22, 19            # encoding: [0x11,0xd6,0x9c,0x68]
+# CHECK-LE: evmwlumia 14, 22, 19            # encoding: [0x68,0x9c,0xd6,0x11]
+            evmwlumia %r14, %r22, %r19
+# CHECK-BE: evmwlumiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x48]
+# CHECK-LE: evmwlumiaaw 14, 22, 19          # encoding: [0x48,0x9d,0xd6,0x11]
+            evmwlumiaaw %r14, %r22, %r19
+# CHECK-BE: evmwlumianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xc8]
+# CHECK-LE: evmwlumianw 14, 22, 19          # encoding: [0xc8,0x9d,0xd6,0x11]
+            evmwlumianw %r14, %r22, %r19
+# CHECK-BE: evmwlusiaaw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0x40]
+# CHECK-LE: evmwlusiaaw 14, 22, 19          # encoding: [0x40,0x9d,0xd6,0x11]
+            evmwlusiaaw %r14, %r22, %r19
+# CHECK-BE: evmwlusianw 14, 22, 19          # encoding: [0x11,0xd6,0x9d,0xc0]
+# CHECK-LE: evmwlusianw 14, 22, 19          # encoding: [0xc0,0x9d,0xd6,0x11]
+            evmwlusianw %r14, %r22, %r19
+# CHECK-BE: evmwsmf 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0x5b]
+# CHECK-LE: evmwsmf 14, 22, 19              # encoding: [0x5b,0x9c,0xd6,0x11]
+            evmwsmf %r14, %r22, %r19
+# CHECK-BE: evmwsmfa 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x7b]
+# CHECK-LE: evmwsmfa 14, 22, 19             # encoding: [0x7b,0x9c,0xd6,0x11]
+            evmwsmfa %r14, %r22, %r19
+# CHECK-BE: evmwsmfaa 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0x5b]
+# CHECK-LE: evmwsmfaa 14, 22, 19            # encoding: [0x5b,0x9d,0xd6,0x11]
+            evmwsmfaa %r14, %r22, %r19
+# CHECK-BE: evmwsmfan 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0xdb]
+# CHECK-LE: evmwsmfan 14, 22, 19            # encoding: [0xdb,0x9d,0xd6,0x11]
+            evmwsmfan %r14, %r22, %r19
+# CHECK-BE: evmwsmi 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0x59]
+# CHECK-LE: evmwsmi 14, 22, 19              # encoding: [0x59,0x9c,0xd6,0x11]
+            evmwsmi %r14, %r22, %r19
+# CHECK-BE: evmwsmia 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x79]
+# CHECK-LE: evmwsmia 14, 22, 19             # encoding: [0x79,0x9c,0xd6,0x11]
+            evmwsmia %r14, %r22, %r19
+# CHECK-BE: evmwsmiaa 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0x59]
+# CHECK-LE: evmwsmiaa 14, 22, 19            # encoding: [0x59,0x9d,0xd6,0x11]
+            evmwsmiaa %r14, %r22, %r19
+# CHECK-BE: evmwsmian 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0xd9]
+# CHECK-LE: evmwsmian 14, 22, 19            # encoding: [0xd9,0x9d,0xd6,0x11]
+            evmwsmian %r14, %r22, %r19
+# CHECK-BE: evmwssf 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0x53]
+# CHECK-LE: evmwssf 14, 22, 19              # encoding: [0x53,0x9c,0xd6,0x11]
+            evmwssf %r14, %r22, %r19
+# CHECK-BE: evmwssfa 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x73]
+# CHECK-LE: evmwssfa 14, 22, 19             # encoding: [0x73,0x9c,0xd6,0x11]
+            evmwssfa %r14, %r22, %r19
+# CHECK-BE: evmwssfaa 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0x53]
+# CHECK-LE: evmwssfaa 14, 22, 19            # encoding: [0x53,0x9d,0xd6,0x11]
+            evmwssfaa %r14, %r22, %r19
+# CHECK-BE: evmwssfan 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0xd3]
+# CHECK-LE: evmwssfan 14, 22, 19            # encoding: [0xd3,0x9d,0xd6,0x11]
+            evmwssfan %r14, %r22, %r19
+# CHECK-BE: evmwumi 14, 22, 19              # encoding: [0x11,0xd6,0x9c,0x58]
+# CHECK-LE: evmwumi 14, 22, 19              # encoding: [0x58,0x9c,0xd6,0x11]
+            evmwumi %r14, %r22, %r19
+# CHECK-BE: evmwumia 14, 22, 19             # encoding: [0x11,0xd6,0x9c,0x78]
+# CHECK-LE: evmwumia 14, 22, 19             # encoding: [0x78,0x9c,0xd6,0x11]
+            evmwumia %r14, %r22, %r19
+# CHECK-BE: evmwumiaa 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0x58]
+# CHECK-LE: evmwumiaa 14, 22, 19            # encoding: [0x58,0x9d,0xd6,0x11]
+            evmwumiaa %r14, %r22, %r19
+# CHECK-BE: evmwumian 14, 22, 19            # encoding: [0x11,0xd6,0x9d,0xd8]
+# CHECK-LE: evmwumian 14, 22, 19            # encoding: [0xd8,0x9d,0xd6,0x11]
+            evmwumian %r14, %r22, %r19
+# CHECK-BE: evnand 14, 22, 19               # encoding: [0x11,0xd6,0x9a,0x1e]
+# CHECK-LE: evnand 14, 22, 19               # encoding: [0x1e,0x9a,0xd6,0x11]
+            evnand %r14, %r22, %r19
+# CHECK-BE: evneg 14, 22                    # encoding: [0x11,0xd6,0x02,0x09]
+# CHECK-LE: evneg 14, 22                    # encoding: [0x09,0x02,0xd6,0x11]
+            evneg %r14, %r22
+# CHECK-BE: evnor 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x18]
+# CHECK-LE: evnor 14, 22, 19                # encoding: [0x18,0x9a,0xd6,0x11]
+            evnor %r14, %r22, %r19
+# CHECK-BE: evor 14, 22, 19                 # encoding: [0x11,0xd6,0x9a,0x17]
+# CHECK-LE: evor 14, 22, 19                 # encoding: [0x17,0x9a,0xd6,0x11]
+            evor %r14, %r22, %r19
+# CHECK-BE: evorc 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x1b]
+# CHECK-LE: evorc 14, 22, 19                # encoding: [0x1b,0x9a,0xd6,0x11]
+            evorc %r14, %r22, %r19
+# CHECK-BE: evrlwi 14, 29, 19               # encoding: [0x11,0xdd,0x9a,0x2a]
+# CHECK-LE: evrlwi 14, 29, 19               # encoding: [0x2a,0x9a,0xdd,0x11]
+            evrlwi %r14, 29, %r19
+# CHECK-BE: evrlw 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x28]
+# CHECK-LE: evrlw 14, 22, 19                # encoding: [0x28,0x9a,0xd6,0x11]
+            evrlw %r14, %r22, %r19
+# CHECK-BE: evrndw 14, 22                   # encoding: [0x11,0xd6,0x02,0x0c]
+# CHECK-LE: evrndw 14, 22                   # encoding: [0x0c,0x02,0xd6,0x11]
+            evrndw %r14, %r22
+# CHECK-BE: evslwi 14, 29, 19               # encoding: [0x11,0xdd,0x9a,0x26]
+# CHECK-LE: evslwi 14, 29, 19               # encoding: [0x26,0x9a,0xdd,0x11]
+            evslwi %r14, 29, %r19
+# CHECK-BE: evslw 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x24]
+# CHECK-LE: evslw 14, 22, 19                # encoding: [0x24,0x9a,0xd6,0x11]
+            evslw %r14, %r22, %r19
+# CHECK-BE: evsplatfi 14, -13               # encoding: [0x11,0xd3,0x02,0x2b]
+# CHECK-LE: evsplatfi 14, -13               # encoding: [0x2b,0x02,0xd3,0x11]
+            evsplatfi %r14, -13
+# CHECK-BE: evsplati 14, -13                # encoding: [0x11,0xd3,0x02,0x29]
+# CHECK-LE: evsplati 14, -13                # encoding: [0x29,0x02,0xd3,0x11]
+            evsplati %r14, -13
+# CHECK-BE: evsrwis 14, 29, 19              # encoding: [0x11,0xdd,0x9a,0x23]
+# CHECK-LE: evsrwis 14, 29, 19              # encoding: [0x23,0x9a,0xdd,0x11]
+            evsrwis %r14, 29, %r19
+# CHECK-BE: evsrwiu 14, 29, 19              # encoding: [0x11,0xdd,0x9a,0x22]
+# CHECK-LE: evsrwiu 14, 29, 19              # encoding: [0x22,0x9a,0xdd,0x11]
+            evsrwiu %r14, 29, %r19
+# CHECK-BE: evsrws 14, 22, 19               # encoding: [0x11,0xd6,0x9a,0x21]
+# CHECK-LE: evsrws 14, 22, 19               # encoding: [0x21,0x9a,0xd6,0x11]
+            evsrws %r14, %r22, %r19
+# CHECK-BE: evsrwu 14, 22, 19               # encoding: [0x11,0xd6,0x9a,0x20]
+# CHECK-LE: evsrwu 14, 22, 19               # encoding: [0x20,0x9a,0xd6,0x11]
+            evsrwu %r14, %r22, %r19
+# CHECK-BE: evstddx 14, 22, 19              # encoding: [0x11,0xd6,0x9b,0x20]
+# CHECK-LE: evstddx 14, 22, 19              # encoding: [0x20,0x9b,0xd6,0x11]
+            evstddx %r14, %r22, %r19
+# CHECK-BE: evstdhx 14, 22, 19              # encoding: [0x11,0xd6,0x9b,0x24]
+# CHECK-LE: evstdhx 14, 22, 19              # encoding: [0x24,0x9b,0xd6,0x11]
+            evstdhx %r14, %r22, %r19
+# CHECK-BE: evstdwx 14, 22, 19              # encoding: [0x11,0xd6,0x9b,0x22]
+# CHECK-LE: evstdwx 14, 22, 19              # encoding: [0x22,0x9b,0xd6,0x11]
+            evstdwx %r14, %r22, %r19
+# CHECK-BE: evstwhex 14, 22, 19             # encoding: [0x11,0xd6,0x9b,0x30]
+# CHECK-LE: evstwhex 14, 22, 19             # encoding: [0x30,0x9b,0xd6,0x11]
+            evstwhex %r14, %r22, %r19
+# CHECK-BE: evstwhox 14, 22, 19             # encoding: [0x11,0xd6,0x9b,0x34]
+# CHECK-LE: evstwhox 14, 22, 19             # encoding: [0x34,0x9b,0xd6,0x11]
+            evstwhox %r14, %r22, %r19
+# CHECK-BE: evstwwex 14, 22, 19             # encoding: [0x11,0xd6,0x9b,0x38]
+# CHECK-LE: evstwwex 14, 22, 19             # encoding: [0x38,0x9b,0xd6,0x11]
+            evstwwex %r14, %r22, %r19
+# CHECK-BE: evstwwox 14, 22, 19             # encoding: [0x11,0xd6,0x9b,0x3c]
+# CHECK-LE: evstwwox 14, 22, 19             # encoding: [0x3c,0x9b,0xd6,0x11]
+            evstwwox %r14, %r22, %r19
+# CHECK-BE: evsubfssiaaw 14, 22             # encoding: [0x11,0xd6,0x04,0xc3]
+# CHECK-LE: evsubfssiaaw 14, 22             # encoding: [0xc3,0x04,0xd6,0x11]
+            evsubfssiaaw %r14, %r22
+# CHECK-BE: evsubfsmiaaw 14, 22             # encoding: [0x11,0xd6,0x04,0xcb]
+# CHECK-LE: evsubfsmiaaw 14, 22             # encoding: [0xcb,0x04,0xd6,0x11]
+            evsubfsmiaaw %r14, %r22
+# CHECK-BE: evsubfumiaaw 14, 22             # encoding: [0x11,0xd6,0x04,0xca]
+# CHECK-LE: evsubfumiaaw 14, 22             # encoding: [0xca,0x04,0xd6,0x11]
+            evsubfumiaaw %r14, %r22
+# CHECK-BE: evsubfusiaaw 14, 22             # encoding: [0x11,0xd6,0x04,0xc2]
+# CHECK-LE: evsubfusiaaw 14, 22             # encoding: [0xc2,0x04,0xd6,0x11]
+            evsubfusiaaw %r14, %r22
+# CHECK-BE: evsubfw 14, 22, 19              # encoding: [0x11,0xd6,0x9a,0x04]
+# CHECK-LE: evsubfw 14, 22, 19              # encoding: [0x04,0x9a,0xd6,0x11]
+            evsubfw %r14, %r22, %r19
+# CHECK-BE: evsubifw 14, 29, 19             # encoding: [0x11,0xdd,0x9a,0x06]
+# CHECK-LE: evsubifw 14, 29, 19             # encoding: [0x06,0x9a,0xdd,0x11]
+            evsubifw %r14, 29, %r19
+# CHECK-BE: evxor 14, 22, 19                # encoding: [0x11,0xd6,0x9a,0x16]
+# CHECK-LE: evxor 14, 22, 19                # encoding: [0x16,0x9a,0xd6,0x11]
+            evxor %r14, %r22, %r19
+
+# CHECK-BE: evldd 14, 0(27)                 # encoding: [0x11,0xdb,0x03,0x01]
+# CHECK-LE: evldd 14, 0(27)                 # encoding: [0x01,0x03,0xdb,0x11]
+            evldd %r14, 0(%r27)
+# CHECK-BE: evldd 14, 248(27)               # encoding: [0x11,0xdb,0xfb,0x01]
+# CHECK-LE: evldd 14, 248(27)               # encoding: [0x01,0xfb,0xdb,0x11]
+            evldd %r14, 248(%r27)
+# CHECK-BE: evldd 14, 248(9)                # encoding: [0x11,0xc9,0xfb,0x01]
+# CHECK-LE: evldd 14, 248(9)                # encoding: [0x01,0xfb,0xc9,0x11]
+            evldd %r14, 248(%r9)
+# CHECK-BE: evldw 14, 0(27)                 # encoding: [0x11,0xdb,0x03,0x03]
+# CHECK-LE: evldw 14, 0(27)                 # encoding: [0x03,0x03,0xdb,0x11]
+            evldw %r14, 0(%r27)
+# CHECK-BE: evldw 14, 248(27)               # encoding: [0x11,0xdb,0xfb,0x03]
+# CHECK-LE: evldw 14, 248(27)               # encoding: [0x03,0xfb,0xdb,0x11]
+            evldw %r14, 248(%r27)
+# CHECK-BE: evldw 14, 248(9)                # encoding: [0x11,0xc9,0xfb,0x03]
+# CHECK-LE: evldw 14, 248(9)                # encoding: [0x03,0xfb,0xc9,0x11]
+            evldw %r14, 248(%r9)
+# CHECK-BE: evldh 14, 0(27)                 # encoding: [0x11,0xdb,0x03,0x05]
+# CHECK-LE: evldh 14, 0(27)                 # encoding: [0x05,0x03,0xdb,0x11]
+            evldh %r14, 0(%r27)
+# CHECK-BE: evldh 14, 248(27)               # encoding: [0x11,0xdb,0xfb,0x05]
+# CHECK-LE: evldh 14, 248(27)               # encoding: [0x05,0xfb,0xdb,0x11]
+            evldh %r14, 248(%r27)
+# CHECK-BE: evldh 14, 248(9)                # encoding: [0x11,0xc9,0xfb,0x05]
+# CHECK-LE: evldh 14, 248(9)                # encoding: [0x05,0xfb,0xc9,0x11]
+            evldh %r14, 248(%r9)
+# CHECK-BE: evlhhesplat 14, 0(27)           # encoding: [0x11,0xdb,0x03,0x09]
+# CHECK-LE: evlhhesplat 14, 0(27)           # encoding: [0x09,0x03,0xdb,0x11]
+            evlhhesplat %r14, 0(%r27)
+# CHECK-BE: evlhhousplat 14, 0(27)          # encoding: [0x11,0xdb,0x03,0x0d]
+# CHECK-LE: evlhhousplat 14, 0(27)          # encoding: [0x0d,0x03,0xdb,0x11]
+            evlhhousplat %r14, 0(%r27)
+# CHECK-BE: evlhhousplat 14, 62(27)         # encoding: [0x11,0xdb,0xfb,0x0d]
+# CHECK-LE: evlhhousplat 14, 62(27)         # encoding: [0x0d,0xfb,0xdb,0x11]
+            evlhhousplat %r14, 62(%r27)
+# CHECK-BE: evlhhousplat 14, 62(9)          # encoding: [0x11,0xc9,0xfb,0x0d]
+# CHECK-LE: evlhhousplat 14, 62(9)          # encoding: [0x0d,0xfb,0xc9,0x11]
+            evlhhousplat %r14, 62(%r9)
+# CHECK-BE: evlhhossplat 14, 0(27)          # encoding: [0x11,0xdb,0x03,0x0f]
+# CHECK-LE: evlhhossplat 14, 0(27)          # encoding: [0x0f,0x03,0xdb,0x11]
+            evlhhossplat %r14, 0(%r27)
+# CHECK-BE: evlhhossplat 14, 62(27)         # encoding: [0x11,0xdb,0xfb,0x0f]
+# CHECK-LE: evlhhossplat 14, 62(27)         # encoding: [0x0f,0xfb,0xdb,0x11]
+            evlhhossplat %r14, 62(%r27)
+# CHECK-BE: evlhhossplat 14, 62(9)          # encoding: [0x11,0xc9,0xfb,0x0f]
+# CHECK-LE: evlhhossplat 14, 62(9)          # encoding: [0x0f,0xfb,0xc9,0x11]
+            evlhhossplat %r14, 62(%r9)
+# CHECK-BE: evlwhe 14, 0(27)                # encoding: [0x11,0xdb,0x03,0x11]
+# CHECK-LE: evlwhe 14, 0(27)                # encoding: [0x11,0x03,0xdb,0x11]
+            evlwhe %r14, 0(%r27)
+# CHECK-BE: evlwhe 14, 124(27)              # encoding: [0x11,0xdb,0xfb,0x11]
+# CHECK-LE: evlwhe 14, 124(27)              # encoding: [0x11,0xfb,0xdb,0x11]
+            evlwhe %r14, 124(%r27)
+# CHECK-BE: evlwhe 14, 124(9)               # encoding: [0x11,0xc9,0xfb,0x11]
+# CHECK-LE: evlwhe 14, 124(9)               # encoding: [0x11,0xfb,0xc9,0x11]
+            evlwhe %r14, 124(%r9)
+# CHECK-BE: evlwhou 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x15]
+# CHECK-LE: evlwhou 14, 0(27)               # encoding: [0x15,0x03,0xdb,0x11]
+            evlwhou %r14, 0(%r27)
+# CHECK-BE: evlwhou 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x15]
+# CHECK-LE: evlwhou 14, 124(27)             # encoding: [0x15,0xfb,0xdb,0x11]
+            evlwhou %r14, 124(%r27)
+# CHECK-BE: evlwhou 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x15]
+# CHECK-LE: evlwhou 14, 124(9)              # encoding: [0x15,0xfb,0xc9,0x11]
+            evlwhou %r14, 124(%r9)
+# CHECK-BE: evlwhos 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x17]
+# CHECK-LE: evlwhos 14, 0(27)               # encoding: [0x17,0x03,0xdb,0x11]
+            evlwhos %r14, 0(%r27)
+# CHECK-BE: evlwhos 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x17]
+# CHECK-LE: evlwhos 14, 124(27)             # encoding: [0x17,0xfb,0xdb,0x11]
+            evlwhos %r14, 124(%r27)
+# CHECK-BE: evlwhos 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x17]
+# CHECK-LE: evlwhos 14, 124(9)              # encoding: [0x17,0xfb,0xc9,0x11]
+            evlwhos %r14, 124(%r9)
+# CHECK-BE: evlwwsplat 14, 0(27)            # encoding: [0x11,0xdb,0x03,0x19]
+# CHECK-LE: evlwwsplat 14, 0(27)            # encoding: [0x19,0x03,0xdb,0x11]
+            evlwwsplat %r14, 0(%r27)
+# CHECK-BE: evlwwsplat 14, 124(27)          # encoding: [0x11,0xdb,0xfb,0x19]
+# CHECK-LE: evlwwsplat 14, 124(27)          # encoding: [0x19,0xfb,0xdb,0x11]
+            evlwwsplat %r14, 124(%r27)
+# CHECK-BE: evlwwsplat 14, 124(9)           # encoding: [0x11,0xc9,0xfb,0x19]
+# CHECK-LE: evlwwsplat 14, 124(9)           # encoding: [0x19,0xfb,0xc9,0x11]
+            evlwwsplat %r14, 124(%r9)
+# CHECK-BE: evlwhsplat 14, 0(27)            # encoding: [0x11,0xdb,0x03,0x1d]
+# CHECK-LE: evlwhsplat 14, 0(27)            # encoding: [0x1d,0x03,0xdb,0x11]
+            evlwhsplat %r14, 0(%r27)
+# CHECK-BE: evlwhsplat 14, 124(27)          # encoding: [0x11,0xdb,0xfb,0x1d]
+# CHECK-LE: evlwhsplat 14, 124(27)          # encoding: [0x1d,0xfb,0xdb,0x11]
+            evlwhsplat %r14, 124(%r27)
+# CHECK-BE: evlwhsplat 14, 124(9)           # encoding: [0x11,0xc9,0xfb,0x1d]
+# CHECK-LE: evlwhsplat 14, 124(9)           # encoding: [0x1d,0xfb,0xc9,0x11]
+            evlwhsplat %r14, 124(%r9)
+# CHECK-BE: evstdd 14, 0(27)                # encoding: [0x11,0xdb,0x03,0x21]
+# CHECK-LE: evstdd 14, 0(27)                # encoding: [0x21,0x03,0xdb,0x11]
+            evstdd %r14, 0(%r27)
+# CHECK-BE: evstdd 14, 248(27)              # encoding: [0x11,0xdb,0xfb,0x21]
+# CHECK-LE: evstdd 14, 248(27)              # encoding: [0x21,0xfb,0xdb,0x11]
+            evstdd %r14, 248(%r27)
+# CHECK-BE: evstdd 14, 248(9)               # encoding: [0x11,0xc9,0xfb,0x21]
+# CHECK-LE: evstdd 14, 248(9)               # encoding: [0x21,0xfb,0xc9,0x11]
+            evstdd %r14, 248(%r9)
+# CHECK-BE: evstdh 14, 0(27)                # encoding: [0x11,0xdb,0x03,0x25]
+# CHECK-LE: evstdh 14, 0(27)                # encoding: [0x25,0x03,0xdb,0x11]
+            evstdh %r14, 0(%r27)
+# CHECK-BE: evstdh 14, 248(27)              # encoding: [0x11,0xdb,0xfb,0x25]
+# CHECK-LE: evstdh 14, 248(27)              # encoding: [0x25,0xfb,0xdb,0x11]
+            evstdh %r14, 248(%r27)
+# CHECK-BE: evstdh 14, 248(9)               # encoding: [0x11,0xc9,0xfb,0x25]
+# CHECK-LE: evstdh 14, 248(9)               # encoding: [0x25,0xfb,0xc9,0x11]
+            evstdh %r14, 248(%r9)
+# CHECK-BE: evstdw 14, 0(27)                # encoding: [0x11,0xdb,0x03,0x23]
+# CHECK-LE: evstdw 14, 0(27)                # encoding: [0x23,0x03,0xdb,0x11]
+            evstdw %r14, 0(%r27)
+# CHECK-BE: evstdw 14, 248(27)              # encoding: [0x11,0xdb,0xfb,0x23]
+# CHECK-LE: evstdw 14, 248(27)              # encoding: [0x23,0xfb,0xdb,0x11]
+            evstdw %r14, 248(%r27)
+# CHECK-BE: evstdw 14, 248(9)               # encoding: [0x11,0xc9,0xfb,0x23]
+# CHECK-LE: evstdw 14, 248(9)               # encoding: [0x23,0xfb,0xc9,0x11]
+            evstdw %r14, 248(%r9)
+# CHECK-BE: evstwhe 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x31]
+# CHECK-LE: evstwhe 14, 0(27)               # encoding: [0x31,0x03,0xdb,0x11]
+            evstwhe %r14, 0(%r27)
+# CHECK-BE: evstwhe 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x31]
+# CHECK-LE: evstwhe 14, 124(27)             # encoding: [0x31,0xfb,0xdb,0x11]
+            evstwhe %r14, 124(%r27)
+# CHECK-BE: evstwhe 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x31]
+# CHECK-LE: evstwhe 14, 124(9)              # encoding: [0x31,0xfb,0xc9,0x11]
+            evstwhe %r14, 124(%r9)
+# CHECK-BE: evstwho 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x35]
+# CHECK-LE: evstwho 14, 0(27)               # encoding: [0x35,0x03,0xdb,0x11]
+            evstwho %r14, 0(%r27)
+# CHECK-BE: evstwho 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x35]
+# CHECK-LE: evstwho 14, 124(27)             # encoding: [0x35,0xfb,0xdb,0x11]
+            evstwho %r14, 124(%r27)
+# CHECK-BE: evstwho 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x35]
+# CHECK-LE: evstwho 14, 124(9)              # encoding: [0x35,0xfb,0xc9,0x11]
+            evstwho %r14, 124(%r9)
+# CHECK-BE: evstwwe 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x39]
+# CHECK-LE: evstwwe 14, 0(27)               # encoding: [0x39,0x03,0xdb,0x11]
+            evstwwe %r14, 0(%r27)
+# CHECK-BE: evstwwe 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x39]
+# CHECK-LE: evstwwe 14, 124(27)             # encoding: [0x39,0xfb,0xdb,0x11]
+            evstwwe %r14, 124(%r27)
+# CHECK-BE: evstwwe 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x39]
+# CHECK-LE: evstwwe 14, 124(9)              # encoding: [0x39,0xfb,0xc9,0x11]
+            evstwwe %r14, 124(%r9)
+# CHECK-BE: evstwwo 14, 0(27)               # encoding: [0x11,0xdb,0x03,0x3d]
+# CHECK-LE: evstwwo 14, 0(27)               # encoding: [0x3d,0x03,0xdb,0x11]
+            evstwwo %r14, 0(%r27)
+# CHECK-BE: evstwwo 14, 124(27)             # encoding: [0x11,0xdb,0xfb,0x3d]
+# CHECK-LE: evstwwo 14, 124(27)             # encoding: [0x3d,0xfb,0xdb,0x11]
+            evstwwo %r14, 124(%r27)
+# CHECK-BE: evstwwo 14, 124(9)              # encoding: [0x11,0xc9,0xfb,0x3d]
+# CHECK-LE: evstwwo 14, 124(9)              # encoding: [0x3d,0xfb,0xc9,0x11]
+            evstwwo %r14, 124(%r9)
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
index 4c3530d9c628..6d09dfd8e5fc 100644
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -612,7 +612,9 @@
 # CHECK-BE: cntlzw. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
 # CHECK-LE: cntlzw. 2, 3                    # encoding: [0x35,0x00,0x62,0x7c]
             cntlzw. 2, 3
-# FIXME:    cmpb 2, 3, 4
+            cmpb 7, 21, 4
+# CHECK-BE: cmpb 7, 21, 4                   # encoding: [0x7e,0xa7,0x23,0xf8]
+# CHECK-LE: cmpb 7, 21, 4                   # encoding: [0xf8,0x23,0xa7,0x7e]
 # FIXME:    popcntb 2, 3
 # CHECK-BE: popcntw 2, 3                    # encoding: [0x7c,0x62,0x02,0xf4]
 # CHECK-LE: popcntw 2, 3                    # encoding: [0xf4,0x02,0x62,0x7c]
@@ -767,3 +769,17 @@
 # CHECK-LE: mfocrf 16, 8                    # encoding: [0x26,0x80,0x10,0x7e]
             mfocrf 16, 8
 
+# Move to/from segment register
+# CHECK-BE: mtsr    12, 10                    # encoding: [0x7d,0x4c,0x01,0xa4]
+# CHECK-LE: mtsr    12, 10                    # encoding: [0xa4,0x01,0x4c,0x7d]
+            mtsr    12,%r10
+# CHECK-BE: mfsr    10, 12                    # encoding: [0x7d,0x4c,0x04,0xa6]
+# CHECK-LE: mfsr    10, 12                    # encoding: [0xa6,0x04,0x4c,0x7d]
+            mfsr    %r10,12
+
+# CHECK-BE: mtsrin  10, 12                    # encoding: [0x7d,0x40,0x61,0xe4]
+# CHECK-LE: mtsrin  10, 12                    # encoding: [0xe4,0x61,0x40,0x7d]
+            mtsrin  %r10,%r12
+# CHECK-BE: mfsrin  10, 12                    # encoding: [0x7d,0x40,0x65,0x26]
+# CHECK-LE: mfsrin  10, 12                    # encoding: [0x26,0x65,0x40,0x7d]
+            mfsrin  %r10,%r12
diff --git a/test/MC/PowerPC/ppc64-fixup-apply.s b/test/MC/PowerPC/ppc64-fixup-apply.s
index 169340524775..f98b46d6b18d 100644
--- a/test/MC/PowerPC/ppc64-fixup-apply.s
+++ b/test/MC/PowerPC/ppc64-fixup-apply.s
@@ -14,19 +14,21 @@ addis 1, 1, target
 
 .set target, 0x1234
 
-addi 1, 1, target2@l
-addis 1, 1, target2@ha
+subi 1, 1, -target2@l
+subis 1, 1, -target2@ha
 
 .set target2, 0x12345678
 
 addi 1, 1, target3-target4@l
-addis 1, 1, target3-target4@ha
+subis 1, 1, target4-target3@ha
 
 .set target3, 0x23455678
 .set target4, 0x12341234
 
 addi 1, 1, target5+0x8000@l
 addis 1, 1, target5+0x8000@ha
+ori 1, 1, target5+0x8000@l
+oris 1, 1, target5+0x8000@ha
 
 .set target5, 0x10000001
 
@@ -68,7 +70,7 @@ addis 1, 1, target7@highesta
 # CHECK-NEXT:    ]
 # CHECK-NEXT:    Address: 0x0
 # CHECK-NEXT:    Offset:
-# CHECK-NEXT:    Size: 64
+# CHECK-NEXT:    Size: 72
 # CHECK-NEXT:    Link: 0
 # CHECK-NEXT:    Info: 0
 # CHECK-NEXT:    AddressAlignment: 4
@@ -78,10 +80,12 @@ addis 1, 1, target7@highesta
 # CHECK-LE-NEXT:   0000: 34122138 3412213C 78562138 3412213C
 # CHECK-BE-NEXT:   0010: 38214444 3C211111 38218001 3C211001
 # CHECK-LE-NEXT:   0010: 44442138 1111213C 01802138 0110213C
-# CHECK-BE-NEXT:   0020: 38210008 3C210000 38214321 3C214321
-# CHECK-LE-NEXT:   0020: 08002138 0000213C 21432138 2143213C
-# CHECK-BE-NEXT:   0030: 3821FFFF 3C211234 38210000 3C211235
-# CHECK-LE-NEXT:   0030: FFFF2138 3412213C 00002138 3512213C
+# CHECK-BE-NEXT:   0020: 60218001 64211001 38210008 3C210000
+# CHECK-LE-NEXT:   0020: 01802160 01102164 08002138 0000213C
+# CHECK-BE-NEXT:   0030: 38214321 3C214321 3821FFFF 3C211234
+# CHECK-LE-NEXT:   0030: 21432138 2143213C FFFF2138 3412213C
+# CHECK-BE-NEXT:   0040: 38210000 3C211235
+# CHECK-LE-NEXT:   0040: 00002138 3512213C
 # CHECK-NEXT:    )
 # CHECK-NEXT:  }
 
diff --git a/test/MC/PowerPC/ppc64-fixups.s b/test/MC/PowerPC/ppc64-fixups.s
index d3769f525805..20a70c270547 100644
--- a/test/MC/PowerPC/ppc64-fixups.s
+++ b/test/MC/PowerPC/ppc64-fixups.s
@@ -687,6 +687,18 @@ base:
 # CHECK-BE: ori 1, 2, 2                  # encoding: [0x60,0x41,0x00,0x02]
 # CHECK-LE: ori 1, 2, 2                  # encoding: [0x02,0x00,0x41,0x60]
             ori 1, 2, 131071@ha
+# CHECK-BE: addi 1, 2, -1                # encoding: [0x38,0x22,0xff,0xff]
+# CHECK-LE: addi 1, 2, -1                # encoding: [0xff,0xff,0x22,0x38]
+            addi 1, 2, 131071@l
+# CHECK-BE: addi 1, 2, 1                 # encoding: [0x38,0x22,0x00,0x01]
+# CHECK-LE: addi 1, 2, 1                 # encoding: [0x01,0x00,0x22,0x38]
+            addi 1, 2, 131071@h
+# CHECK-BE: addi 1, 2, 2                 # encoding: [0x38,0x22,0x00,0x02]
+# CHECK-LE: addi 1, 2, 2                 # encoding: [0x02,0x00,0x22,0x38]
+            addi 1, 2, 131071@ha
+# CHECK-BE: addis 1, 2, -4096            # encoding: [0x3c,0x22,0xf0,0x00]
+# CHECK-LE: addis 1, 2, -4096            # encoding: [0x00,0xf0,0x22,0x3c]
+            addis 1, 2, 0xf0000000@h
 
 # Data relocs
 # llvm-mc does not show any "encoding" string for data, so we just check the relocs
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
index d292ddaf5cc4..b355ba37b801 100644
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -1,27 +1,27 @@
 # RUN: llvm-mc -triple powerpc64-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-BE %s
 # RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu --show-encoding %s | FileCheck -check-prefix=CHECK-LE %s
 
-# CHECK-BE: lxsdx 7, 5, 31                     # encoding: [0x7c,0xe5,0xfc,0x98]
-# CHECK-LE: lxsdx 7, 5, 31                     # encoding: [0x98,0xfc,0xe5,0x7c]
-            lxsdx 7, 5, 31
-# CHECK-BE: lxvd2x 7, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x98]
-# CHECK-LE: lxvd2x 7, 5, 31                    # encoding: [0x98,0xfe,0xe5,0x7c]
-            lxvd2x 7, 5, 31
-# CHECK-BE: lxvdsx 7, 5, 31                    # encoding: [0x7c,0xe5,0xfa,0x98]
-# CHECK-LE: lxvdsx 7, 5, 31                    # encoding: [0x98,0xfa,0xe5,0x7c]
-            lxvdsx 7, 5, 31
-# CHECK-BE: lxvw4x 7, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x18]
-# CHECK-LE: lxvw4x 7, 5, 31                    # encoding: [0x18,0xfe,0xe5,0x7c]
-            lxvw4x 7, 5, 31
-# CHECK-BE: stxsdx 8, 5, 31                    # encoding: [0x7d,0x05,0xfd,0x98]
-# CHECK-LE: stxsdx 8, 5, 31                    # encoding: [0x98,0xfd,0x05,0x7d]
-            stxsdx 8, 5, 31
-# CHECK-BE: stxvd2x 8, 5, 31                   # encoding: [0x7d,0x05,0xff,0x98]
-# CHECK-LE: stxvd2x 8, 5, 31                   # encoding: [0x98,0xff,0x05,0x7d]
-            stxvd2x 8, 5, 31
-# CHECK-BE: stxvw4x 8, 5, 31                   # encoding: [0x7d,0x05,0xff,0x18]
-# CHECK-LE: stxvw4x 8, 5, 31                   # encoding: [0x18,0xff,0x05,0x7d]
-            stxvw4x 8, 5, 31
+# CHECK-BE: lxsdx 39, 5, 31                     # encoding: [0x7c,0xe5,0xfc,0x99]
+# CHECK-LE: lxsdx 39, 5, 31                     # encoding: [0x99,0xfc,0xe5,0x7c]
+            lxsdx 39, 5, 31
+# CHECK-BE: lxvd2x 39, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x99]
+# CHECK-LE: lxvd2x 39, 5, 31                    # encoding: [0x99,0xfe,0xe5,0x7c]
+            lxvd2x 39, 5, 31
+# CHECK-BE: lxvdsx 39, 5, 31                    # encoding: [0x7c,0xe5,0xfa,0x99]
+# CHECK-LE: lxvdsx 39, 5, 31                    # encoding: [0x99,0xfa,0xe5,0x7c]
+            lxvdsx 39, 5, 31
+# CHECK-BE: lxvw4x 39, 5, 31                    # encoding: [0x7c,0xe5,0xfe,0x19]
+# CHECK-LE: lxvw4x 39, 5, 31                    # encoding: [0x19,0xfe,0xe5,0x7c]
+            lxvw4x 39, 5, 31
+# CHECK-BE: stxsdx 40, 5, 31                    # encoding: [0x7d,0x05,0xfd,0x99]
+# CHECK-LE: stxsdx 40, 5, 31                    # encoding: [0x99,0xfd,0x05,0x7d]
+            stxsdx 40, 5, 31
+# CHECK-BE: stxvd2x 40, 5, 31                   # encoding: [0x7d,0x05,0xff,0x99]
+# CHECK-LE: stxvd2x 40, 5, 31                   # encoding: [0x99,0xff,0x05,0x7d]
+            stxvd2x 40, 5, 31
+# CHECK-BE: stxvw4x 40, 5, 31                   # encoding: [0x7d,0x05,0xff,0x19]
+# CHECK-LE: stxvw4x 40, 5, 31                   # encoding: [0x19,0xff,0x05,0x7d]
+            stxvw4x 40, 5, 31
 # CHECK-BE: xsabsdp 7, 27                      # encoding: [0xf0,0xe0,0xdd,0x64]
 # CHECK-LE: xsabsdp 7, 27                      # encoding: [0x64,0xdd,0xe0,0xf0]
             xsabsdp 7, 27
diff --git a/test/MC/R600/lit.local.cfg b/test/MC/R600/lit.local.cfg
new file mode 100644
index 000000000000..ad9ce2541ef7
--- /dev/null
+++ b/test/MC/R600/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'R600' in config.root.targets:
+    config.unsupported = True
diff --git a/test/MC/R600/sopp.s b/test/MC/R600/sopp.s
new file mode 100644
index 000000000000..0f186b17b2d8
--- /dev/null
+++ b/test/MC/R600/sopp.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI  -show-encoding %s | FileCheck %s
+
+  s_nop 1            // CHECK: s_nop 1 ; encoding: [0x01,0x00,0x80,0xbf]
+  s_endpgm           // CHECK: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+  s_branch 2         // CHECK: s_branch 2 ; encoding: [0x02,0x00,0x82,0xbf]
+  s_cbranch_scc0 3   // CHECK: s_cbranch_scc0 3 ; encoding: [0x03,0x00,0x84,0xbf]
+  s_cbranch_scc1 4   // CHECK: s_cbranch_scc1 4 ; encoding: [0x04,0x00,0x85,0xbf]
+  s_cbranch_vccz 5   // CHECK: s_cbranch_vccz 5 ; encoding: [0x05,0x00,0x86,0xbf]
+  s_cbranch_vccnz 6  // CHECK: s_cbranch_vccnz 6 ; encoding: [0x06,0x00,0x87,0xbf]
+  s_cbranch_execz 7  // CHECK: s_cbranch_execz 7 ; encoding: [0x07,0x00,0x88,0xbf]
+  s_cbranch_execnz 8 // CHECK: s_cbranch_execnz 8 ; encoding: [0x08,0x00,0x89,0xbf]
+  s_barrier          // CHECK: s_barrier ; encoding: [0x00,0x00,0x8a,0xbf]
+
+//===----------------------------------------------------------------------===//
+// s_waitcnt
+//===----------------------------------------------------------------------===//
+
+  s_waitcnt 0
+  // CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+  s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
+  // CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+  s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+  // CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+  s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0)
+  // CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+  s_waitcnt vmcnt(1)
+  // CHECK: s_waitcnt vmcnt(1) ; encoding: [0x71,0x07,0x8c,0xbf]
+
+  s_waitcnt expcnt(2)
+  // CHECK: s_waitcnt expcnt(2) ; encoding: [0x2f,0x07,0x8c,0xbf]
+
+  s_waitcnt lgkmcnt(3)
+  // CHECK: s_waitcnt lgkmcnt(3) ; encoding: [0x7f,0x03,0x8c,0xbf]
+
+  s_waitcnt vmcnt(0), expcnt(0)
+  // CHECK: s_waitcnt vmcnt(0) expcnt(0) ; encoding: [0x00,0x07,0x8c,0xbf]
+
+
+  s_sethalt 9        // CHECK: s_sethalt 9 ; encoding: [0x09,0x00,0x8d,0xbf]
+  s_sleep 10         // CHECK: s_sleep 10 ; encoding: [0x0a,0x00,0x8e,0xbf]
+  s_setprio 1        // CHECK: s_setprio 1 ; encoding: [0x01,0x00,0x8f,0xbf]
+  s_sendmsg 2        // CHECK: s_sendmsg Gs(nop), [m0] ; encoding: [0x02,0x00,0x90,0xbf]
+  s_sendmsghalt 3    // CHECK: s_sendmsghalt 3 ; encoding: [0x03,0x00,0x91,0xbf]
+  s_trap 4           // CHECK: s_trap 4 ; encoding: [0x04,0x00,0x92,0xbf]
+  s_icache_inv       // CHECK: s_icache_inv ; encoding: [0x00,0x00,0x93,0xbf]
+  s_incperflevel 5   // CHECK: s_incperflevel 5 ; encoding: [0x05,0x00,0x94,0xbf]
+  s_decperflevel 6   // CHECK: s_decperflevel 6 ; encoding: [0x06,0x00,0x95,0xbf]
+  s_ttracedata       // CHECK: s_ttracedata ; encoding: [0x00,0x00,0x96,0xbf]
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index 5c02dd3614a4..78c5738c7acc 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,3 +1,6 @@
 if not 'SystemZ' in config.root.targets:
     config.unsupported = True
 
+# http://llvm.org/bugs/show_bug.cgi?id=20980
+if 'ubsan' in config.available_features:
+  config.unsupported = True
diff --git a/test/MC/X86/AlignedBundling/labeloffset.s b/test/MC/X86/AlignedBundling/labeloffset.s
new file mode 100644
index 000000000000..65a008651f19
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/labeloffset.s
@@ -0,0 +1,83 @@
+# RUN: llvm-mc -triple=i686-linux -filetype=obj %s -o - | \
+# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
+# RUN: llvm-mc -triple=i686-nacl -filetype=obj %s -o - | \
+# RUN: llvm-objdump -disassemble -no-show-raw-insn -r - | FileCheck %s
+
+        .bundle_align_mode 5
+        .text
+        .globl  main
+        .align  32, 0x90
+        .type   main,@function
+main:                                   # @main
+# CHECK-LABEL: main:
+# Call + pop sequence for determining the PIC base.
+        .bundle_lock align_to_end
+        calll   .L0$pb
+        .bundle_unlock
+.L0$pb:
+        popl    %eax
+# CHECK: 20: popl
+# 26 bytes of instructions between the pop and the use of the pic base symbol.
+        movl    $3, 2(%ebx, %ebx)
+        movl    $3, 2(%ebx, %ebx)
+        movl    $3, 2(%ebx, %ebx)
+        hlt
+        hlt
+# CHECK: nop
+.Ltmp0:
+        addl    (.Ltmp0-.L0$pb), %eax
+# The addl has bundle padding to push it from 0x3b to 0x40.
+# The difference between the labels should be 0x20 (0x40-0x20) not 0x1b
+# (0x3b-0x20)
+# CHECK: 40: addl 32, %eax
+        popl    %ecx
+        jmp     *%ecx
+
+
+# Also make sure it works with a non-relaxable instruction (cmp vs add)
+# and for 2 adjacent labels that both point to the correct instruction
+        .section .text.bar, "ax"
+        .globl  bar
+        .align  32, 0x90
+        .type   bar,@function
+bar:
+# CHECK-LABEL: bar:
+        .bundle_lock align_to_end
+        calll   .L1$pb
+        .bundle_unlock
+.L1$pb:
+        popl %eax
+# CHECK: 20: popl
+# 26 bytes of instructions between the pop and the use of the pic base symbol.
+        movl    $3, 2(%ebx, %ebx)
+        movl    $3, 2(%ebx, %ebx)
+        movl    $3, 2(%ebx, %ebx)
+        hlt
+        hlt
+# CHECK: nop
+.Ltmp1:
+.Ltmp2:
+        cmpl    %eax, .Ltmp1
+# CHECK: 40: cmpl %eax, 64
+        cmpl     %eax, (.Ltmp2-.L1$pb)
+# CHECK: 46: cmpl %eax, 32
+        popl    %ecx
+        jmp *%ecx
+
+
+# Switch sections in the middle of a function
+        .section .text.foo, "ax"
+        .globl  foo
+        .align  32, 0x90
+        .type   foo,@function
+# CHECK-LABEL: foo:
+foo:
+        inc %eax
+tmp3:
+        .rodata
+        .type   obj,@object
+        .comm   obj,4,4
+        .section .text.foo
+        inc %eax
+# CHECK: tmp3:
+# CHECK-NEXT: 1: incl
diff --git a/test/MC/X86/AlignedBundling/long-nop-pad.s b/test/MC/X86/AlignedBundling/long-nop-pad.s
index ea33e2889b9e..9b1ec1184b05 100644
--- a/test/MC/X86/AlignedBundling/long-nop-pad.s
+++ b/test/MC/X86/AlignedBundling/long-nop-pad.s
@@ -14,7 +14,7 @@ foo:
 # To align this group to a bundle end, we need a 15-byte NOP and a 12-byte NOP.
 # CHECK:        0:  nop
 # CHECK-NEXT:   f:  nop
-# CHECK-NEXT:   1b: callq
+# CHECK:   1b: callq
 
 # This push instruction is 1 byte long
   .bundle_lock align_to_end
diff --git a/test/MC/X86/AlignedBundling/nesting.s b/test/MC/X86/AlignedBundling/nesting.s
new file mode 100644
index 000000000000..8996170a1d19
--- /dev/null
+++ b/test/MC/X86/AlignedBundling/nesting.s
@@ -0,0 +1,67 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN:   | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Will be bundle-aligning to 16 byte boundaries
+  .bundle_align_mode 4
+  .text
+# CHECK-LABEL: foo
+foo:
+# Test that bundle alignment mode can be set more than once.
+  .bundle_align_mode 4
+# Each of these callq instructions is 5 bytes long
+  callq bar
+  callq bar
+  .bundle_lock
+  .bundle_lock
+  callq bar
+  callq bar     
+  .bundle_unlock
+  .bundle_unlock
+# CHECK:      10: callq
+# CHECK-NEXT: 15: callq
+
+  .p2align 4
+# CHECK-LABEL: bar
+bar:
+  callq foo
+  callq foo
+# Check that the callqs get bundled together, and that the whole group is
+# align_to_end
+  .bundle_lock 
+  callq bar
+  .bundle_lock align_to_end
+  callq bar
+  .bundle_unlock
+  .bundle_unlock
+# CHECK:      36: callq
+# CHECK-NEXT: 3b: callq
+
+# CHECK-LABEL: baz
+baz:
+  callq foo
+  callq foo
+# Check that the callqs get bundled together, and that the whole group is
+# align_to_end (with the outer directive marked align_to_end)
+  .bundle_lock align_to_end
+  callq bar
+  .bundle_lock
+  callq bar
+  .bundle_unlock
+  .bundle_unlock
+# CHECK:      56: callq
+# CHECK-NEXT: 5b: callq
+
+# CHECK-LABEL: quux
+quux:
+  callq bar
+  callq bar
+  .bundle_lock
+  .bundle_lock
+  callq bar
+  .bundle_unlock
+  callq bar     
+  .bundle_unlock
+# Check that the calls are bundled together when the second one is after the
+# inner nest is closed.
+# CHECK:      70: callq
+# CHECK-NEXT: 75: callq
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 187b51264c41..4f4dc702bea9 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -113,6 +113,94 @@
 // CHECK:  encoding: [0x62,0xe1,0x14,0x58,0x58,0x92,0xfc,0xfd,0xff,0xff]
           vaddps -516(%rdx){1to16}, %zmm13, %zmm18
 
+// CHECK: vbroadcastsd (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4c,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30 {%k4}
+
+// CHECK: vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xcc,0x19,0x31]
+          vbroadcastsd (%rcx), %zmm30 {%k4} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastsd 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcastsd 1016(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x7f]
+          vbroadcastsd 1016(%rdx), %zmm30
+
+// CHECK: vbroadcastsd 1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcastsd 1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0x72,0x80]
+          vbroadcastsd -1024(%rdx), %zmm30
+
+// CHECK: vbroadcastsd -1032(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcastsd -1032(%rdx), %zmm30
+
+// CHECK: vbroadcastsd %xmm22, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x48,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x4f,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21 {%k7}
+
+// CHECK: vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xcf,0x19,0xee]
+          vbroadcastsd %xmm22, %zmm21 {%k7} {z}
+
+// CHECK: vbroadcastss (%rcx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x4c,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3 {%k4}
+
+// CHECK: vbroadcastss (%rcx), %zmm3 {%k4} {z}
+// CHECK:  encoding: [0x62,0xf2,0x7d,0xcc,0x18,0x19]
+          vbroadcastss (%rcx), %zmm3 {%k4} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %zmm3
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x48,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %zmm3
+
+// CHECK: vbroadcastss 508(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x7f]
+          vbroadcastss 508(%rdx), %zmm3
+
+// CHECK: vbroadcastss 512(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -512(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x5a,0x80]
+          vbroadcastss -512(%rdx), %zmm3
+
+// CHECK: vbroadcastss -516(%rdx), %zmm3
+// CHECK:  encoding: [0x62,0xf2,0x7d,0x48,0x18,0x9a,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %zmm3
+
+// CHECK: vbroadcastss %xmm18, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x4a,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18 {%k2}
+
+// CHECK: vbroadcastss %xmm18, %zmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xca,0x18,0xd2]
+          vbroadcastss %xmm18, %zmm18 {%k2} {z}
+
 // CHECK: vdivpd %zmm11, %zmm6, %zmm18
 // CHECK:  encoding: [0x62,0xc1,0xcd,0x48,0x5e,0xd3]
           vdivpd %zmm11, %zmm6, %zmm18
@@ -449,6 +537,390 @@
 // CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x5d,0x9a,0xfc,0xfd,0xff,0xff]
           vminps -516(%rdx){1to16}, %zmm3, %zmm3
 
+// CHECK: vmovapd %zmm14, %zmm7
+// CHECK:  encoding: [0x62,0xd1,0xfd,0x48,0x28,0xfe]
+          vmovapd %zmm14, %zmm7
+
+// CHECK: vmovapd %zmm14, %zmm7 {%k5}
+// CHECK:  encoding: [0x62,0xd1,0xfd,0x4d,0x28,0xfe]
+          vmovapd %zmm14, %zmm7 {%k5}
+
+// CHECK: vmovapd %zmm14, %zmm7 {%k5} {z}
+// CHECK:  encoding: [0x62,0xd1,0xfd,0xcd,0x28,0xfe]
+          vmovapd %zmm14, %zmm7 {%k5} {z}
+
+// CHECK: vmovapd (%rcx), %zmm7
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x28,0x39]
+          vmovapd (%rcx), %zmm7
+
+// CHECK: vmovapd 291(%rax,%r14,8), %zmm7
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x48,0x28,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd 291(%rax,%r14,8), %zmm7
+
+// CHECK: vmovapd 8128(%rdx), %zmm7
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x28,0x7a,0x7f]
+          vmovapd 8128(%rdx), %zmm7
+
+// CHECK: vmovapd 8192(%rdx), %zmm7
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x28,0xba,0x00,0x20,0x00,0x00]
+          vmovapd 8192(%rdx), %zmm7
+
+// CHECK: vmovapd -8192(%rdx), %zmm7
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x28,0x7a,0x80]
+          vmovapd -8192(%rdx), %zmm7
+
+// CHECK: vmovapd -8256(%rdx), %zmm7
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x28,0xba,0xc0,0xdf,0xff,0xff]
+          vmovapd -8256(%rdx), %zmm7
+
+// CHECK: vmovaps %zmm9, %zmm5
+// CHECK:  encoding: [0x62,0xd1,0x7c,0x48,0x28,0xe9]
+          vmovaps %zmm9, %zmm5
+
+// CHECK: vmovaps %zmm9, %zmm5 {%k1}
+// CHECK:  encoding: [0x62,0xd1,0x7c,0x49,0x28,0xe9]
+          vmovaps %zmm9, %zmm5 {%k1}
+
+// CHECK: vmovaps %zmm9, %zmm5 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd1,0x7c,0xc9,0x28,0xe9]
+          vmovaps %zmm9, %zmm5 {%k1} {z}
+
+// CHECK: vmovaps (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x28,0x29]
+          vmovaps (%rcx), %zmm5
+
+// CHECK: vmovaps 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x48,0x28,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps 291(%rax,%r14,8), %zmm5
+
+// CHECK: vmovaps 8128(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x28,0x6a,0x7f]
+          vmovaps 8128(%rdx), %zmm5
+
+// CHECK: vmovaps 8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x28,0xaa,0x00,0x20,0x00,0x00]
+          vmovaps 8192(%rdx), %zmm5
+
+// CHECK: vmovaps -8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x28,0x6a,0x80]
+          vmovaps -8192(%rdx), %zmm5
+
+// CHECK: vmovaps -8256(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x28,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovaps -8256(%rdx), %zmm5
+
+// CHECK: vmovdqa32 %zmm18, %zmm22
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x48,0x6f,0xf2]
+          vmovdqa32 %zmm18, %zmm22
+
+// CHECK: vmovdqa32 %zmm18, %zmm22 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x4e,0x6f,0xf2]
+          vmovdqa32 %zmm18, %zmm22 {%k6}
+
+// CHECK: vmovdqa32 %zmm18, %zmm22 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0xce,0x6f,0xf2]
+          vmovdqa32 %zmm18, %zmm22 {%k6} {z}
+
+// CHECK: vmovdqa32 (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x6f,0x31]
+          vmovdqa32 (%rcx), %zmm22
+
+// CHECK: vmovdqa32 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x48,0x6f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 291(%rax,%r14,8), %zmm22
+
+// CHECK: vmovdqa32 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x6f,0x72,0x7f]
+          vmovdqa32 8128(%rdx), %zmm22
+
+// CHECK: vmovdqa32 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x6f,0xb2,0x00,0x20,0x00,0x00]
+          vmovdqa32 8192(%rdx), %zmm22
+
+// CHECK: vmovdqa32 -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x6f,0x72,0x80]
+          vmovdqa32 -8192(%rdx), %zmm22
+
+// CHECK: vmovdqa32 -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
+          vmovdqa32 -8256(%rdx), %zmm22
+
+// CHECK: vmovdqa64 %zmm12, %zmm22
+// CHECK:  encoding: [0x62,0xc1,0xfd,0x48,0x6f,0xf4]
+          vmovdqa64 %zmm12, %zmm22
+
+// CHECK: vmovdqa64 %zmm12, %zmm22 {%k5}
+// CHECK:  encoding: [0x62,0xc1,0xfd,0x4d,0x6f,0xf4]
+          vmovdqa64 %zmm12, %zmm22 {%k5}
+
+// CHECK: vmovdqa64 %zmm12, %zmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc1,0xfd,0xcd,0x6f,0xf4]
+          vmovdqa64 %zmm12, %zmm22 {%k5} {z}
+
+// CHECK: vmovdqa64 (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x6f,0x31]
+          vmovdqa64 (%rcx), %zmm22
+
+// CHECK: vmovdqa64 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x6f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 291(%rax,%r14,8), %zmm22
+
+// CHECK: vmovdqa64 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x6f,0x72,0x7f]
+          vmovdqa64 8128(%rdx), %zmm22
+
+// CHECK: vmovdqa64 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x6f,0xb2,0x00,0x20,0x00,0x00]
+          vmovdqa64 8192(%rdx), %zmm22
+
+// CHECK: vmovdqa64 -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x6f,0x72,0x80]
+          vmovdqa64 -8192(%rdx), %zmm22
+
+// CHECK: vmovdqa64 -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
+          vmovdqa64 -8256(%rdx), %zmm22
+
+// CHECK: vmovdqu32 %zmm24, %zmm5
+// CHECK:  encoding: [0x62,0x91,0x7e,0x48,0x6f,0xe8]
+          vmovdqu32 %zmm24, %zmm5
+
+// CHECK: vmovdqu32 %zmm24, %zmm5 {%k5}
+// CHECK:  encoding: [0x62,0x91,0x7e,0x4d,0x6f,0xe8]
+          vmovdqu32 %zmm24, %zmm5 {%k5}
+
+// CHECK: vmovdqu32 %zmm24, %zmm5 {%k5} {z}
+// CHECK:  encoding: [0x62,0x91,0x7e,0xcd,0x6f,0xe8]
+          vmovdqu32 %zmm24, %zmm5 {%k5} {z}
+
+// CHECK: vmovdqu32 (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x29]
+          vmovdqu32 (%rcx), %zmm5
+
+// CHECK: vmovdqu32 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x48,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 291(%rax,%r14,8), %zmm5
+
+// CHECK: vmovdqu32 8128(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x6a,0x7f]
+          vmovdqu32 8128(%rdx), %zmm5
+
+// CHECK: vmovdqu32 8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x48,0x6f,0xaa,0x00,0x20,0x00,0x00]
+          vmovdqu32 8192(%rdx), %zmm5
+
+// CHECK: vmovdqu32 -8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x6a,0x80]
+          vmovdqu32 -8192(%rdx), %zmm5
+
+// CHECK: vmovdqu32 -8256(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x48,0x6f,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovdqu32 -8256(%rdx), %zmm5
+
+// CHECK: vmovdqu64 %zmm15, %zmm6
+// CHECK:  encoding: [0x62,0xd1,0xfe,0x48,0x6f,0xf7]
+          vmovdqu64 %zmm15, %zmm6
+
+// CHECK: vmovdqu64 %zmm15, %zmm6 {%k3}
+// CHECK:  encoding: [0x62,0xd1,0xfe,0x4b,0x6f,0xf7]
+          vmovdqu64 %zmm15, %zmm6 {%k3}
+
+// CHECK: vmovdqu64 %zmm15, %zmm6 {%k3} {z}
+// CHECK:  encoding: [0x62,0xd1,0xfe,0xcb,0x6f,0xf7]
+          vmovdqu64 %zmm15, %zmm6 {%k3} {z}
+
+// CHECK: vmovdqu64 (%rcx), %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x31]
+          vmovdqu64 (%rcx), %zmm6
+
+// CHECK: vmovdqu64 291(%rax,%r14,8), %zmm6
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x48,0x6f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 291(%rax,%r14,8), %zmm6
+
+// CHECK: vmovdqu64 8128(%rdx), %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x72,0x7f]
+          vmovdqu64 8128(%rdx), %zmm6
+
+// CHECK: vmovdqu64 8192(%rdx), %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0x00,0x20,0x00,0x00]
+          vmovdqu64 8192(%rdx), %zmm6
+
+// CHECK: vmovdqu64 -8192(%rdx), %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x72,0x80]
+          vmovdqu64 -8192(%rdx), %zmm6
+
+// CHECK: vmovdqu64 -8256(%rdx), %zmm6
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x48,0x6f,0xb2,0xc0,0xdf,0xff,0xff]
+          vmovdqu64 -8256(%rdx), %zmm6
+
+// CHECK: vmovntdq %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x48,0xe7,0x01]
+          vmovntdq %zmm24, (%rcx)
+
+// CHECK: vmovntdq %zmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7d,0x48,0xe7,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdq %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %zmm24, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x7f]
+          vmovntdq %zmm24, 8128(%rdx)
+
+// CHECK: vmovntdq %zmm24, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0x00,0x20,0x00,0x00]
+          vmovntdq %zmm24, 8192(%rdx)
+
+// CHECK: vmovntdq %zmm24, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x48,0xe7,0x42,0x80]
+          vmovntdq %zmm24, -8192(%rdx)
+
+// CHECK: vmovntdq %zmm24, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x48,0xe7,0x82,0xc0,0xdf,0xff,0xff]
+          vmovntdq %zmm24, -8256(%rdx)
+
+// CHECK: vmovntdqa (%rcx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x09]
+          vmovntdqa (%rcx), %zmm17
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %zmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x2a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdqa 291(%rax,%r14,8), %zmm17
+
+// CHECK: vmovntdqa 8128(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x7f]
+          vmovntdqa 8128(%rdx), %zmm17
+
+// CHECK: vmovntdqa 8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0x00,0x20,0x00,0x00]
+          vmovntdqa 8192(%rdx), %zmm17
+
+// CHECK: vmovntdqa -8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x4a,0x80]
+          vmovntdqa -8192(%rdx), %zmm17
+
+// CHECK: vmovntdqa -8256(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x2a,0x8a,0xc0,0xdf,0xff,0xff]
+          vmovntdqa -8256(%rdx), %zmm17
+
+// CHECK: vmovntpd %zmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x09]
+          vmovntpd %zmm17, (%rcx)
+
+// CHECK: vmovntpd %zmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovntpd %zmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %zmm17, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x7f]
+          vmovntpd %zmm17, 8128(%rdx)
+
+// CHECK: vmovntpd %zmm17, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0x00,0x20,0x00,0x00]
+          vmovntpd %zmm17, 8192(%rdx)
+
+// CHECK: vmovntpd %zmm17, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x4a,0x80]
+          vmovntpd %zmm17, -8192(%rdx)
+
+// CHECK: vmovntpd %zmm17, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x2b,0x8a,0xc0,0xdf,0xff,0xff]
+          vmovntpd %zmm17, -8256(%rdx)
+
+// CHECK: vmovntps %zmm5, (%rcx)
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x29]
+          vmovntps %zmm5, (%rcx)
+
+// CHECK: vmovntps %zmm5, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x48,0x2b,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovntps %zmm5, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %zmm5, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x7f]
+          vmovntps %zmm5, 8128(%rdx)
+
+// CHECK: vmovntps %zmm5, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0x00,0x20,0x00,0x00]
+          vmovntps %zmm5, 8192(%rdx)
+
+// CHECK: vmovntps %zmm5, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x6a,0x80]
+          vmovntps %zmm5, -8192(%rdx)
+
+// CHECK: vmovntps %zmm5, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x2b,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovntps %zmm5, -8256(%rdx)
+
+// CHECK: vmovupd %zmm9, %zmm27
+// CHECK:  encoding: [0x62,0x41,0xfd,0x48,0x10,0xd9]
+          vmovupd %zmm9, %zmm27
+
+// CHECK: vmovupd %zmm9, %zmm27 {%k2}
+// CHECK:  encoding: [0x62,0x41,0xfd,0x4a,0x10,0xd9]
+          vmovupd %zmm9, %zmm27 {%k2}
+
+// CHECK: vmovupd %zmm9, %zmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x41,0xfd,0xca,0x10,0xd9]
+          vmovupd %zmm9, %zmm27 {%k2} {z}
+
+// CHECK: vmovupd (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x10,0x19]
+          vmovupd (%rcx), %zmm27
+
+// CHECK: vmovupd 291(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd 291(%rax,%r14,8), %zmm27
+
+// CHECK: vmovupd 8128(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x10,0x5a,0x7f]
+          vmovupd 8128(%rdx), %zmm27
+
+// CHECK: vmovupd 8192(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x10,0x9a,0x00,0x20,0x00,0x00]
+          vmovupd 8192(%rdx), %zmm27
+
+// CHECK: vmovupd -8192(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x10,0x5a,0x80]
+          vmovupd -8192(%rdx), %zmm27
+
+// CHECK: vmovupd -8256(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x10,0x9a,0xc0,0xdf,0xff,0xff]
+          vmovupd -8256(%rdx), %zmm27
+
+// CHECK: vmovups %zmm22, %zmm22
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x48,0x10,0xf6]
+          vmovups %zmm22, %zmm22
+
+// CHECK: vmovups %zmm22, %zmm22 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x4b,0x10,0xf6]
+          vmovups %zmm22, %zmm22 {%k3}
+
+// CHECK: vmovups %zmm22, %zmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0xcb,0x10,0xf6]
+          vmovups %zmm22, %zmm22 {%k3} {z}
+
+// CHECK: vmovups (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x48,0x10,0x31]
+          vmovups (%rcx), %zmm22
+
+// CHECK: vmovups 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x48,0x10,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovups 291(%rax,%r14,8), %zmm22
+
+// CHECK: vmovups 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x48,0x10,0x72,0x7f]
+          vmovups 8128(%rdx), %zmm22
+
+// CHECK: vmovups 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x48,0x10,0xb2,0x00,0x20,0x00,0x00]
+          vmovups 8192(%rdx), %zmm22
+
+// CHECK: vmovups -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x48,0x10,0x72,0x80]
+          vmovups -8192(%rdx), %zmm22
+
+// CHECK: vmovups -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x48,0x10,0xb2,0xc0,0xdf,0xff,0xff]
+          vmovups -8256(%rdx), %zmm22
+
 // CHECK: vmulpd %zmm23, %zmm4, %zmm24
 // CHECK:  encoding: [0x62,0x21,0xdd,0x48,0x59,0xc7]
           vmulpd %zmm23, %zmm4, %zmm24
@@ -1041,6 +1513,474 @@
 // CHECK:  encoding: [0x62,0xe1,0xdd,0x58,0xdb,0x8a,0xf8,0xfb,0xff,0xff]
           vpandq -1032(%rdx){1to8}, %zmm4, %zmm17
 
+// CHECK: vpbroadcastd %eax, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6}
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6} {z}
+
+// CHECK: vpbroadcastd %ebp, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %ebp, %zmm11
+
+// CHECK: vpbroadcastd %r13d, %zmm11
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %r13d, %zmm11
+
+// CHECK: vpbroadcastq %rax, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6}
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6} {z}
+
+// CHECK: vpbroadcastq %r8, %zmm1
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %r8, %zmm1
+
+// CHECK: vpcmpd $171, %zmm10, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xd3,0x35,0x40,0x1f,0xea,0xab]
+          vpcmpd $171, %zmm10, %zmm25, %k5
+
+// CHECK: vpcmpd $171, %zmm10, %zmm25, %k5 {%k3}
+// CHECK:  encoding: [0x62,0xd3,0x35,0x43,0x1f,0xea,0xab]
+          vpcmpd $171, %zmm10, %zmm25, %k5 {%k3}
+
+// CHECK: vpcmpd $123, %zmm10, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xd3,0x35,0x40,0x1f,0xea,0x7b]
+          vpcmpd $123, %zmm10, %zmm25, %k5
+
+// CHECK: vpcmpd $123, (%rcx), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1f,0x29,0x7b]
+          vpcmpd $123, (%rcx), %zmm25, %k5
+
+// CHECK: vpcmpd $123, 291(%rax,%r14,8), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xb3,0x35,0x40,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpd $123, 291(%rax,%r14,8), %zmm25, %k5
+
+// CHECK: vpcmpd $123, (%rcx){1to16}, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1f,0x29,0x7b]
+          vpcmpd $123, (%rcx){1to16}, %zmm25, %k5
+
+// CHECK: vpcmpd $123, 8128(%rdx), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1f,0x6a,0x7f,0x7b]
+          vpcmpd $123, 8128(%rdx), %zmm25, %k5
+
+// CHECK: vpcmpd $123, 8192(%rdx), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1f,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpd $123, 8192(%rdx), %zmm25, %k5
+
+// CHECK: vpcmpd $123, -8192(%rdx), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1f,0x6a,0x80,0x7b]
+          vpcmpd $123, -8192(%rdx), %zmm25, %k5
+
+// CHECK: vpcmpd $123, -8256(%rdx), %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1f,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpd $123, -8256(%rdx), %zmm25, %k5
+
+// CHECK: vpcmpd $123, 508(%rdx){1to16}, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1f,0x6a,0x7f,0x7b]
+          vpcmpd $123, 508(%rdx){1to16}, %zmm25, %k5
+
+// CHECK: vpcmpd $123, 512(%rdx){1to16}, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1f,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpd $123, 512(%rdx){1to16}, %zmm25, %k5
+
+// CHECK: vpcmpd $123, -512(%rdx){1to16}, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1f,0x6a,0x80,0x7b]
+          vpcmpd $123, -512(%rdx){1to16}, %zmm25, %k5
+
+// CHECK: vpcmpd $123, -516(%rdx){1to16}, %zmm25, %k5
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1f,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpd $123, -516(%rdx){1to16}, %zmm25, %k5
+
+// CHECK: vpcmpeqd %zmm10, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xd1,0x6d,0x48,0x76,0xea]
+          vpcmpeqd %zmm10, %zmm2, %k5
+
+// CHECK: vpcmpeqd %zmm10, %zmm2, %k5 {%k7}
+// CHECK:  encoding: [0x62,0xd1,0x6d,0x4f,0x76,0xea]
+          vpcmpeqd %zmm10, %zmm2, %k5 {%k7}
+
+// CHECK: vpcmpeqd (%rcx), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x48,0x76,0x29]
+          vpcmpeqd (%rcx), %zmm2, %k5
+
+// CHECK: vpcmpeqd 291(%rax,%r14,8), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x48,0x76,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqd 291(%rax,%r14,8), %zmm2, %k5
+
+// CHECK: vpcmpeqd (%rcx){1to16}, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x58,0x76,0x29]
+          vpcmpeqd (%rcx){1to16}, %zmm2, %k5
+
+// CHECK: vpcmpeqd 8128(%rdx), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x48,0x76,0x6a,0x7f]
+          vpcmpeqd 8128(%rdx), %zmm2, %k5
+
+// CHECK: vpcmpeqd 8192(%rdx), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x48,0x76,0xaa,0x00,0x20,0x00,0x00]
+          vpcmpeqd 8192(%rdx), %zmm2, %k5
+
+// CHECK: vpcmpeqd -8192(%rdx), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x48,0x76,0x6a,0x80]
+          vpcmpeqd -8192(%rdx), %zmm2, %k5
+
+// CHECK: vpcmpeqd -8256(%rdx), %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x48,0x76,0xaa,0xc0,0xdf,0xff,0xff]
+          vpcmpeqd -8256(%rdx), %zmm2, %k5
+
+// CHECK: vpcmpeqd 508(%rdx){1to16}, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x58,0x76,0x6a,0x7f]
+          vpcmpeqd 508(%rdx){1to16}, %zmm2, %k5
+
+// CHECK: vpcmpeqd 512(%rdx){1to16}, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x58,0x76,0xaa,0x00,0x02,0x00,0x00]
+          vpcmpeqd 512(%rdx){1to16}, %zmm2, %k5
+
+// CHECK: vpcmpeqd -512(%rdx){1to16}, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x58,0x76,0x6a,0x80]
+          vpcmpeqd -512(%rdx){1to16}, %zmm2, %k5
+
+// CHECK: vpcmpeqd -516(%rdx){1to16}, %zmm2, %k5
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x58,0x76,0xaa,0xfc,0xfd,0xff,0xff]
+          vpcmpeqd -516(%rdx){1to16}, %zmm2, %k5
+
+// CHECK: vpcmpeqq %zmm2, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0xda]
+          vpcmpeqq %zmm2, %zmm22, %k3
+
+// CHECK: vpcmpeqq %zmm2, %zmm22, %k3 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x46,0x29,0xda]
+          vpcmpeqq %zmm2, %zmm22, %k3 {%k6}
+
+// CHECK: vpcmpeqq (%rcx), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0x19]
+          vpcmpeqq (%rcx), %zmm22, %k3
+
+// CHECK: vpcmpeqq 291(%rax,%r14,8), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xb2,0xcd,0x40,0x29,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqq 291(%rax,%r14,8), %zmm22, %k3
+
+// CHECK: vpcmpeqq (%rcx){1to8}, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x50,0x29,0x19]
+          vpcmpeqq (%rcx){1to8}, %zmm22, %k3
+
+// CHECK: vpcmpeqq 8128(%rdx), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0x5a,0x7f]
+          vpcmpeqq 8128(%rdx), %zmm22, %k3
+
+// CHECK: vpcmpeqq 8192(%rdx), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0x9a,0x00,0x20,0x00,0x00]
+          vpcmpeqq 8192(%rdx), %zmm22, %k3
+
+// CHECK: vpcmpeqq -8192(%rdx), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0x5a,0x80]
+          vpcmpeqq -8192(%rdx), %zmm22, %k3
+
+// CHECK: vpcmpeqq -8256(%rdx), %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x40,0x29,0x9a,0xc0,0xdf,0xff,0xff]
+          vpcmpeqq -8256(%rdx), %zmm22, %k3
+
+// CHECK: vpcmpeqq 1016(%rdx){1to8}, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x50,0x29,0x5a,0x7f]
+          vpcmpeqq 1016(%rdx){1to8}, %zmm22, %k3
+
+// CHECK: vpcmpeqq 1024(%rdx){1to8}, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x50,0x29,0x9a,0x00,0x04,0x00,0x00]
+          vpcmpeqq 1024(%rdx){1to8}, %zmm22, %k3
+
+// CHECK: vpcmpeqq -1024(%rdx){1to8}, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x50,0x29,0x5a,0x80]
+          vpcmpeqq -1024(%rdx){1to8}, %zmm22, %k3
+
+// CHECK: vpcmpeqq -1032(%rdx){1to8}, %zmm22, %k3
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x50,0x29,0x9a,0xf8,0xfb,0xff,0xff]
+          vpcmpeqq -1032(%rdx){1to8}, %zmm22, %k3
+
+// CHECK: vpcmpgtd %zmm8, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xd1,0x55,0x40,0x66,0xe8]
+          vpcmpgtd %zmm8, %zmm21, %k5
+
+// CHECK: vpcmpgtd %zmm8, %zmm21, %k5 {%k5}
+// CHECK:  encoding: [0x62,0xd1,0x55,0x45,0x66,0xe8]
+          vpcmpgtd %zmm8, %zmm21, %k5 {%k5}
+
+// CHECK: vpcmpgtd (%rcx), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x40,0x66,0x29]
+          vpcmpgtd (%rcx), %zmm21, %k5
+
+// CHECK: vpcmpgtd 291(%rax,%r14,8), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xb1,0x55,0x40,0x66,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtd 291(%rax,%r14,8), %zmm21, %k5
+
+// CHECK: vpcmpgtd (%rcx){1to16}, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x50,0x66,0x29]
+          vpcmpgtd (%rcx){1to16}, %zmm21, %k5
+
+// CHECK: vpcmpgtd 8128(%rdx), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x40,0x66,0x6a,0x7f]
+          vpcmpgtd 8128(%rdx), %zmm21, %k5
+
+// CHECK: vpcmpgtd 8192(%rdx), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x40,0x66,0xaa,0x00,0x20,0x00,0x00]
+          vpcmpgtd 8192(%rdx), %zmm21, %k5
+
+// CHECK: vpcmpgtd -8192(%rdx), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x40,0x66,0x6a,0x80]
+          vpcmpgtd -8192(%rdx), %zmm21, %k5
+
+// CHECK: vpcmpgtd -8256(%rdx), %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x40,0x66,0xaa,0xc0,0xdf,0xff,0xff]
+          vpcmpgtd -8256(%rdx), %zmm21, %k5
+
+// CHECK: vpcmpgtd 508(%rdx){1to16}, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x50,0x66,0x6a,0x7f]
+          vpcmpgtd 508(%rdx){1to16}, %zmm21, %k5
+
+// CHECK: vpcmpgtd 512(%rdx){1to16}, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x50,0x66,0xaa,0x00,0x02,0x00,0x00]
+          vpcmpgtd 512(%rdx){1to16}, %zmm21, %k5
+
+// CHECK: vpcmpgtd -512(%rdx){1to16}, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x50,0x66,0x6a,0x80]
+          vpcmpgtd -512(%rdx){1to16}, %zmm21, %k5
+
+// CHECK: vpcmpgtd -516(%rdx){1to16}, %zmm21, %k5
+// CHECK:  encoding: [0x62,0xf1,0x55,0x50,0x66,0xaa,0xfc,0xfd,0xff,0xff]
+          vpcmpgtd -516(%rdx){1to16}, %zmm21, %k5
+
+// CHECK: vpcmpgtq %zmm17, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xb2,0xdd,0x40,0x37,0xd1]
+          vpcmpgtq %zmm17, %zmm20, %k2
+
+// CHECK: vpcmpgtq %zmm17, %zmm20, %k2 {%k3}
+// CHECK:  encoding: [0x62,0xb2,0xdd,0x43,0x37,0xd1]
+          vpcmpgtq %zmm17, %zmm20, %k2 {%k3}
+
+// CHECK: vpcmpgtq (%rcx), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x40,0x37,0x11]
+          vpcmpgtq (%rcx), %zmm20, %k2
+
+// CHECK: vpcmpgtq 291(%rax,%r14,8), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xb2,0xdd,0x40,0x37,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtq 291(%rax,%r14,8), %zmm20, %k2
+
+// CHECK: vpcmpgtq (%rcx){1to8}, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x50,0x37,0x11]
+          vpcmpgtq (%rcx){1to8}, %zmm20, %k2
+
+// CHECK: vpcmpgtq 8128(%rdx), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x40,0x37,0x52,0x7f]
+          vpcmpgtq 8128(%rdx), %zmm20, %k2
+
+// CHECK: vpcmpgtq 8192(%rdx), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x40,0x37,0x92,0x00,0x20,0x00,0x00]
+          vpcmpgtq 8192(%rdx), %zmm20, %k2
+
+// CHECK: vpcmpgtq -8192(%rdx), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x40,0x37,0x52,0x80]
+          vpcmpgtq -8192(%rdx), %zmm20, %k2
+
+// CHECK: vpcmpgtq -8256(%rdx), %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x40,0x37,0x92,0xc0,0xdf,0xff,0xff]
+          vpcmpgtq -8256(%rdx), %zmm20, %k2
+
+// CHECK: vpcmpgtq 1016(%rdx){1to8}, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x50,0x37,0x52,0x7f]
+          vpcmpgtq 1016(%rdx){1to8}, %zmm20, %k2
+
+// CHECK: vpcmpgtq 1024(%rdx){1to8}, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x50,0x37,0x92,0x00,0x04,0x00,0x00]
+          vpcmpgtq 1024(%rdx){1to8}, %zmm20, %k2
+
+// CHECK: vpcmpgtq -1024(%rdx){1to8}, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x50,0x37,0x52,0x80]
+          vpcmpgtq -1024(%rdx){1to8}, %zmm20, %k2
+
+// CHECK: vpcmpgtq -1032(%rdx){1to8}, %zmm20, %k2
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x50,0x37,0x92,0xf8,0xfb,0xff,0xff]
+          vpcmpgtq -1032(%rdx){1to8}, %zmm20, %k2
+
+// CHECK: vpcmpq $171, %zmm28, %zmm28, %k5
+// CHECK:  encoding: [0x62,0x93,0x9d,0x40,0x1f,0xec,0xab]
+          vpcmpq $171, %zmm28, %zmm28, %k5
+
+// CHECK: vpcmpq $171, %zmm28, %zmm28, %k5 {%k3}
+// CHECK:  encoding: [0x62,0x93,0x9d,0x43,0x1f,0xec,0xab]
+          vpcmpq $171, %zmm28, %zmm28, %k5 {%k3}
+
+// CHECK: vpcmpq $123, %zmm28, %zmm28, %k5
+// CHECK:  encoding: [0x62,0x93,0x9d,0x40,0x1f,0xec,0x7b]
+          vpcmpq $123, %zmm28, %zmm28, %k5
+
+// CHECK: vpcmpq $123, (%rcx), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x40,0x1f,0x29,0x7b]
+          vpcmpq $123, (%rcx), %zmm28, %k5
+
+// CHECK: vpcmpq $123, 291(%rax,%r14,8), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x40,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpq $123, 291(%rax,%r14,8), %zmm28, %k5
+
+// CHECK: vpcmpq $123, (%rcx){1to8}, %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x50,0x1f,0x29,0x7b]
+          vpcmpq $123, (%rcx){1to8}, %zmm28, %k5
+
+// CHECK: vpcmpq $123, 8128(%rdx), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x40,0x1f,0x6a,0x7f,0x7b]
+          vpcmpq $123, 8128(%rdx), %zmm28, %k5
+
+// CHECK: vpcmpq $123, 8192(%rdx), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x40,0x1f,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpq $123, 8192(%rdx), %zmm28, %k5
+
+// CHECK: vpcmpq $123, -8192(%rdx), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x40,0x1f,0x6a,0x80,0x7b]
+          vpcmpq $123, -8192(%rdx), %zmm28, %k5
+
+// CHECK: vpcmpq $123, -8256(%rdx), %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x40,0x1f,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpq $123, -8256(%rdx), %zmm28, %k5
+
+// CHECK: vpcmpq $123, 1016(%rdx){1to8}, %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x50,0x1f,0x6a,0x7f,0x7b]
+          vpcmpq $123, 1016(%rdx){1to8}, %zmm28, %k5
+
+// CHECK: vpcmpq $123, 1024(%rdx){1to8}, %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x50,0x1f,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpq $123, 1024(%rdx){1to8}, %zmm28, %k5
+
+// CHECK: vpcmpq $123, -1024(%rdx){1to8}, %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x50,0x1f,0x6a,0x80,0x7b]
+          vpcmpq $123, -1024(%rdx){1to8}, %zmm28, %k5
+
+// CHECK: vpcmpq $123, -1032(%rdx){1to8}, %zmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x50,0x1f,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpq $123, -1032(%rdx){1to8}, %zmm28, %k5
+
+// CHECK: vpcmpud $171, %zmm7, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0xd7,0xab]
+          vpcmpud $171, %zmm7, %zmm25, %k2
+
+// CHECK: vpcmpud $171, %zmm7, %zmm25, %k2 {%k1}
+// CHECK:  encoding: [0x62,0xf3,0x35,0x41,0x1e,0xd7,0xab]
+          vpcmpud $171, %zmm7, %zmm25, %k2 {%k1}
+
+// CHECK: vpcmpud $123, %zmm7, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0xd7,0x7b]
+          vpcmpud $123, %zmm7, %zmm25, %k2
+
+// CHECK: vpcmpud $123, (%rcx), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0x11,0x7b]
+          vpcmpud $123, (%rcx), %zmm25, %k2
+
+// CHECK: vpcmpud $123, 291(%rax,%r14,8), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xb3,0x35,0x40,0x1e,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpud $123, 291(%rax,%r14,8), %zmm25, %k2
+
+// CHECK: vpcmpud $123, (%rcx){1to16}, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1e,0x11,0x7b]
+          vpcmpud $123, (%rcx){1to16}, %zmm25, %k2
+
+// CHECK: vpcmpud $123, 8128(%rdx), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0x52,0x7f,0x7b]
+          vpcmpud $123, 8128(%rdx), %zmm25, %k2
+
+// CHECK: vpcmpud $123, 8192(%rdx), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpud $123, 8192(%rdx), %zmm25, %k2
+
+// CHECK: vpcmpud $123, -8192(%rdx), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0x52,0x80,0x7b]
+          vpcmpud $123, -8192(%rdx), %zmm25, %k2
+
+// CHECK: vpcmpud $123, -8256(%rdx), %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x40,0x1e,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpud $123, -8256(%rdx), %zmm25, %k2
+
+// CHECK: vpcmpud $123, 508(%rdx){1to16}, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1e,0x52,0x7f,0x7b]
+          vpcmpud $123, 508(%rdx){1to16}, %zmm25, %k2
+
+// CHECK: vpcmpud $123, 512(%rdx){1to16}, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1e,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpud $123, 512(%rdx){1to16}, %zmm25, %k2
+
+// CHECK: vpcmpud $123, -512(%rdx){1to16}, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1e,0x52,0x80,0x7b]
+          vpcmpud $123, -512(%rdx){1to16}, %zmm25, %k2
+
+// CHECK: vpcmpud $123, -516(%rdx){1to16}, %zmm25, %k2
+// CHECK:  encoding: [0x62,0xf3,0x35,0x50,0x1e,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpud $123, -516(%rdx){1to16}, %zmm25, %k2
+
+// CHECK: vpcmpuq $171, %zmm8, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xd3,0x8d,0x48,0x1e,0xd8,0xab]
+          vpcmpuq $171, %zmm8, %zmm14, %k3
+
+// CHECK: vpcmpuq $171, %zmm8, %zmm14, %k3 {%k2}
+// CHECK:  encoding: [0x62,0xd3,0x8d,0x4a,0x1e,0xd8,0xab]
+          vpcmpuq $171, %zmm8, %zmm14, %k3 {%k2}
+
+// CHECK: vpcmpuq $123, %zmm8, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xd3,0x8d,0x48,0x1e,0xd8,0x7b]
+          vpcmpuq $123, %zmm8, %zmm14, %k3
+
+// CHECK: vpcmpuq $123, (%rcx), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x48,0x1e,0x19,0x7b]
+          vpcmpuq $123, (%rcx), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, 291(%rax,%r14,8), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xb3,0x8d,0x48,0x1e,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuq $123, 291(%rax,%r14,8), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, (%rcx){1to8}, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x58,0x1e,0x19,0x7b]
+          vpcmpuq $123, (%rcx){1to8}, %zmm14, %k3
+
+// CHECK: vpcmpuq $123, 8128(%rdx), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x48,0x1e,0x5a,0x7f,0x7b]
+          vpcmpuq $123, 8128(%rdx), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, 8192(%rdx), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x48,0x1e,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpuq $123, 8192(%rdx), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, -8192(%rdx), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x48,0x1e,0x5a,0x80,0x7b]
+          vpcmpuq $123, -8192(%rdx), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, -8256(%rdx), %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x48,0x1e,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpuq $123, -8256(%rdx), %zmm14, %k3
+
+// CHECK: vpcmpuq $123, 1016(%rdx){1to8}, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x58,0x1e,0x5a,0x7f,0x7b]
+          vpcmpuq $123, 1016(%rdx){1to8}, %zmm14, %k3
+
+// CHECK: vpcmpuq $123, 1024(%rdx){1to8}, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x58,0x1e,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpuq $123, 1024(%rdx){1to8}, %zmm14, %k3
+
+// CHECK: vpcmpuq $123, -1024(%rdx){1to8}, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x58,0x1e,0x5a,0x80,0x7b]
+          vpcmpuq $123, -1024(%rdx){1to8}, %zmm14, %k3
+
+// CHECK: vpcmpuq $123, -1032(%rdx){1to8}, %zmm14, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x58,0x1e,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpuq $123, -1032(%rdx){1to8}, %zmm14, %k3
+
 // CHECK: vpmaxsd %zmm16, %zmm8, %zmm6
 // CHECK:  encoding: [0x62,0xb2,0x3d,0x48,0x3d,0xf0]
           vpmaxsd %zmm16, %zmm8, %zmm6
@@ -2377,6 +3317,342 @@
 // CHECK:  encoding: [0x62,0xf1,0xcd,0x50,0xef,0xba,0xf8,0xfb,0xff,0xff]
           vpxorq -1032(%rdx){1to8}, %zmm22, %zmm7
 
+// CHECK: vrcp14pd %zmm4, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0xec]
+          vrcp14pd %zmm4, %zmm13
+
+// CHECK: vrcp14pd %zmm4, %zmm13 {%k5}
+// CHECK:  encoding: [0x62,0x72,0xfd,0x4d,0x4c,0xec]
+          vrcp14pd %zmm4, %zmm13 {%k5}
+
+// CHECK: vrcp14pd %zmm4, %zmm13 {%k5} {z}
+// CHECK:  encoding: [0x62,0x72,0xfd,0xcd,0x4c,0xec]
+          vrcp14pd %zmm4, %zmm13 {%k5} {z}
+
+// CHECK: vrcp14pd (%rcx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0x29]
+          vrcp14pd (%rcx), %zmm13
+
+// CHECK: vrcp14pd 291(%rax,%r14,8), %zmm13
+// CHECK:  encoding: [0x62,0x32,0xfd,0x48,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14pd 291(%rax,%r14,8), %zmm13
+
+// CHECK: vrcp14pd (%rcx){1to8}, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x58,0x4c,0x29]
+          vrcp14pd (%rcx){1to8}, %zmm13
+
+// CHECK: vrcp14pd 8128(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x7f]
+          vrcp14pd 8128(%rdx), %zmm13
+
+// CHECK: vrcp14pd 8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0x00,0x20,0x00,0x00]
+          vrcp14pd 8192(%rdx), %zmm13
+
+// CHECK: vrcp14pd -8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0x6a,0x80]
+          vrcp14pd -8192(%rdx), %zmm13
+
+// CHECK: vrcp14pd -8256(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x48,0x4c,0xaa,0xc0,0xdf,0xff,0xff]
+          vrcp14pd -8256(%rdx), %zmm13
+
+// CHECK: vrcp14pd 1016(%rdx){1to8}, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x7f]
+          vrcp14pd 1016(%rdx){1to8}, %zmm13
+
+// CHECK: vrcp14pd 1024(%rdx){1to8}, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0x00,0x04,0x00,0x00]
+          vrcp14pd 1024(%rdx){1to8}, %zmm13
+
+// CHECK: vrcp14pd -1024(%rdx){1to8}, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x58,0x4c,0x6a,0x80]
+          vrcp14pd -1024(%rdx){1to8}, %zmm13
+
+// CHECK: vrcp14pd -1032(%rdx){1to8}, %zmm13
+// CHECK:  encoding: [0x62,0x72,0xfd,0x58,0x4c,0xaa,0xf8,0xfb,0xff,0xff]
+          vrcp14pd -1032(%rdx){1to8}, %zmm13
+
+// CHECK: vrcp14ps %zmm25, %zmm10
+// CHECK:  encoding: [0x62,0x12,0x7d,0x48,0x4c,0xd1]
+          vrcp14ps %zmm25, %zmm10
+
+// CHECK: vrcp14ps %zmm25, %zmm10 {%k1}
+// CHECK:  encoding: [0x62,0x12,0x7d,0x49,0x4c,0xd1]
+          vrcp14ps %zmm25, %zmm10 {%k1}
+
+// CHECK: vrcp14ps %zmm25, %zmm10 {%k1} {z}
+// CHECK:  encoding: [0x62,0x12,0x7d,0xc9,0x4c,0xd1]
+          vrcp14ps %zmm25, %zmm10 {%k1} {z}
+
+// CHECK: vrcp14ps (%rcx), %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x4c,0x11]
+          vrcp14ps (%rcx), %zmm10
+
+// CHECK: vrcp14ps 291(%rax,%r14,8), %zmm10
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14ps 291(%rax,%r14,8), %zmm10
+
+// CHECK: vrcp14ps (%rcx){1to16}, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x4c,0x11]
+          vrcp14ps (%rcx){1to16}, %zmm10
+
+// CHECK: vrcp14ps 8128(%rdx), %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x7f]
+          vrcp14ps 8128(%rdx), %zmm10
+
+// CHECK: vrcp14ps 8192(%rdx), %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0x00,0x20,0x00,0x00]
+          vrcp14ps 8192(%rdx), %zmm10
+
+// CHECK: vrcp14ps -8192(%rdx), %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x4c,0x52,0x80]
+          vrcp14ps -8192(%rdx), %zmm10
+
+// CHECK: vrcp14ps -8256(%rdx), %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x4c,0x92,0xc0,0xdf,0xff,0xff]
+          vrcp14ps -8256(%rdx), %zmm10
+
+// CHECK: vrcp14ps 508(%rdx){1to16}, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x7f]
+          vrcp14ps 508(%rdx){1to16}, %zmm10
+
+// CHECK: vrcp14ps 512(%rdx){1to16}, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0x00,0x02,0x00,0x00]
+          vrcp14ps 512(%rdx){1to16}, %zmm10
+
+// CHECK: vrcp14ps -512(%rdx){1to16}, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x4c,0x52,0x80]
+          vrcp14ps -512(%rdx){1to16}, %zmm10
+
+// CHECK: vrcp14ps -516(%rdx){1to16}, %zmm10
+// CHECK:  encoding: [0x62,0x72,0x7d,0x58,0x4c,0x92,0xfc,0xfd,0xff,0xff]
+          vrcp14ps -516(%rdx){1to16}, %zmm10
+
+// CHECK: vrsqrt14pd %zmm14, %zmm19
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x48,0x4e,0xde]
+          vrsqrt14pd %zmm14, %zmm19
+
+// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x49,0x4e,0xde]
+          vrsqrt14pd %zmm14, %zmm19 {%k1}
+
+// CHECK: vrsqrt14pd %zmm14, %zmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xc2,0xfd,0xc9,0x4e,0xde]
+          vrsqrt14pd %zmm14, %zmm19 {%k1} {z}
+
+// CHECK: vrsqrt14pd (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x19]
+          vrsqrt14pd (%rcx), %zmm19
+
+// CHECK: vrsqrt14pd 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x48,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14pd 291(%rax,%r14,8), %zmm19
+
+// CHECK: vrsqrt14pd (%rcx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x19]
+          vrsqrt14pd (%rcx){1to8}, %zmm19
+
+// CHECK: vrsqrt14pd 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x7f]
+          vrsqrt14pd 8128(%rdx), %zmm19
+
+// CHECK: vrsqrt14pd 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0x00,0x20,0x00,0x00]
+          vrsqrt14pd 8192(%rdx), %zmm19
+
+// CHECK: vrsqrt14pd -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x5a,0x80]
+          vrsqrt14pd -8192(%rdx), %zmm19
+
+// CHECK: vrsqrt14pd -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x48,0x4e,0x9a,0xc0,0xdf,0xff,0xff]
+          vrsqrt14pd -8256(%rdx), %zmm19
+
+// CHECK: vrsqrt14pd 1016(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x7f]
+          vrsqrt14pd 1016(%rdx){1to8}, %zmm19
+
+// CHECK: vrsqrt14pd 1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0x00,0x04,0x00,0x00]
+          vrsqrt14pd 1024(%rdx){1to8}, %zmm19
+
+// CHECK: vrsqrt14pd -1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x5a,0x80]
+          vrsqrt14pd -1024(%rdx){1to8}, %zmm19
+
+// CHECK: vrsqrt14pd -1032(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x58,0x4e,0x9a,0xf8,0xfb,0xff,0xff]
+          vrsqrt14pd -1032(%rdx){1to8}, %zmm19
+
+// CHECK: vrsqrt14ps %zmm9, %zmm16
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x48,0x4e,0xc1]
+          vrsqrt14ps %zmm9, %zmm16
+
+// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x4d,0x4e,0xc1]
+          vrsqrt14ps %zmm9, %zmm16 {%k5}
+
+// CHECK: vrsqrt14ps %zmm9, %zmm16 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0xcd,0x4e,0xc1]
+          vrsqrt14ps %zmm9, %zmm16 {%k5} {z}
+
+// CHECK: vrsqrt14ps (%rcx), %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x01]
+          vrsqrt14ps (%rcx), %zmm16
+
+// CHECK: vrsqrt14ps 291(%rax,%r14,8), %zmm16
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x4e,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14ps 291(%rax,%r14,8), %zmm16
+
+// CHECK: vrsqrt14ps (%rcx){1to16}, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x01]
+          vrsqrt14ps (%rcx){1to16}, %zmm16
+
+// CHECK: vrsqrt14ps 8128(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x7f]
+          vrsqrt14ps 8128(%rdx), %zmm16
+
+// CHECK: vrsqrt14ps 8192(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0x00,0x20,0x00,0x00]
+          vrsqrt14ps 8192(%rdx), %zmm16
+
+// CHECK: vrsqrt14ps -8192(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x42,0x80]
+          vrsqrt14ps -8192(%rdx), %zmm16
+
+// CHECK: vrsqrt14ps -8256(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x4e,0x82,0xc0,0xdf,0xff,0xff]
+          vrsqrt14ps -8256(%rdx), %zmm16
+
+// CHECK: vrsqrt14ps 508(%rdx){1to16}, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x7f]
+          vrsqrt14ps 508(%rdx){1to16}, %zmm16
+
+// CHECK: vrsqrt14ps 512(%rdx){1to16}, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0x00,0x02,0x00,0x00]
+          vrsqrt14ps 512(%rdx){1to16}, %zmm16
+
+// CHECK: vrsqrt14ps -512(%rdx){1to16}, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x42,0x80]
+          vrsqrt14ps -512(%rdx){1to16}, %zmm16
+
+// CHECK: vrsqrt14ps -516(%rdx){1to16}, %zmm16
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x58,0x4e,0x82,0xfc,0xfd,0xff,0xff]
+          vrsqrt14ps -516(%rdx){1to16}, %zmm16
+
+// CHECK: vsqrtpd %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x51,0xdb]
+          vsqrtpd %zmm19, %zmm19
+
+// CHECK: vsqrtpd %zmm19, %zmm19 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x4d,0x51,0xdb]
+          vsqrtpd %zmm19, %zmm19 {%k5}
+
+// CHECK: vsqrtpd %zmm19, %zmm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xcd,0x51,0xdb]
+          vsqrtpd %zmm19, %zmm19 {%k5} {z}
+
+// CHECK: vsqrtpd (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x51,0x19]
+          vsqrtpd (%rcx), %zmm19
+
+// CHECK: vsqrtpd 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x51,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtpd 291(%rax,%r14,8), %zmm19
+
+// CHECK: vsqrtpd (%rcx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x58,0x51,0x19]
+          vsqrtpd (%rcx){1to8}, %zmm19
+
+// CHECK: vsqrtpd 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x51,0x5a,0x7f]
+          vsqrtpd 8128(%rdx), %zmm19
+
+// CHECK: vsqrtpd 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x51,0x9a,0x00,0x20,0x00,0x00]
+          vsqrtpd 8192(%rdx), %zmm19
+
+// CHECK: vsqrtpd -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x51,0x5a,0x80]
+          vsqrtpd -8192(%rdx), %zmm19
+
+// CHECK: vsqrtpd -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x51,0x9a,0xc0,0xdf,0xff,0xff]
+          vsqrtpd -8256(%rdx), %zmm19
+
+// CHECK: vsqrtpd 1016(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x58,0x51,0x5a,0x7f]
+          vsqrtpd 1016(%rdx){1to8}, %zmm19
+
+// CHECK: vsqrtpd 1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x58,0x51,0x9a,0x00,0x04,0x00,0x00]
+          vsqrtpd 1024(%rdx){1to8}, %zmm19
+
+// CHECK: vsqrtpd -1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x58,0x51,0x5a,0x80]
+          vsqrtpd -1024(%rdx){1to8}, %zmm19
+
+// CHECK: vsqrtpd -1032(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x58,0x51,0x9a,0xf8,0xfb,0xff,0xff]
+          vsqrtpd -1032(%rdx){1to8}, %zmm19
+
+// CHECK: vsqrtps %zmm29, %zmm28
+// CHECK:  encoding: [0x62,0x01,0x7c,0x48,0x51,0xe5]
+          vsqrtps %zmm29, %zmm28
+
+// CHECK: vsqrtps %zmm29, %zmm28 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x7c,0x4b,0x51,0xe5]
+          vsqrtps %zmm29, %zmm28 {%k3}
+
+// CHECK: vsqrtps %zmm29, %zmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x7c,0xcb,0x51,0xe5]
+          vsqrtps %zmm29, %zmm28 {%k3} {z}
+
+// CHECK: vsqrtps (%rcx), %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x51,0x21]
+          vsqrtps (%rcx), %zmm28
+
+// CHECK: vsqrtps 291(%rax,%r14,8), %zmm28
+// CHECK:  encoding: [0x62,0x21,0x7c,0x48,0x51,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtps 291(%rax,%r14,8), %zmm28
+
+// CHECK: vsqrtps (%rcx){1to16}, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x58,0x51,0x21]
+          vsqrtps (%rcx){1to16}, %zmm28
+
+// CHECK: vsqrtps 8128(%rdx), %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x51,0x62,0x7f]
+          vsqrtps 8128(%rdx), %zmm28
+
+// CHECK: vsqrtps 8192(%rdx), %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x51,0xa2,0x00,0x20,0x00,0x00]
+          vsqrtps 8192(%rdx), %zmm28
+
+// CHECK: vsqrtps -8192(%rdx), %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x51,0x62,0x80]
+          vsqrtps -8192(%rdx), %zmm28
+
+// CHECK: vsqrtps -8256(%rdx), %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x51,0xa2,0xc0,0xdf,0xff,0xff]
+          vsqrtps -8256(%rdx), %zmm28
+
+// CHECK: vsqrtps 508(%rdx){1to16}, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x58,0x51,0x62,0x7f]
+          vsqrtps 508(%rdx){1to16}, %zmm28
+
+// CHECK: vsqrtps 512(%rdx){1to16}, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x58,0x51,0xa2,0x00,0x02,0x00,0x00]
+          vsqrtps 512(%rdx){1to16}, %zmm28
+
+// CHECK: vsqrtps -512(%rdx){1to16}, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x58,0x51,0x62,0x80]
+          vsqrtps -512(%rdx){1to16}, %zmm28
+
+// CHECK: vsqrtps -516(%rdx){1to16}, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x7c,0x58,0x51,0xa2,0xfc,0xfd,0xff,0xff]
+          vsqrtps -516(%rdx){1to16}, %zmm28
+
 // CHECK: vsubpd %zmm9, %zmm12, %zmm9
 // CHECK:  encoding: [0x62,0x51,0x9d,0x48,0x5c,0xc9]
           vsubpd %zmm9, %zmm12, %zmm9
@@ -2489,6 +3765,298 @@
 // CHECK:  encoding: [0x62,0x71,0x24,0x50,0x5c,0xb2,0xfc,0xfd,0xff,0xff]
           vsubps -516(%rdx){1to16}, %zmm27, %zmm14
 
+// CHECK: kandw  %k6, %k5, %k2
+// CHECK:  encoding: [0xc5,0xd4,0x41,0xd6]
+          kandw  %k6, %k5, %k2
+
+// CHECK: kandnw %k7, %k6, %k4
+// CHECK:  encoding: [0xc5,0xcc,0x42,0xe7]
+          kandnw %k7, %k6, %k4
+
+// CHECK: korw   %k7, %k6, %k4
+// CHECK:  encoding: [0xc5,0xcc,0x45,0xe7]
+          korw   %k7, %k6, %k4
+
+// CHECK: kxnorw %k5, %k5, %k3
+// CHECK:  encoding: [0xc5,0xd4,0x46,0xdd]
+          kxnorw %k5, %k5, %k3
+
+// CHECK: kxorw  %k7, %k6, %k2
+// CHECK:  encoding: [0xc5,0xcc,0x47,0xd7]
+          kxorw  %k7, %k6, %k2
+
+// CHECK: knotw  %k6, %k3
+// CHECK:  encoding: [0xc5,0xf8,0x44,0xde]
+          knotw  %k6, %k3
+
+// CHECK: kmovw  %k5, %k4
+// CHECK:  encoding: [0xc5,0xf8,0x90,0xe5]
+          kmovw  %k5, %k4
+
+// CHECK: kmovw  (%rcx), %k4
+// CHECK:  encoding: [0xc5,0xf8,0x90,0x21]
+          kmovw  (%rcx), %k4
+
+// CHECK: kmovw  291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0xc4,0xa1,0x78,0x90,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          kmovw  291(%rax,%r14,8), %k4
+
+// CHECK: kmovw  %k4, (%rcx)
+// CHECK:  encoding: [0xc5,0xf8,0x91,0x21]
+          kmovw  %k4, (%rcx)
+
+// CHECK: kmovw  %k4, 291(%rax,%r14,8)
+// CHECK:  encoding: [0xc4,0xa1,0x78,0x91,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          kmovw  %k4, 291(%rax,%r14,8)
+
+// CHECK: kmovw  %eax, %k3
+// CHECK:  encoding: [0xc5,0xf8,0x92,0xd8]
+          kmovw  %eax, %k3
+
+// CHECK: kmovw  %ebp, %k3
+// CHECK:  encoding: [0xc5,0xf8,0x92,0xdd]
+          kmovw  %ebp, %k3
+
+// CHECK: kmovw  %r13d, %k3
+// CHECK:  encoding: [0xc4,0xc1,0x78,0x92,0xdd]
+          kmovw  %r13d, %k3
+
+// CHECK: kmovw  %k2, %eax
+// CHECK:  encoding: [0xc5,0xf8,0x93,0xc2]
+          kmovw  %k2, %eax
+
+// CHECK: kmovw  %k2, %ebp
+// CHECK:  encoding: [0xc5,0xf8,0x93,0xea]
+          kmovw  %k2, %ebp
+
+// CHECK: kmovw  %k2, %r13d
+// CHECK:  encoding: [0xc5,0x78,0x93,0xea]
+          kmovw  %k2, %r13d
+
+// CHECK: vmovapd %zmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x29,0x11]
+          vmovapd %zmm18, (%rcx)
+
+// CHECK: vmovapd %zmm18, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x4e,0x29,0x11]
+          vmovapd %zmm18, (%rcx) {%k6}
+
+// CHECK: vmovapd %zmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x29,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd %zmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovapd %zmm18, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x29,0x52,0x7f]
+          vmovapd %zmm18, 8128(%rdx)
+
+// CHECK: vmovapd %zmm18, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x29,0x92,0x00,0x20,0x00,0x00]
+          vmovapd %zmm18, 8192(%rdx)
+
+// CHECK: vmovapd %zmm18, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x29,0x52,0x80]
+          vmovapd %zmm18, -8192(%rdx)
+
+// CHECK: vmovapd %zmm18, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x29,0x92,0xc0,0xdf,0xff,0xff]
+          vmovapd %zmm18, -8256(%rdx)
+
+// CHECK: vmovaps %zmm9, (%rcx)
+// CHECK:  encoding: [0x62,0x71,0x7c,0x48,0x29,0x09]
+          vmovaps %zmm9, (%rcx)
+
+// CHECK: vmovaps %zmm9, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x71,0x7c,0x4b,0x29,0x09]
+          vmovaps %zmm9, (%rcx) {%k3}
+
+// CHECK: vmovaps %zmm9, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x31,0x7c,0x48,0x29,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps %zmm9, 291(%rax,%r14,8)
+
+// CHECK: vmovaps %zmm9, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x71,0x7c,0x48,0x29,0x4a,0x7f]
+          vmovaps %zmm9, 8128(%rdx)
+
+// CHECK: vmovaps %zmm9, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x71,0x7c,0x48,0x29,0x8a,0x00,0x20,0x00,0x00]
+          vmovaps %zmm9, 8192(%rdx)
+
+// CHECK: vmovaps %zmm9, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x71,0x7c,0x48,0x29,0x4a,0x80]
+          vmovaps %zmm9, -8192(%rdx)
+
+// CHECK: vmovaps %zmm9, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x71,0x7c,0x48,0x29,0x8a,0xc0,0xdf,0xff,0xff]
+          vmovaps %zmm9, -8256(%rdx)
+
+// CHECK: vmovdqa32 %zmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0x11]
+          vmovdqa32 %zmm18, (%rcx)
+
+// CHECK: vmovdqa32 %zmm18, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x4c,0x7f,0x11]
+          vmovdqa32 %zmm18, (%rcx) {%k4}
+
+// CHECK: vmovdqa32 %zmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x48,0x7f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 %zmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa32 %zmm18, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0x52,0x7f]
+          vmovdqa32 %zmm18, 8128(%rdx)
+
+// CHECK: vmovdqa32 %zmm18, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0x92,0x00,0x20,0x00,0x00]
+          vmovdqa32 %zmm18, 8192(%rdx)
+
+// CHECK: vmovdqa32 %zmm18, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0x52,0x80]
+          vmovdqa32 %zmm18, -8192(%rdx)
+
+// CHECK: vmovdqa32 %zmm18, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0x92,0xc0,0xdf,0xff,0xff]
+          vmovdqa32 %zmm18, -8256(%rdx)
+
+// CHECK: vmovdqa64 %zmm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x7f,0x19]
+          vmovdqa64 %zmm19, (%rcx)
+
+// CHECK: vmovdqa64 %zmm19, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x4d,0x7f,0x19]
+          vmovdqa64 %zmm19, (%rcx) {%k5}
+
+// CHECK: vmovdqa64 %zmm19, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x7f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 %zmm19, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa64 %zmm19, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x7f,0x5a,0x7f]
+          vmovdqa64 %zmm19, 8128(%rdx)
+
+// CHECK: vmovdqa64 %zmm19, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x7f,0x9a,0x00,0x20,0x00,0x00]
+          vmovdqa64 %zmm19, 8192(%rdx)
+
+// CHECK: vmovdqa64 %zmm19, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x7f,0x5a,0x80]
+          vmovdqa64 %zmm19, -8192(%rdx)
+
+// CHECK: vmovdqa64 %zmm19, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x7f,0x9a,0xc0,0xdf,0xff,0xff]
+          vmovdqa64 %zmm19, -8256(%rdx)
+
+// CHECK: vmovdqu32 %zmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x7f,0x31]
+          vmovdqu32 %zmm22, (%rcx)
+
+// CHECK: vmovdqu32 %zmm22, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x49,0x7f,0x31]
+          vmovdqu32 %zmm22, (%rcx) {%k1}
+
+// CHECK: vmovdqu32 %zmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x7f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 %zmm22, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu32 %zmm22, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x7f,0x72,0x7f]
+          vmovdqu32 %zmm22, 8128(%rdx)
+
+// CHECK: vmovdqu32 %zmm22, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x7f,0xb2,0x00,0x20,0x00,0x00]
+          vmovdqu32 %zmm22, 8192(%rdx)
+
+// CHECK: vmovdqu32 %zmm22, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x7f,0x72,0x80]
+          vmovdqu32 %zmm22, -8192(%rdx)
+
+// CHECK: vmovdqu32 %zmm22, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x7f,0xb2,0xc0,0xdf,0xff,0xff]
+          vmovdqu32 %zmm22, -8256(%rdx)
+
+// CHECK: vmovdqu64 %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xfe,0x48,0x7f,0x01]
+          vmovdqu64 %zmm24, (%rcx)
+
+// CHECK: vmovdqu64 %zmm24, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x61,0xfe,0x4d,0x7f,0x01]
+          vmovdqu64 %zmm24, (%rcx) {%k5}
+
+// CHECK: vmovdqu64 %zmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xfe,0x48,0x7f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu64 %zmm24, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfe,0x48,0x7f,0x42,0x7f]
+          vmovdqu64 %zmm24, 8128(%rdx)
+
+// CHECK: vmovdqu64 %zmm24, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfe,0x48,0x7f,0x82,0x00,0x20,0x00,0x00]
+          vmovdqu64 %zmm24, 8192(%rdx)
+
+// CHECK: vmovdqu64 %zmm24, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfe,0x48,0x7f,0x42,0x80]
+          vmovdqu64 %zmm24, -8192(%rdx)
+
+// CHECK: vmovdqu64 %zmm24, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfe,0x48,0x7f,0x82,0xc0,0xdf,0xff,0xff]
+          vmovdqu64 %zmm24, -8256(%rdx)
+
+// CHECK: vmovupd %zmm10, (%rcx)
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x11,0x11]
+          vmovupd %zmm10, (%rcx)
+
+// CHECK: vmovupd %zmm10, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x71,0xfd,0x4f,0x11,0x11]
+          vmovupd %zmm10, (%rcx) {%k7}
+
+// CHECK: vmovupd %zmm10, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x31,0xfd,0x48,0x11,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd %zmm10, 291(%rax,%r14,8)
+
+// CHECK: vmovupd %zmm10, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x11,0x52,0x7f]
+          vmovupd %zmm10, 8128(%rdx)
+
+// CHECK: vmovupd %zmm10, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x11,0x92,0x00,0x20,0x00,0x00]
+          vmovupd %zmm10, 8192(%rdx)
+
+// CHECK: vmovupd %zmm10, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x11,0x52,0x80]
+          vmovupd %zmm10, -8192(%rdx)
+
+// CHECK: vmovupd %zmm10, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x11,0x92,0xc0,0xdf,0xff,0xff]
+          vmovupd %zmm10, -8256(%rdx)
+
+// CHECK: vmovups %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x11,0x01]
+          vmovups %zmm24, (%rcx)
+
+// CHECK: vmovups %zmm24, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x61,0x7c,0x4f,0x11,0x01]
+          vmovups %zmm24, (%rcx) {%k7}
+
+// CHECK: vmovups %zmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x48,0x11,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovups %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vmovups %zmm24, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x11,0x42,0x7f]
+          vmovups %zmm24, 8128(%rdx)
+
+// CHECK: vmovups %zmm24, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x11,0x82,0x00,0x20,0x00,0x00]
+          vmovups %zmm24, 8192(%rdx)
+
+// CHECK: vmovups %zmm24, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x11,0x42,0x80]
+          vmovups %zmm24, -8192(%rdx)
+
+// CHECK: vmovups %zmm24, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x48,0x11,0x82,0xc0,0xdf,0xff,0xff]
+          vmovups %zmm24, -8256(%rdx)
+
 // CHECK: vpmovqb %zmm2, %xmm3
 // CHECK:  encoding: [0x62,0xf2,0x7e,0x48,0x32,0xd3]
           vpmovqb %zmm2, %xmm3
@@ -3097,6 +4665,14 @@ vinserti32x4  $1, %xmm21, %zmm5, %zmm17
 // CHECK: encoding: [0x62,0xe3,0x1d,0x40,0x38,0x4f,0x10,0x01]
 vinserti32x4  $1, 256(%rdi), %zmm28, %zmm17
 
+// CHECK: vinserti32x8
+// CHECK: encoding: [0x62,0xd3,0x4d,0x40,0x3a,0xdb,0x01]
+vinserti32x8  $1, %ymm11, %zmm22, %zmm3
+
+// CHECK: vinsertf64x2
+// CHECK: encoding: [0x62,0xf3,0xed,0x48,0x18,0x4f,0x10,0x01]
+vinsertf64x2  $1, 256(%rdi), %zmm2, %zmm1
+
 // CHECK: vextracti32x4
 // CHECK: encoding: [0x62,0x33,0x7d,0x48,0x39,0xc9,0x01]
 vextracti32x4  $1, %zmm9, %xmm17
@@ -3219,3 +4795,59 @@ vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3}
 // CHECK: vpermt2d
 // CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00]	
 vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z}
+
+// CHECK: valignq $2
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x03,0x4c,0x24,0x04,0x02]
+valignq  $2, 0x100(%rsp), %zmm0, %zmm1
+
+// CHECK: valignq $3
+// CHECK: encoding: [0x62,0xf3,0xfd,0x49,0x03,0xcb,0x03]
+valignq  $3, %zmm3, %zmm0, %zmm1 {%k1}
+
+// CHECK: vextractf32x4 $3
+// CHECK: encoding: [0x62,0xf3,0x7d,0x49,0x19,0xd9,0x03]
+vextractf32x4  $3, %zmm3, %xmm1 {%k1}
+
+// CHECK: vextracti64x4 $1
+// CHECK: encoding: [0x62,0x53,0xfd,0xcb,0x3b,0xf4,0x01]
+vextracti64x4  $1, %zmm14, %ymm12 {%k3} {z}
+
+// CHECK: vfmadd231ps
+// CHECK: encoding: [0x62,0xb2,0x1d,0x48,0xb8,0xe7]
+vfmadd231ps %zmm23, %zmm12, %zmm4
+
+// CHECK: vfmsub231pd
+// CHECK: encoding: [0x62,0xe2,0xed,0x48,0xba,0x73,0x08]
+vfmsub231pd 0x200(%rbx), %zmm2, %zmm22
+
+// CHECK: vfmaddsub231ps
+// CHECK: encoding: [0x62,0xd2,0x65,0x4b,0xb6,0xec]
+vfmaddsub231ps %zmm12, %zmm3, %zmm5 {%k3}
+
+// CHECK: vfmsubadd231pd
+// CHECK: encoding: [0x62,0x72,0x85,0xc5,0xb7,0xdd]
+vfmsubadd231pd %zmm5, %zmm31, %zmm11 {%k5}{z}
+
+// CHECK: vfnmadd231ps
+// CHECK: encoding: [0x62,0xf2,0x4d,0x48,0xbc,0xfd]
+vfnmadd231ps %zmm5, %zmm6, %zmm7
+
+// CHECK: vfnmsub231pd
+// CHECK: encoding: [0x62,0xf2,0xcd,0x48,0xbe,0xfd]
+vfnmsub231pd %zmm5, %zmm6, %zmm7
+
+// CHECK: vpermilps
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x0c,0xd9]
+vpermilps %zmm1, %zmm2, %zmm3
+
+// CHECK: vpermilpd
+// CHECK: encoding: [0x62,0xf2,0xed,0x48,0x0d,0x5b,0x10]
+vpermilpd 0x400(%rbx), %zmm2, %zmm3
+
+// CHECK: vpermilps
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x0c,0x5b,0x10]
+vpermilps 0x400(%rbx), %zmm2, %zmm3
+
+// CHECK: vpermilpd
+// CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x05,0x53,0x10,0x23]
+vpermilpd $0x23, 0x400(%rbx), %zmm2
diff --git a/test/MC/X86/avx512bw-encoding.s b/test/MC/X86/avx512bw-encoding.s
new file mode 100644
index 000000000000..06397fbebdf7
--- /dev/null
+++ b/test/MC/X86/avx512bw-encoding.s
@@ -0,0 +1,73 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=skx  --show-encoding %s | FileCheck %s
+
+// CHECK: vpblendmb %zmm25, %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0x82,0x6d,0x40,0x66,0xc9]
+          vpblendmb %zmm25, %zmm18, %zmm17
+
+// CHECK: vpblendmb %zmm25, %zmm18, %zmm17 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x6d,0x45,0x66,0xc9]
+          vpblendmb %zmm25, %zmm18, %zmm17 {%k5}
+
+// CHECK: vpblendmb %zmm25, %zmm18, %zmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x6d,0xc5,0x66,0xc9]
+          vpblendmb %zmm25, %zmm18, %zmm17 {%k5} {z}
+
+// CHECK: vpblendmb (%rcx), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x40,0x66,0x09]
+          vpblendmb (%rcx), %zmm18, %zmm17
+
+// CHECK: vpblendmb 291(%rax,%r14,8), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xa2,0x6d,0x40,0x66,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmb 291(%rax,%r14,8), %zmm18, %zmm17
+
+// CHECK: vpblendmb 8128(%rdx), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x40,0x66,0x4a,0x7f]
+          vpblendmb 8128(%rdx), %zmm18, %zmm17
+
+// CHECK: vpblendmb 8192(%rdx), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x40,0x66,0x8a,0x00,0x20,0x00,0x00]
+          vpblendmb 8192(%rdx), %zmm18, %zmm17
+
+// CHECK: vpblendmb -8192(%rdx), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x40,0x66,0x4a,0x80]
+          vpblendmb -8192(%rdx), %zmm18, %zmm17
+
+// CHECK: vpblendmb -8256(%rdx), %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xe2,0x6d,0x40,0x66,0x8a,0xc0,0xdf,0xff,0xff]
+          vpblendmb -8256(%rdx), %zmm18, %zmm17
+
+// CHECK: vpblendmw %zmm17, %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xdd,0x40,0x66,0xd1]
+          vpblendmw %zmm17, %zmm20, %zmm26
+
+// CHECK: vpblendmw %zmm17, %zmm20, %zmm26 {%k7}
+// CHECK:  encoding: [0x62,0x22,0xdd,0x47,0x66,0xd1]
+          vpblendmw %zmm17, %zmm20, %zmm26 {%k7}
+
+// CHECK: vpblendmw %zmm17, %zmm20, %zmm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0xdd,0xc7,0x66,0xd1]
+          vpblendmw %zmm17, %zmm20, %zmm26 {%k7} {z}
+
+// CHECK: vpblendmw (%rcx), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xdd,0x40,0x66,0x11]
+          vpblendmw (%rcx), %zmm20, %zmm26
+
+// CHECK: vpblendmw 291(%rax,%r14,8), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x22,0xdd,0x40,0x66,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmw 291(%rax,%r14,8), %zmm20, %zmm26
+
+// CHECK: vpblendmw 8128(%rdx), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xdd,0x40,0x66,0x52,0x7f]
+          vpblendmw 8128(%rdx), %zmm20, %zmm26
+
+// CHECK: vpblendmw 8192(%rdx), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xdd,0x40,0x66,0x92,0x00,0x20,0x00,0x00]
+          vpblendmw 8192(%rdx), %zmm20, %zmm26
+
+// CHECK: vpblendmw -8192(%rdx), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xdd,0x40,0x66,0x52,0x80]
+          vpblendmw -8192(%rdx), %zmm20, %zmm26
+
+// CHECK: vpblendmw -8256(%rdx), %zmm20, %zmm26
+// CHECK:  encoding: [0x62,0x62,0xdd,0x40,0x66,0x92,0xc0,0xdf,0xff,0xff]
+          vpblendmw -8256(%rdx), %zmm20, %zmm26
diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s
new file mode 100644
index 000000000000..acea70cb12ae
--- /dev/null
+++ b/test/MC/X86/avx512vl-encoding.s
@@ -0,0 +1,449 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=skx  --show-encoding %s | FileCheck %s
+
+// CHECK: vblendmpd %xmm19, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x22,0xdd,0x00,0x65,0xdb]
+          vblendmpd %xmm19, %xmm20, %xmm27
+
+// CHECK: vblendmpd %xmm19, %xmm20, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x22,0xdd,0x07,0x65,0xdb]
+          vblendmpd %xmm19, %xmm20, %xmm27 {%k7}
+
+// CHECK: vblendmpd %xmm19, %xmm20, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0xdd,0x87,0x65,0xdb]
+          vblendmpd %xmm19, %xmm20, %xmm27 {%k7} {z}
+
+// CHECK: vblendmpd (%rcx), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0x65,0x19]
+          vblendmpd (%rcx), %xmm20, %xmm27
+
+// CHECK: vblendmpd 291(%rax,%r14,8), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x22,0xdd,0x00,0x65,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vblendmpd 291(%rax,%r14,8), %xmm20, %xmm27
+
+// CHECK: vblendmpd (%rcx){1to2}, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0x65,0x19]
+          vblendmpd (%rcx){1to2}, %xmm20, %xmm27
+
+// CHECK: vblendmpd 2032(%rdx), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0x65,0x5a,0x7f]
+          vblendmpd 2032(%rdx), %xmm20, %xmm27
+
+// CHECK: vblendmpd 2048(%rdx), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0x65,0x9a,0x00,0x08,0x00,0x00]
+          vblendmpd 2048(%rdx), %xmm20, %xmm27
+
+// CHECK: vblendmpd -2048(%rdx), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0x65,0x5a,0x80]
+          vblendmpd -2048(%rdx), %xmm20, %xmm27
+
+// CHECK: vblendmpd -2064(%rdx), %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x00,0x65,0x9a,0xf0,0xf7,0xff,0xff]
+          vblendmpd -2064(%rdx), %xmm20, %xmm27
+
+// CHECK: vblendmpd 1016(%rdx){1to2}, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0x65,0x5a,0x7f]
+          vblendmpd 1016(%rdx){1to2}, %xmm20, %xmm27
+
+// CHECK: vblendmpd 1024(%rdx){1to2}, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0x65,0x9a,0x00,0x04,0x00,0x00]
+          vblendmpd 1024(%rdx){1to2}, %xmm20, %xmm27
+
+// CHECK: vblendmpd -1024(%rdx){1to2}, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0x65,0x5a,0x80]
+          vblendmpd -1024(%rdx){1to2}, %xmm20, %xmm27
+
+// CHECK: vblendmpd -1032(%rdx){1to2}, %xmm20, %xmm27
+// CHECK:  encoding: [0x62,0x62,0xdd,0x10,0x65,0x9a,0xf8,0xfb,0xff,0xff]
+          vblendmpd -1032(%rdx){1to2}, %xmm20, %xmm27
+
+// CHECK: vblendmpd %ymm23, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x22,0xd5,0x20,0x65,0xe7]
+          vblendmpd %ymm23, %ymm21, %ymm28
+
+// CHECK: vblendmpd %ymm23, %ymm21, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x22,0xd5,0x23,0x65,0xe7]
+          vblendmpd %ymm23, %ymm21, %ymm28 {%k3}
+
+// CHECK: vblendmpd %ymm23, %ymm21, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0xd5,0xa3,0x65,0xe7]
+          vblendmpd %ymm23, %ymm21, %ymm28 {%k3} {z}
+
+// CHECK: vblendmpd (%rcx), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x65,0x21]
+          vblendmpd (%rcx), %ymm21, %ymm28
+
+// CHECK: vblendmpd 291(%rax,%r14,8), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x22,0xd5,0x20,0x65,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vblendmpd 291(%rax,%r14,8), %ymm21, %ymm28
+
+// CHECK: vblendmpd (%rcx){1to4}, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x65,0x21]
+          vblendmpd (%rcx){1to4}, %ymm21, %ymm28
+
+// CHECK: vblendmpd 4064(%rdx), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x65,0x62,0x7f]
+          vblendmpd 4064(%rdx), %ymm21, %ymm28
+
+// CHECK: vblendmpd 4096(%rdx), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x65,0xa2,0x00,0x10,0x00,0x00]
+          vblendmpd 4096(%rdx), %ymm21, %ymm28
+
+// CHECK: vblendmpd -4096(%rdx), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x65,0x62,0x80]
+          vblendmpd -4096(%rdx), %ymm21, %ymm28
+
+// CHECK: vblendmpd -4128(%rdx), %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x65,0xa2,0xe0,0xef,0xff,0xff]
+          vblendmpd -4128(%rdx), %ymm21, %ymm28
+
+// CHECK: vblendmpd 1016(%rdx){1to4}, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x65,0x62,0x7f]
+          vblendmpd 1016(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vblendmpd 1024(%rdx){1to4}, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x65,0xa2,0x00,0x04,0x00,0x00]
+          vblendmpd 1024(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vblendmpd -1024(%rdx){1to4}, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x65,0x62,0x80]
+          vblendmpd -1024(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vblendmpd -1032(%rdx){1to4}, %ymm21, %ymm28
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x65,0xa2,0xf8,0xfb,0xff,0xff]
+          vblendmpd -1032(%rdx){1to4}, %ymm21, %ymm28
+
+// CHECK: vblendmps %xmm20, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x5d,0x00,0x65,0xc4]
+          vblendmps %xmm20, %xmm20, %xmm24
+
+// CHECK: vblendmps %xmm20, %xmm20, %xmm24 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x5d,0x01,0x65,0xc4]
+          vblendmps %xmm20, %xmm20, %xmm24 {%k1}
+
+// CHECK: vblendmps %xmm20, %xmm20, %xmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x5d,0x81,0x65,0xc4]
+          vblendmps %xmm20, %xmm20, %xmm24 {%k1} {z}
+
+// CHECK: vblendmps (%rcx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0x65,0x01]
+          vblendmps (%rcx), %xmm20, %xmm24
+
+// CHECK: vblendmps 291(%rax,%r14,8), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x5d,0x00,0x65,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vblendmps 291(%rax,%r14,8), %xmm20, %xmm24
+
+// CHECK: vblendmps (%rcx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0x65,0x01]
+          vblendmps (%rcx){1to4}, %xmm20, %xmm24
+
+// CHECK: vblendmps 2032(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0x65,0x42,0x7f]
+          vblendmps 2032(%rdx), %xmm20, %xmm24
+
+// CHECK: vblendmps 2048(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0x65,0x82,0x00,0x08,0x00,0x00]
+          vblendmps 2048(%rdx), %xmm20, %xmm24
+
+// CHECK: vblendmps -2048(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0x65,0x42,0x80]
+          vblendmps -2048(%rdx), %xmm20, %xmm24
+
+// CHECK: vblendmps -2064(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x00,0x65,0x82,0xf0,0xf7,0xff,0xff]
+          vblendmps -2064(%rdx), %xmm20, %xmm24
+
+// CHECK: vblendmps 508(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0x65,0x42,0x7f]
+          vblendmps 508(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vblendmps 512(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0x65,0x82,0x00,0x02,0x00,0x00]
+          vblendmps 512(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vblendmps -512(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0x65,0x42,0x80]
+          vblendmps -512(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vblendmps -516(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x5d,0x10,0x65,0x82,0xfc,0xfd,0xff,0xff]
+          vblendmps -516(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vblendmps %ymm24, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0x82,0x45,0x20,0x65,0xc8]
+          vblendmps %ymm24, %ymm23, %ymm17
+
+// CHECK: vblendmps %ymm24, %ymm23, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x45,0x26,0x65,0xc8]
+          vblendmps %ymm24, %ymm23, %ymm17 {%k6}
+
+// CHECK: vblendmps %ymm24, %ymm23, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x45,0xa6,0x65,0xc8]
+          vblendmps %ymm24, %ymm23, %ymm17 {%k6} {z}
+
+// CHECK: vblendmps (%rcx), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x20,0x65,0x09]
+          vblendmps (%rcx), %ymm23, %ymm17
+
+// CHECK: vblendmps 291(%rax,%r14,8), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x45,0x20,0x65,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vblendmps 291(%rax,%r14,8), %ymm23, %ymm17
+
+// CHECK: vblendmps (%rcx){1to8}, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x30,0x65,0x09]
+          vblendmps (%rcx){1to8}, %ymm23, %ymm17
+
+// CHECK: vblendmps 4064(%rdx), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x20,0x65,0x4a,0x7f]
+          vblendmps 4064(%rdx), %ymm23, %ymm17
+
+// CHECK: vblendmps 4096(%rdx), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x20,0x65,0x8a,0x00,0x10,0x00,0x00]
+          vblendmps 4096(%rdx), %ymm23, %ymm17
+
+// CHECK: vblendmps -4096(%rdx), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x20,0x65,0x4a,0x80]
+          vblendmps -4096(%rdx), %ymm23, %ymm17
+
+// CHECK: vblendmps -4128(%rdx), %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x20,0x65,0x8a,0xe0,0xef,0xff,0xff]
+          vblendmps -4128(%rdx), %ymm23, %ymm17
+
+// CHECK: vblendmps 508(%rdx){1to8}, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x30,0x65,0x4a,0x7f]
+          vblendmps 508(%rdx){1to8}, %ymm23, %ymm17
+
+// CHECK: vblendmps 512(%rdx){1to8}, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x30,0x65,0x8a,0x00,0x02,0x00,0x00]
+          vblendmps 512(%rdx){1to8}, %ymm23, %ymm17
+
+// CHECK: vblendmps -512(%rdx){1to8}, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x30,0x65,0x4a,0x80]
+          vblendmps -512(%rdx){1to8}, %ymm23, %ymm17
+
+// CHECK: vblendmps -516(%rdx){1to8}, %ymm23, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x45,0x30,0x65,0x8a,0xfc,0xfd,0xff,0xff]
+          vblendmps -516(%rdx){1to8}, %ymm23, %ymm17
+
+// CHECK: vpblendmd %xmm26, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0x82,0x35,0x00,0x64,0xca]
+          vpblendmd %xmm26, %xmm25, %xmm17
+
+// CHECK: vpblendmd %xmm26, %xmm25, %xmm17 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x35,0x05,0x64,0xca]
+          vpblendmd %xmm26, %xmm25, %xmm17 {%k5}
+
+// CHECK: vpblendmd %xmm26, %xmm25, %xmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x35,0x85,0x64,0xca]
+          vpblendmd %xmm26, %xmm25, %xmm17 {%k5} {z}
+
+// CHECK: vpblendmd (%rcx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x64,0x09]
+          vpblendmd (%rcx), %xmm25, %xmm17
+
+// CHECK: vpblendmd 291(%rax,%r14,8), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x35,0x00,0x64,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmd 291(%rax,%r14,8), %xmm25, %xmm17
+
+// CHECK: vpblendmd (%rcx){1to4}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x64,0x09]
+          vpblendmd (%rcx){1to4}, %xmm25, %xmm17
+
+// CHECK: vpblendmd 2032(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x64,0x4a,0x7f]
+          vpblendmd 2032(%rdx), %xmm25, %xmm17
+
+// CHECK: vpblendmd 2048(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x64,0x8a,0x00,0x08,0x00,0x00]
+          vpblendmd 2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpblendmd -2048(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x64,0x4a,0x80]
+          vpblendmd -2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpblendmd -2064(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x00,0x64,0x8a,0xf0,0xf7,0xff,0xff]
+          vpblendmd -2064(%rdx), %xmm25, %xmm17
+
+// CHECK: vpblendmd 508(%rdx){1to4}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x64,0x4a,0x7f]
+          vpblendmd 508(%rdx){1to4}, %xmm25, %xmm17
+
+// CHECK: vpblendmd 512(%rdx){1to4}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x64,0x8a,0x00,0x02,0x00,0x00]
+          vpblendmd 512(%rdx){1to4}, %xmm25, %xmm17
+
+// CHECK: vpblendmd -512(%rdx){1to4}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x64,0x4a,0x80]
+          vpblendmd -512(%rdx){1to4}, %xmm25, %xmm17
+
+// CHECK: vpblendmd -516(%rdx){1to4}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x35,0x10,0x64,0x8a,0xfc,0xfd,0xff,0xff]
+          vpblendmd -516(%rdx){1to4}, %xmm25, %xmm17
+
+// CHECK: vpblendmd %ymm23, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x15,0x20,0x64,0xd7]
+          vpblendmd %ymm23, %ymm29, %ymm26
+
+// CHECK: vpblendmd %ymm23, %ymm29, %ymm26 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x15,0x27,0x64,0xd7]
+          vpblendmd %ymm23, %ymm29, %ymm26 {%k7}
+
+// CHECK: vpblendmd %ymm23, %ymm29, %ymm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x15,0xa7,0x64,0xd7]
+          vpblendmd %ymm23, %ymm29, %ymm26 {%k7} {z}
+
+// CHECK: vpblendmd (%rcx), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0x64,0x11]
+          vpblendmd (%rcx), %ymm29, %ymm26
+
+// CHECK: vpblendmd 291(%rax,%r14,8), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x15,0x20,0x64,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmd 291(%rax,%r14,8), %ymm29, %ymm26
+
+// CHECK: vpblendmd (%rcx){1to8}, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0x64,0x11]
+          vpblendmd (%rcx){1to8}, %ymm29, %ymm26
+
+// CHECK: vpblendmd 4064(%rdx), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0x64,0x52,0x7f]
+          vpblendmd 4064(%rdx), %ymm29, %ymm26
+
+// CHECK: vpblendmd 4096(%rdx), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0x64,0x92,0x00,0x10,0x00,0x00]
+          vpblendmd 4096(%rdx), %ymm29, %ymm26
+
+// CHECK: vpblendmd -4096(%rdx), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0x64,0x52,0x80]
+          vpblendmd -4096(%rdx), %ymm29, %ymm26
+
+// CHECK: vpblendmd -4128(%rdx), %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x20,0x64,0x92,0xe0,0xef,0xff,0xff]
+          vpblendmd -4128(%rdx), %ymm29, %ymm26
+
+// CHECK: vpblendmd 508(%rdx){1to8}, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0x64,0x52,0x7f]
+          vpblendmd 508(%rdx){1to8}, %ymm29, %ymm26
+
+// CHECK: vpblendmd 512(%rdx){1to8}, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0x64,0x92,0x00,0x02,0x00,0x00]
+          vpblendmd 512(%rdx){1to8}, %ymm29, %ymm26
+
+// CHECK: vpblendmd -512(%rdx){1to8}, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0x64,0x52,0x80]
+          vpblendmd -512(%rdx){1to8}, %ymm29, %ymm26
+
+// CHECK: vpblendmd -516(%rdx){1to8}, %ymm29, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x15,0x30,0x64,0x92,0xfc,0xfd,0xff,0xff]
+          vpblendmd -516(%rdx){1to8}, %ymm29, %ymm26
+
+// CHECK: vpblendmq %xmm17, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x64,0xe9]
+          vpblendmq %xmm17, %xmm27, %xmm29
+
+// CHECK: vpblendmq %xmm17, %xmm27, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x06,0x64,0xe9]
+          vpblendmq %xmm17, %xmm27, %xmm29 {%k6}
+
+// CHECK: vpblendmq %xmm17, %xmm27, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x86,0x64,0xe9]
+          vpblendmq %xmm17, %xmm27, %xmm29 {%k6} {z}
+
+// CHECK: vpblendmq (%rcx), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x64,0x29]
+          vpblendmq (%rcx), %xmm27, %xmm29
+
+// CHECK: vpblendmq 291(%rax,%r14,8), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x64,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmq 291(%rax,%r14,8), %xmm27, %xmm29
+
+// CHECK: vpblendmq (%rcx){1to2}, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x64,0x29]
+          vpblendmq (%rcx){1to2}, %xmm27, %xmm29
+
+// CHECK: vpblendmq 2032(%rdx), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x64,0x6a,0x7f]
+          vpblendmq 2032(%rdx), %xmm27, %xmm29
+
+// CHECK: vpblendmq 2048(%rdx), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x64,0xaa,0x00,0x08,0x00,0x00]
+          vpblendmq 2048(%rdx), %xmm27, %xmm29
+
+// CHECK: vpblendmq -2048(%rdx), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x64,0x6a,0x80]
+          vpblendmq -2048(%rdx), %xmm27, %xmm29
+
+// CHECK: vpblendmq -2064(%rdx), %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x64,0xaa,0xf0,0xf7,0xff,0xff]
+          vpblendmq -2064(%rdx), %xmm27, %xmm29
+
+// CHECK: vpblendmq 1016(%rdx){1to2}, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x64,0x6a,0x7f]
+          vpblendmq 1016(%rdx){1to2}, %xmm27, %xmm29
+
+// CHECK: vpblendmq 1024(%rdx){1to2}, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x64,0xaa,0x00,0x04,0x00,0x00]
+          vpblendmq 1024(%rdx){1to2}, %xmm27, %xmm29
+
+// CHECK: vpblendmq -1024(%rdx){1to2}, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x64,0x6a,0x80]
+          vpblendmq -1024(%rdx){1to2}, %xmm27, %xmm29
+
+// CHECK: vpblendmq -1032(%rdx){1to2}, %xmm27, %xmm29
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x64,0xaa,0xf8,0xfb,0xff,0xff]
+          vpblendmq -1032(%rdx){1to2}, %xmm27, %xmm29
+
+// CHECK: vpblendmq %ymm21, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xc5,0x20,0x64,0xed]
+          vpblendmq %ymm21, %ymm23, %ymm21
+
+// CHECK: vpblendmq %ymm21, %ymm23, %ymm21 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0xc5,0x23,0x64,0xed]
+          vpblendmq %ymm21, %ymm23, %ymm21 {%k3}
+
+// CHECK: vpblendmq %ymm21, %ymm23, %ymm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0xc5,0xa3,0x64,0xed]
+          vpblendmq %ymm21, %ymm23, %ymm21 {%k3} {z}
+
+// CHECK: vpblendmq (%rcx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x64,0x29]
+          vpblendmq (%rcx), %ymm23, %ymm21
+
+// CHECK: vpblendmq 291(%rax,%r14,8), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xc5,0x20,0x64,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpblendmq 291(%rax,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpblendmq (%rcx){1to4}, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x64,0x29]
+          vpblendmq (%rcx){1to4}, %ymm23, %ymm21
+
+// CHECK: vpblendmq 4064(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x64,0x6a,0x7f]
+          vpblendmq 4064(%rdx), %ymm23, %ymm21
+
+// CHECK: vpblendmq 4096(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x64,0xaa,0x00,0x10,0x00,0x00]
+          vpblendmq 4096(%rdx), %ymm23, %ymm21
+
+// CHECK: vpblendmq -4096(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x64,0x6a,0x80]
+          vpblendmq -4096(%rdx), %ymm23, %ymm21
+
+// CHECK: vpblendmq -4128(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x64,0xaa,0xe0,0xef,0xff,0xff]
+          vpblendmq -4128(%rdx), %ymm23, %ymm21
+
+// CHECK: vpblendmq 1016(%rdx){1to4}, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x64,0x6a,0x7f]
+          vpblendmq 1016(%rdx){1to4}, %ymm23, %ymm21
+
+// CHECK: vpblendmq 1024(%rdx){1to4}, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x64,0xaa,0x00,0x04,0x00,0x00]
+          vpblendmq 1024(%rdx){1to4}, %ymm23, %ymm21
+
+// CHECK: vpblendmq -1024(%rdx){1to4}, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x64,0x6a,0x80]
+          vpblendmq -1024(%rdx){1to4}, %ymm23, %ymm21
+
+// CHECK: vpblendmq -1032(%rdx){1to4}, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x64,0xaa,0xf8,0xfb,0xff,0xff]
+          vpblendmq -1032(%rdx){1to4}, %ymm23, %ymm21
diff --git a/test/MC/X86/compact-unwind.s b/test/MC/X86/compact-unwind.s
new file mode 100644
index 000000000000..82be239d79c4
--- /dev/null
+++ b/test/MC/X86/compact-unwind.s
@@ -0,0 +1,72 @@
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin10.0 %s | llvm-objdump -unwind-info - | FileCheck %s
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.macosx_version_min 10, 10
+
+# Check that we emit compact-unwind info with UNWIND_X86_MODE_STACK_IND encoding
+
+# CHECK: Contents of __compact_unwind section:
+# CHECK-NEXT:   Entry at offset 0x0:
+# CHECK-NEXT:     start:                0x0 _test0
+# CHECK-NEXT:     length:               0x15
+# CHECK-NEXT:     compact encoding:     0x03056804
+	.globl	_test0
+_test0:                                  ## @test0
+	.cfi_startproc
+## BB#0:                                ## %entry
+	pushq	%rbp
+Ltmp0:
+	.cfi_def_cfa_offset 16
+	pushq	%rbx
+Ltmp1:
+	.cfi_def_cfa_offset 24
+	subq	$14408, %rsp            ## imm = 0x3848
+Ltmp2:
+	.cfi_def_cfa_offset 14432
+Ltmp3:
+	.cfi_offset %rbx, -24
+Ltmp4:
+	.cfi_offset %rbp, -16
+	xorl	%eax, %eax
+	addq	$14408, %rsp            ## imm = 0x3848
+	popq	%rbx
+	popq	%rbp
+	retq
+	.cfi_endproc
+
+# Check that we emit compact-unwind info with UNWIND_X86_MODE_STACK_IMMD encoding
+
+# CHECK:   Entry at offset 0x20:
+# CHECK-NEXT:     start:                0x15 _test1
+# CHECK-NEXT:     length:               0x15
+# CHECK-NEXT:     compact encoding:     0x02360804
+	.globl	_test1
+_test1:                                  ## @test1
+	.cfi_startproc
+## BB#0:                                ## %entry
+	pushq	%rbp
+Ltmp10:
+	.cfi_def_cfa_offset 16
+	pushq	%rbx
+Ltmp11:
+	.cfi_def_cfa_offset 24
+	subq	$408, %rsp              ## imm = 0x198
+Ltmp12:
+	.cfi_def_cfa_offset 432
+Ltmp13:
+	.cfi_offset %rbx, -24
+Ltmp14:
+	.cfi_offset %rbp, -16
+	xorl	%eax, %eax
+	addq	$408, %rsp              ## imm = 0x198
+	popq	%rbx
+	popq	%rbp
+	retq
+	.cfi_endproc
+
+	.section	__TEXT,__cstring,cstring_literals
+L_.str:                                 ## @.str
+	.asciz	"%d\n"
+
+
+.subsections_via_symbols
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
index d6dbe152cd58..f7bdaf92dbb5 100644
--- a/test/MC/X86/intel-syntax-2.s
+++ b/test/MC/X86/intel-syntax-2.s
@@ -7,3 +7,11 @@ _test:
     .att_syntax
 // CHECK:	movl	$257, -4(%rsp)
     movl $257, -4(%rsp)
+
+_test2:
+.intel_syntax noprefix
+	mov	DWORD PTR [RSP - 4], 255
+// CHECK:	movl	$255, -4(%rsp)
+.att_syntax prefix
+	movl $255, -4(%rsp)
+// CHECK:	movl	$255, -4(%rsp)
diff --git a/test/MC/X86/intel-syntax-ambiguous.s b/test/MC/X86/intel-syntax-ambiguous.s
new file mode 100644
index 000000000000..fe1fe5023902
--- /dev/null
+++ b/test/MC/X86/intel-syntax-ambiguous.s
@@ -0,0 +1,47 @@
+// RUN: not llvm-mc -triple i686-unknown-unknown %s -o /dev/null 2>&1 | FileCheck %s
+
+.intel_syntax
+
+// Basic case of ambiguity for inc.
+
+inc [eax]
+// CHECK: error: ambiguous operand size for instruction 'inc'
+inc dword ptr [eax]
+inc word ptr [eax]
+inc byte ptr [eax]
+// CHECK-NOT: error:
+
+// Other ambiguous instructions.  Anything that doesn't take a register,
+// basically.
+
+dec [eax]
+// CHECK: error: ambiguous operand size for instruction 'dec'
+mov [eax], 1
+// CHECK: error: ambiguous operand size for instruction 'mov'
+and [eax], 0
+// CHECK: error: ambiguous operand size for instruction 'and'
+or [eax], 1
+// CHECK: error: ambiguous operand size for instruction 'or'
+add [eax], 1
+// CHECK: error: ambiguous operand size for instruction 'add'
+sub [eax], 1
+// CHECK: error: ambiguous operand size for instruction 'sub'
+
+// gas assumes these instructions are pointer-sized by default, and we follow
+// suit.
+push [eax]
+call [eax]
+jmp [eax]
+// CHECK-NOT: error:
+
+add byte ptr [eax], eax
+// CHECK: error: invalid operand for instruction
+
+add byte ptr [eax], eax
+// CHECK: error: invalid operand for instruction
+
+add rax, 3
+// CHECK: error: register %rax is only available in 64-bit mode
+
+fadd   "?half@?0??bar@@YAXXZ@4NA"
+// CHECK: error: ambiguous operand size for instruction 'fadd'
diff --git a/test/MC/X86/intel-syntax-error.s b/test/MC/X86/intel-syntax-error.s
new file mode 100644
index 000000000000..7207c952aba7
--- /dev/null
+++ b/test/MC/X86/intel-syntax-error.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=att %s -o /dev/null 2>&1 | FileCheck %s
+
+// This tests weird forms of Intel and AT&T syntax that gas accepts that we
+// don't.  The [no]prefix operand of the syntax directive indicates whether
+// registers need a '%' prefix.
+
+.intel_syntax prefix
+// CHECK: error: '.intel_syntax prefix' is not supported: registers must not have a '%' prefix in .intel_syntax
+_test2:
+	mov	DWORD PTR [%esp - 4], 257
+.att_syntax noprefix
+// CHECK: error: '.att_syntax noprefix' is not supported: registers must have a '%' prefix in .att_syntax
+	movl	$257, -4(esp)
diff --git a/test/MC/X86/intel-syntax-ptr-sized.s b/test/MC/X86/intel-syntax-ptr-sized.s
new file mode 100644
index 000000000000..c052c322b801
--- /dev/null
+++ b/test/MC/X86/intel-syntax-ptr-sized.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc %s -triple=i686-pc-windows | FileCheck %s
+
+.intel_syntax
+
+push [eax]
+// CHECK: pushl (%eax)
+call [eax]
+// CHECK: calll *(%eax)
+jmp [eax]
+// CHECK: jmpl *(%eax)
+
+// mode switch
+.code16
+
+push [eax]
+// CHECK: pushw (%eax)
+call [eax]
+// CHECK: callw *(%eax)
+jmp [eax]
+// CHECK: jmpw *(%eax)
diff --git a/test/MC/X86/intel-syntax-unsized-memory.s b/test/MC/X86/intel-syntax-unsized-memory.s
new file mode 100644
index 000000000000..c9d8e2a5f7c8
--- /dev/null
+++ b/test/MC/X86/intel-syntax-unsized-memory.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -mcpu=knl %s | FileCheck %s
+
+// Check that we deduce unsized memory operands in the general, unambiguous, case.
+// We can't deduce xword memory operands, because there is no instruction
+// unambiguously accessing 80-bit memory.
+
+// CHECK: movb %al, (%rax)
+mov [rax], al
+
+// CHECK: movw %ax, (%rax)
+mov [rax], ax
+
+// CHECK: movl %eax, (%rax)
+mov [rax], eax
+
+// CHECK: movq %rax, (%rax)
+mov [rax], rax
+
+// CHECK: movdqa %xmm0, (%rax)
+movdqa [rax], xmm0
+
+// CHECK: vmovdqa %ymm0, (%rax)
+vmovdqa [rax], ymm0
+
+// CHECK: vaddps (%rax), %zmm1, %zmm1
+vaddps zmm1, zmm1, [rax]
+
+// CHECK: leal 1(%r15d), %r9d
+lea r9d, [r15d+1]
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 796891880b12..c027aa426f3b 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -603,7 +603,62 @@ mov rcx, qword ptr [_g0 + 8]
 "?half@?0??bar@@YAXXZ@4NA":
 	.quad   4602678819172646912
 
-fadd   "?half@?0??bar@@YAXXZ@4NA"
-fadd   "?half@?0??bar@@YAXXZ@4NA"@IMGREL
+fadd   dword ptr "?half@?0??bar@@YAXXZ@4NA"
+fadd   dword ptr "?half@?0??bar@@YAXXZ@4NA"@IMGREL
 // CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"
-// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"@IMGREL32
+// CHECK: fadds   "?half@?0??bar@@YAXXZ@4NA"@IMGREL
+
+inc qword ptr [rax]
+inc dword ptr [rax]
+inc word ptr [rax]
+inc byte ptr [rax]
+// CHECK: incq (%rax)
+// CHECK: incl (%rax)
+// CHECK: incw (%rax)
+// CHECK: incb (%rax)
+
+dec qword ptr [rax]
+dec dword ptr [rax]
+dec word ptr [rax]
+dec byte ptr [rax]
+// CHECK: decq (%rax)
+// CHECK: decl (%rax)
+// CHECK: decw (%rax)
+// CHECK: decb (%rax)
+
+add qword ptr [rax], 1
+add dword ptr [rax], 1
+add word ptr [rax], 1
+add byte ptr [rax], 1
+// CHECK: addq $1, (%rax)
+// CHECK: addl $1, (%rax)
+// CHECK: addw $1, (%rax)
+// CHECK: addb $1, (%rax)
+
+fstp xword ptr [rax]
+fstp qword ptr [rax]
+fstp dword ptr [rax]
+// CHECK: fstpt (%rax)
+// CHECK: fstpl (%rax)
+// CHECK: fstps (%rax)
+
+fxsave [eax]
+fsave [eax]
+fxrstor [eax]
+frstor [eax]
+// CHECK: fxsave (%eax)
+// CHECK: wait
+// CHECK: fnsave (%eax)
+// CHECK: fxrstor (%eax)
+// CHECK: frstor (%eax)
+
+// FIXME: Should we accept this?  Masm accepts it, but gas does not.
+fxsave dword ptr [eax]
+fsave dword ptr [eax]
+fxrstor dword ptr [eax]
+frstor dword ptr [eax]
+// CHECK: fxsave (%eax)
+// CHECK: wait
+// CHECK: fnsave (%eax)
+// CHECK: fxrstor (%eax)
+// CHECK: frstor (%eax)
diff --git a/test/MC/X86/macho-uleb.s b/test/MC/X86/macho-uleb.s
new file mode 100644
index 000000000000..46d858b8ae20
--- /dev/null
+++ b/test/MC/X86/macho-uleb.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple=x86_64-apple-darwin %s | FileCheck %s
+
+a:
+b:
+        .uleb128 a-b
+
+// CHECK:        .uleb128 a-b
diff --git a/test/MC/X86/reloc-macho.s b/test/MC/X86/reloc-macho.s
new file mode 100644
index 000000000000..9297b1ba8b31
--- /dev/null
+++ b/test/MC/X86/reloc-macho.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin %s -o - | llvm-readobj -r | FileCheck %s
+
+// CHECK:      Relocations [
+// CHECK-NEXT: ]
+
+  .section foo,bar
+La:
+Lb:
+ .long   La-Lb
diff --git a/test/MC/X86/sgx-encoding.s b/test/MC/X86/sgx-encoding.s
new file mode 100644
index 000000000000..e6ae8c9beb18
--- /dev/null
+++ b/test/MC/X86/sgx-encoding.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: encls
+// CHECK: encoding: [0x0f,0x01,0xcf]
+	encls
+
+// CHECK: enclu
+// CHECK: encoding: [0x0f,0x01,0xd7]
+	enclu
diff --git a/test/MC/X86/shuffle-comments.s b/test/MC/X86/shuffle-comments.s
index 20fd4ebae4dc..2f22bff35339 100644
--- a/test/MC/X86/shuffle-comments.s
+++ b/test/MC/X86/shuffle-comments.s
@@ -269,3 +269,8 @@ vshufpd $11, %ymm0, %ymm1, %ymm2
 # CHECK: ymm2 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
 vshufpd $11, (%rax), %ymm1, %ymm2
 # CHECK: ymm2 = ymm1[1],mem[1],ymm1[2],mem[3]
+
+vinsertps $16, %xmm0, %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],xmm0[0],xmm1[2,3]
+vinsertps $16, (%rax), %xmm1, %xmm2
+# CHECK: xmm2 = xmm1[0],mem[0],xmm1[2,3]
diff --git a/test/MC/X86/stackmap-nops.ll b/test/MC/X86/stackmap-nops.ll
index 98d17ea6a62b..a0d441866035 100644
--- a/test/MC/X86/stackmap-nops.ll
+++ b/test/MC/X86/stackmap-nops.ll
@@ -41,6 +41,10 @@ entry:
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13)
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14)
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15)
+; Add an extra stackmap with a zero-length shadow to thwart the shadow
+; optimization. This will force all 15 bytes of the previous shadow to be
+; padded with nops.
+  tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0)
   ret void
 }
 
diff --git a/test/MC/X86/validate-inst-att.s b/test/MC/X86/validate-inst-att.s
new file mode 100644
index 000000000000..dec8bfdf6600
--- /dev/null
+++ b/test/MC/X86/validate-inst-att.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -triple i686 -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+	.text
+	int $65535
+# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK:	int $65535
+# CHECK:            ^
diff --git a/test/MC/X86/validate-inst-intel.s b/test/MC/X86/validate-inst-intel.s
new file mode 100644
index 000000000000..9a7d122ca781
--- /dev/null
+++ b/test/MC/X86/validate-inst-intel.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc -x86-asm-syntax intel -triple i686 -filetype asm -o /dev/null %s 2>&1 \
+# RUN:    | FileCheck %s
+
+	.text
+	int 65535
+# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK:	int 65535
+# CHECK:            ^
+
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 732874b91872..80c34ece624f 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -19618,22 +19618,36 @@
 // CHECK:   blendvps	%xmm2, %xmm1    # encoding: [0x66,0x0f,0x38,0x14,0xca]
             blendvps %xmm2, %xmm1
 
-// rdar://9795008
-// These instructions take a mask not an 8-bit sign extended value.
+// These instructions can take an unsigned 8-bit mask as well as a signed 8-bit
+// immediate. Check both forms here.
 // CHECK: blendps $129, %xmm2, %xmm1
           blendps $0x81, %xmm2, %xmm1
+// CHECK: blendps $-64, %xmm2, %xmm1
+          blendps $-64, %xmm2, %xmm1
 // CHECK: blendpd $129, %xmm2, %xmm1
           blendpd $0x81, %xmm2, %xmm1
+// CHECK: blendpd $-64, %xmm2, %xmm1
+          blendpd $-64, %xmm2, %xmm1
 // CHECK: pblendw $129, %xmm2, %xmm1
           pblendw $0x81, %xmm2, %xmm1
+// CHECK: pblendw $-64, %xmm2, %xmm1
+          pblendw $-64, %xmm2, %xmm1
 // CHECK: mpsadbw $129, %xmm2, %xmm1
           mpsadbw $0x81, %xmm2, %xmm1
+// CHECK: mpsadbw $-64, %xmm2, %xmm1
+          mpsadbw $-64, %xmm2, %xmm1
 // CHECK: dpps $129, %xmm2, %xmm1
           dpps $0x81, %xmm2, %xmm1
+// CHECK: dpps $-64, %xmm2, %xmm1
+          dpps $-64, %xmm2, %xmm1
 // CHECK: dppd $129, %xmm2, %xmm1
           dppd $0x81, %xmm2, %xmm1
+// CHECK: dppd $-64, %xmm2, %xmm1
+          dppd $-64, %xmm2, %xmm1
 // CHECK: insertps $129, %xmm2, %xmm1
           insertps $0x81, %xmm2, %xmm1
+// CHECK: insertps $-64, %xmm2, %xmm1
+          insertps $-64, %xmm2, %xmm1
 
 // PR13253 handle implicit optional third argument that must always be xmm0
 // CHECK: pblendvb %xmm2, %xmm1
diff --git a/test/MC/X86/x86-32-ms-inline-asm.s b/test/MC/X86/x86-32-ms-inline-asm.s
index d912915c585e..3169033f2ffc 100644
--- a/test/MC/X86/x86-32-ms-inline-asm.s
+++ b/test/MC/X86/x86-32-ms-inline-asm.s
@@ -90,4 +90,8 @@ _t21:                                   ## @t21
 // CHECK: popal
 // CHECK: # encoding: [0x61]
 
+    fwait
+// CHECK: wait
+// CHECK: # encoding: [0x9b]
+
 	ret
diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s
new file mode 100644
index 000000000000..1abe7776f0c2
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512bw.s
@@ -0,0 +1,1021 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw  --show-encoding %s | FileCheck %s
+
+// CHECK: vpaddb %zmm23, %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xfc,0xdf]
+          vpaddb %zmm23, %zmm24, %zmm19
+
+// CHECK: vpaddb %zmm23, %zmm24, %zmm19 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x43,0xfc,0xdf]
+          vpaddb %zmm23, %zmm24, %zmm19 {%k3}
+
+// CHECK: vpaddb %zmm23, %zmm24, %zmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0xc3,0xfc,0xdf]
+          vpaddb %zmm23, %zmm24, %zmm19 {%k3} {z}
+
+// CHECK: vpaddb (%rcx), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfc,0x19]
+          vpaddb (%rcx), %zmm24, %zmm19
+
+// CHECK: vpaddb 291(%rax,%r14,8), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xfc,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpaddb 291(%rax,%r14,8), %zmm24, %zmm19
+
+// CHECK: vpaddb 8128(%rdx), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfc,0x5a,0x7f]
+          vpaddb 8128(%rdx), %zmm24, %zmm19
+
+// CHECK: vpaddb 8192(%rdx), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfc,0x9a,0x00,0x20,0x00,0x00]
+          vpaddb 8192(%rdx), %zmm24, %zmm19
+
+// CHECK: vpaddb -8192(%rdx), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfc,0x5a,0x80]
+          vpaddb -8192(%rdx), %zmm24, %zmm19
+
+// CHECK: vpaddb -8256(%rdx), %zmm24, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfc,0x9a,0xc0,0xdf,0xff,0xff]
+          vpaddb -8256(%rdx), %zmm24, %zmm19
+
+// CHECK: vpaddw %zmm19, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xfd,0xcb]
+          vpaddw %zmm19, %zmm24, %zmm17
+
+// CHECK: vpaddw %zmm19, %zmm24, %zmm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x43,0xfd,0xcb]
+          vpaddw %zmm19, %zmm24, %zmm17 {%k3}
+
+// CHECK: vpaddw %zmm19, %zmm24, %zmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0xc3,0xfd,0xcb]
+          vpaddw %zmm19, %zmm24, %zmm17 {%k3} {z}
+
+// CHECK: vpaddw (%rcx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfd,0x09]
+          vpaddw (%rcx), %zmm24, %zmm17
+
+// CHECK: vpaddw 291(%rax,%r14,8), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xfd,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpaddw 291(%rax,%r14,8), %zmm24, %zmm17
+
+// CHECK: vpaddw 8128(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfd,0x4a,0x7f]
+          vpaddw 8128(%rdx), %zmm24, %zmm17
+
+// CHECK: vpaddw 8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfd,0x8a,0x00,0x20,0x00,0x00]
+          vpaddw 8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vpaddw -8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfd,0x4a,0x80]
+          vpaddw -8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vpaddw -8256(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xfd,0x8a,0xc0,0xdf,0xff,0xff]
+          vpaddw -8256(%rdx), %zmm24, %zmm17
+
+// CHECK: vpbroadcastb %eax, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7}
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7} {z}
+
+// CHECK: vpbroadcastw %eax, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1}
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK: vpcmpeqb %zmm26, %zmm26, %k4
+// CHECK:  encoding: [0x62,0x91,0x2d,0x40,0x74,0xe2]
+          vpcmpeqb %zmm26, %zmm26, %k4
+
+// CHECK: vpcmpeqb %zmm26, %zmm26, %k4 {%k6}
+// CHECK:  encoding: [0x62,0x91,0x2d,0x46,0x74,0xe2]
+          vpcmpeqb %zmm26, %zmm26, %k4 {%k6}
+
+// CHECK: vpcmpeqb (%rcx), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x40,0x74,0x21]
+          vpcmpeqb (%rcx), %zmm26, %k4
+
+// CHECK: vpcmpeqb 291(%rax,%r14,8), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x40,0x74,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqb 291(%rax,%r14,8), %zmm26, %k4
+
+// CHECK: vpcmpeqb 8128(%rdx), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x40,0x74,0x62,0x7f]
+          vpcmpeqb 8128(%rdx), %zmm26, %k4
+
+// CHECK: vpcmpeqb 8192(%rdx), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x40,0x74,0xa2,0x00,0x20,0x00,0x00]
+          vpcmpeqb 8192(%rdx), %zmm26, %k4
+
+// CHECK: vpcmpeqb -8192(%rdx), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x40,0x74,0x62,0x80]
+          vpcmpeqb -8192(%rdx), %zmm26, %k4
+
+// CHECK: vpcmpeqb -8256(%rdx), %zmm26, %k4
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x40,0x74,0xa2,0xc0,0xdf,0xff,0xff]
+          vpcmpeqb -8256(%rdx), %zmm26, %k4
+
+// CHECK: vpcmpeqw %zmm19, %zmm23, %k5
+// CHECK:  encoding: [0x62,0xb1,0x45,0x40,0x75,0xeb]
+          vpcmpeqw %zmm19, %zmm23, %k5
+
+// CHECK: vpcmpeqw %zmm19, %zmm23, %k5 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0x45,0x47,0x75,0xeb]
+          vpcmpeqw %zmm19, %zmm23, %k5 {%k7}
+
+// CHECK: vpcmpeqw (%rcx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x75,0x29]
+          vpcmpeqw (%rcx), %zmm23, %k5
+
+// CHECK: vpcmpeqw 291(%rax,%r14,8), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xb1,0x45,0x40,0x75,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqw 291(%rax,%r14,8), %zmm23, %k5
+
+// CHECK: vpcmpeqw 8128(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x75,0x6a,0x7f]
+          vpcmpeqw 8128(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpeqw 8192(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x75,0xaa,0x00,0x20,0x00,0x00]
+          vpcmpeqw 8192(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpeqw -8192(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x75,0x6a,0x80]
+          vpcmpeqw -8192(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpeqw -8256(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x75,0xaa,0xc0,0xdf,0xff,0xff]
+          vpcmpeqw -8256(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpgtb %zmm20, %zmm30, %k4
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x40,0x64,0xe4]
+          vpcmpgtb %zmm20, %zmm30, %k4
+
+// CHECK: vpcmpgtb %zmm20, %zmm30, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x41,0x64,0xe4]
+          vpcmpgtb %zmm20, %zmm30, %k4 {%k1}
+
+// CHECK: vpcmpgtb (%rcx), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x40,0x64,0x21]
+          vpcmpgtb (%rcx), %zmm30, %k4
+
+// CHECK: vpcmpgtb 291(%rax,%r14,8), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x40,0x64,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtb 291(%rax,%r14,8), %zmm30, %k4
+
+// CHECK: vpcmpgtb 8128(%rdx), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x40,0x64,0x62,0x7f]
+          vpcmpgtb 8128(%rdx), %zmm30, %k4
+
+// CHECK: vpcmpgtb 8192(%rdx), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x40,0x64,0xa2,0x00,0x20,0x00,0x00]
+          vpcmpgtb 8192(%rdx), %zmm30, %k4
+
+// CHECK: vpcmpgtb -8192(%rdx), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x40,0x64,0x62,0x80]
+          vpcmpgtb -8192(%rdx), %zmm30, %k4
+
+// CHECK: vpcmpgtb -8256(%rdx), %zmm30, %k4
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x40,0x64,0xa2,0xc0,0xdf,0xff,0xff]
+          vpcmpgtb -8256(%rdx), %zmm30, %k4
+
+// CHECK: vpcmpgtw %zmm21, %zmm23, %k5
+// CHECK:  encoding: [0x62,0xb1,0x45,0x40,0x65,0xed]
+          vpcmpgtw %zmm21, %zmm23, %k5
+
+// CHECK: vpcmpgtw %zmm21, %zmm23, %k5 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0x45,0x47,0x65,0xed]
+          vpcmpgtw %zmm21, %zmm23, %k5 {%k7}
+
+// CHECK: vpcmpgtw (%rcx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x65,0x29]
+          vpcmpgtw (%rcx), %zmm23, %k5
+
+// CHECK: vpcmpgtw 291(%rax,%r14,8), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xb1,0x45,0x40,0x65,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtw 291(%rax,%r14,8), %zmm23, %k5
+
+// CHECK: vpcmpgtw 8128(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x65,0x6a,0x7f]
+          vpcmpgtw 8128(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpgtw 8192(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x65,0xaa,0x00,0x20,0x00,0x00]
+          vpcmpgtw 8192(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpgtw -8192(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x65,0x6a,0x80]
+          vpcmpgtw -8192(%rdx), %zmm23, %k5
+
+// CHECK: vpcmpgtw -8256(%rdx), %zmm23, %k5
+// CHECK:  encoding: [0x62,0xf1,0x45,0x40,0x65,0xaa,0xc0,0xdf,0xff,0xff]
+          vpcmpgtw -8256(%rdx), %zmm23, %k5
+
+// CHECK: vpmaxsb %zmm26, %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0x82,0x15,0x40,0x3c,0xfa]
+          vpmaxsb %zmm26, %zmm29, %zmm23
+
+// CHECK: vpmaxsb %zmm26, %zmm29, %zmm23 {%k4}
+// CHECK:  encoding: [0x62,0x82,0x15,0x44,0x3c,0xfa]
+          vpmaxsb %zmm26, %zmm29, %zmm23 {%k4}
+
+// CHECK: vpmaxsb %zmm26, %zmm29, %zmm23 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0x15,0xc4,0x3c,0xfa]
+          vpmaxsb %zmm26, %zmm29, %zmm23 {%k4} {z}
+
+// CHECK: vpmaxsb (%rcx), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3c,0x39]
+          vpmaxsb (%rcx), %zmm29, %zmm23
+
+// CHECK: vpmaxsb 291(%rax,%r14,8), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xa2,0x15,0x40,0x3c,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsb 291(%rax,%r14,8), %zmm29, %zmm23
+
+// CHECK: vpmaxsb 8128(%rdx), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3c,0x7a,0x7f]
+          vpmaxsb 8128(%rdx), %zmm29, %zmm23
+
+// CHECK: vpmaxsb 8192(%rdx), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3c,0xba,0x00,0x20,0x00,0x00]
+          vpmaxsb 8192(%rdx), %zmm29, %zmm23
+
+// CHECK: vpmaxsb -8192(%rdx), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3c,0x7a,0x80]
+          vpmaxsb -8192(%rdx), %zmm29, %zmm23
+
+// CHECK: vpmaxsb -8256(%rdx), %zmm29, %zmm23
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3c,0xba,0xc0,0xdf,0xff,0xff]
+          vpmaxsb -8256(%rdx), %zmm29, %zmm23
+
+// CHECK: vpmaxsw %zmm25, %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x01,0x1d,0x40,0xee,0xc9]
+          vpmaxsw %zmm25, %zmm28, %zmm25
+
+// CHECK: vpmaxsw %zmm25, %zmm28, %zmm25 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x1d,0x43,0xee,0xc9]
+          vpmaxsw %zmm25, %zmm28, %zmm25 {%k3}
+
+// CHECK: vpmaxsw %zmm25, %zmm28, %zmm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x1d,0xc3,0xee,0xc9]
+          vpmaxsw %zmm25, %zmm28, %zmm25 {%k3} {z}
+
+// CHECK: vpmaxsw (%rcx), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x1d,0x40,0xee,0x09]
+          vpmaxsw (%rcx), %zmm28, %zmm25
+
+// CHECK: vpmaxsw 291(%rax,%r14,8), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x21,0x1d,0x40,0xee,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsw 291(%rax,%r14,8), %zmm28, %zmm25
+
+// CHECK: vpmaxsw 8128(%rdx), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x1d,0x40,0xee,0x4a,0x7f]
+          vpmaxsw 8128(%rdx), %zmm28, %zmm25
+
+// CHECK: vpmaxsw 8192(%rdx), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x1d,0x40,0xee,0x8a,0x00,0x20,0x00,0x00]
+          vpmaxsw 8192(%rdx), %zmm28, %zmm25
+
+// CHECK: vpmaxsw -8192(%rdx), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x1d,0x40,0xee,0x4a,0x80]
+          vpmaxsw -8192(%rdx), %zmm28, %zmm25
+
+// CHECK: vpmaxsw -8256(%rdx), %zmm28, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x1d,0x40,0xee,0x8a,0xc0,0xdf,0xff,0xff]
+          vpmaxsw -8256(%rdx), %zmm28, %zmm25
+
+// CHECK: vpmaxub %zmm25, %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x01,0x6d,0x40,0xde,0xe9]
+          vpmaxub %zmm25, %zmm18, %zmm29
+
+// CHECK: vpmaxub %zmm25, %zmm18, %zmm29 {%k7}
+// CHECK:  encoding: [0x62,0x01,0x6d,0x47,0xde,0xe9]
+          vpmaxub %zmm25, %zmm18, %zmm29 {%k7}
+
+// CHECK: vpmaxub %zmm25, %zmm18, %zmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0x6d,0xc7,0xde,0xe9]
+          vpmaxub %zmm25, %zmm18, %zmm29 {%k7} {z}
+
+// CHECK: vpmaxub (%rcx), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xde,0x29]
+          vpmaxub (%rcx), %zmm18, %zmm29
+
+// CHECK: vpmaxub 291(%rax,%r14,8), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x21,0x6d,0x40,0xde,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxub 291(%rax,%r14,8), %zmm18, %zmm29
+
+// CHECK: vpmaxub 8128(%rdx), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xde,0x6a,0x7f]
+          vpmaxub 8128(%rdx), %zmm18, %zmm29
+
+// CHECK: vpmaxub 8192(%rdx), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xde,0xaa,0x00,0x20,0x00,0x00]
+          vpmaxub 8192(%rdx), %zmm18, %zmm29
+
+// CHECK: vpmaxub -8192(%rdx), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xde,0x6a,0x80]
+          vpmaxub -8192(%rdx), %zmm18, %zmm29
+
+// CHECK: vpmaxub -8256(%rdx), %zmm18, %zmm29
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xde,0xaa,0xc0,0xdf,0xff,0xff]
+          vpmaxub -8256(%rdx), %zmm18, %zmm29
+
+// CHECK: vpmaxuw %zmm23, %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x25,0x40,0x3e,0xef]
+          vpmaxuw %zmm23, %zmm27, %zmm21
+
+// CHECK: vpmaxuw %zmm23, %zmm27, %zmm21 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x25,0x43,0x3e,0xef]
+          vpmaxuw %zmm23, %zmm27, %zmm21 {%k3}
+
+// CHECK: vpmaxuw %zmm23, %zmm27, %zmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x25,0xc3,0x3e,0xef]
+          vpmaxuw %zmm23, %zmm27, %zmm21 {%k3} {z}
+
+// CHECK: vpmaxuw (%rcx), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x3e,0x29]
+          vpmaxuw (%rcx), %zmm27, %zmm21
+
+// CHECK: vpmaxuw 291(%rax,%r14,8), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xa2,0x25,0x40,0x3e,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxuw 291(%rax,%r14,8), %zmm27, %zmm21
+
+// CHECK: vpmaxuw 8128(%rdx), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x3e,0x6a,0x7f]
+          vpmaxuw 8128(%rdx), %zmm27, %zmm21
+
+// CHECK: vpmaxuw 8192(%rdx), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x3e,0xaa,0x00,0x20,0x00,0x00]
+          vpmaxuw 8192(%rdx), %zmm27, %zmm21
+
+// CHECK: vpmaxuw -8192(%rdx), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x3e,0x6a,0x80]
+          vpmaxuw -8192(%rdx), %zmm27, %zmm21
+
+// CHECK: vpmaxuw -8256(%rdx), %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x3e,0xaa,0xc0,0xdf,0xff,0xff]
+          vpmaxuw -8256(%rdx), %zmm27, %zmm21
+
+// CHECK: vpminsb %zmm25, %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x02,0x4d,0x40,0x38,0xe1]
+          vpminsb %zmm25, %zmm22, %zmm28
+
+// CHECK: vpminsb %zmm25, %zmm22, %zmm28 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x4d,0x46,0x38,0xe1]
+          vpminsb %zmm25, %zmm22, %zmm28 {%k6}
+
+// CHECK: vpminsb %zmm25, %zmm22, %zmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x4d,0xc6,0x38,0xe1]
+          vpminsb %zmm25, %zmm22, %zmm28 {%k6} {z}
+
+// CHECK: vpminsb (%rcx), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x62,0x4d,0x40,0x38,0x21]
+          vpminsb (%rcx), %zmm22, %zmm28
+
+// CHECK: vpminsb 291(%rax,%r14,8), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x22,0x4d,0x40,0x38,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpminsb 291(%rax,%r14,8), %zmm22, %zmm28
+
+// CHECK: vpminsb 8128(%rdx), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x62,0x4d,0x40,0x38,0x62,0x7f]
+          vpminsb 8128(%rdx), %zmm22, %zmm28
+
+// CHECK: vpminsb 8192(%rdx), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x62,0x4d,0x40,0x38,0xa2,0x00,0x20,0x00,0x00]
+          vpminsb 8192(%rdx), %zmm22, %zmm28
+
+// CHECK: vpminsb -8192(%rdx), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x62,0x4d,0x40,0x38,0x62,0x80]
+          vpminsb -8192(%rdx), %zmm22, %zmm28
+
+// CHECK: vpminsb -8256(%rdx), %zmm22, %zmm28
+// CHECK:  encoding: [0x62,0x62,0x4d,0x40,0x38,0xa2,0xc0,0xdf,0xff,0xff]
+          vpminsb -8256(%rdx), %zmm22, %zmm28
+
+// CHECK: vpminsw %zmm25, %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x01,0x4d,0x40,0xea,0xd9]
+          vpminsw %zmm25, %zmm22, %zmm27
+
+// CHECK: vpminsw %zmm25, %zmm22, %zmm27 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x4d,0x46,0xea,0xd9]
+          vpminsw %zmm25, %zmm22, %zmm27 {%k6}
+
+// CHECK: vpminsw %zmm25, %zmm22, %zmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x4d,0xc6,0xea,0xd9]
+          vpminsw %zmm25, %zmm22, %zmm27 {%k6} {z}
+
+// CHECK: vpminsw (%rcx), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xea,0x19]
+          vpminsw (%rcx), %zmm22, %zmm27
+
+// CHECK: vpminsw 291(%rax,%r14,8), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x21,0x4d,0x40,0xea,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsw 291(%rax,%r14,8), %zmm22, %zmm27
+
+// CHECK: vpminsw 8128(%rdx), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xea,0x5a,0x7f]
+          vpminsw 8128(%rdx), %zmm22, %zmm27
+
+// CHECK: vpminsw 8192(%rdx), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xea,0x9a,0x00,0x20,0x00,0x00]
+          vpminsw 8192(%rdx), %zmm22, %zmm27
+
+// CHECK: vpminsw -8192(%rdx), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xea,0x5a,0x80]
+          vpminsw -8192(%rdx), %zmm22, %zmm27
+
+// CHECK: vpminsw -8256(%rdx), %zmm22, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xea,0x9a,0xc0,0xdf,0xff,0xff]
+          vpminsw -8256(%rdx), %zmm22, %zmm27
+
+// CHECK: vpminub %zmm26, %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x01,0x35,0x40,0xda,0xca]
+          vpminub %zmm26, %zmm25, %zmm25
+
+// CHECK: vpminub %zmm26, %zmm25, %zmm25 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x35,0x46,0xda,0xca]
+          vpminub %zmm26, %zmm25, %zmm25 {%k6}
+
+// CHECK: vpminub %zmm26, %zmm25, %zmm25 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x35,0xc6,0xda,0xca]
+          vpminub %zmm26, %zmm25, %zmm25 {%k6} {z}
+
+// CHECK: vpminub (%rcx), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xda,0x09]
+          vpminub (%rcx), %zmm25, %zmm25
+
+// CHECK: vpminub 291(%rax,%r14,8), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x21,0x35,0x40,0xda,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpminub 291(%rax,%r14,8), %zmm25, %zmm25
+
+// CHECK: vpminub 8128(%rdx), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xda,0x4a,0x7f]
+          vpminub 8128(%rdx), %zmm25, %zmm25
+
+// CHECK: vpminub 8192(%rdx), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xda,0x8a,0x00,0x20,0x00,0x00]
+          vpminub 8192(%rdx), %zmm25, %zmm25
+
+// CHECK: vpminub -8192(%rdx), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xda,0x4a,0x80]
+          vpminub -8192(%rdx), %zmm25, %zmm25
+
+// CHECK: vpminub -8256(%rdx), %zmm25, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xda,0x8a,0xc0,0xdf,0xff,0xff]
+          vpminub -8256(%rdx), %zmm25, %zmm25
+
+// CHECK: vpminuw %zmm20, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x15,0x40,0x3a,0xdc]
+          vpminuw %zmm20, %zmm29, %zmm19
+
+// CHECK: vpminuw %zmm20, %zmm29, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x15,0x47,0x3a,0xdc]
+          vpminuw %zmm20, %zmm29, %zmm19 {%k7}
+
+// CHECK: vpminuw %zmm20, %zmm29, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x15,0xc7,0x3a,0xdc]
+          vpminuw %zmm20, %zmm29, %zmm19 {%k7} {z}
+
+// CHECK: vpminuw (%rcx), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x19]
+          vpminuw (%rcx), %zmm29, %zmm19
+
+// CHECK: vpminuw 291(%rax,%r14,8), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xa2,0x15,0x40,0x3a,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminuw 291(%rax,%r14,8), %zmm29, %zmm19
+
+// CHECK: vpminuw 8128(%rdx), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x5a,0x7f]
+          vpminuw 8128(%rdx), %zmm29, %zmm19
+
+// CHECK: vpminuw 8192(%rdx), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0x00,0x20,0x00,0x00]
+          vpminuw 8192(%rdx), %zmm29, %zmm19
+
+// CHECK: vpminuw -8192(%rdx), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x5a,0x80]
+          vpminuw -8192(%rdx), %zmm29, %zmm19
+
+// CHECK: vpminuw -8256(%rdx), %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0xc0,0xdf,0xff,0xff]
+          vpminuw -8256(%rdx), %zmm29, %zmm19
+
+// CHECK: vpmullw %zmm19, %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x40,0xd5,0xdb]
+          vpmullw %zmm19, %zmm28, %zmm19
+
+// CHECK: vpmullw %zmm19, %zmm28, %zmm19 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x45,0xd5,0xdb]
+          vpmullw %zmm19, %zmm28, %zmm19 {%k5}
+
+// CHECK: vpmullw %zmm19, %zmm28, %zmm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0xc5,0xd5,0xdb]
+          vpmullw %zmm19, %zmm28, %zmm19 {%k5} {z}
+
+// CHECK: vpmullw (%rcx), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x40,0xd5,0x19]
+          vpmullw (%rcx), %zmm28, %zmm19
+
+// CHECK: vpmullw 291(%rax,%r14,8), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x40,0xd5,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmullw 291(%rax,%r14,8), %zmm28, %zmm19
+
+// CHECK: vpmullw 8128(%rdx), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x40,0xd5,0x5a,0x7f]
+          vpmullw 8128(%rdx), %zmm28, %zmm19
+
+// CHECK: vpmullw 8192(%rdx), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x40,0xd5,0x9a,0x00,0x20,0x00,0x00]
+          vpmullw 8192(%rdx), %zmm28, %zmm19
+
+// CHECK: vpmullw -8192(%rdx), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x40,0xd5,0x5a,0x80]
+          vpmullw -8192(%rdx), %zmm28, %zmm19
+
+// CHECK: vpmullw -8256(%rdx), %zmm28, %zmm19
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x40,0xd5,0x9a,0xc0,0xdf,0xff,0xff]
+          vpmullw -8256(%rdx), %zmm28, %zmm19
+
+// CHECK: vpsubb %zmm26, %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x01,0x6d,0x40,0xf8,0xca]
+          vpsubb %zmm26, %zmm18, %zmm25
+
+// CHECK: vpsubb %zmm26, %zmm18, %zmm25 {%k4}
+// CHECK:  encoding: [0x62,0x01,0x6d,0x44,0xf8,0xca]
+          vpsubb %zmm26, %zmm18, %zmm25 {%k4}
+
+// CHECK: vpsubb %zmm26, %zmm18, %zmm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0x6d,0xc4,0xf8,0xca]
+          vpsubb %zmm26, %zmm18, %zmm25 {%k4} {z}
+
+// CHECK: vpsubb (%rcx), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xf8,0x09]
+          vpsubb (%rcx), %zmm18, %zmm25
+
+// CHECK: vpsubb 291(%rax,%r14,8), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x21,0x6d,0x40,0xf8,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubb 291(%rax,%r14,8), %zmm18, %zmm25
+
+// CHECK: vpsubb 8128(%rdx), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xf8,0x4a,0x7f]
+          vpsubb 8128(%rdx), %zmm18, %zmm25
+
+// CHECK: vpsubb 8192(%rdx), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xf8,0x8a,0x00,0x20,0x00,0x00]
+          vpsubb 8192(%rdx), %zmm18, %zmm25
+
+// CHECK: vpsubb -8192(%rdx), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xf8,0x4a,0x80]
+          vpsubb -8192(%rdx), %zmm18, %zmm25
+
+// CHECK: vpsubb -8256(%rdx), %zmm18, %zmm25
+// CHECK:  encoding: [0x62,0x61,0x6d,0x40,0xf8,0x8a,0xc0,0xdf,0xff,0xff]
+          vpsubb -8256(%rdx), %zmm18, %zmm25
+
+// CHECK: vpsubw %zmm24, %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x81,0x3d,0x40,0xf9,0xc8]
+          vpsubw %zmm24, %zmm24, %zmm17
+
+// CHECK: vpsubw %zmm24, %zmm24, %zmm17 {%k4}
+// CHECK:  encoding: [0x62,0x81,0x3d,0x44,0xf9,0xc8]
+          vpsubw %zmm24, %zmm24, %zmm17 {%k4}
+
+// CHECK: vpsubw %zmm24, %zmm24, %zmm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0x3d,0xc4,0xf9,0xc8]
+          vpsubw %zmm24, %zmm24, %zmm17 {%k4} {z}
+
+// CHECK: vpsubw (%rcx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xf9,0x09]
+          vpsubw (%rcx), %zmm24, %zmm17
+
+// CHECK: vpsubw 291(%rax,%r14,8), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xf9,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubw 291(%rax,%r14,8), %zmm24, %zmm17
+
+// CHECK: vpsubw 8128(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xf9,0x4a,0x7f]
+          vpsubw 8128(%rdx), %zmm24, %zmm17
+
+// CHECK: vpsubw 8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xf9,0x8a,0x00,0x20,0x00,0x00]
+          vpsubw 8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vpsubw -8192(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xf9,0x4a,0x80]
+          vpsubw -8192(%rdx), %zmm24, %zmm17
+
+// CHECK: vpsubw -8256(%rdx), %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0xf9,0x8a,0xc0,0xdf,0xff,0xff]
+          vpsubw -8256(%rdx), %zmm24, %zmm17
+
+// CHECK: vmovdqu8 %zmm19, %zmm29
+// CHECK:  encoding: [0x62,0x21,0x7f,0x48,0x6f,0xeb]
+          vmovdqu8 %zmm19, %zmm29
+
+// CHECK: vmovdqu8 %zmm19, %zmm29 {%k7}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x4f,0x6f,0xeb]
+          vmovdqu8 %zmm19, %zmm29 {%k7}
+
+// CHECK: vmovdqu8 %zmm19, %zmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x21,0x7f,0xcf,0x6f,0xeb]
+          vmovdqu8 %zmm19, %zmm29 {%k7} {z}
+
+// CHECK: vmovdqu8 (%rcx), %zmm29
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x6f,0x29]
+          vmovdqu8 (%rcx), %zmm29
+
+// CHECK: vmovdqu8 291(%rax,%r14,8), %zmm29
+// CHECK:  encoding: [0x62,0x21,0x7f,0x48,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 291(%rax,%r14,8), %zmm29
+
+// CHECK: vmovdqu8 8128(%rdx), %zmm29
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x6f,0x6a,0x7f]
+          vmovdqu8 8128(%rdx), %zmm29
+
+// CHECK: vmovdqu8 8192(%rdx), %zmm29
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x6f,0xaa,0x00,0x20,0x00,0x00]
+          vmovdqu8 8192(%rdx), %zmm29
+
+// CHECK: vmovdqu8 -8192(%rdx), %zmm29
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x6f,0x6a,0x80]
+          vmovdqu8 -8192(%rdx), %zmm29
+
+// CHECK: vmovdqu8 -8256(%rdx), %zmm29
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x6f,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovdqu8 -8256(%rdx), %zmm29
+
+// CHECK: vmovdqu16 %zmm18, %zmm17
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x6f,0xca]
+          vmovdqu16 %zmm18, %zmm17
+
+// CHECK: vmovdqu16 %zmm18, %zmm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x4b,0x6f,0xca]
+          vmovdqu16 %zmm18, %zmm17 {%k3}
+
+// CHECK: vmovdqu16 %zmm18, %zmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0xcb,0x6f,0xca]
+          vmovdqu16 %zmm18, %zmm17 {%k3} {z}
+
+// CHECK: vmovdqu16 (%rcx), %zmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x6f,0x09]
+          vmovdqu16 (%rcx), %zmm17
+
+// CHECK: vmovdqu16 291(%rax,%r14,8), %zmm17
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x6f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 291(%rax,%r14,8), %zmm17
+
+// CHECK: vmovdqu16 8128(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x6f,0x4a,0x7f]
+          vmovdqu16 8128(%rdx), %zmm17
+
+// CHECK: vmovdqu16 8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x6f,0x8a,0x00,0x20,0x00,0x00]
+          vmovdqu16 8192(%rdx), %zmm17
+
+// CHECK: vmovdqu16 -8192(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x6f,0x4a,0x80]
+          vmovdqu16 -8192(%rdx), %zmm17
+
+// CHECK: vmovdqu16 -8256(%rdx), %zmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x6f,0x8a,0xc0,0xdf,0xff,0xff]
+          vmovdqu16 -8256(%rdx), %zmm17
+
+// CHECK: kandq  %k7, %k5, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xd4,0x41,0xef]
+          kandq  %k7, %k5, %k5
+
+// CHECK: kandd  %k4, %k5, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xd5,0x41,0xec]
+          kandd  %k4, %k5, %k5
+
+// CHECK: kandnq %k4, %k5, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xd4,0x42,0xd4]
+          kandnq %k4, %k5, %k2
+
+// CHECK: kandnd %k6, %k6, %k3
+// CHECK:  encoding: [0xc4,0xe1,0xcd,0x42,0xde]
+          kandnd %k6, %k6, %k3
+
+// CHECK: korq   %k4, %k5, %k4
+// CHECK:  encoding: [0xc4,0xe1,0xd4,0x45,0xe4]
+          korq   %k4, %k5, %k4
+
+// CHECK: kord   %k6, %k6, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xcd,0x45,0xee]
+          kord   %k6, %k6, %k5
+
+// CHECK: kxnorq %k6, %k5, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xd4,0x46,0xd6]
+          kxnorq %k6, %k5, %k2
+
+// CHECK: kxnord %k5, %k3, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xe5,0x46,0xed]
+          kxnord %k5, %k3, %k5
+
+// CHECK: kxorq  %k4, %k3, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xe4,0x47,0xd4]
+          kxorq  %k4, %k3, %k2
+
+// CHECK: kxord  %k6, %k5, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xd5,0x47,0xd6]
+          kxord  %k6, %k5, %k2
+
+// CHECK: knotq  %k6, %k3
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x44,0xde]
+          knotq  %k6, %k3
+
+// CHECK: knotd  %k4, %k3
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x44,0xdc]
+          knotd  %k4, %k3
+
+// CHECK: kmovq  %k5, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x90,0xd5]
+          kmovq  %k5, %k2
+
+// CHECK: kmovq  (%rcx), %k2
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x90,0x11]
+          kmovq  (%rcx), %k2
+
+// CHECK: kmovq  291(%rax,%r14,8), %k2
+// CHECK:  encoding: [0xc4,0xa1,0xf8,0x90,0x94,0xf0,0x23,0x01,0x00,0x00]
+          kmovq  291(%rax,%r14,8), %k2
+
+// CHECK: kmovd  %k4, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x90,0xec]
+          kmovd  %k4, %k5
+
+// CHECK: kmovd  (%rcx), %k5
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x90,0x29]
+          kmovd  (%rcx), %k5
+
+// CHECK: kmovd  291(%rax,%r14,8), %k5
+// CHECK:  encoding: [0xc4,0xa1,0xf9,0x90,0xac,0xf0,0x23,0x01,0x00,0x00]
+          kmovd  291(%rax,%r14,8), %k5
+
+// CHECK: kmovq  %k3, (%rcx)
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x91,0x19]
+          kmovq  %k3, (%rcx)
+
+// CHECK: kmovq  %k3, 291(%rax,%r14,8)
+// CHECK:  encoding: [0xc4,0xa1,0xf8,0x91,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          kmovq  %k3, 291(%rax,%r14,8)
+
+// CHECK: kmovd  %k3, (%rcx)
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x91,0x19]
+          kmovd  %k3, (%rcx)
+
+// CHECK: kmovd  %k3, 291(%rax,%r14,8)
+// CHECK:  encoding: [0xc4,0xa1,0xf9,0x91,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          kmovd  %k3, 291(%rax,%r14,8)
+
+// CHECK: kmovq  %rax, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xfb,0x92,0xd0]
+          kmovq  %rax, %k2
+
+// CHECK: kmovq  %r8, %k2
+// CHECK:  encoding: [0xc4,0xc1,0xfb,0x92,0xd0]
+          kmovq  %r8, %k2
+
+// CHECK: kmovd  %eax, %k4
+// CHECK:  encoding: [0xc5,0xfb,0x92,0xe0]
+          kmovd  %eax, %k4
+
+// CHECK: kmovd  %ebp, %k4
+// CHECK:  encoding: [0xc5,0xfb,0x92,0xe5]
+          kmovd  %ebp, %k4
+
+// CHECK: kmovd  %r13d, %k4
+// CHECK:  encoding: [0xc4,0xc1,0x7b,0x92,0xe5]
+          kmovd  %r13d, %k4
+
+// CHECK: kmovq  %k3, %rax
+// CHECK:  encoding: [0xc4,0xe1,0xfb,0x93,0xc3]
+          kmovq  %k3, %rax
+
+// CHECK: kmovq  %k3, %r8
+// CHECK:  encoding: [0xc4,0x61,0xfb,0x93,0xc3]
+          kmovq  %k3, %r8
+
+// CHECK: kmovd  %k5, %eax
+// CHECK:  encoding: [0xc5,0xfb,0x93,0xc5]
+          kmovd  %k5, %eax
+
+// CHECK: kmovd  %k5, %ebp
+// CHECK:  encoding: [0xc5,0xfb,0x93,0xed]
+          kmovd  %k5, %ebp
+
+// CHECK: kmovd  %k5, %r13d
+// CHECK:  encoding: [0xc5,0x7b,0x93,0xed]
+          kmovd  %k5, %r13d
+
+// CHECK: vmovdqu8 %zmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x48,0x7f,0x11]
+          vmovdqu8 %zmm18, (%rcx)
+
+// CHECK: vmovdqu8 %zmm18, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x4b,0x7f,0x11]
+          vmovdqu8 %zmm18, (%rcx) {%k3}
+
+// CHECK: vmovdqu8 %zmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x48,0x7f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 %zmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu8 %zmm18, 8128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x48,0x7f,0x52,0x7f]
+          vmovdqu8 %zmm18, 8128(%rdx)
+
+// CHECK: vmovdqu8 %zmm18, 8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x48,0x7f,0x92,0x00,0x20,0x00,0x00]
+          vmovdqu8 %zmm18, 8192(%rdx)
+
+// CHECK: vmovdqu8 %zmm18, -8192(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x48,0x7f,0x52,0x80]
+          vmovdqu8 %zmm18, -8192(%rdx)
+
+// CHECK: vmovdqu8 %zmm18, -8256(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x48,0x7f,0x92,0xc0,0xdf,0xff,0xff]
+          vmovdqu8 %zmm18, -8256(%rdx)
+
+// CHECK: vmovdqu16 %zmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7f,0x21]
+          vmovdqu16 %zmm28, (%rcx)
+
+// CHECK: vmovdqu16 %zmm28, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x61,0xff,0x4e,0x7f,0x21]
+          vmovdqu16 %zmm28, (%rcx) {%k6}
+
+// CHECK: vmovdqu16 %zmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xff,0x48,0x7f,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 %zmm28, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu16 %zmm28, 8128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7f,0x62,0x7f]
+          vmovdqu16 %zmm28, 8128(%rdx)
+
+// CHECK: vmovdqu16 %zmm28, 8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7f,0xa2,0x00,0x20,0x00,0x00]
+          vmovdqu16 %zmm28, 8192(%rdx)
+
+// CHECK: vmovdqu16 %zmm28, -8192(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7f,0x62,0x80]
+          vmovdqu16 %zmm28, -8192(%rdx)
+
+// CHECK: vmovdqu16 %zmm28, -8256(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7f,0xa2,0xc0,0xdf,0xff,0xff]
+          vmovdqu16 %zmm28, -8256(%rdx)
+
+
+// CHECK: vpcmpb $171, %zmm25, %zmm26, %k3
+// CHECK:  encoding: [0x62,0x93,0x2d,0x40,0x3f,0xd9,0xab]
+          vpcmpb $171, %zmm25, %zmm26, %k3
+
+// CHECK: vpcmpb $171, %zmm25, %zmm26, %k3 {%k7}
+// CHECK:  encoding: [0x62,0x93,0x2d,0x47,0x3f,0xd9,0xab]
+          vpcmpb $171, %zmm25, %zmm26, %k3 {%k7}
+
+// CHECK: vpcmpb $123, %zmm25, %zmm26, %k3
+// CHECK:  encoding: [0x62,0x93,0x2d,0x40,0x3f,0xd9,0x7b]
+          vpcmpb $123, %zmm25, %zmm26, %k3
+
+// CHECK: vpcmpb $123, (%rcx), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0x2d,0x40,0x3f,0x19,0x7b]
+          vpcmpb $123, (%rcx), %zmm26, %k3
+
+// CHECK: vpcmpb $123, 291(%rax,%r14,8), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xb3,0x2d,0x40,0x3f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpb $123, 291(%rax,%r14,8), %zmm26, %k3
+
+// CHECK: vpcmpb $123, 8128(%rdx), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0x2d,0x40,0x3f,0x5a,0x7f,0x7b]
+          vpcmpb $123, 8128(%rdx), %zmm26, %k3
+
+// CHECK: vpcmpb $123, 8192(%rdx), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0x2d,0x40,0x3f,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpb $123, 8192(%rdx), %zmm26, %k3
+
+// CHECK: vpcmpb $123, -8192(%rdx), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0x2d,0x40,0x3f,0x5a,0x80,0x7b]
+          vpcmpb $123, -8192(%rdx), %zmm26, %k3
+
+// CHECK: vpcmpb $123, -8256(%rdx), %zmm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0x2d,0x40,0x3f,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpb $123, -8256(%rdx), %zmm26, %k3
+
+// CHECK: vpcmpw $171, %zmm25, %zmm29, %k3
+// CHECK:  encoding: [0x62,0x93,0x95,0x40,0x3f,0xd9,0xab]
+          vpcmpw $171, %zmm25, %zmm29, %k3
+
+// CHECK: vpcmpw $171, %zmm25, %zmm29, %k3 {%k6}
+// CHECK:  encoding: [0x62,0x93,0x95,0x46,0x3f,0xd9,0xab]
+          vpcmpw $171, %zmm25, %zmm29, %k3 {%k6}
+
+// CHECK: vpcmpw $123, %zmm25, %zmm29, %k3
+// CHECK:  encoding: [0x62,0x93,0x95,0x40,0x3f,0xd9,0x7b]
+          vpcmpw $123, %zmm25, %zmm29, %k3
+
+// CHECK: vpcmpw $123, (%rcx), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xf3,0x95,0x40,0x3f,0x19,0x7b]
+          vpcmpw $123, (%rcx), %zmm29, %k3
+
+// CHECK: vpcmpw $123, 291(%rax,%r14,8), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xb3,0x95,0x40,0x3f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpw $123, 291(%rax,%r14,8), %zmm29, %k3
+
+// CHECK: vpcmpw $123, 8128(%rdx), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xf3,0x95,0x40,0x3f,0x5a,0x7f,0x7b]
+          vpcmpw $123, 8128(%rdx), %zmm29, %k3
+
+// CHECK: vpcmpw $123, 8192(%rdx), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xf3,0x95,0x40,0x3f,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpw $123, 8192(%rdx), %zmm29, %k3
+
+// CHECK: vpcmpw $123, -8192(%rdx), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xf3,0x95,0x40,0x3f,0x5a,0x80,0x7b]
+          vpcmpw $123, -8192(%rdx), %zmm29, %k3
+
+// CHECK: vpcmpw $123, -8256(%rdx), %zmm29, %k3
+// CHECK:  encoding: [0x62,0xf3,0x95,0x40,0x3f,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpw $123, -8256(%rdx), %zmm29, %k3
+
+// CHECK: vpcmpub $171, %zmm22, %zmm29, %k4
+// CHECK:  encoding: [0x62,0xb3,0x15,0x40,0x3e,0xe6,0xab]
+          vpcmpub $171, %zmm22, %zmm29, %k4
+
+// CHECK: vpcmpub $171, %zmm22, %zmm29, %k4 {%k7}
+// CHECK:  encoding: [0x62,0xb3,0x15,0x47,0x3e,0xe6,0xab]
+          vpcmpub $171, %zmm22, %zmm29, %k4 {%k7}
+
+// CHECK: vpcmpub $123, %zmm22, %zmm29, %k4
+// CHECK:  encoding: [0x62,0xb3,0x15,0x40,0x3e,0xe6,0x7b]
+          vpcmpub $123, %zmm22, %zmm29, %k4
+
+// CHECK: vpcmpub $123, (%rcx), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xf3,0x15,0x40,0x3e,0x21,0x7b]
+          vpcmpub $123, (%rcx), %zmm29, %k4
+
+// CHECK: vpcmpub $123, 291(%rax,%r14,8), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xb3,0x15,0x40,0x3e,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpub $123, 291(%rax,%r14,8), %zmm29, %k4
+
+// CHECK: vpcmpub $123, 8128(%rdx), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xf3,0x15,0x40,0x3e,0x62,0x7f,0x7b]
+          vpcmpub $123, 8128(%rdx), %zmm29, %k4
+
+// CHECK: vpcmpub $123, 8192(%rdx), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xf3,0x15,0x40,0x3e,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpub $123, 8192(%rdx), %zmm29, %k4
+
+// CHECK: vpcmpub $123, -8192(%rdx), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xf3,0x15,0x40,0x3e,0x62,0x80,0x7b]
+          vpcmpub $123, -8192(%rdx), %zmm29, %k4
+
+// CHECK: vpcmpub $123, -8256(%rdx), %zmm29, %k4
+// CHECK:  encoding: [0x62,0xf3,0x15,0x40,0x3e,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpub $123, -8256(%rdx), %zmm29, %k4
+
+// CHECK: vpcmpuw $171, %zmm22, %zmm22, %k4
+// CHECK:  encoding: [0x62,0xb3,0xcd,0x40,0x3e,0xe6,0xab]
+          vpcmpuw $171, %zmm22, %zmm22, %k4
+
+// CHECK: vpcmpuw $171, %zmm22, %zmm22, %k4 {%k7}
+// CHECK:  encoding: [0x62,0xb3,0xcd,0x47,0x3e,0xe6,0xab]
+          vpcmpuw $171, %zmm22, %zmm22, %k4 {%k7}
+
+// CHECK: vpcmpuw $123, %zmm22, %zmm22, %k4
+// CHECK:  encoding: [0x62,0xb3,0xcd,0x40,0x3e,0xe6,0x7b]
+          vpcmpuw $123, %zmm22, %zmm22, %k4
+
+// CHECK: vpcmpuw $123, (%rcx), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x40,0x3e,0x21,0x7b]
+          vpcmpuw $123, (%rcx), %zmm22, %k4
+
+// CHECK: vpcmpuw $123, 291(%rax,%r14,8), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xb3,0xcd,0x40,0x3e,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuw $123, 291(%rax,%r14,8), %zmm22, %k4
+
+// CHECK: vpcmpuw $123, 8128(%rdx), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x40,0x3e,0x62,0x7f,0x7b]
+          vpcmpuw $123, 8128(%rdx), %zmm22, %k4
+
+// CHECK: vpcmpuw $123, 8192(%rdx), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x40,0x3e,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vpcmpuw $123, 8192(%rdx), %zmm22, %k4
+
+// CHECK: vpcmpuw $123, -8192(%rdx), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x40,0x3e,0x62,0x80,0x7b]
+          vpcmpuw $123, -8192(%rdx), %zmm22, %k4
+
+// CHECK: vpcmpuw $123, -8256(%rdx), %zmm22, %k4
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x40,0x3e,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vpcmpuw $123, -8256(%rdx), %zmm22, %k4
diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s
new file mode 100644
index 000000000000..e84755022eb6
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512bw_vl.s
@@ -0,0 +1,1785 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl  --show-encoding %s | FileCheck %s
+
+// CHECK: vpaddb %xmm22, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0x75,0x00,0xfc,0xd6]
+          vpaddb %xmm22, %xmm17, %xmm26
+
+// CHECK: vpaddb %xmm22, %xmm17, %xmm26 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x75,0x05,0xfc,0xd6]
+          vpaddb %xmm22, %xmm17, %xmm26 {%k5}
+
+// CHECK: vpaddb %xmm22, %xmm17, %xmm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x75,0x85,0xfc,0xd6]
+          vpaddb %xmm22, %xmm17, %xmm26 {%k5} {z}
+
+// CHECK: vpaddb (%rcx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xfc,0x11]
+          vpaddb (%rcx), %xmm17, %xmm26
+
+// CHECK: vpaddb 291(%rax,%r14,8), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0x75,0x00,0xfc,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpaddb 291(%rax,%r14,8), %xmm17, %xmm26
+
+// CHECK: vpaddb 2032(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xfc,0x52,0x7f]
+          vpaddb 2032(%rdx), %xmm17, %xmm26
+
+// CHECK: vpaddb 2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xfc,0x92,0x00,0x08,0x00,0x00]
+          vpaddb 2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpaddb -2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xfc,0x52,0x80]
+          vpaddb -2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpaddb -2064(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xfc,0x92,0xf0,0xf7,0xff,0xff]
+          vpaddb -2064(%rdx), %xmm17, %xmm26
+
+// CHECK: vpaddb %ymm28, %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x01,0x25,0x20,0xfc,0xd4]
+          vpaddb %ymm28, %ymm27, %ymm26
+
+// CHECK: vpaddb %ymm28, %ymm27, %ymm26 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x25,0x23,0xfc,0xd4]
+          vpaddb %ymm28, %ymm27, %ymm26 {%k3}
+
+// CHECK: vpaddb %ymm28, %ymm27, %ymm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x25,0xa3,0xfc,0xd4]
+          vpaddb %ymm28, %ymm27, %ymm26 {%k3} {z}
+
+// CHECK: vpaddb (%rcx), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x25,0x20,0xfc,0x11]
+          vpaddb (%rcx), %ymm27, %ymm26
+
+// CHECK: vpaddb 291(%rax,%r14,8), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x21,0x25,0x20,0xfc,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpaddb 291(%rax,%r14,8), %ymm27, %ymm26
+
+// CHECK: vpaddb 4064(%rdx), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x25,0x20,0xfc,0x52,0x7f]
+          vpaddb 4064(%rdx), %ymm27, %ymm26
+
+// CHECK: vpaddb 4096(%rdx), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x25,0x20,0xfc,0x92,0x00,0x10,0x00,0x00]
+          vpaddb 4096(%rdx), %ymm27, %ymm26
+
+// CHECK: vpaddb -4096(%rdx), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x25,0x20,0xfc,0x52,0x80]
+          vpaddb -4096(%rdx), %ymm27, %ymm26
+
+// CHECK: vpaddb -4128(%rdx), %ymm27, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x25,0x20,0xfc,0x92,0xe0,0xef,0xff,0xff]
+          vpaddb -4128(%rdx), %ymm27, %ymm26
+
+// CHECK: vpaddw %xmm18, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x75,0x00,0xfd,0xd2]
+          vpaddw %xmm18, %xmm17, %xmm18
+
+// CHECK: vpaddw %xmm18, %xmm17, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x75,0x01,0xfd,0xd2]
+          vpaddw %xmm18, %xmm17, %xmm18 {%k1}
+
+// CHECK: vpaddw %xmm18, %xmm17, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x75,0x81,0xfd,0xd2]
+          vpaddw %xmm18, %xmm17, %xmm18 {%k1} {z}
+
+// CHECK: vpaddw (%rcx), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xfd,0x11]
+          vpaddw (%rcx), %xmm17, %xmm18
+
+// CHECK: vpaddw 291(%rax,%r14,8), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x75,0x00,0xfd,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpaddw 291(%rax,%r14,8), %xmm17, %xmm18
+
+// CHECK: vpaddw 2032(%rdx), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xfd,0x52,0x7f]
+          vpaddw 2032(%rdx), %xmm17, %xmm18
+
+// CHECK: vpaddw 2048(%rdx), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xfd,0x92,0x00,0x08,0x00,0x00]
+          vpaddw 2048(%rdx), %xmm17, %xmm18
+
+// CHECK: vpaddw -2048(%rdx), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xfd,0x52,0x80]
+          vpaddw -2048(%rdx), %xmm17, %xmm18
+
+// CHECK: vpaddw -2064(%rdx), %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xfd,0x92,0xf0,0xf7,0xff,0xff]
+          vpaddw -2064(%rdx), %xmm17, %xmm18
+
+// CHECK: vpaddw %ymm26, %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0x81,0x55,0x20,0xfd,0xfa]
+          vpaddw %ymm26, %ymm21, %ymm23
+
+// CHECK: vpaddw %ymm26, %ymm21, %ymm23 {%k7}
+// CHECK:  encoding: [0x62,0x81,0x55,0x27,0xfd,0xfa]
+          vpaddw %ymm26, %ymm21, %ymm23 {%k7}
+
+// CHECK: vpaddw %ymm26, %ymm21, %ymm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0x55,0xa7,0xfd,0xfa]
+          vpaddw %ymm26, %ymm21, %ymm23 {%k7} {z}
+
+// CHECK: vpaddw (%rcx), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xfd,0x39]
+          vpaddw (%rcx), %ymm21, %ymm23
+
+// CHECK: vpaddw 291(%rax,%r14,8), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xa1,0x55,0x20,0xfd,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpaddw 291(%rax,%r14,8), %ymm21, %ymm23
+
+// CHECK: vpaddw 4064(%rdx), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xfd,0x7a,0x7f]
+          vpaddw 4064(%rdx), %ymm21, %ymm23
+
+// CHECK: vpaddw 4096(%rdx), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xfd,0xba,0x00,0x10,0x00,0x00]
+          vpaddw 4096(%rdx), %ymm21, %ymm23
+
+// CHECK: vpaddw -4096(%rdx), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xfd,0x7a,0x80]
+          vpaddw -4096(%rdx), %ymm21, %ymm23
+
+// CHECK: vpaddw -4128(%rdx), %ymm21, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xfd,0xba,0xe0,0xef,0xff,0xff]
+          vpaddw -4128(%rdx), %ymm21, %ymm23
+
+// CHECK: vpbroadcastb %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1}
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1}
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4}
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4} {z}
+
+// CHECK: vpcmpeqb %xmm21, %xmm21, %k4
+// CHECK:  encoding: [0x62,0xb1,0x55,0x00,0x74,0xe5]
+          vpcmpeqb %xmm21, %xmm21, %k4
+
+// CHECK: vpcmpeqb %xmm21, %xmm21, %k4 {%k3}
+// CHECK:  encoding: [0x62,0xb1,0x55,0x03,0x74,0xe5]
+          vpcmpeqb %xmm21, %xmm21, %k4 {%k3}
+
+// CHECK: vpcmpeqb (%rcx), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x00,0x74,0x21]
+          vpcmpeqb (%rcx), %xmm21, %k4
+
+// CHECK: vpcmpeqb 291(%rax,%r14,8), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xb1,0x55,0x00,0x74,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqb 291(%rax,%r14,8), %xmm21, %k4
+
+// CHECK: vpcmpeqb 2032(%rdx), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x00,0x74,0x62,0x7f]
+          vpcmpeqb 2032(%rdx), %xmm21, %k4
+
+// CHECK: vpcmpeqb 2048(%rdx), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x00,0x74,0xa2,0x00,0x08,0x00,0x00]
+          vpcmpeqb 2048(%rdx), %xmm21, %k4
+
+// CHECK: vpcmpeqb -2048(%rdx), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x00,0x74,0x62,0x80]
+          vpcmpeqb -2048(%rdx), %xmm21, %k4
+
+// CHECK: vpcmpeqb -2064(%rdx), %xmm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x00,0x74,0xa2,0xf0,0xf7,0xff,0xff]
+          vpcmpeqb -2064(%rdx), %xmm21, %k4
+
+// CHECK: vpcmpeqb %ymm18, %ymm21, %k4
+// CHECK:  encoding: [0x62,0xb1,0x55,0x20,0x74,0xe2]
+          vpcmpeqb %ymm18, %ymm21, %k4
+
+// CHECK: vpcmpeqb %ymm18, %ymm21, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb1,0x55,0x21,0x74,0xe2]
+          vpcmpeqb %ymm18, %ymm21, %k4 {%k1}
+
+// CHECK: vpcmpeqb (%rcx), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x20,0x74,0x21]
+          vpcmpeqb (%rcx), %ymm21, %k4
+
+// CHECK: vpcmpeqb 291(%rax,%r14,8), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xb1,0x55,0x20,0x74,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqb 291(%rax,%r14,8), %ymm21, %k4
+
+// CHECK: vpcmpeqb 4064(%rdx), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x20,0x74,0x62,0x7f]
+          vpcmpeqb 4064(%rdx), %ymm21, %k4
+
+// CHECK: vpcmpeqb 4096(%rdx), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x20,0x74,0xa2,0x00,0x10,0x00,0x00]
+          vpcmpeqb 4096(%rdx), %ymm21, %k4
+
+// CHECK: vpcmpeqb -4096(%rdx), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x20,0x74,0x62,0x80]
+          vpcmpeqb -4096(%rdx), %ymm21, %k4
+
+// CHECK: vpcmpeqb -4128(%rdx), %ymm21, %k4
+// CHECK:  encoding: [0x62,0xf1,0x55,0x20,0x74,0xa2,0xe0,0xef,0xff,0xff]
+          vpcmpeqb -4128(%rdx), %ymm21, %k4
+
+// CHECK: vpcmpeqw %xmm27, %xmm30, %k3
+// CHECK:  encoding: [0x62,0x91,0x0d,0x00,0x75,0xdb]
+          vpcmpeqw %xmm27, %xmm30, %k3
+
+// CHECK: vpcmpeqw %xmm27, %xmm30, %k3 {%k1}
+// CHECK:  encoding: [0x62,0x91,0x0d,0x01,0x75,0xdb]
+          vpcmpeqw %xmm27, %xmm30, %k3 {%k1}
+
+// CHECK: vpcmpeqw (%rcx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x75,0x19]
+          vpcmpeqw (%rcx), %xmm30, %k3
+
+// CHECK: vpcmpeqw 291(%rax,%r14,8), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x00,0x75,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqw 291(%rax,%r14,8), %xmm30, %k3
+
+// CHECK: vpcmpeqw 2032(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x75,0x5a,0x7f]
+          vpcmpeqw 2032(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpeqw 2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x75,0x9a,0x00,0x08,0x00,0x00]
+          vpcmpeqw 2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpeqw -2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x75,0x5a,0x80]
+          vpcmpeqw -2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpeqw -2064(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x75,0x9a,0xf0,0xf7,0xff,0xff]
+          vpcmpeqw -2064(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpeqw %ymm29, %ymm20, %k2
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x75,0xd5]
+          vpcmpeqw %ymm29, %ymm20, %k2
+
+// CHECK: vpcmpeqw %ymm29, %ymm20, %k2 {%k5}
+// CHECK:  encoding: [0x62,0x91,0x5d,0x25,0x75,0xd5]
+          vpcmpeqw %ymm29, %ymm20, %k2 {%k5}
+
+// CHECK: vpcmpeqw (%rcx), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x75,0x11]
+          vpcmpeqw (%rcx), %ymm20, %k2
+
+// CHECK: vpcmpeqw 291(%rax,%r14,8), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x20,0x75,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqw 291(%rax,%r14,8), %ymm20, %k2
+
+// CHECK: vpcmpeqw 4064(%rdx), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x75,0x52,0x7f]
+          vpcmpeqw 4064(%rdx), %ymm20, %k2
+
+// CHECK: vpcmpeqw 4096(%rdx), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x75,0x92,0x00,0x10,0x00,0x00]
+          vpcmpeqw 4096(%rdx), %ymm20, %k2
+
+// CHECK: vpcmpeqw -4096(%rdx), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x75,0x52,0x80]
+          vpcmpeqw -4096(%rdx), %ymm20, %k2
+
+// CHECK: vpcmpeqw -4128(%rdx), %ymm20, %k2
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x75,0x92,0xe0,0xef,0xff,0xff]
+          vpcmpeqw -4128(%rdx), %ymm20, %k2
+
+// CHECK: vpcmpgtb %xmm17, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x00,0x64,0xd9]
+          vpcmpgtb %xmm17, %xmm30, %k3
+
+// CHECK: vpcmpgtb %xmm17, %xmm30, %k3 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x07,0x64,0xd9]
+          vpcmpgtb %xmm17, %xmm30, %k3 {%k7}
+
+// CHECK: vpcmpgtb (%rcx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x64,0x19]
+          vpcmpgtb (%rcx), %xmm30, %k3
+
+// CHECK: vpcmpgtb 291(%rax,%r14,8), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb1,0x0d,0x00,0x64,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtb 291(%rax,%r14,8), %xmm30, %k3
+
+// CHECK: vpcmpgtb 2032(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x64,0x5a,0x7f]
+          vpcmpgtb 2032(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtb 2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x64,0x9a,0x00,0x08,0x00,0x00]
+          vpcmpgtb 2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtb -2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x64,0x5a,0x80]
+          vpcmpgtb -2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtb -2064(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf1,0x0d,0x00,0x64,0x9a,0xf0,0xf7,0xff,0xff]
+          vpcmpgtb -2064(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtb %ymm17, %ymm17, %k2
+// CHECK:  encoding: [0x62,0xb1,0x75,0x20,0x64,0xd1]
+          vpcmpgtb %ymm17, %ymm17, %k2
+
+// CHECK: vpcmpgtb %ymm17, %ymm17, %k2 {%k4}
+// CHECK:  encoding: [0x62,0xb1,0x75,0x24,0x64,0xd1]
+          vpcmpgtb %ymm17, %ymm17, %k2 {%k4}
+
+// CHECK: vpcmpgtb (%rcx), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xf1,0x75,0x20,0x64,0x11]
+          vpcmpgtb (%rcx), %ymm17, %k2
+
+// CHECK: vpcmpgtb 291(%rax,%r14,8), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xb1,0x75,0x20,0x64,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtb 291(%rax,%r14,8), %ymm17, %k2
+
+// CHECK: vpcmpgtb 4064(%rdx), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xf1,0x75,0x20,0x64,0x52,0x7f]
+          vpcmpgtb 4064(%rdx), %ymm17, %k2
+
+// CHECK: vpcmpgtb 4096(%rdx), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xf1,0x75,0x20,0x64,0x92,0x00,0x10,0x00,0x00]
+          vpcmpgtb 4096(%rdx), %ymm17, %k2
+
+// CHECK: vpcmpgtb -4096(%rdx), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xf1,0x75,0x20,0x64,0x52,0x80]
+          vpcmpgtb -4096(%rdx), %ymm17, %k2
+
+// CHECK: vpcmpgtb -4128(%rdx), %ymm17, %k2
+// CHECK:  encoding: [0x62,0xf1,0x75,0x20,0x64,0x92,0xe0,0xef,0xff,0xff]
+          vpcmpgtb -4128(%rdx), %ymm17, %k2
+
+// CHECK: vpcmpgtw %xmm22, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xb1,0x1d,0x00,0x65,0xd6]
+          vpcmpgtw %xmm22, %xmm28, %k2
+
+// CHECK: vpcmpgtw %xmm22, %xmm28, %k2 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0x1d,0x07,0x65,0xd6]
+          vpcmpgtw %xmm22, %xmm28, %k2 {%k7}
+
+// CHECK: vpcmpgtw (%rcx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf1,0x1d,0x00,0x65,0x11]
+          vpcmpgtw (%rcx), %xmm28, %k2
+
+// CHECK: vpcmpgtw 291(%rax,%r14,8), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xb1,0x1d,0x00,0x65,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtw 291(%rax,%r14,8), %xmm28, %k2
+
+// CHECK: vpcmpgtw 2032(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf1,0x1d,0x00,0x65,0x52,0x7f]
+          vpcmpgtw 2032(%rdx), %xmm28, %k2
+
+// CHECK: vpcmpgtw 2048(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf1,0x1d,0x00,0x65,0x92,0x00,0x08,0x00,0x00]
+          vpcmpgtw 2048(%rdx), %xmm28, %k2
+
+// CHECK: vpcmpgtw -2048(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf1,0x1d,0x00,0x65,0x52,0x80]
+          vpcmpgtw -2048(%rdx), %xmm28, %k2
+
+// CHECK: vpcmpgtw -2064(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf1,0x1d,0x00,0x65,0x92,0xf0,0xf7,0xff,0xff]
+          vpcmpgtw -2064(%rdx), %xmm28, %k2
+
+// CHECK: vpcmpgtw %ymm26, %ymm20, %k5
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x65,0xea]
+          vpcmpgtw %ymm26, %ymm20, %k5
+
+// CHECK: vpcmpgtw %ymm26, %ymm20, %k5 {%k2}
+// CHECK:  encoding: [0x62,0x91,0x5d,0x22,0x65,0xea]
+          vpcmpgtw %ymm26, %ymm20, %k5 {%k2}
+
+// CHECK: vpcmpgtw (%rcx), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x65,0x29]
+          vpcmpgtw (%rcx), %ymm20, %k5
+
+// CHECK: vpcmpgtw 291(%rax,%r14,8), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x20,0x65,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtw 291(%rax,%r14,8), %ymm20, %k5
+
+// CHECK: vpcmpgtw 4064(%rdx), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x65,0x6a,0x7f]
+          vpcmpgtw 4064(%rdx), %ymm20, %k5
+
+// CHECK: vpcmpgtw 4096(%rdx), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x65,0xaa,0x00,0x10,0x00,0x00]
+          vpcmpgtw 4096(%rdx), %ymm20, %k5
+
+// CHECK: vpcmpgtw -4096(%rdx), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x65,0x6a,0x80]
+          vpcmpgtw -4096(%rdx), %ymm20, %k5
+
+// CHECK: vpcmpgtw -4128(%rdx), %ymm20, %k5
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x65,0xaa,0xe0,0xef,0xff,0xff]
+          vpcmpgtw -4128(%rdx), %ymm20, %k5
+
+// CHECK: vpcmpb $171, %xmm17, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x00,0x3f,0xd9,0xab]
+          vpcmpb $171, %xmm17, %xmm30, %k3
+
+// CHECK: vpcmpb $171, %xmm17, %xmm30, %k3 {%k1}
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x01,0x3f,0xd9,0xab]
+          vpcmpb $171, %xmm17, %xmm30, %k3 {%k1}
+
+// CHECK: vpcmpb $123, %xmm17, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x00,0x3f,0xd9,0x7b]
+          vpcmpb $123, %xmm17, %xmm30, %k3
+
+// CHECK: vpcmpb $123, (%rcx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x00,0x3f,0x19,0x7b]
+          vpcmpb $123, (%rcx), %xmm30, %k3
+
+// CHECK: vpcmpb $123, 291(%rax,%r14,8), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x00,0x3f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpb $123, 291(%rax,%r14,8), %xmm30, %k3
+
+// CHECK: vpcmpb $123, 2032(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x00,0x3f,0x5a,0x7f,0x7b]
+          vpcmpb $123, 2032(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpb $123, 2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x00,0x3f,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpb $123, 2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpb $123, -2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x00,0x3f,0x5a,0x80,0x7b]
+          vpcmpb $123, -2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpb $123, -2064(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x00,0x3f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpb $123, -2064(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpb $171, %ymm19, %ymm19, %k5
+// CHECK:  encoding: [0x62,0xb3,0x65,0x20,0x3f,0xeb,0xab]
+          vpcmpb $171, %ymm19, %ymm19, %k5
+
+// CHECK: vpcmpb $171, %ymm19, %ymm19, %k5 {%k4}
+// CHECK:  encoding: [0x62,0xb3,0x65,0x24,0x3f,0xeb,0xab]
+          vpcmpb $171, %ymm19, %ymm19, %k5 {%k4}
+
+// CHECK: vpcmpb $123, %ymm19, %ymm19, %k5
+// CHECK:  encoding: [0x62,0xb3,0x65,0x20,0x3f,0xeb,0x7b]
+          vpcmpb $123, %ymm19, %ymm19, %k5
+
+// CHECK: vpcmpb $123, (%rcx), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xf3,0x65,0x20,0x3f,0x29,0x7b]
+          vpcmpb $123, (%rcx), %ymm19, %k5
+
+// CHECK: vpcmpb $123, 291(%rax,%r14,8), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xb3,0x65,0x20,0x3f,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpb $123, 291(%rax,%r14,8), %ymm19, %k5
+
+// CHECK: vpcmpb $123, 4064(%rdx), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xf3,0x65,0x20,0x3f,0x6a,0x7f,0x7b]
+          vpcmpb $123, 4064(%rdx), %ymm19, %k5
+
+// CHECK: vpcmpb $123, 4096(%rdx), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xf3,0x65,0x20,0x3f,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpb $123, 4096(%rdx), %ymm19, %k5
+
+// CHECK: vpcmpb $123, -4096(%rdx), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xf3,0x65,0x20,0x3f,0x6a,0x80,0x7b]
+          vpcmpb $123, -4096(%rdx), %ymm19, %k5
+
+// CHECK: vpcmpb $123, -4128(%rdx), %ymm19, %k5
+// CHECK:  encoding: [0x62,0xf3,0x65,0x20,0x3f,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpb $123, -4128(%rdx), %ymm19, %k5
+
+// CHECK: vpcmpw $171, %xmm22, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x8d,0x00,0x3f,0xde,0xab]
+          vpcmpw $171, %xmm22, %xmm30, %k3
+
+// CHECK: vpcmpw $171, %xmm22, %xmm30, %k3 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0x8d,0x06,0x3f,0xde,0xab]
+          vpcmpw $171, %xmm22, %xmm30, %k3 {%k6}
+
+// CHECK: vpcmpw $123, %xmm22, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x8d,0x00,0x3f,0xde,0x7b]
+          vpcmpw $123, %xmm22, %xmm30, %k3
+
+// CHECK: vpcmpw $123, (%rcx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x00,0x3f,0x19,0x7b]
+          vpcmpw $123, (%rcx), %xmm30, %k3
+
+// CHECK: vpcmpw $123, 291(%rax,%r14,8), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x8d,0x00,0x3f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpw $123, 291(%rax,%r14,8), %xmm30, %k3
+
+// CHECK: vpcmpw $123, 2032(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x00,0x3f,0x5a,0x7f,0x7b]
+          vpcmpw $123, 2032(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpw $123, 2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x00,0x3f,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpw $123, 2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpw $123, -2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x00,0x3f,0x5a,0x80,0x7b]
+          vpcmpw $123, -2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpw $123, -2064(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x8d,0x00,0x3f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpw $123, -2064(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpw $171, %ymm18, %ymm26, %k3
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x3f,0xda,0xab]
+          vpcmpw $171, %ymm18, %ymm26, %k3
+
+// CHECK: vpcmpw $171, %ymm18, %ymm26, %k3 {%k3}
+// CHECK:  encoding: [0x62,0xb3,0xad,0x23,0x3f,0xda,0xab]
+          vpcmpw $171, %ymm18, %ymm26, %k3 {%k3}
+
+// CHECK: vpcmpw $123, %ymm18, %ymm26, %k3
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x3f,0xda,0x7b]
+          vpcmpw $123, %ymm18, %ymm26, %k3
+
+// CHECK: vpcmpw $123, (%rcx), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x3f,0x19,0x7b]
+          vpcmpw $123, (%rcx), %ymm26, %k3
+
+// CHECK: vpcmpw $123, 291(%rax,%r14,8), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x3f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpw $123, 291(%rax,%r14,8), %ymm26, %k3
+
+// CHECK: vpcmpw $123, 4064(%rdx), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x3f,0x5a,0x7f,0x7b]
+          vpcmpw $123, 4064(%rdx), %ymm26, %k3
+
+// CHECK: vpcmpw $123, 4096(%rdx), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x3f,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpw $123, 4096(%rdx), %ymm26, %k3
+
+// CHECK: vpcmpw $123, -4096(%rdx), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x3f,0x5a,0x80,0x7b]
+          vpcmpw $123, -4096(%rdx), %ymm26, %k3
+
+// CHECK: vpcmpw $123, -4128(%rdx), %ymm26, %k3
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x3f,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpw $123, -4128(%rdx), %ymm26, %k3
+
+// CHECK: vpcmpub $171, %xmm21, %xmm22, %k5
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x3e,0xed,0xab]
+          vpcmpub $171, %xmm21, %xmm22, %k5
+
+// CHECK: vpcmpub $171, %xmm21, %xmm22, %k5 {%k3}
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x03,0x3e,0xed,0xab]
+          vpcmpub $171, %xmm21, %xmm22, %k5 {%k3}
+
+// CHECK: vpcmpub $123, %xmm21, %xmm22, %k5
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x3e,0xed,0x7b]
+          vpcmpub $123, %xmm21, %xmm22, %k5
+
+// CHECK: vpcmpub $123, (%rcx), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x3e,0x29,0x7b]
+          vpcmpub $123, (%rcx), %xmm22, %k5
+
+// CHECK: vpcmpub $123, 291(%rax,%r14,8), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x3e,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpub $123, 291(%rax,%r14,8), %xmm22, %k5
+
+// CHECK: vpcmpub $123, 2032(%rdx), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x3e,0x6a,0x7f,0x7b]
+          vpcmpub $123, 2032(%rdx), %xmm22, %k5
+
+// CHECK: vpcmpub $123, 2048(%rdx), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x3e,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpub $123, 2048(%rdx), %xmm22, %k5
+
+// CHECK: vpcmpub $123, -2048(%rdx), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x3e,0x6a,0x80,0x7b]
+          vpcmpub $123, -2048(%rdx), %xmm22, %k5
+
+// CHECK: vpcmpub $123, -2064(%rdx), %xmm22, %k5
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x3e,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpub $123, -2064(%rdx), %xmm22, %k5
+
+// CHECK: vpcmpub $171, %ymm21, %ymm23, %k2
+// CHECK:  encoding: [0x62,0xb3,0x45,0x20,0x3e,0xd5,0xab]
+          vpcmpub $171, %ymm21, %ymm23, %k2
+
+// CHECK: vpcmpub $171, %ymm21, %ymm23, %k2 {%k2}
+// CHECK:  encoding: [0x62,0xb3,0x45,0x22,0x3e,0xd5,0xab]
+          vpcmpub $171, %ymm21, %ymm23, %k2 {%k2}
+
+// CHECK: vpcmpub $123, %ymm21, %ymm23, %k2
+// CHECK:  encoding: [0x62,0xb3,0x45,0x20,0x3e,0xd5,0x7b]
+          vpcmpub $123, %ymm21, %ymm23, %k2
+
+// CHECK: vpcmpub $123, (%rcx), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xf3,0x45,0x20,0x3e,0x11,0x7b]
+          vpcmpub $123, (%rcx), %ymm23, %k2
+
+// CHECK: vpcmpub $123, 291(%rax,%r14,8), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xb3,0x45,0x20,0x3e,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpub $123, 291(%rax,%r14,8), %ymm23, %k2
+
+// CHECK: vpcmpub $123, 4064(%rdx), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xf3,0x45,0x20,0x3e,0x52,0x7f,0x7b]
+          vpcmpub $123, 4064(%rdx), %ymm23, %k2
+
+// CHECK: vpcmpub $123, 4096(%rdx), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xf3,0x45,0x20,0x3e,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpub $123, 4096(%rdx), %ymm23, %k2
+
+// CHECK: vpcmpub $123, -4096(%rdx), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xf3,0x45,0x20,0x3e,0x52,0x80,0x7b]
+          vpcmpub $123, -4096(%rdx), %ymm23, %k2
+
+// CHECK: vpcmpub $123, -4128(%rdx), %ymm23, %k2
+// CHECK:  encoding: [0x62,0xf3,0x45,0x20,0x3e,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpub $123, -4128(%rdx), %ymm23, %k2
+
+// CHECK: vpcmpuw $171, %xmm17, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x00,0x3e,0xe9,0xab]
+          vpcmpuw $171, %xmm17, %xmm28, %k5
+
+// CHECK: vpcmpuw $171, %xmm17, %xmm28, %k5 {%k4}
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x04,0x3e,0xe9,0xab]
+          vpcmpuw $171, %xmm17, %xmm28, %k5 {%k4}
+
+// CHECK: vpcmpuw $123, %xmm17, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x00,0x3e,0xe9,0x7b]
+          vpcmpuw $123, %xmm17, %xmm28, %k5
+
+// CHECK: vpcmpuw $123, (%rcx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x3e,0x29,0x7b]
+          vpcmpuw $123, (%rcx), %xmm28, %k5
+
+// CHECK: vpcmpuw $123, 291(%rax,%r14,8), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x00,0x3e,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuw $123, 291(%rax,%r14,8), %xmm28, %k5
+
+// CHECK: vpcmpuw $123, 2032(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x3e,0x6a,0x7f,0x7b]
+          vpcmpuw $123, 2032(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuw $123, 2048(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x3e,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpuw $123, 2048(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuw $123, -2048(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x3e,0x6a,0x80,0x7b]
+          vpcmpuw $123, -2048(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuw $123, -2064(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x3e,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpuw $123, -2064(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuw $171, %ymm28, %ymm27, %k4
+// CHECK:  encoding: [0x62,0x93,0xa5,0x20,0x3e,0xe4,0xab]
+          vpcmpuw $171, %ymm28, %ymm27, %k4
+
+// CHECK: vpcmpuw $171, %ymm28, %ymm27, %k4 {%k2}
+// CHECK:  encoding: [0x62,0x93,0xa5,0x22,0x3e,0xe4,0xab]
+          vpcmpuw $171, %ymm28, %ymm27, %k4 {%k2}
+
+// CHECK: vpcmpuw $123, %ymm28, %ymm27, %k4
+// CHECK:  encoding: [0x62,0x93,0xa5,0x20,0x3e,0xe4,0x7b]
+          vpcmpuw $123, %ymm28, %ymm27, %k4
+
+// CHECK: vpcmpuw $123, (%rcx), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x20,0x3e,0x21,0x7b]
+          vpcmpuw $123, (%rcx), %ymm27, %k4
+
+// CHECK: vpcmpuw $123, 291(%rax,%r14,8), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xb3,0xa5,0x20,0x3e,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuw $123, 291(%rax,%r14,8), %ymm27, %k4
+
+// CHECK: vpcmpuw $123, 4064(%rdx), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x20,0x3e,0x62,0x7f,0x7b]
+          vpcmpuw $123, 4064(%rdx), %ymm27, %k4
+
+// CHECK: vpcmpuw $123, 4096(%rdx), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x20,0x3e,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpuw $123, 4096(%rdx), %ymm27, %k4
+
+// CHECK: vpcmpuw $123, -4096(%rdx), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x20,0x3e,0x62,0x80,0x7b]
+          vpcmpuw $123, -4096(%rdx), %ymm27, %k4
+
+// CHECK: vpcmpuw $123, -4128(%rdx), %ymm27, %k4
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x20,0x3e,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpuw $123, -4128(%rdx), %ymm27, %k4
+
+// CHECK: vpmaxsb %xmm17, %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x00,0x3c,0xf9]
+          vpmaxsb %xmm17, %xmm24, %xmm23
+
+// CHECK: vpmaxsb %xmm17, %xmm24, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x03,0x3c,0xf9]
+          vpmaxsb %xmm17, %xmm24, %xmm23 {%k3}
+
+// CHECK: vpmaxsb %xmm17, %xmm24, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x83,0x3c,0xf9]
+          vpmaxsb %xmm17, %xmm24, %xmm23 {%k3} {z}
+
+// CHECK: vpmaxsb (%rcx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x3c,0x39]
+          vpmaxsb (%rcx), %xmm24, %xmm23
+
+// CHECK: vpmaxsb 291(%rax,%r14,8), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x3d,0x00,0x3c,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsb 291(%rax,%r14,8), %xmm24, %xmm23
+
+// CHECK: vpmaxsb 2032(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x3c,0x7a,0x7f]
+          vpmaxsb 2032(%rdx), %xmm24, %xmm23
+
+// CHECK: vpmaxsb 2048(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x3c,0xba,0x00,0x08,0x00,0x00]
+          vpmaxsb 2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpmaxsb -2048(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x3c,0x7a,0x80]
+          vpmaxsb -2048(%rdx), %xmm24, %xmm23
+
+// CHECK: vpmaxsb -2064(%rdx), %xmm24, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x3c,0xba,0xf0,0xf7,0xff,0xff]
+          vpmaxsb -2064(%rdx), %xmm24, %xmm23
+
+// CHECK: vpmaxsb %ymm27, %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x02,0x4d,0x20,0x3c,0xf3]
+          vpmaxsb %ymm27, %ymm22, %ymm30
+
+// CHECK: vpmaxsb %ymm27, %ymm22, %ymm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x4d,0x25,0x3c,0xf3]
+          vpmaxsb %ymm27, %ymm22, %ymm30 {%k5}
+
+// CHECK: vpmaxsb %ymm27, %ymm22, %ymm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x4d,0xa5,0x3c,0xf3]
+          vpmaxsb %ymm27, %ymm22, %ymm30 {%k5} {z}
+
+// CHECK: vpmaxsb (%rcx), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x3c,0x31]
+          vpmaxsb (%rcx), %ymm22, %ymm30
+
+// CHECK: vpmaxsb 291(%rax,%r14,8), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x4d,0x20,0x3c,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsb 291(%rax,%r14,8), %ymm22, %ymm30
+
+// CHECK: vpmaxsb 4064(%rdx), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x3c,0x72,0x7f]
+          vpmaxsb 4064(%rdx), %ymm22, %ymm30
+
+// CHECK: vpmaxsb 4096(%rdx), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x3c,0xb2,0x00,0x10,0x00,0x00]
+          vpmaxsb 4096(%rdx), %ymm22, %ymm30
+
+// CHECK: vpmaxsb -4096(%rdx), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x3c,0x72,0x80]
+          vpmaxsb -4096(%rdx), %ymm22, %ymm30
+
+// CHECK: vpmaxsb -4128(%rdx), %ymm22, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x4d,0x20,0x3c,0xb2,0xe0,0xef,0xff,0xff]
+          vpmaxsb -4128(%rdx), %ymm22, %ymm30
+
+// CHECK: vpmaxsw %xmm28, %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0x81,0x3d,0x00,0xee,0xd4]
+          vpmaxsw %xmm28, %xmm24, %xmm18
+
+// CHECK: vpmaxsw %xmm28, %xmm24, %xmm18 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x3d,0x03,0xee,0xd4]
+          vpmaxsw %xmm28, %xmm24, %xmm18 {%k3}
+
+// CHECK: vpmaxsw %xmm28, %xmm24, %xmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x3d,0x83,0xee,0xd4]
+          vpmaxsw %xmm28, %xmm24, %xmm18 {%k3} {z}
+
+// CHECK: vpmaxsw (%rcx), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xee,0x11]
+          vpmaxsw (%rcx), %xmm24, %xmm18
+
+// CHECK: vpmaxsw 291(%rax,%r14,8), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x00,0xee,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsw 291(%rax,%r14,8), %xmm24, %xmm18
+
+// CHECK: vpmaxsw 2032(%rdx), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xee,0x52,0x7f]
+          vpmaxsw 2032(%rdx), %xmm24, %xmm18
+
+// CHECK: vpmaxsw 2048(%rdx), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xee,0x92,0x00,0x08,0x00,0x00]
+          vpmaxsw 2048(%rdx), %xmm24, %xmm18
+
+// CHECK: vpmaxsw -2048(%rdx), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xee,0x52,0x80]
+          vpmaxsw -2048(%rdx), %xmm24, %xmm18
+
+// CHECK: vpmaxsw -2064(%rdx), %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xee,0x92,0xf0,0xf7,0xff,0xff]
+          vpmaxsw -2064(%rdx), %xmm24, %xmm18
+
+// CHECK: vpmaxsw %ymm17, %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x21,0x1d,0x20,0xee,0xd9]
+          vpmaxsw %ymm17, %ymm28, %ymm27
+
+// CHECK: vpmaxsw %ymm17, %ymm28, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x1d,0x26,0xee,0xd9]
+          vpmaxsw %ymm17, %ymm28, %ymm27 {%k6}
+
+// CHECK: vpmaxsw %ymm17, %ymm28, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x1d,0xa6,0xee,0xd9]
+          vpmaxsw %ymm17, %ymm28, %ymm27 {%k6} {z}
+
+// CHECK: vpmaxsw (%rcx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0xee,0x19]
+          vpmaxsw (%rcx), %ymm28, %ymm27
+
+// CHECK: vpmaxsw 291(%rax,%r14,8), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x21,0x1d,0x20,0xee,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsw 291(%rax,%r14,8), %ymm28, %ymm27
+
+// CHECK: vpmaxsw 4064(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0xee,0x5a,0x7f]
+          vpmaxsw 4064(%rdx), %ymm28, %ymm27
+
+// CHECK: vpmaxsw 4096(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0xee,0x9a,0x00,0x10,0x00,0x00]
+          vpmaxsw 4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpmaxsw -4096(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0xee,0x5a,0x80]
+          vpmaxsw -4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpmaxsw -4128(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0xee,0x9a,0xe0,0xef,0xff,0xff]
+          vpmaxsw -4128(%rdx), %ymm28, %ymm27
+
+// CHECK: vpmaxub %xmm23, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x75,0x00,0xde,0xcf]
+          vpmaxub %xmm23, %xmm17, %xmm17
+
+// CHECK: vpmaxub %xmm23, %xmm17, %xmm17 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x75,0x05,0xde,0xcf]
+          vpmaxub %xmm23, %xmm17, %xmm17 {%k5}
+
+// CHECK: vpmaxub %xmm23, %xmm17, %xmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x75,0x85,0xde,0xcf]
+          vpmaxub %xmm23, %xmm17, %xmm17 {%k5} {z}
+
+// CHECK: vpmaxub (%rcx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xde,0x09]
+          vpmaxub (%rcx), %xmm17, %xmm17
+
+// CHECK: vpmaxub 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x75,0x00,0xde,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxub 291(%rax,%r14,8), %xmm17, %xmm17
+
+// CHECK: vpmaxub 2032(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xde,0x4a,0x7f]
+          vpmaxub 2032(%rdx), %xmm17, %xmm17
+
+// CHECK: vpmaxub 2048(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xde,0x8a,0x00,0x08,0x00,0x00]
+          vpmaxub 2048(%rdx), %xmm17, %xmm17
+
+// CHECK: vpmaxub -2048(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xde,0x4a,0x80]
+          vpmaxub -2048(%rdx), %xmm17, %xmm17
+
+// CHECK: vpmaxub -2064(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xde,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmaxub -2064(%rdx), %xmm17, %xmm17
+
+// CHECK: vpmaxub %ymm24, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x01,0x2d,0x20,0xde,0xe0]
+          vpmaxub %ymm24, %ymm26, %ymm28
+
+// CHECK: vpmaxub %ymm24, %ymm26, %ymm28 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x2d,0x26,0xde,0xe0]
+          vpmaxub %ymm24, %ymm26, %ymm28 {%k6}
+
+// CHECK: vpmaxub %ymm24, %ymm26, %ymm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x2d,0xa6,0xde,0xe0]
+          vpmaxub %ymm24, %ymm26, %ymm28 {%k6} {z}
+
+// CHECK: vpmaxub (%rcx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xde,0x21]
+          vpmaxub (%rcx), %ymm26, %ymm28
+
+// CHECK: vpmaxub 291(%rax,%r14,8), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x21,0x2d,0x20,0xde,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxub 291(%rax,%r14,8), %ymm26, %ymm28
+
+// CHECK: vpmaxub 4064(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xde,0x62,0x7f]
+          vpmaxub 4064(%rdx), %ymm26, %ymm28
+
+// CHECK: vpmaxub 4096(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xde,0xa2,0x00,0x10,0x00,0x00]
+          vpmaxub 4096(%rdx), %ymm26, %ymm28
+
+// CHECK: vpmaxub -4096(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xde,0x62,0x80]
+          vpmaxub -4096(%rdx), %ymm26, %ymm28
+
+// CHECK: vpmaxub -4128(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xde,0xa2,0xe0,0xef,0xff,0xff]
+          vpmaxub -4128(%rdx), %ymm26, %ymm28
+
+// CHECK: vpmaxuw %xmm20, %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x6d,0x00,0x3e,0xc4]
+          vpmaxuw %xmm20, %xmm18, %xmm24
+
+// CHECK: vpmaxuw %xmm20, %xmm18, %xmm24 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x6d,0x07,0x3e,0xc4]
+          vpmaxuw %xmm20, %xmm18, %xmm24 {%k7}
+
+// CHECK: vpmaxuw %xmm20, %xmm18, %xmm24 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x6d,0x87,0x3e,0xc4]
+          vpmaxuw %xmm20, %xmm18, %xmm24 {%k7} {z}
+
+// CHECK: vpmaxuw (%rcx), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x6d,0x00,0x3e,0x01]
+          vpmaxuw (%rcx), %xmm18, %xmm24
+
+// CHECK: vpmaxuw 291(%rax,%r14,8), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x6d,0x00,0x3e,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxuw 291(%rax,%r14,8), %xmm18, %xmm24
+
+// CHECK: vpmaxuw 2032(%rdx), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x6d,0x00,0x3e,0x42,0x7f]
+          vpmaxuw 2032(%rdx), %xmm18, %xmm24
+
+// CHECK: vpmaxuw 2048(%rdx), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x6d,0x00,0x3e,0x82,0x00,0x08,0x00,0x00]
+          vpmaxuw 2048(%rdx), %xmm18, %xmm24
+
+// CHECK: vpmaxuw -2048(%rdx), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x6d,0x00,0x3e,0x42,0x80]
+          vpmaxuw -2048(%rdx), %xmm18, %xmm24
+
+// CHECK: vpmaxuw -2064(%rdx), %xmm18, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x6d,0x00,0x3e,0x82,0xf0,0xf7,0xff,0xff]
+          vpmaxuw -2064(%rdx), %xmm18, %xmm24
+
+// CHECK: vpmaxuw %ymm19, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x20,0x3e,0xdb]
+          vpmaxuw %ymm19, %ymm22, %ymm19
+
+// CHECK: vpmaxuw %ymm19, %ymm22, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x27,0x3e,0xdb]
+          vpmaxuw %ymm19, %ymm22, %ymm19 {%k7}
+
+// CHECK: vpmaxuw %ymm19, %ymm22, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0xa7,0x3e,0xdb]
+          vpmaxuw %ymm19, %ymm22, %ymm19 {%k7} {z}
+
+// CHECK: vpmaxuw (%rcx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3e,0x19]
+          vpmaxuw (%rcx), %ymm22, %ymm19
+
+// CHECK: vpmaxuw 291(%rax,%r14,8), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x20,0x3e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxuw 291(%rax,%r14,8), %ymm22, %ymm19
+
+// CHECK: vpmaxuw 4064(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3e,0x5a,0x7f]
+          vpmaxuw 4064(%rdx), %ymm22, %ymm19
+
+// CHECK: vpmaxuw 4096(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3e,0x9a,0x00,0x10,0x00,0x00]
+          vpmaxuw 4096(%rdx), %ymm22, %ymm19
+
+// CHECK: vpmaxuw -4096(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3e,0x5a,0x80]
+          vpmaxuw -4096(%rdx), %ymm22, %ymm19
+
+// CHECK: vpmaxuw -4128(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3e,0x9a,0xe0,0xef,0xff,0xff]
+          vpmaxuw -4128(%rdx), %ymm22, %ymm19
+
+// CHECK: vpminsb %xmm27, %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x02,0x1d,0x00,0x38,0xcb]
+          vpminsb %xmm27, %xmm28, %xmm25
+
+// CHECK: vpminsb %xmm27, %xmm28, %xmm25 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x1d,0x01,0x38,0xcb]
+          vpminsb %xmm27, %xmm28, %xmm25 {%k1}
+
+// CHECK: vpminsb %xmm27, %xmm28, %xmm25 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x1d,0x81,0x38,0xcb]
+          vpminsb %xmm27, %xmm28, %xmm25 {%k1} {z}
+
+// CHECK: vpminsb (%rcx), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x38,0x09]
+          vpminsb (%rcx), %xmm28, %xmm25
+
+// CHECK: vpminsb 291(%rax,%r14,8), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsb 291(%rax,%r14,8), %xmm28, %xmm25
+
+// CHECK: vpminsb 2032(%rdx), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x38,0x4a,0x7f]
+          vpminsb 2032(%rdx), %xmm28, %xmm25
+
+// CHECK: vpminsb 2048(%rdx), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x38,0x8a,0x00,0x08,0x00,0x00]
+          vpminsb 2048(%rdx), %xmm28, %xmm25
+
+// CHECK: vpminsb -2048(%rdx), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x38,0x4a,0x80]
+          vpminsb -2048(%rdx), %xmm28, %xmm25
+
+// CHECK: vpminsb -2064(%rdx), %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x38,0x8a,0xf0,0xf7,0xff,0xff]
+          vpminsb -2064(%rdx), %xmm28, %xmm25
+
+// CHECK: vpminsb %ymm27, %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x02,0x5d,0x20,0x38,0xdb]
+          vpminsb %ymm27, %ymm20, %ymm27
+
+// CHECK: vpminsb %ymm27, %ymm20, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x5d,0x26,0x38,0xdb]
+          vpminsb %ymm27, %ymm20, %ymm27 {%k6}
+
+// CHECK: vpminsb %ymm27, %ymm20, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x5d,0xa6,0x38,0xdb]
+          vpminsb %ymm27, %ymm20, %ymm27 {%k6} {z}
+
+// CHECK: vpminsb (%rcx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x38,0x19]
+          vpminsb (%rcx), %ymm20, %ymm27
+
+// CHECK: vpminsb 291(%rax,%r14,8), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x5d,0x20,0x38,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsb 291(%rax,%r14,8), %ymm20, %ymm27
+
+// CHECK: vpminsb 4064(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x38,0x5a,0x7f]
+          vpminsb 4064(%rdx), %ymm20, %ymm27
+
+// CHECK: vpminsb 4096(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x38,0x9a,0x00,0x10,0x00,0x00]
+          vpminsb 4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vpminsb -4096(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x38,0x5a,0x80]
+          vpminsb -4096(%rdx), %ymm20, %ymm27
+
+// CHECK: vpminsb -4128(%rdx), %ymm20, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x38,0x9a,0xe0,0xef,0xff,0xff]
+          vpminsb -4128(%rdx), %ymm20, %ymm27
+
+// CHECK: vpminsw %xmm19, %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x21,0x2d,0x00,0xea,0xdb]
+          vpminsw %xmm19, %xmm26, %xmm27
+
+// CHECK: vpminsw %xmm19, %xmm26, %xmm27 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x2d,0x02,0xea,0xdb]
+          vpminsw %xmm19, %xmm26, %xmm27 {%k2}
+
+// CHECK: vpminsw %xmm19, %xmm26, %xmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x2d,0x82,0xea,0xdb]
+          vpminsw %xmm19, %xmm26, %xmm27 {%k2} {z}
+
+// CHECK: vpminsw (%rcx), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xea,0x19]
+          vpminsw (%rcx), %xmm26, %xmm27
+
+// CHECK: vpminsw 291(%rax,%r14,8), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x21,0x2d,0x00,0xea,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsw 291(%rax,%r14,8), %xmm26, %xmm27
+
+// CHECK: vpminsw 2032(%rdx), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xea,0x5a,0x7f]
+          vpminsw 2032(%rdx), %xmm26, %xmm27
+
+// CHECK: vpminsw 2048(%rdx), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xea,0x9a,0x00,0x08,0x00,0x00]
+          vpminsw 2048(%rdx), %xmm26, %xmm27
+
+// CHECK: vpminsw -2048(%rdx), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xea,0x5a,0x80]
+          vpminsw -2048(%rdx), %xmm26, %xmm27
+
+// CHECK: vpminsw -2064(%rdx), %xmm26, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xea,0x9a,0xf0,0xf7,0xff,0xff]
+          vpminsw -2064(%rdx), %xmm26, %xmm27
+
+// CHECK: vpminsw %ymm27, %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0x81,0x45,0x20,0xea,0xeb]
+          vpminsw %ymm27, %ymm23, %ymm21
+
+// CHECK: vpminsw %ymm27, %ymm23, %ymm21 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x45,0x22,0xea,0xeb]
+          vpminsw %ymm27, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpminsw %ymm27, %ymm23, %ymm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x45,0xa2,0xea,0xeb]
+          vpminsw %ymm27, %ymm23, %ymm21 {%k2} {z}
+
+// CHECK: vpminsw (%rcx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xea,0x29]
+          vpminsw (%rcx), %ymm23, %ymm21
+
+// CHECK: vpminsw 291(%rax,%r14,8), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x45,0x20,0xea,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpminsw 291(%rax,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpminsw 4064(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xea,0x6a,0x7f]
+          vpminsw 4064(%rdx), %ymm23, %ymm21
+
+// CHECK: vpminsw 4096(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xea,0xaa,0x00,0x10,0x00,0x00]
+          vpminsw 4096(%rdx), %ymm23, %ymm21
+
+// CHECK: vpminsw -4096(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xea,0x6a,0x80]
+          vpminsw -4096(%rdx), %ymm23, %ymm21
+
+// CHECK: vpminsw -4128(%rdx), %ymm23, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x45,0x20,0xea,0xaa,0xe0,0xef,0xff,0xff]
+          vpminsw -4128(%rdx), %ymm23, %ymm21
+
+// CHECK: vpminub %xmm27, %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0x81,0x25,0x00,0xda,0xf3]
+          vpminub %xmm27, %xmm27, %xmm22
+
+// CHECK: vpminub %xmm27, %xmm27, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x25,0x05,0xda,0xf3]
+          vpminub %xmm27, %xmm27, %xmm22 {%k5}
+
+// CHECK: vpminub %xmm27, %xmm27, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x25,0x85,0xda,0xf3]
+          vpminub %xmm27, %xmm27, %xmm22 {%k5} {z}
+
+// CHECK: vpminub (%rcx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x25,0x00,0xda,0x31]
+          vpminub (%rcx), %xmm27, %xmm22
+
+// CHECK: vpminub 291(%rax,%r14,8), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x25,0x00,0xda,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpminub 291(%rax,%r14,8), %xmm27, %xmm22
+
+// CHECK: vpminub 2032(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x25,0x00,0xda,0x72,0x7f]
+          vpminub 2032(%rdx), %xmm27, %xmm22
+
+// CHECK: vpminub 2048(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x25,0x00,0xda,0xb2,0x00,0x08,0x00,0x00]
+          vpminub 2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vpminub -2048(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x25,0x00,0xda,0x72,0x80]
+          vpminub -2048(%rdx), %xmm27, %xmm22
+
+// CHECK: vpminub -2064(%rdx), %xmm27, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x25,0x00,0xda,0xb2,0xf0,0xf7,0xff,0xff]
+          vpminub -2064(%rdx), %xmm27, %xmm22
+
+// CHECK: vpminub %ymm27, %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x01,0x35,0x20,0xda,0xdb]
+          vpminub %ymm27, %ymm25, %ymm27
+
+// CHECK: vpminub %ymm27, %ymm25, %ymm27 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x35,0x23,0xda,0xdb]
+          vpminub %ymm27, %ymm25, %ymm27 {%k3}
+
+// CHECK: vpminub %ymm27, %ymm25, %ymm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x35,0xa3,0xda,0xdb]
+          vpminub %ymm27, %ymm25, %ymm27 {%k3} {z}
+
+// CHECK: vpminub (%rcx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0xda,0x19]
+          vpminub (%rcx), %ymm25, %ymm27
+
+// CHECK: vpminub 291(%rax,%r14,8), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x21,0x35,0x20,0xda,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminub 291(%rax,%r14,8), %ymm25, %ymm27
+
+// CHECK: vpminub 4064(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0xda,0x5a,0x7f]
+          vpminub 4064(%rdx), %ymm25, %ymm27
+
+// CHECK: vpminub 4096(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0xda,0x9a,0x00,0x10,0x00,0x00]
+          vpminub 4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vpminub -4096(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0xda,0x5a,0x80]
+          vpminub -4096(%rdx), %ymm25, %ymm27
+
+// CHECK: vpminub -4128(%rdx), %ymm25, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0xda,0x9a,0xe0,0xef,0xff,0xff]
+          vpminub -4128(%rdx), %ymm25, %ymm27
+
+// CHECK: vpminuw %xmm23, %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x75,0x00,0x3a,0xe7]
+          vpminuw %xmm23, %xmm17, %xmm28
+
+// CHECK: vpminuw %xmm23, %xmm17, %xmm28 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x75,0x02,0x3a,0xe7]
+          vpminuw %xmm23, %xmm17, %xmm28 {%k2}
+
+// CHECK: vpminuw %xmm23, %xmm17, %xmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x75,0x82,0x3a,0xe7]
+          vpminuw %xmm23, %xmm17, %xmm28 {%k2} {z}
+
+// CHECK: vpminuw (%rcx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x3a,0x21]
+          vpminuw (%rcx), %xmm17, %xmm28
+
+// CHECK: vpminuw 291(%rax,%r14,8), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x75,0x00,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpminuw 291(%rax,%r14,8), %xmm17, %xmm28
+
+// CHECK: vpminuw 2032(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x3a,0x62,0x7f]
+          vpminuw 2032(%rdx), %xmm17, %xmm28
+
+// CHECK: vpminuw 2048(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x3a,0xa2,0x00,0x08,0x00,0x00]
+          vpminuw 2048(%rdx), %xmm17, %xmm28
+
+// CHECK: vpminuw -2048(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x3a,0x62,0x80]
+          vpminuw -2048(%rdx), %xmm17, %xmm28
+
+// CHECK: vpminuw -2064(%rdx), %xmm17, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x75,0x00,0x3a,0xa2,0xf0,0xf7,0xff,0xff]
+          vpminuw -2064(%rdx), %xmm17, %xmm28
+
+// CHECK: vpminuw %ymm18, %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x25,0x20,0x3a,0xe2]
+          vpminuw %ymm18, %ymm27, %ymm20
+
+// CHECK: vpminuw %ymm18, %ymm27, %ymm20 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x25,0x27,0x3a,0xe2]
+          vpminuw %ymm18, %ymm27, %ymm20 {%k7}
+
+// CHECK: vpminuw %ymm18, %ymm27, %ymm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x25,0xa7,0x3a,0xe2]
+          vpminuw %ymm18, %ymm27, %ymm20 {%k7} {z}
+
+// CHECK: vpminuw (%rcx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0x21]
+          vpminuw (%rcx), %ymm27, %ymm20
+
+// CHECK: vpminuw 291(%rax,%r14,8), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xa2,0x25,0x20,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpminuw 291(%rax,%r14,8), %ymm27, %ymm20
+
+// CHECK: vpminuw 4064(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0x62,0x7f]
+          vpminuw 4064(%rdx), %ymm27, %ymm20
+
+// CHECK: vpminuw 4096(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0x00,0x10,0x00,0x00]
+          vpminuw 4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vpminuw -4096(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0x62,0x80]
+          vpminuw -4096(%rdx), %ymm27, %ymm20
+
+// CHECK: vpminuw -4128(%rdx), %ymm27, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0xe0,0xef,0xff,0xff]
+          vpminuw -4128(%rdx), %ymm27, %ymm20
+
+// CHECK: vpmullw %xmm26, %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x01,0x65,0x00,0xd5,0xea]
+          vpmullw %xmm26, %xmm19, %xmm29
+
+// CHECK: vpmullw %xmm26, %xmm19, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x01,0x65,0x07,0xd5,0xea]
+          vpmullw %xmm26, %xmm19, %xmm29 {%k7}
+
+// CHECK: vpmullw %xmm26, %xmm19, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0x65,0x87,0xd5,0xea]
+          vpmullw %xmm26, %xmm19, %xmm29 {%k7} {z}
+
+// CHECK: vpmullw (%rcx), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xd5,0x29]
+          vpmullw (%rcx), %xmm19, %xmm29
+
+// CHECK: vpmullw 291(%rax,%r14,8), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0xd5,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmullw 291(%rax,%r14,8), %xmm19, %xmm29
+
+// CHECK: vpmullw 2032(%rdx), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xd5,0x6a,0x7f]
+          vpmullw 2032(%rdx), %xmm19, %xmm29
+
+// CHECK: vpmullw 2048(%rdx), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xd5,0xaa,0x00,0x08,0x00,0x00]
+          vpmullw 2048(%rdx), %xmm19, %xmm29
+
+// CHECK: vpmullw -2048(%rdx), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xd5,0x6a,0x80]
+          vpmullw -2048(%rdx), %xmm19, %xmm29
+
+// CHECK: vpmullw -2064(%rdx), %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xd5,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmullw -2064(%rdx), %xmm19, %xmm29
+
+// CHECK: vpmullw %ymm20, %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x20,0xd5,0xcc]
+          vpmullw %ymm20, %ymm24, %ymm17
+
+// CHECK: vpmullw %ymm20, %ymm24, %ymm17 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x25,0xd5,0xcc]
+          vpmullw %ymm20, %ymm24, %ymm17 {%k5}
+
+// CHECK: vpmullw %ymm20, %ymm24, %ymm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0xa5,0xd5,0xcc]
+          vpmullw %ymm20, %ymm24, %ymm17 {%k5} {z}
+
+// CHECK: vpmullw (%rcx), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x20,0xd5,0x09]
+          vpmullw (%rcx), %ymm24, %ymm17
+
+// CHECK: vpmullw 291(%rax,%r14,8), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x20,0xd5,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmullw 291(%rax,%r14,8), %ymm24, %ymm17
+
+// CHECK: vpmullw 4064(%rdx), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x20,0xd5,0x4a,0x7f]
+          vpmullw 4064(%rdx), %ymm24, %ymm17
+
+// CHECK: vpmullw 4096(%rdx), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x20,0xd5,0x8a,0x00,0x10,0x00,0x00]
+          vpmullw 4096(%rdx), %ymm24, %ymm17
+
+// CHECK: vpmullw -4096(%rdx), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x20,0xd5,0x4a,0x80]
+          vpmullw -4096(%rdx), %ymm24, %ymm17
+
+// CHECK: vpmullw -4128(%rdx), %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x20,0xd5,0x8a,0xe0,0xef,0xff,0xff]
+          vpmullw -4128(%rdx), %ymm24, %ymm17
+
+// CHECK: vpsubb %xmm28, %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x01,0x15,0x00,0xf8,0xdc]
+          vpsubb %xmm28, %xmm29, %xmm27
+
+// CHECK: vpsubb %xmm28, %xmm29, %xmm27 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x15,0x03,0xf8,0xdc]
+          vpsubb %xmm28, %xmm29, %xmm27 {%k3}
+
+// CHECK: vpsubb %xmm28, %xmm29, %xmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x15,0x83,0xf8,0xdc]
+          vpsubb %xmm28, %xmm29, %xmm27 {%k3} {z}
+
+// CHECK: vpsubb (%rcx), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xf8,0x19]
+          vpsubb (%rcx), %xmm29, %xmm27
+
+// CHECK: vpsubb 291(%rax,%r14,8), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x21,0x15,0x00,0xf8,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubb 291(%rax,%r14,8), %xmm29, %xmm27
+
+// CHECK: vpsubb 2032(%rdx), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xf8,0x5a,0x7f]
+          vpsubb 2032(%rdx), %xmm29, %xmm27
+
+// CHECK: vpsubb 2048(%rdx), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xf8,0x9a,0x00,0x08,0x00,0x00]
+          vpsubb 2048(%rdx), %xmm29, %xmm27
+
+// CHECK: vpsubb -2048(%rdx), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xf8,0x5a,0x80]
+          vpsubb -2048(%rdx), %xmm29, %xmm27
+
+// CHECK: vpsubb -2064(%rdx), %xmm29, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xf8,0x9a,0xf0,0xf7,0xff,0xff]
+          vpsubb -2064(%rdx), %xmm29, %xmm27
+
+// CHECK: vpsubb %ymm28, %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x5d,0x20,0xf8,0xe4]
+          vpsubb %ymm28, %ymm20, %ymm20
+
+// CHECK: vpsubb %ymm28, %ymm20, %ymm20 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x5d,0x22,0xf8,0xe4]
+          vpsubb %ymm28, %ymm20, %ymm20 {%k2}
+
+// CHECK: vpsubb %ymm28, %ymm20, %ymm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x5d,0xa2,0xf8,0xe4]
+          vpsubb %ymm28, %ymm20, %ymm20 {%k2} {z}
+
+// CHECK: vpsubb (%rcx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf8,0x21]
+          vpsubb (%rcx), %ymm20, %ymm20
+
+// CHECK: vpsubb 291(%rax,%r14,8), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x5d,0x20,0xf8,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpsubb 291(%rax,%r14,8), %ymm20, %ymm20
+
+// CHECK: vpsubb 4064(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf8,0x62,0x7f]
+          vpsubb 4064(%rdx), %ymm20, %ymm20
+
+// CHECK: vpsubb 4096(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf8,0xa2,0x00,0x10,0x00,0x00]
+          vpsubb 4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vpsubb -4096(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf8,0x62,0x80]
+          vpsubb -4096(%rdx), %ymm20, %ymm20
+
+// CHECK: vpsubb -4128(%rdx), %ymm20, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf8,0xa2,0xe0,0xef,0xff,0xff]
+          vpsubb -4128(%rdx), %ymm20, %ymm20
+
+// CHECK: vpsubw %xmm18, %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x00,0xf9,0xca]
+          vpsubw %xmm18, %xmm22, %xmm17
+
+// CHECK: vpsubw %xmm18, %xmm22, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x06,0xf9,0xca]
+          vpsubw %xmm18, %xmm22, %xmm17 {%k6}
+
+// CHECK: vpsubw %xmm18, %xmm22, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x86,0xf9,0xca]
+          vpsubw %xmm18, %xmm22, %xmm17 {%k6} {z}
+
+// CHECK: vpsubw (%rcx), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf9,0x09]
+          vpsubw (%rcx), %xmm22, %xmm17
+
+// CHECK: vpsubw 291(%rax,%r14,8), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x00,0xf9,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubw 291(%rax,%r14,8), %xmm22, %xmm17
+
+// CHECK: vpsubw 2032(%rdx), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf9,0x4a,0x7f]
+          vpsubw 2032(%rdx), %xmm22, %xmm17
+
+// CHECK: vpsubw 2048(%rdx), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf9,0x8a,0x00,0x08,0x00,0x00]
+          vpsubw 2048(%rdx), %xmm22, %xmm17
+
+// CHECK: vpsubw -2048(%rdx), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf9,0x4a,0x80]
+          vpsubw -2048(%rdx), %xmm22, %xmm17
+
+// CHECK: vpsubw -2064(%rdx), %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf9,0x8a,0xf0,0xf7,0xff,0xff]
+          vpsubw -2064(%rdx), %xmm22, %xmm17
+
+// CHECK: vpsubw %ymm24, %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0x81,0x15,0x20,0xf9,0xc8]
+          vpsubw %ymm24, %ymm29, %ymm17
+
+// CHECK: vpsubw %ymm24, %ymm29, %ymm17 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x15,0x22,0xf9,0xc8]
+          vpsubw %ymm24, %ymm29, %ymm17 {%k2}
+
+// CHECK: vpsubw %ymm24, %ymm29, %ymm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x15,0xa2,0xf9,0xc8]
+          vpsubw %ymm24, %ymm29, %ymm17 {%k2} {z}
+
+// CHECK: vpsubw (%rcx), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xf9,0x09]
+          vpsubw (%rcx), %ymm29, %ymm17
+
+// CHECK: vpsubw 291(%rax,%r14,8), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x15,0x20,0xf9,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubw 291(%rax,%r14,8), %ymm29, %ymm17
+
+// CHECK: vpsubw 4064(%rdx), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xf9,0x4a,0x7f]
+          vpsubw 4064(%rdx), %ymm29, %ymm17
+
+// CHECK: vpsubw 4096(%rdx), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xf9,0x8a,0x00,0x10,0x00,0x00]
+          vpsubw 4096(%rdx), %ymm29, %ymm17
+
+// CHECK: vpsubw -4096(%rdx), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xf9,0x4a,0x80]
+          vpsubw -4096(%rdx), %ymm29, %ymm17
+
+// CHECK: vpsubw -4128(%rdx), %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xf9,0x8a,0xe0,0xef,0xff,0xff]
+          vpsubw -4128(%rdx), %ymm29, %ymm17
+
+// CHECK: vmovdqu8 %xmm23, %xmm26
+// CHECK:  encoding: [0x62,0x21,0x7f,0x08,0x6f,0xd7]
+          vmovdqu8 %xmm23, %xmm26
+
+// CHECK: vmovdqu8 %xmm23, %xmm26 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x0a,0x6f,0xd7]
+          vmovdqu8 %xmm23, %xmm26 {%k2}
+
+// CHECK: vmovdqu8 %xmm23, %xmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x8a,0x6f,0xd7]
+          vmovdqu8 %xmm23, %xmm26 {%k2} {z}
+
+// CHECK: vmovdqu8 (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x08,0x6f,0x11]
+          vmovdqu8 (%rcx), %xmm26
+
+// CHECK: vmovdqu8 291(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x21,0x7f,0x08,0x6f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 291(%rax,%r14,8), %xmm26
+
+// CHECK: vmovdqu8 2032(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x08,0x6f,0x52,0x7f]
+          vmovdqu8 2032(%rdx), %xmm26
+
+// CHECK: vmovdqu8 2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x08,0x6f,0x92,0x00,0x08,0x00,0x00]
+          vmovdqu8 2048(%rdx), %xmm26
+
+// CHECK: vmovdqu8 -2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x08,0x6f,0x52,0x80]
+          vmovdqu8 -2048(%rdx), %xmm26
+
+// CHECK: vmovdqu8 -2064(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x08,0x6f,0x92,0xf0,0xf7,0xff,0xff]
+          vmovdqu8 -2064(%rdx), %xmm26
+
+// CHECK: vmovdqu8 %ymm29, %ymm18
+// CHECK:  encoding: [0x62,0x81,0x7f,0x28,0x6f,0xd5]
+          vmovdqu8 %ymm29, %ymm18
+
+// CHECK: vmovdqu8 %ymm29, %ymm18 {%k7}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x2f,0x6f,0xd5]
+          vmovdqu8 %ymm29, %ymm18 {%k7}
+
+// CHECK: vmovdqu8 %ymm29, %ymm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0xaf,0x6f,0xd5]
+          vmovdqu8 %ymm29, %ymm18 {%k7} {z}
+
+// CHECK: vmovdqu8 (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x6f,0x11]
+          vmovdqu8 (%rcx), %ymm18
+
+// CHECK: vmovdqu8 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x6f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovdqu8 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x6f,0x52,0x7f]
+          vmovdqu8 4064(%rdx), %ymm18
+
+// CHECK: vmovdqu8 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x6f,0x92,0x00,0x10,0x00,0x00]
+          vmovdqu8 4096(%rdx), %ymm18
+
+// CHECK: vmovdqu8 -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x6f,0x52,0x80]
+          vmovdqu8 -4096(%rdx), %ymm18
+
+// CHECK: vmovdqu8 -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x6f,0x92,0xe0,0xef,0xff,0xff]
+          vmovdqu8 -4128(%rdx), %ymm18
+
+// CHECK: vmovdqu16 %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x01,0xff,0x08,0x6f,0xe8]
+          vmovdqu16 %xmm24, %xmm29
+
+// CHECK: vmovdqu16 %xmm24, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x01,0xff,0x0e,0x6f,0xe8]
+          vmovdqu16 %xmm24, %xmm29 {%k6}
+
+// CHECK: vmovdqu16 %xmm24, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0x8e,0x6f,0xe8]
+          vmovdqu16 %xmm24, %xmm29 {%k6} {z}
+
+// CHECK: vmovdqu16 (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x6f,0x29]
+          vmovdqu16 (%rcx), %xmm29
+
+// CHECK: vmovdqu16 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 291(%rax,%r14,8), %xmm29
+
+// CHECK: vmovdqu16 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x6f,0x6a,0x7f]
+          vmovdqu16 2032(%rdx), %xmm29
+
+// CHECK: vmovdqu16 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x6f,0xaa,0x00,0x08,0x00,0x00]
+          vmovdqu16 2048(%rdx), %xmm29
+
+// CHECK: vmovdqu16 -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x6f,0x6a,0x80]
+          vmovdqu16 -2048(%rdx), %xmm29
+
+// CHECK: vmovdqu16 -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x6f,0xaa,0xf0,0xf7,0xff,0xff]
+          vmovdqu16 -2064(%rdx), %xmm29
+
+// CHECK: vmovdqu16 %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0x81,0xff,0x28,0x6f,0xf8]
+          vmovdqu16 %ymm24, %ymm23
+
+// CHECK: vmovdqu16 %ymm24, %ymm23 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xff,0x2b,0x6f,0xf8]
+          vmovdqu16 %ymm24, %ymm23 {%k3}
+
+// CHECK: vmovdqu16 %ymm24, %ymm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xff,0xab,0x6f,0xf8]
+          vmovdqu16 %ymm24, %ymm23 {%k3} {z}
+
+// CHECK: vmovdqu16 (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x6f,0x39]
+          vmovdqu16 (%rcx), %ymm23
+
+// CHECK: vmovdqu16 291(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa1,0xff,0x28,0x6f,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 291(%rax,%r14,8), %ymm23
+
+// CHECK: vmovdqu16 4064(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x6f,0x7a,0x7f]
+          vmovdqu16 4064(%rdx), %ymm23
+
+// CHECK: vmovdqu16 4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x6f,0xba,0x00,0x10,0x00,0x00]
+          vmovdqu16 4096(%rdx), %ymm23
+
+// CHECK: vmovdqu16 -4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x6f,0x7a,0x80]
+          vmovdqu16 -4096(%rdx), %ymm23
+
+// CHECK: vmovdqu16 -4128(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x6f,0xba,0xe0,0xef,0xff,0xff]
+          vmovdqu16 -4128(%rdx), %ymm23
+
+// CHECK: vmovdqu8 %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x7f,0x09]
+          vmovdqu8 %xmm17, (%rcx)
+
+// CHECK: vmovdqu8 %xmm17, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x0c,0x7f,0x09]
+          vmovdqu8 %xmm17, (%rcx) {%k4}
+
+// CHECK: vmovdqu8 %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x7f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu8 %xmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x7f,0x4a,0x7f]
+          vmovdqu8 %xmm17, 2032(%rdx)
+
+// CHECK: vmovdqu8 %xmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x7f,0x8a,0x00,0x08,0x00,0x00]
+          vmovdqu8 %xmm17, 2048(%rdx)
+
+// CHECK: vmovdqu8 %xmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x7f,0x4a,0x80]
+          vmovdqu8 %xmm17, -2048(%rdx)
+
+// CHECK: vmovdqu8 %xmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x7f,0x8a,0xf0,0xf7,0xff,0xff]
+          vmovdqu8 %xmm17, -2064(%rdx)
+
+// CHECK: vmovdqu8 %ymm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x7f,0x29]
+          vmovdqu8 %ymm21, (%rcx)
+
+// CHECK: vmovdqu8 %ymm21, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x29,0x7f,0x29]
+          vmovdqu8 %ymm21, (%rcx) {%k1}
+
+// CHECK: vmovdqu8 %ymm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu8 %ymm21, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu8 %ymm21, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x7f,0x6a,0x7f]
+          vmovdqu8 %ymm21, 4064(%rdx)
+
+// CHECK: vmovdqu8 %ymm21, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x7f,0xaa,0x00,0x10,0x00,0x00]
+          vmovdqu8 %ymm21, 4096(%rdx)
+
+// CHECK: vmovdqu8 %ymm21, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x7f,0x6a,0x80]
+          vmovdqu8 %ymm21, -4096(%rdx)
+
+// CHECK: vmovdqu8 %ymm21, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x7f,0xaa,0xe0,0xef,0xff,0xff]
+          vmovdqu8 %ymm21, -4128(%rdx)
+
+// CHECK: vmovdqu16 %xmm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7f,0x39]
+          vmovdqu16 %xmm23, (%rcx)
+
+// CHECK: vmovdqu16 %xmm23, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe1,0xff,0x0f,0x7f,0x39]
+          vmovdqu16 %xmm23, (%rcx) {%k7}
+
+// CHECK: vmovdqu16 %xmm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7f,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 %xmm23, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu16 %xmm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7f,0x7a,0x7f]
+          vmovdqu16 %xmm23, 2032(%rdx)
+
+// CHECK: vmovdqu16 %xmm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7f,0xba,0x00,0x08,0x00,0x00]
+          vmovdqu16 %xmm23, 2048(%rdx)
+
+// CHECK: vmovdqu16 %xmm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7f,0x7a,0x80]
+          vmovdqu16 %xmm23, -2048(%rdx)
+
+// CHECK: vmovdqu16 %xmm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7f,0xba,0xf0,0xf7,0xff,0xff]
+          vmovdqu16 %xmm23, -2064(%rdx)
+
+// CHECK: vmovdqu16 %ymm29, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7f,0x29]
+          vmovdqu16 %ymm29, (%rcx)
+
+// CHECK: vmovdqu16 %ymm29, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x61,0xff,0x2e,0x7f,0x29]
+          vmovdqu16 %ymm29, (%rcx) {%k6}
+
+// CHECK: vmovdqu16 %ymm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x7f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu16 %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu16 %ymm29, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7f,0x6a,0x7f]
+          vmovdqu16 %ymm29, 4064(%rdx)
+
+// CHECK: vmovdqu16 %ymm29, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7f,0xaa,0x00,0x10,0x00,0x00]
+          vmovdqu16 %ymm29, 4096(%rdx)
+
+// CHECK: vmovdqu16 %ymm29, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7f,0x6a,0x80]
+          vmovdqu16 %ymm29, -4096(%rdx)
+
+// CHECK: vmovdqu16 %ymm29, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7f,0xaa,0xe0,0xef,0xff,0xff]
+          vmovdqu16 %ymm29, -4128(%rdx)
diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s
new file mode 100644
index 000000000000..aac176517bf6
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512dq.s
@@ -0,0 +1,129 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq  --show-encoding %s | FileCheck %s
+
+// CHECK: vpmullq %zmm18, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x40,0x40,0xd2]
+          vpmullq %zmm18, %zmm24, %zmm18
+
+// CHECK: vpmullq %zmm18, %zmm24, %zmm18 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x42,0x40,0xd2]
+          vpmullq %zmm18, %zmm24, %zmm18 {%k2}
+
+// CHECK: vpmullq %zmm18, %zmm24, %zmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xbd,0xc2,0x40,0xd2]
+          vpmullq %zmm18, %zmm24, %zmm18 {%k2} {z}
+
+// CHECK: vpmullq (%rcx), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x40,0x11]
+          vpmullq (%rcx), %zmm24, %zmm18
+
+// CHECK: vpmullq 291(%rax,%r14,8), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x40,0x40,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmullq 291(%rax,%r14,8), %zmm24, %zmm18
+
+// CHECK: vpmullq (%rcx){1to8}, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x50,0x40,0x11]
+          vpmullq (%rcx){1to8}, %zmm24, %zmm18
+
+// CHECK: vpmullq 8128(%rdx), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x40,0x52,0x7f]
+          vpmullq 8128(%rdx), %zmm24, %zmm18
+
+// CHECK: vpmullq 8192(%rdx), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x40,0x92,0x00,0x20,0x00,0x00]
+          vpmullq 8192(%rdx), %zmm24, %zmm18
+
+// CHECK: vpmullq -8192(%rdx), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x40,0x52,0x80]
+          vpmullq -8192(%rdx), %zmm24, %zmm18
+
+// CHECK: vpmullq -8256(%rdx), %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x40,0x92,0xc0,0xdf,0xff,0xff]
+          vpmullq -8256(%rdx), %zmm24, %zmm18
+
+// CHECK: vpmullq 1016(%rdx){1to8}, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x50,0x40,0x52,0x7f]
+          vpmullq 1016(%rdx){1to8}, %zmm24, %zmm18
+
+// CHECK: vpmullq 1024(%rdx){1to8}, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x50,0x40,0x92,0x00,0x04,0x00,0x00]
+          vpmullq 1024(%rdx){1to8}, %zmm24, %zmm18
+
+// CHECK: vpmullq -1024(%rdx){1to8}, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x50,0x40,0x52,0x80]
+          vpmullq -1024(%rdx){1to8}, %zmm24, %zmm18
+
+// CHECK: vpmullq -1032(%rdx){1to8}, %zmm24, %zmm18
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x50,0x40,0x92,0xf8,0xfb,0xff,0xff]
+          vpmullq -1032(%rdx){1to8}, %zmm24, %zmm18
+
+// CHECK: kandb  %k6, %k5, %k2
+// CHECK:  encoding: [0xc5,0xd5,0x41,0xd6]
+          kandb  %k6, %k5, %k2
+
+// CHECK: kandnb %k4, %k6, %k5
+// CHECK:  encoding: [0xc5,0xcd,0x42,0xec]
+          kandnb %k4, %k6, %k5
+
+// CHECK: korb   %k5, %k4, %k4
+// CHECK:  encoding: [0xc5,0xdd,0x45,0xe5]
+          korb   %k5, %k4, %k4
+
+// CHECK: kxnorb %k7, %k6, %k4
+// CHECK:  encoding: [0xc5,0xcd,0x46,0xe7]
+          kxnorb %k7, %k6, %k4
+
+// CHECK: kxorb  %k5, %k6, %k4
+// CHECK:  encoding: [0xc5,0xcd,0x47,0xe5]
+          kxorb  %k5, %k6, %k4
+
+// CHECK: knotb  %k4, %k5
+// CHECK:  encoding: [0xc5,0xf9,0x44,0xec]
+          knotb  %k4, %k5
+
+// CHECK: knotb  %k3, %k3
+// CHECK:  encoding: [0xc5,0xf9,0x44,0xdb]
+          knotb  %k3, %k3
+
+// CHECK: kmovb  %k3, %k5
+// CHECK:  encoding: [0xc5,0xf9,0x90,0xeb]
+          kmovb  %k3, %k5
+
+// CHECK: kmovb  (%rcx), %k5
+// CHECK:  encoding: [0xc5,0xf9,0x90,0x29]
+          kmovb  (%rcx), %k5
+
+// CHECK: kmovb  4660(%rax,%r14,8), %k5
+// CHECK:  encoding: [0xc4,0xa1,0x79,0x90,0xac,0xf0,0x34,0x12,0x00,0x00]
+          kmovb  4660(%rax,%r14,8), %k5
+
+// CHECK: kmovb  %k2, (%rcx)
+// CHECK:  encoding: [0xc5,0xf9,0x91,0x11]
+          kmovb  %k2, (%rcx)
+
+// CHECK: kmovb  %k2, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0xc4,0xa1,0x79,0x91,0x94,0xf0,0x34,0x12,0x00,0x00]
+          kmovb  %k2, 4660(%rax,%r14,8)
+
+// CHECK: kmovb  %eax, %k2
+// CHECK:  encoding: [0xc5,0xf9,0x92,0xd0]
+          kmovb  %eax, %k2
+
+// CHECK: kmovb  %ebp, %k2
+// CHECK:  encoding: [0xc5,0xf9,0x92,0xd5]
+          kmovb  %ebp, %k2
+
+// CHECK: kmovb  %r13d, %k2
+// CHECK:  encoding: [0xc4,0xc1,0x79,0x92,0xd5]
+          kmovb  %r13d, %k2
+
+// CHECK: kmovb  %k3, %eax
+// CHECK:  encoding: [0xc5,0xf9,0x93,0xc3]
+          kmovb  %k3, %eax
+
+// CHECK: kmovb  %k3, %ebp
+// CHECK:  encoding: [0xc5,0xf9,0x93,0xeb]
+          kmovb  %k3, %ebp
+
+// CHECK: kmovb  %k3, %r13d
+// CHECK:  encoding: [0xc5,0x79,0x93,0xeb]
+          kmovb  %k3, %r13d
diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s
new file mode 100644
index 000000000000..38aab78be2c3
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512dq_vl.s
@@ -0,0 +1,113 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512dq -mattr=+avx512vl  --show-encoding %s | FileCheck %s
+
+// CHECK: vpmullq %xmm22, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x40,0xd6]
+          vpmullq %xmm22, %xmm17, %xmm26
+
+// CHECK: vpmullq %xmm22, %xmm17, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x06,0x40,0xd6]
+          vpmullq %xmm22, %xmm17, %xmm26 {%k6}
+
+// CHECK: vpmullq %xmm22, %xmm17, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x86,0x40,0xd6]
+          vpmullq %xmm22, %xmm17, %xmm26 {%k6} {z}
+
+// CHECK: vpmullq (%rcx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x40,0x11]
+          vpmullq (%rcx), %xmm17, %xmm26
+
+// CHECK: vpmullq 291(%rax,%r14,8), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x40,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmullq 291(%rax,%r14,8), %xmm17, %xmm26
+
+// CHECK: vpmullq (%rcx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x40,0x11]
+          vpmullq (%rcx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpmullq 2032(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x40,0x52,0x7f]
+          vpmullq 2032(%rdx), %xmm17, %xmm26
+
+// CHECK: vpmullq 2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x40,0x92,0x00,0x08,0x00,0x00]
+          vpmullq 2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpmullq -2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x40,0x52,0x80]
+          vpmullq -2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpmullq -2064(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x40,0x92,0xf0,0xf7,0xff,0xff]
+          vpmullq -2064(%rdx), %xmm17, %xmm26
+
+// CHECK: vpmullq 1016(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x40,0x52,0x7f]
+          vpmullq 1016(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpmullq 1024(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x40,0x92,0x00,0x04,0x00,0x00]
+          vpmullq 1024(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpmullq -1024(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x40,0x52,0x80]
+          vpmullq -1024(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpmullq -1032(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x40,0x92,0xf8,0xfb,0xff,0xff]
+          vpmullq -1032(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpmullq %ymm25, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x02,0xb5,0x20,0x40,0xc9]
+          vpmullq %ymm25, %ymm25, %ymm25
+
+// CHECK: vpmullq %ymm25, %ymm25, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x02,0xb5,0x23,0x40,0xc9]
+          vpmullq %ymm25, %ymm25, %ymm25 {%k3}
+
+// CHECK: vpmullq %ymm25, %ymm25, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0xb5,0xa3,0x40,0xc9]
+          vpmullq %ymm25, %ymm25, %ymm25 {%k3} {z}
+
+// CHECK: vpmullq (%rcx), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x40,0x09]
+          vpmullq (%rcx), %ymm25, %ymm25
+
+// CHECK: vpmullq 291(%rax,%r14,8), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x22,0xb5,0x20,0x40,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmullq 291(%rax,%r14,8), %ymm25, %ymm25
+
+// CHECK: vpmullq (%rcx){1to4}, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0x40,0x09]
+          vpmullq (%rcx){1to4}, %ymm25, %ymm25
+
+// CHECK: vpmullq 4064(%rdx), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x40,0x4a,0x7f]
+          vpmullq 4064(%rdx), %ymm25, %ymm25
+
+// CHECK: vpmullq 4096(%rdx), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x40,0x8a,0x00,0x10,0x00,0x00]
+          vpmullq 4096(%rdx), %ymm25, %ymm25
+
+// CHECK: vpmullq -4096(%rdx), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x40,0x4a,0x80]
+          vpmullq -4096(%rdx), %ymm25, %ymm25
+
+// CHECK: vpmullq -4128(%rdx), %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x40,0x8a,0xe0,0xef,0xff,0xff]
+          vpmullq -4128(%rdx), %ymm25, %ymm25
+
+// CHECK: vpmullq 1016(%rdx){1to4}, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0x40,0x4a,0x7f]
+          vpmullq 1016(%rdx){1to4}, %ymm25, %ymm25
+
+// CHECK: vpmullq 1024(%rdx){1to4}, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0x40,0x8a,0x00,0x04,0x00,0x00]
+          vpmullq 1024(%rdx){1to4}, %ymm25, %ymm25
+
+// CHECK: vpmullq -1024(%rdx){1to4}, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0x40,0x4a,0x80]
+          vpmullq -1024(%rdx){1to4}, %ymm25, %ymm25
+
+// CHECK: vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25
+// CHECK:  encoding: [0x62,0x62,0xb5,0x30,0x40,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmullq -1032(%rdx){1to4}, %ymm25, %ymm25
diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s
new file mode 100644
index 000000000000..ad121dc7d0da
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512f_vl.s
@@ -0,0 +1,6653 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl --show-encoding %s | FileCheck %s
+
+// CHECK: vaddpd %xmm19, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0x95,0x00,0x58,0xe3]
+          vaddpd %xmm19, %xmm29, %xmm20
+
+// CHECK: vaddpd %xmm19, %xmm29, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x95,0x07,0x58,0xe3]
+          vaddpd %xmm19, %xmm29, %xmm20 {%k7}
+
+// CHECK: vaddpd %xmm19, %xmm29, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x95,0x87,0x58,0xe3]
+          vaddpd %xmm19, %xmm29, %xmm20 {%k7} {z}
+
+// CHECK: vaddpd (%rcx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x58,0x21]
+          vaddpd (%rcx), %xmm29, %xmm20
+
+// CHECK: vaddpd 291(%rax,%r14,8), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0x95,0x00,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vaddpd 291(%rax,%r14,8), %xmm29, %xmm20
+
+// CHECK: vaddpd (%rcx){1to2}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x58,0x21]
+          vaddpd (%rcx){1to2}, %xmm29, %xmm20
+
+// CHECK: vaddpd 2032(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x58,0x62,0x7f]
+          vaddpd 2032(%rdx), %xmm29, %xmm20
+
+// CHECK: vaddpd 2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x58,0xa2,0x00,0x08,0x00,0x00]
+          vaddpd 2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vaddpd -2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x58,0x62,0x80]
+          vaddpd -2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vaddpd -2064(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x58,0xa2,0xf0,0xf7,0xff,0xff]
+          vaddpd -2064(%rdx), %xmm29, %xmm20
+
+// CHECK: vaddpd 1016(%rdx){1to2}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x58,0x62,0x7f]
+          vaddpd 1016(%rdx){1to2}, %xmm29, %xmm20
+
+// CHECK: vaddpd 1024(%rdx){1to2}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x58,0xa2,0x00,0x04,0x00,0x00]
+          vaddpd 1024(%rdx){1to2}, %xmm29, %xmm20
+
+// CHECK: vaddpd -1024(%rdx){1to2}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x58,0x62,0x80]
+          vaddpd -1024(%rdx){1to2}, %xmm29, %xmm20
+
+// CHECK: vaddpd -1032(%rdx){1to2}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x58,0xa2,0xf8,0xfb,0xff,0xff]
+          vaddpd -1032(%rdx){1to2}, %xmm29, %xmm20
+
+// CHECK: vaddpd %ymm26, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x01,0xad,0x20,0x58,0xe2]
+          vaddpd %ymm26, %ymm26, %ymm28
+
+// CHECK: vaddpd %ymm26, %ymm26, %ymm28 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xad,0x21,0x58,0xe2]
+          vaddpd %ymm26, %ymm26, %ymm28 {%k1}
+
+// CHECK: vaddpd %ymm26, %ymm26, %ymm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xad,0xa1,0x58,0xe2]
+          vaddpd %ymm26, %ymm26, %ymm28 {%k1} {z}
+
+// CHECK: vaddpd (%rcx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0x58,0x21]
+          vaddpd (%rcx), %ymm26, %ymm28
+
+// CHECK: vaddpd 291(%rax,%r14,8), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xad,0x20,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vaddpd 291(%rax,%r14,8), %ymm26, %ymm28
+
+// CHECK: vaddpd (%rcx){1to4}, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0x58,0x21]
+          vaddpd (%rcx){1to4}, %ymm26, %ymm28
+
+// CHECK: vaddpd 4064(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0x58,0x62,0x7f]
+          vaddpd 4064(%rdx), %ymm26, %ymm28
+
+// CHECK: vaddpd 4096(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0x58,0xa2,0x00,0x10,0x00,0x00]
+          vaddpd 4096(%rdx), %ymm26, %ymm28
+
+// CHECK: vaddpd -4096(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0x58,0x62,0x80]
+          vaddpd -4096(%rdx), %ymm26, %ymm28
+
+// CHECK: vaddpd -4128(%rdx), %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0x58,0xa2,0xe0,0xef,0xff,0xff]
+          vaddpd -4128(%rdx), %ymm26, %ymm28
+
+// CHECK: vaddpd 1016(%rdx){1to4}, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0x58,0x62,0x7f]
+          vaddpd 1016(%rdx){1to4}, %ymm26, %ymm28
+
+// CHECK: vaddpd 1024(%rdx){1to4}, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0x58,0xa2,0x00,0x04,0x00,0x00]
+          vaddpd 1024(%rdx){1to4}, %ymm26, %ymm28
+
+// CHECK: vaddpd -1024(%rdx){1to4}, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0x58,0x62,0x80]
+          vaddpd -1024(%rdx){1to4}, %ymm26, %ymm28
+
+// CHECK: vaddpd -1032(%rdx){1to4}, %ymm26, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0x58,0xa2,0xf8,0xfb,0xff,0xff]
+          vaddpd -1032(%rdx){1to4}, %ymm26, %ymm28
+
+// CHECK: vaddps %xmm27, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x01,0x64,0x00,0x58,0xc3]
+          vaddps %xmm27, %xmm19, %xmm24
+
+// CHECK: vaddps %xmm27, %xmm19, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x01,0x64,0x04,0x58,0xc3]
+          vaddps %xmm27, %xmm19, %xmm24 {%k4}
+
+// CHECK: vaddps %xmm27, %xmm19, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0x64,0x84,0x58,0xc3]
+          vaddps %xmm27, %xmm19, %xmm24 {%k4} {z}
+
+// CHECK: vaddps (%rcx), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x58,0x01]
+          vaddps (%rcx), %xmm19, %xmm24
+
+// CHECK: vaddps 291(%rax,%r14,8), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x64,0x00,0x58,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vaddps 291(%rax,%r14,8), %xmm19, %xmm24
+
+// CHECK: vaddps (%rcx){1to4}, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x58,0x01]
+          vaddps (%rcx){1to4}, %xmm19, %xmm24
+
+// CHECK: vaddps 2032(%rdx), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x58,0x42,0x7f]
+          vaddps 2032(%rdx), %xmm19, %xmm24
+
+// CHECK: vaddps 2048(%rdx), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x58,0x82,0x00,0x08,0x00,0x00]
+          vaddps 2048(%rdx), %xmm19, %xmm24
+
+// CHECK: vaddps -2048(%rdx), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x58,0x42,0x80]
+          vaddps -2048(%rdx), %xmm19, %xmm24
+
+// CHECK: vaddps -2064(%rdx), %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x58,0x82,0xf0,0xf7,0xff,0xff]
+          vaddps -2064(%rdx), %xmm19, %xmm24
+
+// CHECK: vaddps 508(%rdx){1to4}, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x58,0x42,0x7f]
+          vaddps 508(%rdx){1to4}, %xmm19, %xmm24
+
+// CHECK: vaddps 512(%rdx){1to4}, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x58,0x82,0x00,0x02,0x00,0x00]
+          vaddps 512(%rdx){1to4}, %xmm19, %xmm24
+
+// CHECK: vaddps -512(%rdx){1to4}, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x58,0x42,0x80]
+          vaddps -512(%rdx){1to4}, %xmm19, %xmm24
+
+// CHECK: vaddps -516(%rdx){1to4}, %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x58,0x82,0xfc,0xfd,0xff,0xff]
+          vaddps -516(%rdx){1to4}, %xmm19, %xmm24
+
+// CHECK: vaddps %ymm20, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x21,0x2c,0x20,0x58,0xcc]
+          vaddps %ymm20, %ymm26, %ymm25
+
+// CHECK: vaddps %ymm20, %ymm26, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x2c,0x24,0x58,0xcc]
+          vaddps %ymm20, %ymm26, %ymm25 {%k4}
+
+// CHECK: vaddps %ymm20, %ymm26, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x2c,0xa4,0x58,0xcc]
+          vaddps %ymm20, %ymm26, %ymm25 {%k4} {z}
+
+// CHECK: vaddps (%rcx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x58,0x09]
+          vaddps (%rcx), %ymm26, %ymm25
+
+// CHECK: vaddps 291(%rax,%r14,8), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x21,0x2c,0x20,0x58,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vaddps 291(%rax,%r14,8), %ymm26, %ymm25
+
+// CHECK: vaddps (%rcx){1to8}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x09]
+          vaddps (%rcx){1to8}, %ymm26, %ymm25
+
+// CHECK: vaddps 4064(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x58,0x4a,0x7f]
+          vaddps 4064(%rdx), %ymm26, %ymm25
+
+// CHECK: vaddps 4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x58,0x8a,0x00,0x10,0x00,0x00]
+          vaddps 4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vaddps -4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x58,0x4a,0x80]
+          vaddps -4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vaddps -4128(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x58,0x8a,0xe0,0xef,0xff,0xff]
+          vaddps -4128(%rdx), %ymm26, %ymm25
+
+// CHECK: vaddps 508(%rdx){1to8}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x4a,0x7f]
+          vaddps 508(%rdx){1to8}, %ymm26, %ymm25
+
+// CHECK: vaddps 512(%rdx){1to8}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0x00,0x02,0x00,0x00]
+          vaddps 512(%rdx){1to8}, %ymm26, %ymm25
+
+// CHECK: vaddps -512(%rdx){1to8}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x4a,0x80]
+          vaddps -512(%rdx){1to8}, %ymm26, %ymm25
+
+// CHECK: vaddps -516(%rdx){1to8}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x58,0x8a,0xfc,0xfd,0xff,0xff]
+          vaddps -516(%rdx){1to8}, %ymm26, %ymm25
+
+// CHECK: vbroadcastsd (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22 {%k5}
+
+// CHECK: vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x19,0x31]
+          vbroadcastsd (%rcx), %ymm22 {%k5} {z}
+
+// CHECK: vbroadcastsd 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x19,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastsd 291(%rax,%r14,8), %ymm22
+
+// CHECK: vbroadcastsd 1016(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x7f]
+          vbroadcastsd 1016(%rdx), %ymm22
+
+// CHECK: vbroadcastsd 1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcastsd 1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1024(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0x72,0x80]
+          vbroadcastsd -1024(%rdx), %ymm22
+
+// CHECK: vbroadcastsd -1032(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x19,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcastsd -1032(%rdx), %ymm22
+
+// CHECK: vbroadcastsd %xmm17, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19 {%k6}
+
+// CHECK: vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0x19,0xd9]
+          vbroadcastsd %xmm17, %ymm19 {%k6} {z}
+
+// CHECK: vbroadcastss (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0a,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21 {%k2}
+
+// CHECK: vbroadcastss (%rcx), %xmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8a,0x18,0x29]
+          vbroadcastss (%rcx), %xmm21 {%k2} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x18,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %xmm21
+
+// CHECK: vbroadcastss 508(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x7f]
+          vbroadcastss 508(%rdx), %xmm21
+
+// CHECK: vbroadcastss 512(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -512(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0x6a,0x80]
+          vbroadcastss -512(%rdx), %xmm21
+
+// CHECK: vbroadcastss -516(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x18,0xaa,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %xmm21
+
+// CHECK: vbroadcastss (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x29,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30 {%k1}
+
+// CHECK: vbroadcastss (%rcx), %ymm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xa9,0x18,0x31]
+          vbroadcastss (%rcx), %ymm30 {%k1} {z}
+
+// CHECK: vbroadcastss 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x18,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastss 291(%rax,%r14,8), %ymm30
+
+// CHECK: vbroadcastss 508(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x7f]
+          vbroadcastss 508(%rdx), %ymm30
+
+// CHECK: vbroadcastss 512(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0x00,0x02,0x00,0x00]
+          vbroadcastss 512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -512(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0x72,0x80]
+          vbroadcastss -512(%rdx), %ymm30
+
+// CHECK: vbroadcastss -516(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x18,0xb2,0xfc,0xfd,0xff,0xff]
+          vbroadcastss -516(%rdx), %ymm30
+
+// CHECK: vbroadcastss %xmm24, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0a,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24 {%k2}
+
+// CHECK: vbroadcastss %xmm24, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8a,0x18,0xc0]
+          vbroadcastss %xmm24, %xmm24 {%k2} {z}
+
+// CHECK: vbroadcastss %xmm28, %ymm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2e,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24 {%k6}
+
+// CHECK: vbroadcastss %xmm28, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xae,0x18,0xc4]
+          vbroadcastss %xmm28, %ymm24 {%k6} {z}
+
+// CHECK: vdivpd %xmm27, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0x81,0xed,0x00,0x5e,0xdb]
+          vdivpd %xmm27, %xmm18, %xmm19
+
+// CHECK: vdivpd %xmm27, %xmm18, %xmm19 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xed,0x03,0x5e,0xdb]
+          vdivpd %xmm27, %xmm18, %xmm19 {%k3}
+
+// CHECK: vdivpd %xmm27, %xmm18, %xmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xed,0x83,0x5e,0xdb]
+          vdivpd %xmm27, %xmm18, %xmm19 {%k3} {z}
+
+// CHECK: vdivpd (%rcx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x00,0x5e,0x19]
+          vdivpd (%rcx), %xmm18, %xmm19
+
+// CHECK: vdivpd 291(%rax,%r14,8), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0xed,0x00,0x5e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vdivpd 291(%rax,%r14,8), %xmm18, %xmm19
+
+// CHECK: vdivpd (%rcx){1to2}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x10,0x5e,0x19]
+          vdivpd (%rcx){1to2}, %xmm18, %xmm19
+
+// CHECK: vdivpd 2032(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x00,0x5e,0x5a,0x7f]
+          vdivpd 2032(%rdx), %xmm18, %xmm19
+
+// CHECK: vdivpd 2048(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x00,0x5e,0x9a,0x00,0x08,0x00,0x00]
+          vdivpd 2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vdivpd -2048(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x00,0x5e,0x5a,0x80]
+          vdivpd -2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vdivpd -2064(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x00,0x5e,0x9a,0xf0,0xf7,0xff,0xff]
+          vdivpd -2064(%rdx), %xmm18, %xmm19
+
+// CHECK: vdivpd 1016(%rdx){1to2}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x10,0x5e,0x5a,0x7f]
+          vdivpd 1016(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vdivpd 1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x10,0x5e,0x9a,0x00,0x04,0x00,0x00]
+          vdivpd 1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vdivpd -1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x10,0x5e,0x5a,0x80]
+          vdivpd -1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vdivpd -1032(%rdx){1to2}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xed,0x10,0x5e,0x9a,0xf8,0xfb,0xff,0xff]
+          vdivpd -1032(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: vdivpd %ymm28, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0x81,0xbd,0x20,0x5e,0xfc]
+          vdivpd %ymm28, %ymm24, %ymm23
+
+// CHECK: vdivpd %ymm28, %ymm24, %ymm23 {%k6}
+// CHECK:  encoding: [0x62,0x81,0xbd,0x26,0x5e,0xfc]
+          vdivpd %ymm28, %ymm24, %ymm23 {%k6}
+
+// CHECK: vdivpd %ymm28, %ymm24, %ymm23 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0xbd,0xa6,0x5e,0xfc]
+          vdivpd %ymm28, %ymm24, %ymm23 {%k6} {z}
+
+// CHECK: vdivpd (%rcx), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5e,0x39]
+          vdivpd (%rcx), %ymm24, %ymm23
+
+// CHECK: vdivpd 291(%rax,%r14,8), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xa1,0xbd,0x20,0x5e,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vdivpd 291(%rax,%r14,8), %ymm24, %ymm23
+
+// CHECK: vdivpd (%rcx){1to4}, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5e,0x39]
+          vdivpd (%rcx){1to4}, %ymm24, %ymm23
+
+// CHECK: vdivpd 4064(%rdx), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5e,0x7a,0x7f]
+          vdivpd 4064(%rdx), %ymm24, %ymm23
+
+// CHECK: vdivpd 4096(%rdx), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5e,0xba,0x00,0x10,0x00,0x00]
+          vdivpd 4096(%rdx), %ymm24, %ymm23
+
+// CHECK: vdivpd -4096(%rdx), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5e,0x7a,0x80]
+          vdivpd -4096(%rdx), %ymm24, %ymm23
+
+// CHECK: vdivpd -4128(%rdx), %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5e,0xba,0xe0,0xef,0xff,0xff]
+          vdivpd -4128(%rdx), %ymm24, %ymm23
+
+// CHECK: vdivpd 1016(%rdx){1to4}, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5e,0x7a,0x7f]
+          vdivpd 1016(%rdx){1to4}, %ymm24, %ymm23
+
+// CHECK: vdivpd 1024(%rdx){1to4}, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5e,0xba,0x00,0x04,0x00,0x00]
+          vdivpd 1024(%rdx){1to4}, %ymm24, %ymm23
+
+// CHECK: vdivpd -1024(%rdx){1to4}, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5e,0x7a,0x80]
+          vdivpd -1024(%rdx){1to4}, %ymm24, %ymm23
+
+// CHECK: vdivpd -1032(%rdx){1to4}, %ymm24, %ymm23
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5e,0xba,0xf8,0xfb,0xff,0xff]
+          vdivpd -1032(%rdx){1to4}, %ymm24, %ymm23
+
+// CHECK: vdivps %xmm26, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0x81,0x3c,0x00,0x5e,0xca]
+          vdivps %xmm26, %xmm24, %xmm17
+
+// CHECK: vdivps %xmm26, %xmm24, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x3c,0x02,0x5e,0xca]
+          vdivps %xmm26, %xmm24, %xmm17 {%k2}
+
+// CHECK: vdivps %xmm26, %xmm24, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x3c,0x82,0x5e,0xca]
+          vdivps %xmm26, %xmm24, %xmm17 {%k2} {z}
+
+// CHECK: vdivps (%rcx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x00,0x5e,0x09]
+          vdivps (%rcx), %xmm24, %xmm17
+
+// CHECK: vdivps 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x3c,0x00,0x5e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vdivps 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vdivps (%rcx){1to4}, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x10,0x5e,0x09]
+          vdivps (%rcx){1to4}, %xmm24, %xmm17
+
+// CHECK: vdivps 2032(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x00,0x5e,0x4a,0x7f]
+          vdivps 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vdivps 2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x00,0x5e,0x8a,0x00,0x08,0x00,0x00]
+          vdivps 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vdivps -2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x00,0x5e,0x4a,0x80]
+          vdivps -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vdivps -2064(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x00,0x5e,0x8a,0xf0,0xf7,0xff,0xff]
+          vdivps -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vdivps 508(%rdx){1to4}, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x10,0x5e,0x4a,0x7f]
+          vdivps 508(%rdx){1to4}, %xmm24, %xmm17
+
+// CHECK: vdivps 512(%rdx){1to4}, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x10,0x5e,0x8a,0x00,0x02,0x00,0x00]
+          vdivps 512(%rdx){1to4}, %xmm24, %xmm17
+
+// CHECK: vdivps -512(%rdx){1to4}, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x10,0x5e,0x4a,0x80]
+          vdivps -512(%rdx){1to4}, %xmm24, %xmm17
+
+// CHECK: vdivps -516(%rdx){1to4}, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3c,0x10,0x5e,0x8a,0xfc,0xfd,0xff,0xff]
+          vdivps -516(%rdx){1to4}, %xmm24, %xmm17
+
+// CHECK: vdivps %ymm17, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x24,0x20,0x5e,0xc9]
+          vdivps %ymm17, %ymm27, %ymm17
+
+// CHECK: vdivps %ymm17, %ymm27, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x24,0x26,0x5e,0xc9]
+          vdivps %ymm17, %ymm27, %ymm17 {%k6}
+
+// CHECK: vdivps %ymm17, %ymm27, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x24,0xa6,0x5e,0xc9]
+          vdivps %ymm17, %ymm27, %ymm17 {%k6} {z}
+
+// CHECK: vdivps (%rcx), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x20,0x5e,0x09]
+          vdivps (%rcx), %ymm27, %ymm17
+
+// CHECK: vdivps 291(%rax,%r14,8), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x24,0x20,0x5e,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vdivps 291(%rax,%r14,8), %ymm27, %ymm17
+
+// CHECK: vdivps (%rcx){1to8}, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x30,0x5e,0x09]
+          vdivps (%rcx){1to8}, %ymm27, %ymm17
+
+// CHECK: vdivps 4064(%rdx), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x20,0x5e,0x4a,0x7f]
+          vdivps 4064(%rdx), %ymm27, %ymm17
+
+// CHECK: vdivps 4096(%rdx), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x20,0x5e,0x8a,0x00,0x10,0x00,0x00]
+          vdivps 4096(%rdx), %ymm27, %ymm17
+
+// CHECK: vdivps -4096(%rdx), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x20,0x5e,0x4a,0x80]
+          vdivps -4096(%rdx), %ymm27, %ymm17
+
+// CHECK: vdivps -4128(%rdx), %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x20,0x5e,0x8a,0xe0,0xef,0xff,0xff]
+          vdivps -4128(%rdx), %ymm27, %ymm17
+
+// CHECK: vdivps 508(%rdx){1to8}, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x30,0x5e,0x4a,0x7f]
+          vdivps 508(%rdx){1to8}, %ymm27, %ymm17
+
+// CHECK: vdivps 512(%rdx){1to8}, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x30,0x5e,0x8a,0x00,0x02,0x00,0x00]
+          vdivps 512(%rdx){1to8}, %ymm27, %ymm17
+
+// CHECK: vdivps -512(%rdx){1to8}, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x30,0x5e,0x4a,0x80]
+          vdivps -512(%rdx){1to8}, %ymm27, %ymm17
+
+// CHECK: vdivps -516(%rdx){1to8}, %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x24,0x30,0x5e,0x8a,0xfc,0xfd,0xff,0xff]
+          vdivps -516(%rdx){1to8}, %ymm27, %ymm17
+
+// CHECK: vmaxpd %xmm23, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x95,0x00,0x5f,0xcf]
+          vmaxpd %xmm23, %xmm29, %xmm17
+
+// CHECK: vmaxpd %xmm23, %xmm29, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x95,0x06,0x5f,0xcf]
+          vmaxpd %xmm23, %xmm29, %xmm17 {%k6}
+
+// CHECK: vmaxpd %xmm23, %xmm29, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x95,0x86,0x5f,0xcf]
+          vmaxpd %xmm23, %xmm29, %xmm17 {%k6} {z}
+
+// CHECK: vmaxpd (%rcx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x5f,0x09]
+          vmaxpd (%rcx), %xmm29, %xmm17
+
+// CHECK: vmaxpd 291(%rax,%r14,8), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x95,0x00,0x5f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmaxpd 291(%rax,%r14,8), %xmm29, %xmm17
+
+// CHECK: vmaxpd (%rcx){1to2}, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x5f,0x09]
+          vmaxpd (%rcx){1to2}, %xmm29, %xmm17
+
+// CHECK: vmaxpd 2032(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x5f,0x4a,0x7f]
+          vmaxpd 2032(%rdx), %xmm29, %xmm17
+
+// CHECK: vmaxpd 2048(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x5f,0x8a,0x00,0x08,0x00,0x00]
+          vmaxpd 2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vmaxpd -2048(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x5f,0x4a,0x80]
+          vmaxpd -2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vmaxpd -2064(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x00,0x5f,0x8a,0xf0,0xf7,0xff,0xff]
+          vmaxpd -2064(%rdx), %xmm29, %xmm17
+
+// CHECK: vmaxpd 1016(%rdx){1to2}, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x5f,0x4a,0x7f]
+          vmaxpd 1016(%rdx){1to2}, %xmm29, %xmm17
+
+// CHECK: vmaxpd 1024(%rdx){1to2}, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x5f,0x8a,0x00,0x04,0x00,0x00]
+          vmaxpd 1024(%rdx){1to2}, %xmm29, %xmm17
+
+// CHECK: vmaxpd -1024(%rdx){1to2}, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x5f,0x4a,0x80]
+          vmaxpd -1024(%rdx){1to2}, %xmm29, %xmm17
+
+// CHECK: vmaxpd -1032(%rdx){1to2}, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x95,0x10,0x5f,0x8a,0xf8,0xfb,0xff,0xff]
+          vmaxpd -1032(%rdx){1to2}, %xmm29, %xmm17
+
+// CHECK: vmaxpd %ymm24, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0x81,0xbd,0x20,0x5f,0xe8]
+          vmaxpd %ymm24, %ymm24, %ymm21
+
+// CHECK: vmaxpd %ymm24, %ymm24, %ymm21 {%k1}
+// CHECK:  encoding: [0x62,0x81,0xbd,0x21,0x5f,0xe8]
+          vmaxpd %ymm24, %ymm24, %ymm21 {%k1}
+
+// CHECK: vmaxpd %ymm24, %ymm24, %ymm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0xbd,0xa1,0x5f,0xe8]
+          vmaxpd %ymm24, %ymm24, %ymm21 {%k1} {z}
+
+// CHECK: vmaxpd (%rcx), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5f,0x29]
+          vmaxpd (%rcx), %ymm24, %ymm21
+
+// CHECK: vmaxpd 291(%rax,%r14,8), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0xbd,0x20,0x5f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmaxpd 291(%rax,%r14,8), %ymm24, %ymm21
+
+// CHECK: vmaxpd (%rcx){1to4}, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5f,0x29]
+          vmaxpd (%rcx){1to4}, %ymm24, %ymm21
+
+// CHECK: vmaxpd 4064(%rdx), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5f,0x6a,0x7f]
+          vmaxpd 4064(%rdx), %ymm24, %ymm21
+
+// CHECK: vmaxpd 4096(%rdx), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5f,0xaa,0x00,0x10,0x00,0x00]
+          vmaxpd 4096(%rdx), %ymm24, %ymm21
+
+// CHECK: vmaxpd -4096(%rdx), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5f,0x6a,0x80]
+          vmaxpd -4096(%rdx), %ymm24, %ymm21
+
+// CHECK: vmaxpd -4128(%rdx), %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x20,0x5f,0xaa,0xe0,0xef,0xff,0xff]
+          vmaxpd -4128(%rdx), %ymm24, %ymm21
+
+// CHECK: vmaxpd 1016(%rdx){1to4}, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5f,0x6a,0x7f]
+          vmaxpd 1016(%rdx){1to4}, %ymm24, %ymm21
+
+// CHECK: vmaxpd 1024(%rdx){1to4}, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5f,0xaa,0x00,0x04,0x00,0x00]
+          vmaxpd 1024(%rdx){1to4}, %ymm24, %ymm21
+
+// CHECK: vmaxpd -1024(%rdx){1to4}, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5f,0x6a,0x80]
+          vmaxpd -1024(%rdx){1to4}, %ymm24, %ymm21
+
+// CHECK: vmaxpd -1032(%rdx){1to4}, %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xbd,0x30,0x5f,0xaa,0xf8,0xfb,0xff,0xff]
+          vmaxpd -1032(%rdx){1to4}, %ymm24, %ymm21
+
+// CHECK: vmaxps %xmm19, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x5c,0x00,0x5f,0xc3]
+          vmaxps %xmm19, %xmm20, %xmm24
+
+// CHECK: vmaxps %xmm19, %xmm20, %xmm24 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x5c,0x06,0x5f,0xc3]
+          vmaxps %xmm19, %xmm20, %xmm24 {%k6}
+
+// CHECK: vmaxps %xmm19, %xmm20, %xmm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x5c,0x86,0x5f,0xc3]
+          vmaxps %xmm19, %xmm20, %xmm24 {%k6} {z}
+
+// CHECK: vmaxps (%rcx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x00,0x5f,0x01]
+          vmaxps (%rcx), %xmm20, %xmm24
+
+// CHECK: vmaxps 291(%rax,%r14,8), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x5c,0x00,0x5f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmaxps 291(%rax,%r14,8), %xmm20, %xmm24
+
+// CHECK: vmaxps (%rcx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x10,0x5f,0x01]
+          vmaxps (%rcx){1to4}, %xmm20, %xmm24
+
+// CHECK: vmaxps 2032(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x00,0x5f,0x42,0x7f]
+          vmaxps 2032(%rdx), %xmm20, %xmm24
+
+// CHECK: vmaxps 2048(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x00,0x5f,0x82,0x00,0x08,0x00,0x00]
+          vmaxps 2048(%rdx), %xmm20, %xmm24
+
+// CHECK: vmaxps -2048(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x00,0x5f,0x42,0x80]
+          vmaxps -2048(%rdx), %xmm20, %xmm24
+
+// CHECK: vmaxps -2064(%rdx), %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x00,0x5f,0x82,0xf0,0xf7,0xff,0xff]
+          vmaxps -2064(%rdx), %xmm20, %xmm24
+
+// CHECK: vmaxps 508(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x10,0x5f,0x42,0x7f]
+          vmaxps 508(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vmaxps 512(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x10,0x5f,0x82,0x00,0x02,0x00,0x00]
+          vmaxps 512(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vmaxps -512(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x10,0x5f,0x42,0x80]
+          vmaxps -512(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vmaxps -516(%rdx){1to4}, %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x5c,0x10,0x5f,0x82,0xfc,0xfd,0xff,0xff]
+          vmaxps -516(%rdx){1to4}, %xmm20, %xmm24
+
+// CHECK: vmaxps %ymm17, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x74,0x20,0x5f,0xd9]
+          vmaxps %ymm17, %ymm17, %ymm19
+
+// CHECK: vmaxps %ymm17, %ymm17, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x74,0x23,0x5f,0xd9]
+          vmaxps %ymm17, %ymm17, %ymm19 {%k3}
+
+// CHECK: vmaxps %ymm17, %ymm17, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x74,0xa3,0x5f,0xd9]
+          vmaxps %ymm17, %ymm17, %ymm19 {%k3} {z}
+
+// CHECK: vmaxps (%rcx), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x20,0x5f,0x19]
+          vmaxps (%rcx), %ymm17, %ymm19
+
+// CHECK: vmaxps 291(%rax,%r14,8), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x74,0x20,0x5f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmaxps 291(%rax,%r14,8), %ymm17, %ymm19
+
+// CHECK: vmaxps (%rcx){1to8}, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x30,0x5f,0x19]
+          vmaxps (%rcx){1to8}, %ymm17, %ymm19
+
+// CHECK: vmaxps 4064(%rdx), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x20,0x5f,0x5a,0x7f]
+          vmaxps 4064(%rdx), %ymm17, %ymm19
+
+// CHECK: vmaxps 4096(%rdx), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x20,0x5f,0x9a,0x00,0x10,0x00,0x00]
+          vmaxps 4096(%rdx), %ymm17, %ymm19
+
+// CHECK: vmaxps -4096(%rdx), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x20,0x5f,0x5a,0x80]
+          vmaxps -4096(%rdx), %ymm17, %ymm19
+
+// CHECK: vmaxps -4128(%rdx), %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x20,0x5f,0x9a,0xe0,0xef,0xff,0xff]
+          vmaxps -4128(%rdx), %ymm17, %ymm19
+
+// CHECK: vmaxps 508(%rdx){1to8}, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x30,0x5f,0x5a,0x7f]
+          vmaxps 508(%rdx){1to8}, %ymm17, %ymm19
+
+// CHECK: vmaxps 512(%rdx){1to8}, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x30,0x5f,0x9a,0x00,0x02,0x00,0x00]
+          vmaxps 512(%rdx){1to8}, %ymm17, %ymm19
+
+// CHECK: vmaxps -512(%rdx){1to8}, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x30,0x5f,0x5a,0x80]
+          vmaxps -512(%rdx){1to8}, %ymm17, %ymm19
+
+// CHECK: vmaxps -516(%rdx){1to8}, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x74,0x30,0x5f,0x9a,0xfc,0xfd,0xff,0xff]
+          vmaxps -516(%rdx){1to8}, %ymm17, %ymm19
+
+// CHECK: vminpd %xmm19, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x21,0xe5,0x00,0x5d,0xdb]
+          vminpd %xmm19, %xmm19, %xmm27
+
+// CHECK: vminpd %xmm19, %xmm19, %xmm27 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xe5,0x06,0x5d,0xdb]
+          vminpd %xmm19, %xmm19, %xmm27 {%k6}
+
+// CHECK: vminpd %xmm19, %xmm19, %xmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xe5,0x86,0x5d,0xdb]
+          vminpd %xmm19, %xmm19, %xmm27 {%k6} {z}
+
+// CHECK: vminpd (%rcx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x00,0x5d,0x19]
+          vminpd (%rcx), %xmm19, %xmm27
+
+// CHECK: vminpd 291(%rax,%r14,8), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x21,0xe5,0x00,0x5d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vminpd 291(%rax,%r14,8), %xmm19, %xmm27
+
+// CHECK: vminpd (%rcx){1to2}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x10,0x5d,0x19]
+          vminpd (%rcx){1to2}, %xmm19, %xmm27
+
+// CHECK: vminpd 2032(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x00,0x5d,0x5a,0x7f]
+          vminpd 2032(%rdx), %xmm19, %xmm27
+
+// CHECK: vminpd 2048(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x00,0x5d,0x9a,0x00,0x08,0x00,0x00]
+          vminpd 2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vminpd -2048(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x00,0x5d,0x5a,0x80]
+          vminpd -2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vminpd -2064(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x00,0x5d,0x9a,0xf0,0xf7,0xff,0xff]
+          vminpd -2064(%rdx), %xmm19, %xmm27
+
+// CHECK: vminpd 1016(%rdx){1to2}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x10,0x5d,0x5a,0x7f]
+          vminpd 1016(%rdx){1to2}, %xmm19, %xmm27
+
+// CHECK: vminpd 1024(%rdx){1to2}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x10,0x5d,0x9a,0x00,0x04,0x00,0x00]
+          vminpd 1024(%rdx){1to2}, %xmm19, %xmm27
+
+// CHECK: vminpd -1024(%rdx){1to2}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x10,0x5d,0x5a,0x80]
+          vminpd -1024(%rdx){1to2}, %xmm19, %xmm27
+
+// CHECK: vminpd -1032(%rdx){1to2}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xe5,0x10,0x5d,0x9a,0xf8,0xfb,0xff,0xff]
+          vminpd -1032(%rdx){1to2}, %xmm19, %xmm27
+
+// CHECK: vminpd %ymm23, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0x5d,0xc7]
+          vminpd %ymm23, %ymm29, %ymm24
+
+// CHECK: vminpd %ymm23, %ymm29, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x95,0x26,0x5d,0xc7]
+          vminpd %ymm23, %ymm29, %ymm24 {%k6}
+
+// CHECK: vminpd %ymm23, %ymm29, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x95,0xa6,0x5d,0xc7]
+          vminpd %ymm23, %ymm29, %ymm24 {%k6} {z}
+
+// CHECK: vminpd (%rcx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0x5d,0x01]
+          vminpd (%rcx), %ymm29, %ymm24
+
+// CHECK: vminpd 291(%rax,%r14,8), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0x5d,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vminpd 291(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vminpd (%rcx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0x5d,0x01]
+          vminpd (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vminpd 4064(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0x5d,0x42,0x7f]
+          vminpd 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vminpd 4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0x5d,0x82,0x00,0x10,0x00,0x00]
+          vminpd 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vminpd -4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0x5d,0x42,0x80]
+          vminpd -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vminpd -4128(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0x5d,0x82,0xe0,0xef,0xff,0xff]
+          vminpd -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vminpd 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0x5d,0x42,0x7f]
+          vminpd 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vminpd 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0x5d,0x82,0x00,0x04,0x00,0x00]
+          vminpd 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vminpd -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0x5d,0x42,0x80]
+          vminpd -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vminpd -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0x5d,0x82,0xf8,0xfb,0xff,0xff]
+          vminpd -1032(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vminps %xmm23, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x5c,0x00,0x5d,0xcf]
+          vminps %xmm23, %xmm20, %xmm17
+
+// CHECK: vminps %xmm23, %xmm20, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x5c,0x01,0x5d,0xcf]
+          vminps %xmm23, %xmm20, %xmm17 {%k1}
+
+// CHECK: vminps %xmm23, %xmm20, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x5c,0x81,0x5d,0xcf]
+          vminps %xmm23, %xmm20, %xmm17 {%k1} {z}
+
+// CHECK: vminps (%rcx), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x00,0x5d,0x09]
+          vminps (%rcx), %xmm20, %xmm17
+
+// CHECK: vminps 291(%rax,%r14,8), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x5c,0x00,0x5d,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vminps 291(%rax,%r14,8), %xmm20, %xmm17
+
+// CHECK: vminps (%rcx){1to4}, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x10,0x5d,0x09]
+          vminps (%rcx){1to4}, %xmm20, %xmm17
+
+// CHECK: vminps 2032(%rdx), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x00,0x5d,0x4a,0x7f]
+          vminps 2032(%rdx), %xmm20, %xmm17
+
+// CHECK: vminps 2048(%rdx), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x00,0x5d,0x8a,0x00,0x08,0x00,0x00]
+          vminps 2048(%rdx), %xmm20, %xmm17
+
+// CHECK: vminps -2048(%rdx), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x00,0x5d,0x4a,0x80]
+          vminps -2048(%rdx), %xmm20, %xmm17
+
+// CHECK: vminps -2064(%rdx), %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x00,0x5d,0x8a,0xf0,0xf7,0xff,0xff]
+          vminps -2064(%rdx), %xmm20, %xmm17
+
+// CHECK: vminps 508(%rdx){1to4}, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x10,0x5d,0x4a,0x7f]
+          vminps 508(%rdx){1to4}, %xmm20, %xmm17
+
+// CHECK: vminps 512(%rdx){1to4}, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x10,0x5d,0x8a,0x00,0x02,0x00,0x00]
+          vminps 512(%rdx){1to4}, %xmm20, %xmm17
+
+// CHECK: vminps -512(%rdx){1to4}, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x10,0x5d,0x4a,0x80]
+          vminps -512(%rdx){1to4}, %xmm20, %xmm17
+
+// CHECK: vminps -516(%rdx){1to4}, %xmm20, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x5c,0x10,0x5d,0x8a,0xfc,0xfd,0xff,0xff]
+          vminps -516(%rdx){1to4}, %xmm20, %xmm17
+
+// CHECK: vminps %ymm21, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x3c,0x20,0x5d,0xc5]
+          vminps %ymm21, %ymm24, %ymm24
+
+// CHECK: vminps %ymm21, %ymm24, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x21,0x3c,0x23,0x5d,0xc5]
+          vminps %ymm21, %ymm24, %ymm24 {%k3}
+
+// CHECK: vminps %ymm21, %ymm24, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0x3c,0xa3,0x5d,0xc5]
+          vminps %ymm21, %ymm24, %ymm24 {%k3} {z}
+
+// CHECK: vminps (%rcx), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x20,0x5d,0x01]
+          vminps (%rcx), %ymm24, %ymm24
+
+// CHECK: vminps 291(%rax,%r14,8), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x3c,0x20,0x5d,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vminps 291(%rax,%r14,8), %ymm24, %ymm24
+
+// CHECK: vminps (%rcx){1to8}, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x30,0x5d,0x01]
+          vminps (%rcx){1to8}, %ymm24, %ymm24
+
+// CHECK: vminps 4064(%rdx), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x20,0x5d,0x42,0x7f]
+          vminps 4064(%rdx), %ymm24, %ymm24
+
+// CHECK: vminps 4096(%rdx), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x20,0x5d,0x82,0x00,0x10,0x00,0x00]
+          vminps 4096(%rdx), %ymm24, %ymm24
+
+// CHECK: vminps -4096(%rdx), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x20,0x5d,0x42,0x80]
+          vminps -4096(%rdx), %ymm24, %ymm24
+
+// CHECK: vminps -4128(%rdx), %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x20,0x5d,0x82,0xe0,0xef,0xff,0xff]
+          vminps -4128(%rdx), %ymm24, %ymm24
+
+// CHECK: vminps 508(%rdx){1to8}, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x30,0x5d,0x42,0x7f]
+          vminps 508(%rdx){1to8}, %ymm24, %ymm24
+
+// CHECK: vminps 512(%rdx){1to8}, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x30,0x5d,0x82,0x00,0x02,0x00,0x00]
+          vminps 512(%rdx){1to8}, %ymm24, %ymm24
+
+// CHECK: vminps -512(%rdx){1to8}, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x30,0x5d,0x42,0x80]
+          vminps -512(%rdx){1to8}, %ymm24, %ymm24
+
+// CHECK: vminps -516(%rdx){1to8}, %ymm24, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x3c,0x30,0x5d,0x82,0xfc,0xfd,0xff,0xff]
+          vminps -516(%rdx){1to8}, %ymm24, %ymm24
+
+// CHECK: vmovapd %xmm21, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x28,0xed]
+          vmovapd %xmm21, %xmm21
+
+// CHECK: vmovapd %xmm21, %xmm21 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x0b,0x28,0xed]
+          vmovapd %xmm21, %xmm21 {%k3}
+
+// CHECK: vmovapd %xmm21, %xmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x8b,0x28,0xed]
+          vmovapd %xmm21, %xmm21 {%k3} {z}
+
+// CHECK: vmovapd (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x28,0x29]
+          vmovapd (%rcx), %xmm21
+
+// CHECK: vmovapd 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x28,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd 291(%rax,%r14,8), %xmm21
+
+// CHECK: vmovapd 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x28,0x6a,0x7f]
+          vmovapd 2032(%rdx), %xmm21
+
+// CHECK: vmovapd 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x28,0xaa,0x00,0x08,0x00,0x00]
+          vmovapd 2048(%rdx), %xmm21
+
+// CHECK: vmovapd -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x28,0x6a,0x80]
+          vmovapd -2048(%rdx), %xmm21
+
+// CHECK: vmovapd -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x28,0xaa,0xf0,0xf7,0xff,0xff]
+          vmovapd -2064(%rdx), %xmm21
+
+// CHECK: vmovapd %ymm17, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x28,0xd1]
+          vmovapd %ymm17, %ymm18
+
+// CHECK: vmovapd %ymm17, %ymm18 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2a,0x28,0xd1]
+          vmovapd %ymm17, %ymm18 {%k2}
+
+// CHECK: vmovapd %ymm17, %ymm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xaa,0x28,0xd1]
+          vmovapd %ymm17, %ymm18 {%k2} {z}
+
+// CHECK: vmovapd (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x28,0x11]
+          vmovapd (%rcx), %ymm18
+
+// CHECK: vmovapd 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x28,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovapd 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x28,0x52,0x7f]
+          vmovapd 4064(%rdx), %ymm18
+
+// CHECK: vmovapd 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x28,0x92,0x00,0x10,0x00,0x00]
+          vmovapd 4096(%rdx), %ymm18
+
+// CHECK: vmovapd -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x28,0x52,0x80]
+          vmovapd -4096(%rdx), %ymm18
+
+// CHECK: vmovapd -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x28,0x92,0xe0,0xef,0xff,0xff]
+          vmovapd -4128(%rdx), %ymm18
+
+// CHECK: vmovaps %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x28,0xf5]
+          vmovaps %xmm29, %xmm22
+
+// CHECK: vmovaps %xmm29, %xmm22 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x09,0x28,0xf5]
+          vmovaps %xmm29, %xmm22 {%k1}
+
+// CHECK: vmovaps %xmm29, %xmm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x89,0x28,0xf5]
+          vmovaps %xmm29, %xmm22 {%k1} {z}
+
+// CHECK: vmovaps (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x28,0x31]
+          vmovaps (%rcx), %xmm22
+
+// CHECK: vmovaps 291(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x28,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps 291(%rax,%r14,8), %xmm22
+
+// CHECK: vmovaps 2032(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x28,0x72,0x7f]
+          vmovaps 2032(%rdx), %xmm22
+
+// CHECK: vmovaps 2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x28,0xb2,0x00,0x08,0x00,0x00]
+          vmovaps 2048(%rdx), %xmm22
+
+// CHECK: vmovaps -2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x28,0x72,0x80]
+          vmovaps -2048(%rdx), %xmm22
+
+// CHECK: vmovaps -2064(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x28,0xb2,0xf0,0xf7,0xff,0xff]
+          vmovaps -2064(%rdx), %xmm22
+
+// CHECK: vmovaps %ymm28, %ymm25
+// CHECK:  encoding: [0x62,0x01,0x7c,0x28,0x28,0xcc]
+          vmovaps %ymm28, %ymm25
+
+// CHECK: vmovaps %ymm28, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x7c,0x2b,0x28,0xcc]
+          vmovaps %ymm28, %ymm25 {%k3}
+
+// CHECK: vmovaps %ymm28, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x7c,0xab,0x28,0xcc]
+          vmovaps %ymm28, %ymm25 {%k3} {z}
+
+// CHECK: vmovaps (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x28,0x09]
+          vmovaps (%rcx), %ymm25
+
+// CHECK: vmovaps 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x21,0x7c,0x28,0x28,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps 291(%rax,%r14,8), %ymm25
+
+// CHECK: vmovaps 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x28,0x4a,0x7f]
+          vmovaps 4064(%rdx), %ymm25
+
+// CHECK: vmovaps 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x28,0x8a,0x00,0x10,0x00,0x00]
+          vmovaps 4096(%rdx), %ymm25
+
+// CHECK: vmovaps -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x28,0x4a,0x80]
+          vmovaps -4096(%rdx), %ymm25
+
+// CHECK: vmovaps -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x28,0x8a,0xe0,0xef,0xff,0xff]
+          vmovaps -4128(%rdx), %ymm25
+
+// CHECK: vmovdqa32 %xmm24, %xmm21
+// CHECK:  encoding: [0x62,0x81,0x7d,0x08,0x6f,0xe8]
+          vmovdqa32 %xmm24, %xmm21
+
+// CHECK: vmovdqa32 %xmm24, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x7d,0x0e,0x6f,0xe8]
+          vmovdqa32 %xmm24, %xmm21 {%k6}
+
+// CHECK: vmovdqa32 %xmm24, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x7d,0x8e,0x6f,0xe8]
+          vmovdqa32 %xmm24, %xmm21 {%k6} {z}
+
+// CHECK: vmovdqa32 (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0x6f,0x29]
+          vmovdqa32 (%rcx), %xmm21
+
+// CHECK: vmovdqa32 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x08,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 291(%rax,%r14,8), %xmm21
+
+// CHECK: vmovdqa32 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0x6f,0x6a,0x7f]
+          vmovdqa32 2032(%rdx), %xmm21
+
+// CHECK: vmovdqa32 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0x6f,0xaa,0x00,0x08,0x00,0x00]
+          vmovdqa32 2048(%rdx), %xmm21
+
+// CHECK: vmovdqa32 -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0x6f,0x6a,0x80]
+          vmovdqa32 -2048(%rdx), %xmm21
+
+// CHECK: vmovdqa32 -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0x6f,0xaa,0xf0,0xf7,0xff,0xff]
+          vmovdqa32 -2064(%rdx), %xmm21
+
+// CHECK: vmovdqa32 %ymm28, %ymm24
+// CHECK:  encoding: [0x62,0x01,0x7d,0x28,0x6f,0xc4]
+          vmovdqa32 %ymm28, %ymm24
+
+// CHECK: vmovdqa32 %ymm28, %ymm24 {%k5}
+// CHECK:  encoding: [0x62,0x01,0x7d,0x2d,0x6f,0xc4]
+          vmovdqa32 %ymm28, %ymm24 {%k5}
+
+// CHECK: vmovdqa32 %ymm28, %ymm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0x7d,0xad,0x6f,0xc4]
+          vmovdqa32 %ymm28, %ymm24 {%k5} {z}
+
+// CHECK: vmovdqa32 (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x6f,0x01]
+          vmovdqa32 (%rcx), %ymm24
+
+// CHECK: vmovdqa32 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x21,0x7d,0x28,0x6f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 291(%rax,%r14,8), %ymm24
+
+// CHECK: vmovdqa32 4064(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x6f,0x42,0x7f]
+          vmovdqa32 4064(%rdx), %ymm24
+
+// CHECK: vmovdqa32 4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x6f,0x82,0x00,0x10,0x00,0x00]
+          vmovdqa32 4096(%rdx), %ymm24
+
+// CHECK: vmovdqa32 -4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x6f,0x42,0x80]
+          vmovdqa32 -4096(%rdx), %ymm24
+
+// CHECK: vmovdqa32 -4128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x6f,0x82,0xe0,0xef,0xff,0xff]
+          vmovdqa32 -4128(%rdx), %ymm24
+
+// CHECK: vmovdqa64 %xmm24, %xmm27
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x6f,0xd8]
+          vmovdqa64 %xmm24, %xmm27
+
+// CHECK: vmovdqa64 %xmm24, %xmm27 {%k5}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x0d,0x6f,0xd8]
+          vmovdqa64 %xmm24, %xmm27 {%k5}
+
+// CHECK: vmovdqa64 %xmm24, %xmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x8d,0x6f,0xd8]
+          vmovdqa64 %xmm24, %xmm27 {%k5} {z}
+
+// CHECK: vmovdqa64 (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6f,0x19]
+          vmovdqa64 (%rcx), %xmm27
+
+// CHECK: vmovdqa64 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x6f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 291(%rax,%r14,8), %xmm27
+
+// CHECK: vmovdqa64 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6f,0x5a,0x7f]
+          vmovdqa64 2032(%rdx), %xmm27
+
+// CHECK: vmovdqa64 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6f,0x9a,0x00,0x08,0x00,0x00]
+          vmovdqa64 2048(%rdx), %xmm27
+
+// CHECK: vmovdqa64 -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6f,0x5a,0x80]
+          vmovdqa64 -2048(%rdx), %xmm27
+
+// CHECK: vmovdqa64 -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6f,0x9a,0xf0,0xf7,0xff,0xff]
+          vmovdqa64 -2064(%rdx), %xmm27
+
+// CHECK: vmovdqa64 %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x01,0xfd,0x28,0x6f,0xf5]
+          vmovdqa64 %ymm29, %ymm30
+
+// CHECK: vmovdqa64 %ymm29, %ymm30 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x2b,0x6f,0xf5]
+          vmovdqa64 %ymm29, %ymm30 {%k3}
+
+// CHECK: vmovdqa64 %ymm29, %ymm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0xab,0x6f,0xf5]
+          vmovdqa64 %ymm29, %ymm30 {%k3} {z}
+
+// CHECK: vmovdqa64 (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x6f,0x31]
+          vmovdqa64 (%rcx), %ymm30
+
+// CHECK: vmovdqa64 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x6f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 291(%rax,%r14,8), %ymm30
+
+// CHECK: vmovdqa64 4064(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x6f,0x72,0x7f]
+          vmovdqa64 4064(%rdx), %ymm30
+
+// CHECK: vmovdqa64 4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x6f,0xb2,0x00,0x10,0x00,0x00]
+          vmovdqa64 4096(%rdx), %ymm30
+
+// CHECK: vmovdqa64 -4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x6f,0x72,0x80]
+          vmovdqa64 -4096(%rdx), %ymm30
+
+// CHECK: vmovdqa64 -4128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x6f,0xb2,0xe0,0xef,0xff,0xff]
+          vmovdqa64 -4128(%rdx), %ymm30
+
+// CHECK: vmovdqu32 %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x6f,0xeb]
+          vmovdqu32 %xmm19, %xmm29
+
+// CHECK: vmovdqu32 %xmm19, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x0e,0x6f,0xeb]
+          vmovdqu32 %xmm19, %xmm29 {%k6}
+
+// CHECK: vmovdqu32 %xmm19, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x8e,0x6f,0xeb]
+          vmovdqu32 %xmm19, %xmm29 {%k6} {z}
+
+// CHECK: vmovdqu32 (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x6f,0x29]
+          vmovdqu32 (%rcx), %xmm29
+
+// CHECK: vmovdqu32 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 291(%rax,%r14,8), %xmm29
+
+// CHECK: vmovdqu32 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x6f,0x6a,0x7f]
+          vmovdqu32 2032(%rdx), %xmm29
+
+// CHECK: vmovdqu32 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x6f,0xaa,0x00,0x08,0x00,0x00]
+          vmovdqu32 2048(%rdx), %xmm29
+
+// CHECK: vmovdqu32 -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x6f,0x6a,0x80]
+          vmovdqu32 -2048(%rdx), %xmm29
+
+// CHECK: vmovdqu32 -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x6f,0xaa,0xf0,0xf7,0xff,0xff]
+          vmovdqu32 -2064(%rdx), %xmm29
+
+// CHECK: vmovdqu32 %ymm18, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x6f,0xca]
+          vmovdqu32 %ymm18, %ymm17
+
+// CHECK: vmovdqu32 %ymm18, %ymm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x2b,0x6f,0xca]
+          vmovdqu32 %ymm18, %ymm17 {%k3}
+
+// CHECK: vmovdqu32 %ymm18, %ymm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0xab,0x6f,0xca]
+          vmovdqu32 %ymm18, %ymm17 {%k3} {z}
+
+// CHECK: vmovdqu32 (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x6f,0x09]
+          vmovdqu32 (%rcx), %ymm17
+
+// CHECK: vmovdqu32 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x6f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 291(%rax,%r14,8), %ymm17
+
+// CHECK: vmovdqu32 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x6f,0x4a,0x7f]
+          vmovdqu32 4064(%rdx), %ymm17
+
+// CHECK: vmovdqu32 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x6f,0x8a,0x00,0x10,0x00,0x00]
+          vmovdqu32 4096(%rdx), %ymm17
+
+// CHECK: vmovdqu32 -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x6f,0x4a,0x80]
+          vmovdqu32 -4096(%rdx), %ymm17
+
+// CHECK: vmovdqu32 -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x6f,0x8a,0xe0,0xef,0xff,0xff]
+          vmovdqu32 -4128(%rdx), %ymm17
+
+// CHECK: vmovdqu64 %xmm19, %xmm24
+// CHECK:  encoding: [0x62,0x21,0xfe,0x08,0x6f,0xc3]
+          vmovdqu64 %xmm19, %xmm24
+
+// CHECK: vmovdqu64 %xmm19, %xmm24 {%k5}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x0d,0x6f,0xc3]
+          vmovdqu64 %xmm19, %xmm24 {%k5}
+
+// CHECK: vmovdqu64 %xmm19, %xmm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x8d,0x6f,0xc3]
+          vmovdqu64 %xmm19, %xmm24 {%k5} {z}
+
+// CHECK: vmovdqu64 (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x6f,0x01]
+          vmovdqu64 (%rcx), %xmm24
+
+// CHECK: vmovdqu64 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x21,0xfe,0x08,0x6f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 291(%rax,%r14,8), %xmm24
+
+// CHECK: vmovdqu64 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x6f,0x42,0x7f]
+          vmovdqu64 2032(%rdx), %xmm24
+
+// CHECK: vmovdqu64 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x6f,0x82,0x00,0x08,0x00,0x00]
+          vmovdqu64 2048(%rdx), %xmm24
+
+// CHECK: vmovdqu64 -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x6f,0x42,0x80]
+          vmovdqu64 -2048(%rdx), %xmm24
+
+// CHECK: vmovdqu64 -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x6f,0x82,0xf0,0xf7,0xff,0xff]
+          vmovdqu64 -2064(%rdx), %xmm24
+
+// CHECK: vmovdqu64 %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x6f,0xed]
+          vmovdqu64 %ymm21, %ymm29
+
+// CHECK: vmovdqu64 %ymm21, %ymm29 {%k3}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x2b,0x6f,0xed]
+          vmovdqu64 %ymm21, %ymm29 {%k3}
+
+// CHECK: vmovdqu64 %ymm21, %ymm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0xab,0x6f,0xed]
+          vmovdqu64 %ymm21, %ymm29 {%k3} {z}
+
+// CHECK: vmovdqu64 (%rcx), %ymm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x6f,0x29]
+          vmovdqu64 (%rcx), %ymm29
+
+// CHECK: vmovdqu64 291(%rax,%r14,8), %ymm29
+// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x6f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 291(%rax,%r14,8), %ymm29
+
+// CHECK: vmovdqu64 4064(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x6f,0x6a,0x7f]
+          vmovdqu64 4064(%rdx), %ymm29
+
+// CHECK: vmovdqu64 4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0x00,0x10,0x00,0x00]
+          vmovdqu64 4096(%rdx), %ymm29
+
+// CHECK: vmovdqu64 -4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x6f,0x6a,0x80]
+          vmovdqu64 -4096(%rdx), %ymm29
+
+// CHECK: vmovdqu64 -4128(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x6f,0xaa,0xe0,0xef,0xff,0xff]
+          vmovdqu64 -4128(%rdx), %ymm29
+
+// CHECK: vmovntdq %xmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x31]
+          vmovntdq %xmm22, (%rcx)
+
+// CHECK: vmovntdq %xmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x08,0xe7,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdq %xmm22, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %xmm22, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x7f]
+          vmovntdq %xmm22, 2032(%rdx)
+
+// CHECK: vmovntdq %xmm22, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0x00,0x08,0x00,0x00]
+          vmovntdq %xmm22, 2048(%rdx)
+
+// CHECK: vmovntdq %xmm22, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0xe7,0x72,0x80]
+          vmovntdq %xmm22, -2048(%rdx)
+
+// CHECK: vmovntdq %xmm22, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x08,0xe7,0xb2,0xf0,0xf7,0xff,0xff]
+          vmovntdq %xmm22, -2064(%rdx)
+
+// CHECK: vmovntdq %ymm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x19]
+          vmovntdq %ymm19, (%rcx)
+
+// CHECK: vmovntdq %ymm19, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x28,0xe7,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdq %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vmovntdq %ymm19, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x7f]
+          vmovntdq %ymm19, 4064(%rdx)
+
+// CHECK: vmovntdq %ymm19, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0x00,0x10,0x00,0x00]
+          vmovntdq %ymm19, 4096(%rdx)
+
+// CHECK: vmovntdq %ymm19, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x5a,0x80]
+          vmovntdq %ymm19, -4096(%rdx)
+
+// CHECK: vmovntdq %ymm19, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x28,0xe7,0x9a,0xe0,0xef,0xff,0xff]
+          vmovntdq %ymm19, -4128(%rdx)
+
+// CHECK: vmovntdqa (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x2a,0x01]
+          vmovntdqa (%rcx), %xmm24
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x2a,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdqa 291(%rax,%r14,8), %xmm24
+
+// CHECK: vmovntdqa 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x7f]
+          vmovntdqa 2032(%rdx), %xmm24
+
+// CHECK: vmovntdqa 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0x00,0x08,0x00,0x00]
+          vmovntdqa 2048(%rdx), %xmm24
+
+// CHECK: vmovntdqa -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x2a,0x42,0x80]
+          vmovntdqa -2048(%rdx), %xmm24
+
+// CHECK: vmovntdqa -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x2a,0x82,0xf0,0xf7,0xff,0xff]
+          vmovntdqa -2064(%rdx), %xmm24
+
+// CHECK: vmovntdqa (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x2a,0x21]
+          vmovntdqa (%rcx), %ymm28
+
+// CHECK: vmovntdqa 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x2a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovntdqa 291(%rax,%r14,8), %ymm28
+
+// CHECK: vmovntdqa 4064(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x7f]
+          vmovntdqa 4064(%rdx), %ymm28
+
+// CHECK: vmovntdqa 4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0x00,0x10,0x00,0x00]
+          vmovntdqa 4096(%rdx), %ymm28
+
+// CHECK: vmovntdqa -4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x2a,0x62,0x80]
+          vmovntdqa -4096(%rdx), %ymm28
+
+// CHECK: vmovntdqa -4128(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x2a,0xa2,0xe0,0xef,0xff,0xff]
+          vmovntdqa -4128(%rdx), %ymm28
+
+// CHECK: vmovntpd %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x09]
+          vmovntpd %xmm17, (%rcx)
+
+// CHECK: vmovntpd %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x2b,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovntpd %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %xmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x7f]
+          vmovntpd %xmm17, 2032(%rdx)
+
+// CHECK: vmovntpd %xmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0x00,0x08,0x00,0x00]
+          vmovntpd %xmm17, 2048(%rdx)
+
+// CHECK: vmovntpd %xmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x4a,0x80]
+          vmovntpd %xmm17, -2048(%rdx)
+
+// CHECK: vmovntpd %xmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2b,0x8a,0xf0,0xf7,0xff,0xff]
+          vmovntpd %xmm17, -2064(%rdx)
+
+// CHECK: vmovntpd %ymm27, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x2b,0x19]
+          vmovntpd %ymm27, (%rcx)
+
+// CHECK: vmovntpd %ymm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x2b,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovntpd %ymm27, 291(%rax,%r14,8)
+
+// CHECK: vmovntpd %ymm27, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x7f]
+          vmovntpd %ymm27, 4064(%rdx)
+
+// CHECK: vmovntpd %ymm27, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0x00,0x10,0x00,0x00]
+          vmovntpd %ymm27, 4096(%rdx)
+
+// CHECK: vmovntpd %ymm27, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x2b,0x5a,0x80]
+          vmovntpd %ymm27, -4096(%rdx)
+
+// CHECK: vmovntpd %ymm27, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x2b,0x9a,0xe0,0xef,0xff,0xff]
+          vmovntpd %ymm27, -4128(%rdx)
+
+// CHECK: vmovntps %xmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x2b,0x11]
+          vmovntps %xmm26, (%rcx)
+
+// CHECK: vmovntps %xmm26, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x2b,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovntps %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %xmm26, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x7f]
+          vmovntps %xmm26, 2032(%rdx)
+
+// CHECK: vmovntps %xmm26, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0x00,0x08,0x00,0x00]
+          vmovntps %xmm26, 2048(%rdx)
+
+// CHECK: vmovntps %xmm26, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x2b,0x52,0x80]
+          vmovntps %xmm26, -2048(%rdx)
+
+// CHECK: vmovntps %xmm26, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x2b,0x92,0xf0,0xf7,0xff,0xff]
+          vmovntps %xmm26, -2064(%rdx)
+
+// CHECK: vmovntps %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x2b,0x21]
+          vmovntps %ymm28, (%rcx)
+
+// CHECK: vmovntps %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x28,0x2b,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovntps %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vmovntps %ymm28, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x7f]
+          vmovntps %ymm28, 4064(%rdx)
+
+// CHECK: vmovntps %ymm28, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0x00,0x10,0x00,0x00]
+          vmovntps %ymm28, 4096(%rdx)
+
+// CHECK: vmovntps %ymm28, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x2b,0x62,0x80]
+          vmovntps %ymm28, -4096(%rdx)
+
+// CHECK: vmovntps %ymm28, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x2b,0xa2,0xe0,0xef,0xff,0xff]
+          vmovntps %ymm28, -4128(%rdx)
+
+// CHECK: vmovupd %xmm22, %xmm24
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x10,0xc6]
+          vmovupd %xmm22, %xmm24
+
+// CHECK: vmovupd %xmm22, %xmm24 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x0e,0x10,0xc6]
+          vmovupd %xmm22, %xmm24 {%k6}
+
+// CHECK: vmovupd %xmm22, %xmm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x8e,0x10,0xc6]
+          vmovupd %xmm22, %xmm24 {%k6} {z}
+
+// CHECK: vmovupd (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x10,0x01]
+          vmovupd (%rcx), %xmm24
+
+// CHECK: vmovupd 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x10,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vmovupd 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x10,0x42,0x7f]
+          vmovupd 2032(%rdx), %xmm24
+
+// CHECK: vmovupd 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x10,0x82,0x00,0x08,0x00,0x00]
+          vmovupd 2048(%rdx), %xmm24
+
+// CHECK: vmovupd -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x10,0x42,0x80]
+          vmovupd -2048(%rdx), %xmm24
+
+// CHECK: vmovupd -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x10,0x82,0xf0,0xf7,0xff,0xff]
+          vmovupd -2064(%rdx), %xmm24
+
+// CHECK: vmovupd %ymm25, %ymm30
+// CHECK:  encoding: [0x62,0x01,0xfd,0x28,0x10,0xf1]
+          vmovupd %ymm25, %ymm30
+
+// CHECK: vmovupd %ymm25, %ymm30 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x2f,0x10,0xf1]
+          vmovupd %ymm25, %ymm30 {%k7}
+
+// CHECK: vmovupd %ymm25, %ymm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0xaf,0x10,0xf1]
+          vmovupd %ymm25, %ymm30 {%k7} {z}
+
+// CHECK: vmovupd (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x10,0x31]
+          vmovupd (%rcx), %ymm30
+
+// CHECK: vmovupd 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x10,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd 291(%rax,%r14,8), %ymm30
+
+// CHECK: vmovupd 4064(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x10,0x72,0x7f]
+          vmovupd 4064(%rdx), %ymm30
+
+// CHECK: vmovupd 4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x10,0xb2,0x00,0x10,0x00,0x00]
+          vmovupd 4096(%rdx), %ymm30
+
+// CHECK: vmovupd -4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x10,0x72,0x80]
+          vmovupd -4096(%rdx), %ymm30
+
+// CHECK: vmovupd -4128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x10,0xb2,0xe0,0xef,0xff,0xff]
+          vmovupd -4128(%rdx), %ymm30
+
+// CHECK: vmovups %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x10,0xe5]
+          vmovups %xmm29, %xmm20
+
+// CHECK: vmovups %xmm29, %xmm20 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x0e,0x10,0xe5]
+          vmovups %xmm29, %xmm20 {%k6}
+
+// CHECK: vmovups %xmm29, %xmm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x8e,0x10,0xe5]
+          vmovups %xmm29, %xmm20 {%k6} {z}
+
+// CHECK: vmovups (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x10,0x21]
+          vmovups (%rcx), %xmm20
+
+// CHECK: vmovups 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovups 291(%rax,%r14,8), %xmm20
+
+// CHECK: vmovups 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x10,0x62,0x7f]
+          vmovups 2032(%rdx), %xmm20
+
+// CHECK: vmovups 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x10,0xa2,0x00,0x08,0x00,0x00]
+          vmovups 2048(%rdx), %xmm20
+
+// CHECK: vmovups -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x10,0x62,0x80]
+          vmovups -2048(%rdx), %xmm20
+
+// CHECK: vmovups -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x10,0xa2,0xf0,0xf7,0xff,0xff]
+          vmovups -2064(%rdx), %xmm20
+
+// CHECK: vmovups %ymm26, %ymm21
+// CHECK:  encoding: [0x62,0x81,0x7c,0x28,0x10,0xea]
+          vmovups %ymm26, %ymm21
+
+// CHECK: vmovups %ymm26, %ymm21 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x2e,0x10,0xea]
+          vmovups %ymm26, %ymm21 {%k6}
+
+// CHECK: vmovups %ymm26, %ymm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0xae,0x10,0xea]
+          vmovups %ymm26, %ymm21 {%k6} {z}
+
+// CHECK: vmovups (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x10,0x29]
+          vmovups (%rcx), %ymm21
+
+// CHECK: vmovups 291(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x28,0x10,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovups 291(%rax,%r14,8), %ymm21
+
+// CHECK: vmovups 4064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x10,0x6a,0x7f]
+          vmovups 4064(%rdx), %ymm21
+
+// CHECK: vmovups 4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x10,0xaa,0x00,0x10,0x00,0x00]
+          vmovups 4096(%rdx), %ymm21
+
+// CHECK: vmovups -4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x10,0x6a,0x80]
+          vmovups -4096(%rdx), %ymm21
+
+// CHECK: vmovups -4128(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x10,0xaa,0xe0,0xef,0xff,0xff]
+          vmovups -4128(%rdx), %ymm21
+
+// CHECK: vmulpd %xmm26, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x81,0xf5,0x00,0x59,0xca]
+          vmulpd %xmm26, %xmm17, %xmm17
+
+// CHECK: vmulpd %xmm26, %xmm17, %xmm17 {%k7}
+// CHECK:  encoding: [0x62,0x81,0xf5,0x07,0x59,0xca]
+          vmulpd %xmm26, %xmm17, %xmm17 {%k7}
+
+// CHECK: vmulpd %xmm26, %xmm17, %xmm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0xf5,0x87,0x59,0xca]
+          vmulpd %xmm26, %xmm17, %xmm17 {%k7} {z}
+
+// CHECK: vmulpd (%rcx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x00,0x59,0x09]
+          vmulpd (%rcx), %xmm17, %xmm17
+
+// CHECK: vmulpd 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xf5,0x00,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmulpd 291(%rax,%r14,8), %xmm17, %xmm17
+
+// CHECK: vmulpd (%rcx){1to2}, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x10,0x59,0x09]
+          vmulpd (%rcx){1to2}, %xmm17, %xmm17
+
+// CHECK: vmulpd 2032(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x00,0x59,0x4a,0x7f]
+          vmulpd 2032(%rdx), %xmm17, %xmm17
+
+// CHECK: vmulpd 2048(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x00,0x59,0x8a,0x00,0x08,0x00,0x00]
+          vmulpd 2048(%rdx), %xmm17, %xmm17
+
+// CHECK: vmulpd -2048(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x00,0x59,0x4a,0x80]
+          vmulpd -2048(%rdx), %xmm17, %xmm17
+
+// CHECK: vmulpd -2064(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x00,0x59,0x8a,0xf0,0xf7,0xff,0xff]
+          vmulpd -2064(%rdx), %xmm17, %xmm17
+
+// CHECK: vmulpd 1016(%rdx){1to2}, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x10,0x59,0x4a,0x7f]
+          vmulpd 1016(%rdx){1to2}, %xmm17, %xmm17
+
+// CHECK: vmulpd 1024(%rdx){1to2}, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x10,0x59,0x8a,0x00,0x04,0x00,0x00]
+          vmulpd 1024(%rdx){1to2}, %xmm17, %xmm17
+
+// CHECK: vmulpd -1024(%rdx){1to2}, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x10,0x59,0x4a,0x80]
+          vmulpd -1024(%rdx){1to2}, %xmm17, %xmm17
+
+// CHECK: vmulpd -1032(%rdx){1to2}, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xf5,0x10,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+          vmulpd -1032(%rdx){1to2}, %xmm17, %xmm17
+
+// CHECK: vmulpd %ymm27, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xa5,0x20,0x59,0xcb]
+          vmulpd %ymm27, %ymm27, %ymm25
+
+// CHECK: vmulpd %ymm27, %ymm27, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xa5,0x23,0x59,0xcb]
+          vmulpd %ymm27, %ymm27, %ymm25 {%k3}
+
+// CHECK: vmulpd %ymm27, %ymm27, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xa5,0xa3,0x59,0xcb]
+          vmulpd %ymm27, %ymm27, %ymm25 {%k3} {z}
+
+// CHECK: vmulpd (%rcx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x20,0x59,0x09]
+          vmulpd (%rcx), %ymm27, %ymm25
+
+// CHECK: vmulpd 291(%rax,%r14,8), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x21,0xa5,0x20,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmulpd 291(%rax,%r14,8), %ymm27, %ymm25
+
+// CHECK: vmulpd (%rcx){1to4}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x30,0x59,0x09]
+          vmulpd (%rcx){1to4}, %ymm27, %ymm25
+
+// CHECK: vmulpd 4064(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x20,0x59,0x4a,0x7f]
+          vmulpd 4064(%rdx), %ymm27, %ymm25
+
+// CHECK: vmulpd 4096(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x20,0x59,0x8a,0x00,0x10,0x00,0x00]
+          vmulpd 4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vmulpd -4096(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x20,0x59,0x4a,0x80]
+          vmulpd -4096(%rdx), %ymm27, %ymm25
+
+// CHECK: vmulpd -4128(%rdx), %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x20,0x59,0x8a,0xe0,0xef,0xff,0xff]
+          vmulpd -4128(%rdx), %ymm27, %ymm25
+
+// CHECK: vmulpd 1016(%rdx){1to4}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x30,0x59,0x4a,0x7f]
+          vmulpd 1016(%rdx){1to4}, %ymm27, %ymm25
+
+// CHECK: vmulpd 1024(%rdx){1to4}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x30,0x59,0x8a,0x00,0x04,0x00,0x00]
+          vmulpd 1024(%rdx){1to4}, %ymm27, %ymm25
+
+// CHECK: vmulpd -1024(%rdx){1to4}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x30,0x59,0x4a,0x80]
+          vmulpd -1024(%rdx){1to4}, %ymm27, %ymm25
+
+// CHECK: vmulpd -1032(%rdx){1to4}, %ymm27, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xa5,0x30,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+          vmulpd -1032(%rdx){1to4}, %ymm27, %ymm25
+
+// CHECK: vmulps %xmm21, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x74,0x00,0x59,0xed]
+          vmulps %xmm21, %xmm17, %xmm29
+
+// CHECK: vmulps %xmm21, %xmm17, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x74,0x02,0x59,0xed]
+          vmulps %xmm21, %xmm17, %xmm29 {%k2}
+
+// CHECK: vmulps %xmm21, %xmm17, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x74,0x82,0x59,0xed]
+          vmulps %xmm21, %xmm17, %xmm29 {%k2} {z}
+
+// CHECK: vmulps (%rcx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x00,0x59,0x29]
+          vmulps (%rcx), %xmm17, %xmm29
+
+// CHECK: vmulps 291(%rax,%r14,8), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x74,0x00,0x59,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmulps 291(%rax,%r14,8), %xmm17, %xmm29
+
+// CHECK: vmulps (%rcx){1to4}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x10,0x59,0x29]
+          vmulps (%rcx){1to4}, %xmm17, %xmm29
+
+// CHECK: vmulps 2032(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x00,0x59,0x6a,0x7f]
+          vmulps 2032(%rdx), %xmm17, %xmm29
+
+// CHECK: vmulps 2048(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x00,0x59,0xaa,0x00,0x08,0x00,0x00]
+          vmulps 2048(%rdx), %xmm17, %xmm29
+
+// CHECK: vmulps -2048(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x00,0x59,0x6a,0x80]
+          vmulps -2048(%rdx), %xmm17, %xmm29
+
+// CHECK: vmulps -2064(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x00,0x59,0xaa,0xf0,0xf7,0xff,0xff]
+          vmulps -2064(%rdx), %xmm17, %xmm29
+
+// CHECK: vmulps 508(%rdx){1to4}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x10,0x59,0x6a,0x7f]
+          vmulps 508(%rdx){1to4}, %xmm17, %xmm29
+
+// CHECK: vmulps 512(%rdx){1to4}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x10,0x59,0xaa,0x00,0x02,0x00,0x00]
+          vmulps 512(%rdx){1to4}, %xmm17, %xmm29
+
+// CHECK: vmulps -512(%rdx){1to4}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x10,0x59,0x6a,0x80]
+          vmulps -512(%rdx){1to4}, %xmm17, %xmm29
+
+// CHECK: vmulps -516(%rdx){1to4}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x74,0x10,0x59,0xaa,0xfc,0xfd,0xff,0xff]
+          vmulps -516(%rdx){1to4}, %xmm17, %xmm29
+
+// CHECK: vmulps %ymm28, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x01,0x2c,0x20,0x59,0xf4]
+          vmulps %ymm28, %ymm26, %ymm30
+
+// CHECK: vmulps %ymm28, %ymm26, %ymm30 {%k3}
+// CHECK:  encoding: [0x62,0x01,0x2c,0x23,0x59,0xf4]
+          vmulps %ymm28, %ymm26, %ymm30 {%k3}
+
+// CHECK: vmulps %ymm28, %ymm26, %ymm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0x2c,0xa3,0x59,0xf4]
+          vmulps %ymm28, %ymm26, %ymm30 {%k3} {z}
+
+// CHECK: vmulps (%rcx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x59,0x31]
+          vmulps (%rcx), %ymm26, %ymm30
+
+// CHECK: vmulps 291(%rax,%r14,8), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x21,0x2c,0x20,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmulps 291(%rax,%r14,8), %ymm26, %ymm30
+
+// CHECK: vmulps (%rcx){1to8}, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x59,0x31]
+          vmulps (%rcx){1to8}, %ymm26, %ymm30
+
+// CHECK: vmulps 4064(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x59,0x72,0x7f]
+          vmulps 4064(%rdx), %ymm26, %ymm30
+
+// CHECK: vmulps 4096(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x59,0xb2,0x00,0x10,0x00,0x00]
+          vmulps 4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vmulps -4096(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x59,0x72,0x80]
+          vmulps -4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vmulps -4128(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x20,0x59,0xb2,0xe0,0xef,0xff,0xff]
+          vmulps -4128(%rdx), %ymm26, %ymm30
+
+// CHECK: vmulps 508(%rdx){1to8}, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x59,0x72,0x7f]
+          vmulps 508(%rdx){1to8}, %ymm26, %ymm30
+
+// CHECK: vmulps 512(%rdx){1to8}, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x59,0xb2,0x00,0x02,0x00,0x00]
+          vmulps 512(%rdx){1to8}, %ymm26, %ymm30
+
+// CHECK: vmulps -512(%rdx){1to8}, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x59,0x72,0x80]
+          vmulps -512(%rdx){1to8}, %ymm26, %ymm30
+
+// CHECK: vmulps -516(%rdx){1to8}, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x2c,0x30,0x59,0xb2,0xfc,0xfd,0xff,0xff]
+          vmulps -516(%rdx){1to8}, %ymm26, %ymm30
+
+// CHECK: vpaddd %xmm26, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0x81,0x65,0x00,0xfe,0xea]
+          vpaddd %xmm26, %xmm19, %xmm21
+
+// CHECK: vpaddd %xmm26, %xmm19, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x65,0x05,0xfe,0xea]
+          vpaddd %xmm26, %xmm19, %xmm21 {%k5}
+
+// CHECK: vpaddd %xmm26, %xmm19, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x65,0x85,0xfe,0xea]
+          vpaddd %xmm26, %xmm19, %xmm21 {%k5} {z}
+
+// CHECK: vpaddd (%rcx), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x00,0xfe,0x29]
+          vpaddd (%rcx), %xmm19, %xmm21
+
+// CHECK: vpaddd 291(%rax,%r14,8), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x65,0x00,0xfe,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpaddd 291(%rax,%r14,8), %xmm19, %xmm21
+
+// CHECK: vpaddd (%rcx){1to4}, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x10,0xfe,0x29]
+          vpaddd (%rcx){1to4}, %xmm19, %xmm21
+
+// CHECK: vpaddd 2032(%rdx), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x00,0xfe,0x6a,0x7f]
+          vpaddd 2032(%rdx), %xmm19, %xmm21
+
+// CHECK: vpaddd 2048(%rdx), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x00,0xfe,0xaa,0x00,0x08,0x00,0x00]
+          vpaddd 2048(%rdx), %xmm19, %xmm21
+
+// CHECK: vpaddd -2048(%rdx), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x00,0xfe,0x6a,0x80]
+          vpaddd -2048(%rdx), %xmm19, %xmm21
+
+// CHECK: vpaddd -2064(%rdx), %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x00,0xfe,0xaa,0xf0,0xf7,0xff,0xff]
+          vpaddd -2064(%rdx), %xmm19, %xmm21
+
+// CHECK: vpaddd 508(%rdx){1to4}, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x10,0xfe,0x6a,0x7f]
+          vpaddd 508(%rdx){1to4}, %xmm19, %xmm21
+
+// CHECK: vpaddd 512(%rdx){1to4}, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x10,0xfe,0xaa,0x00,0x02,0x00,0x00]
+          vpaddd 512(%rdx){1to4}, %xmm19, %xmm21
+
+// CHECK: vpaddd -512(%rdx){1to4}, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x10,0xfe,0x6a,0x80]
+          vpaddd -512(%rdx){1to4}, %xmm19, %xmm21
+
+// CHECK: vpaddd -516(%rdx){1to4}, %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x65,0x10,0xfe,0xaa,0xfc,0xfd,0xff,0xff]
+          vpaddd -516(%rdx){1to4}, %xmm19, %xmm21
+
+// CHECK: vpaddd %ymm17, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x21,0x45,0x20,0xfe,0xe9]
+          vpaddd %ymm17, %ymm23, %ymm29
+
+// CHECK: vpaddd %ymm17, %ymm23, %ymm29 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x45,0x25,0xfe,0xe9]
+          vpaddd %ymm17, %ymm23, %ymm29 {%k5}
+
+// CHECK: vpaddd %ymm17, %ymm23, %ymm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x45,0xa5,0xfe,0xe9]
+          vpaddd %ymm17, %ymm23, %ymm29 {%k5} {z}
+
+// CHECK: vpaddd (%rcx), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xfe,0x29]
+          vpaddd (%rcx), %ymm23, %ymm29
+
+// CHECK: vpaddd 291(%rax,%r14,8), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x21,0x45,0x20,0xfe,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpaddd 291(%rax,%r14,8), %ymm23, %ymm29
+
+// CHECK: vpaddd (%rcx){1to8}, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x30,0xfe,0x29]
+          vpaddd (%rcx){1to8}, %ymm23, %ymm29
+
+// CHECK: vpaddd 4064(%rdx), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xfe,0x6a,0x7f]
+          vpaddd 4064(%rdx), %ymm23, %ymm29
+
+// CHECK: vpaddd 4096(%rdx), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xfe,0xaa,0x00,0x10,0x00,0x00]
+          vpaddd 4096(%rdx), %ymm23, %ymm29
+
+// CHECK: vpaddd -4096(%rdx), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xfe,0x6a,0x80]
+          vpaddd -4096(%rdx), %ymm23, %ymm29
+
+// CHECK: vpaddd -4128(%rdx), %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xfe,0xaa,0xe0,0xef,0xff,0xff]
+          vpaddd -4128(%rdx), %ymm23, %ymm29
+
+// CHECK: vpaddd 508(%rdx){1to8}, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x30,0xfe,0x6a,0x7f]
+          vpaddd 508(%rdx){1to8}, %ymm23, %ymm29
+
+// CHECK: vpaddd 512(%rdx){1to8}, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x30,0xfe,0xaa,0x00,0x02,0x00,0x00]
+          vpaddd 512(%rdx){1to8}, %ymm23, %ymm29
+
+// CHECK: vpaddd -512(%rdx){1to8}, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x30,0xfe,0x6a,0x80]
+          vpaddd -512(%rdx){1to8}, %ymm23, %ymm29
+
+// CHECK: vpaddd -516(%rdx){1to8}, %ymm23, %ymm29
+// CHECK:  encoding: [0x62,0x61,0x45,0x30,0xfe,0xaa,0xfc,0xfd,0xff,0xff]
+          vpaddd -516(%rdx){1to8}, %ymm23, %ymm29
+
+// CHECK: vpaddq %xmm26, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x01,0xf5,0x00,0xd4,0xea]
+          vpaddq %xmm26, %xmm17, %xmm29
+
+// CHECK: vpaddq %xmm26, %xmm17, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x01,0xf5,0x02,0xd4,0xea]
+          vpaddq %xmm26, %xmm17, %xmm29 {%k2}
+
+// CHECK: vpaddq %xmm26, %xmm17, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0xf5,0x82,0xd4,0xea]
+          vpaddq %xmm26, %xmm17, %xmm29 {%k2} {z}
+
+// CHECK: vpaddq (%rcx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xd4,0x29]
+          vpaddq (%rcx), %xmm17, %xmm29
+
+// CHECK: vpaddq 291(%rax,%r14,8), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x21,0xf5,0x00,0xd4,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpaddq 291(%rax,%r14,8), %xmm17, %xmm29
+
+// CHECK: vpaddq (%rcx){1to2}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xd4,0x29]
+          vpaddq (%rcx){1to2}, %xmm17, %xmm29
+
+// CHECK: vpaddq 2032(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xd4,0x6a,0x7f]
+          vpaddq 2032(%rdx), %xmm17, %xmm29
+
+// CHECK: vpaddq 2048(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xd4,0xaa,0x00,0x08,0x00,0x00]
+          vpaddq 2048(%rdx), %xmm17, %xmm29
+
+// CHECK: vpaddq -2048(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xd4,0x6a,0x80]
+          vpaddq -2048(%rdx), %xmm17, %xmm29
+
+// CHECK: vpaddq -2064(%rdx), %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xd4,0xaa,0xf0,0xf7,0xff,0xff]
+          vpaddq -2064(%rdx), %xmm17, %xmm29
+
+// CHECK: vpaddq 1016(%rdx){1to2}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xd4,0x6a,0x7f]
+          vpaddq 1016(%rdx){1to2}, %xmm17, %xmm29
+
+// CHECK: vpaddq 1024(%rdx){1to2}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xd4,0xaa,0x00,0x04,0x00,0x00]
+          vpaddq 1024(%rdx){1to2}, %xmm17, %xmm29
+
+// CHECK: vpaddq -1024(%rdx){1to2}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xd4,0x6a,0x80]
+          vpaddq -1024(%rdx){1to2}, %xmm17, %xmm29
+
+// CHECK: vpaddq -1032(%rdx){1to2}, %xmm17, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xd4,0xaa,0xf8,0xfb,0xff,0xff]
+          vpaddq -1032(%rdx){1to2}, %xmm17, %xmm29
+
+// CHECK: vpaddq %ymm18, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xb5,0x20,0xd4,0xe2]
+          vpaddq %ymm18, %ymm25, %ymm20
+
+// CHECK: vpaddq %ymm18, %ymm25, %ymm20 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xb5,0x26,0xd4,0xe2]
+          vpaddq %ymm18, %ymm25, %ymm20 {%k6}
+
+// CHECK: vpaddq %ymm18, %ymm25, %ymm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xb5,0xa6,0xd4,0xe2]
+          vpaddq %ymm18, %ymm25, %ymm20 {%k6} {z}
+
+// CHECK: vpaddq (%rcx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x20,0xd4,0x21]
+          vpaddq (%rcx), %ymm25, %ymm20
+
+// CHECK: vpaddq 291(%rax,%r14,8), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xb5,0x20,0xd4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpaddq 291(%rax,%r14,8), %ymm25, %ymm20
+
+// CHECK: vpaddq (%rcx){1to4}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x30,0xd4,0x21]
+          vpaddq (%rcx){1to4}, %ymm25, %ymm20
+
+// CHECK: vpaddq 4064(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x20,0xd4,0x62,0x7f]
+          vpaddq 4064(%rdx), %ymm25, %ymm20
+
+// CHECK: vpaddq 4096(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x20,0xd4,0xa2,0x00,0x10,0x00,0x00]
+          vpaddq 4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vpaddq -4096(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x20,0xd4,0x62,0x80]
+          vpaddq -4096(%rdx), %ymm25, %ymm20
+
+// CHECK: vpaddq -4128(%rdx), %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x20,0xd4,0xa2,0xe0,0xef,0xff,0xff]
+          vpaddq -4128(%rdx), %ymm25, %ymm20
+
+// CHECK: vpaddq 1016(%rdx){1to4}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x30,0xd4,0x62,0x7f]
+          vpaddq 1016(%rdx){1to4}, %ymm25, %ymm20
+
+// CHECK: vpaddq 1024(%rdx){1to4}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x30,0xd4,0xa2,0x00,0x04,0x00,0x00]
+          vpaddq 1024(%rdx){1to4}, %ymm25, %ymm20
+
+// CHECK: vpaddq -1024(%rdx){1to4}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x30,0xd4,0x62,0x80]
+          vpaddq -1024(%rdx){1to4}, %ymm25, %ymm20
+
+// CHECK: vpaddq -1032(%rdx){1to4}, %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x30,0xd4,0xa2,0xf8,0xfb,0xff,0xff]
+          vpaddq -1032(%rdx){1to4}, %ymm25, %ymm20
+
+// CHECK: vpandd %xmm18, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x2d,0x00,0xdb,0xc2]
+          vpandd %xmm18, %xmm26, %xmm24
+
+// CHECK: vpandd %xmm18, %xmm26, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x2d,0x02,0xdb,0xc2]
+          vpandd %xmm18, %xmm26, %xmm24 {%k2}
+
+// CHECK: vpandd %xmm18, %xmm26, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x2d,0x82,0xdb,0xc2]
+          vpandd %xmm18, %xmm26, %xmm24 {%k2} {z}
+
+// CHECK: vpandd (%rcx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xdb,0x01]
+          vpandd (%rcx), %xmm26, %xmm24
+
+// CHECK: vpandd 291(%rax,%r14,8), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x2d,0x00,0xdb,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpandd 291(%rax,%r14,8), %xmm26, %xmm24
+
+// CHECK: vpandd (%rcx){1to4}, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x10,0xdb,0x01]
+          vpandd (%rcx){1to4}, %xmm26, %xmm24
+
+// CHECK: vpandd 2032(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xdb,0x42,0x7f]
+          vpandd 2032(%rdx), %xmm26, %xmm24
+
+// CHECK: vpandd 2048(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xdb,0x82,0x00,0x08,0x00,0x00]
+          vpandd 2048(%rdx), %xmm26, %xmm24
+
+// CHECK: vpandd -2048(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xdb,0x42,0x80]
+          vpandd -2048(%rdx), %xmm26, %xmm24
+
+// CHECK: vpandd -2064(%rdx), %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x00,0xdb,0x82,0xf0,0xf7,0xff,0xff]
+          vpandd -2064(%rdx), %xmm26, %xmm24
+
+// CHECK: vpandd 508(%rdx){1to4}, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x10,0xdb,0x42,0x7f]
+          vpandd 508(%rdx){1to4}, %xmm26, %xmm24
+
+// CHECK: vpandd 512(%rdx){1to4}, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x10,0xdb,0x82,0x00,0x02,0x00,0x00]
+          vpandd 512(%rdx){1to4}, %xmm26, %xmm24
+
+// CHECK: vpandd -512(%rdx){1to4}, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x10,0xdb,0x42,0x80]
+          vpandd -512(%rdx){1to4}, %xmm26, %xmm24
+
+// CHECK: vpandd -516(%rdx){1to4}, %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x10,0xdb,0x82,0xfc,0xfd,0xff,0xff]
+          vpandd -516(%rdx){1to4}, %xmm26, %xmm24
+
+// CHECK: vpandd %ymm20, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x55,0x20,0xdb,0xd4]
+          vpandd %ymm20, %ymm21, %ymm18
+
+// CHECK: vpandd %ymm20, %ymm21, %ymm18 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x55,0x23,0xdb,0xd4]
+          vpandd %ymm20, %ymm21, %ymm18 {%k3}
+
+// CHECK: vpandd %ymm20, %ymm21, %ymm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x55,0xa3,0xdb,0xd4]
+          vpandd %ymm20, %ymm21, %ymm18 {%k3} {z}
+
+// CHECK: vpandd (%rcx), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xdb,0x11]
+          vpandd (%rcx), %ymm21, %ymm18
+
+// CHECK: vpandd 291(%rax,%r14,8), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x55,0x20,0xdb,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpandd 291(%rax,%r14,8), %ymm21, %ymm18
+
+// CHECK: vpandd (%rcx){1to8}, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x30,0xdb,0x11]
+          vpandd (%rcx){1to8}, %ymm21, %ymm18
+
+// CHECK: vpandd 4064(%rdx), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xdb,0x52,0x7f]
+          vpandd 4064(%rdx), %ymm21, %ymm18
+
+// CHECK: vpandd 4096(%rdx), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xdb,0x92,0x00,0x10,0x00,0x00]
+          vpandd 4096(%rdx), %ymm21, %ymm18
+
+// CHECK: vpandd -4096(%rdx), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xdb,0x52,0x80]
+          vpandd -4096(%rdx), %ymm21, %ymm18
+
+// CHECK: vpandd -4128(%rdx), %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x20,0xdb,0x92,0xe0,0xef,0xff,0xff]
+          vpandd -4128(%rdx), %ymm21, %ymm18
+
+// CHECK: vpandd 508(%rdx){1to8}, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x30,0xdb,0x52,0x7f]
+          vpandd 508(%rdx){1to8}, %ymm21, %ymm18
+
+// CHECK: vpandd 512(%rdx){1to8}, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x30,0xdb,0x92,0x00,0x02,0x00,0x00]
+          vpandd 512(%rdx){1to8}, %ymm21, %ymm18
+
+// CHECK: vpandd -512(%rdx){1to8}, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x30,0xdb,0x52,0x80]
+          vpandd -512(%rdx){1to8}, %ymm21, %ymm18
+
+// CHECK: vpandd -516(%rdx){1to8}, %ymm21, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x30,0xdb,0x92,0xfc,0xfd,0xff,0xff]
+          vpandd -516(%rdx){1to8}, %ymm21, %ymm18
+
+// CHECK: vpandnd %xmm22, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x75,0x00,0xdf,0xc6]
+          vpandnd %xmm22, %xmm17, %xmm24
+
+// CHECK: vpandnd %xmm22, %xmm17, %xmm24 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x75,0x02,0xdf,0xc6]
+          vpandnd %xmm22, %xmm17, %xmm24 {%k2}
+
+// CHECK: vpandnd %xmm22, %xmm17, %xmm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x75,0x82,0xdf,0xc6]
+          vpandnd %xmm22, %xmm17, %xmm24 {%k2} {z}
+
+// CHECK: vpandnd (%rcx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xdf,0x01]
+          vpandnd (%rcx), %xmm17, %xmm24
+
+// CHECK: vpandnd 291(%rax,%r14,8), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x21,0x75,0x00,0xdf,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpandnd 291(%rax,%r14,8), %xmm17, %xmm24
+
+// CHECK: vpandnd (%rcx){1to4}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x10,0xdf,0x01]
+          vpandnd (%rcx){1to4}, %xmm17, %xmm24
+
+// CHECK: vpandnd 2032(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xdf,0x42,0x7f]
+          vpandnd 2032(%rdx), %xmm17, %xmm24
+
+// CHECK: vpandnd 2048(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xdf,0x82,0x00,0x08,0x00,0x00]
+          vpandnd 2048(%rdx), %xmm17, %xmm24
+
+// CHECK: vpandnd -2048(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xdf,0x42,0x80]
+          vpandnd -2048(%rdx), %xmm17, %xmm24
+
+// CHECK: vpandnd -2064(%rdx), %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x00,0xdf,0x82,0xf0,0xf7,0xff,0xff]
+          vpandnd -2064(%rdx), %xmm17, %xmm24
+
+// CHECK: vpandnd 508(%rdx){1to4}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x10,0xdf,0x42,0x7f]
+          vpandnd 508(%rdx){1to4}, %xmm17, %xmm24
+
+// CHECK: vpandnd 512(%rdx){1to4}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x10,0xdf,0x82,0x00,0x02,0x00,0x00]
+          vpandnd 512(%rdx){1to4}, %xmm17, %xmm24
+
+// CHECK: vpandnd -512(%rdx){1to4}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x10,0xdf,0x42,0x80]
+          vpandnd -512(%rdx){1to4}, %xmm17, %xmm24
+
+// CHECK: vpandnd -516(%rdx){1to4}, %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x61,0x75,0x10,0xdf,0x82,0xfc,0xfd,0xff,0xff]
+          vpandnd -516(%rdx){1to4}, %xmm17, %xmm24
+
+// CHECK: vpandnd %ymm17, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x25,0x20,0xdf,0xd9]
+          vpandnd %ymm17, %ymm27, %ymm19
+
+// CHECK: vpandnd %ymm17, %ymm27, %ymm19 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x25,0x22,0xdf,0xd9]
+          vpandnd %ymm17, %ymm27, %ymm19 {%k2}
+
+// CHECK: vpandnd %ymm17, %ymm27, %ymm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x25,0xa2,0xdf,0xd9]
+          vpandnd %ymm17, %ymm27, %ymm19 {%k2} {z}
+
+// CHECK: vpandnd (%rcx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xdf,0x19]
+          vpandnd (%rcx), %ymm27, %ymm19
+
+// CHECK: vpandnd 291(%rax,%r14,8), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x25,0x20,0xdf,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpandnd 291(%rax,%r14,8), %ymm27, %ymm19
+
+// CHECK: vpandnd (%rcx){1to8}, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x30,0xdf,0x19]
+          vpandnd (%rcx){1to8}, %ymm27, %ymm19
+
+// CHECK: vpandnd 4064(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xdf,0x5a,0x7f]
+          vpandnd 4064(%rdx), %ymm27, %ymm19
+
+// CHECK: vpandnd 4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xdf,0x9a,0x00,0x10,0x00,0x00]
+          vpandnd 4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpandnd -4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xdf,0x5a,0x80]
+          vpandnd -4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpandnd -4128(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xdf,0x9a,0xe0,0xef,0xff,0xff]
+          vpandnd -4128(%rdx), %ymm27, %ymm19
+
+// CHECK: vpandnd 508(%rdx){1to8}, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x30,0xdf,0x5a,0x7f]
+          vpandnd 508(%rdx){1to8}, %ymm27, %ymm19
+
+// CHECK: vpandnd 512(%rdx){1to8}, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x30,0xdf,0x9a,0x00,0x02,0x00,0x00]
+          vpandnd 512(%rdx){1to8}, %ymm27, %ymm19
+
+// CHECK: vpandnd -512(%rdx){1to8}, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x30,0xdf,0x5a,0x80]
+          vpandnd -512(%rdx){1to8}, %ymm27, %ymm19
+
+// CHECK: vpandnd -516(%rdx){1to8}, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x30,0xdf,0x9a,0xfc,0xfd,0xff,0xff]
+          vpandnd -516(%rdx){1to8}, %ymm27, %ymm19
+
+// CHECK: vpandnq %xmm20, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x00,0xdf,0xfc]
+          vpandnq %xmm20, %xmm28, %xmm23
+
+// CHECK: vpandnq %xmm20, %xmm28, %xmm23 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x07,0xdf,0xfc]
+          vpandnq %xmm20, %xmm28, %xmm23 {%k7}
+
+// CHECK: vpandnq %xmm20, %xmm28, %xmm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x87,0xdf,0xfc]
+          vpandnq %xmm20, %xmm28, %xmm23 {%k7} {z}
+
+// CHECK: vpandnq (%rcx), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xdf,0x39]
+          vpandnq (%rcx), %xmm28, %xmm23
+
+// CHECK: vpandnq 291(%rax,%r14,8), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x00,0xdf,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpandnq 291(%rax,%r14,8), %xmm28, %xmm23
+
+// CHECK: vpandnq (%rcx){1to2}, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xdf,0x39]
+          vpandnq (%rcx){1to2}, %xmm28, %xmm23
+
+// CHECK: vpandnq 2032(%rdx), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xdf,0x7a,0x7f]
+          vpandnq 2032(%rdx), %xmm28, %xmm23
+
+// CHECK: vpandnq 2048(%rdx), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xdf,0xba,0x00,0x08,0x00,0x00]
+          vpandnq 2048(%rdx), %xmm28, %xmm23
+
+// CHECK: vpandnq -2048(%rdx), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xdf,0x7a,0x80]
+          vpandnq -2048(%rdx), %xmm28, %xmm23
+
+// CHECK: vpandnq -2064(%rdx), %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xdf,0xba,0xf0,0xf7,0xff,0xff]
+          vpandnq -2064(%rdx), %xmm28, %xmm23
+
+// CHECK: vpandnq 1016(%rdx){1to2}, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xdf,0x7a,0x7f]
+          vpandnq 1016(%rdx){1to2}, %xmm28, %xmm23
+
+// CHECK: vpandnq 1024(%rdx){1to2}, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xdf,0xba,0x00,0x04,0x00,0x00]
+          vpandnq 1024(%rdx){1to2}, %xmm28, %xmm23
+
+// CHECK: vpandnq -1024(%rdx){1to2}, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xdf,0x7a,0x80]
+          vpandnq -1024(%rdx){1to2}, %xmm28, %xmm23
+
+// CHECK: vpandnq -1032(%rdx){1to2}, %xmm28, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xdf,0xba,0xf8,0xfb,0xff,0xff]
+          vpandnq -1032(%rdx){1to2}, %xmm28, %xmm23
+
+// CHECK: vpandnq %ymm28, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x01,0xdd,0x20,0xdf,0xec]
+          vpandnq %ymm28, %ymm20, %ymm29
+
+// CHECK: vpandnq %ymm28, %ymm20, %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xdd,0x21,0xdf,0xec]
+          vpandnq %ymm28, %ymm20, %ymm29 {%k1}
+
+// CHECK: vpandnq %ymm28, %ymm20, %ymm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xdd,0xa1,0xdf,0xec]
+          vpandnq %ymm28, %ymm20, %ymm29 {%k1} {z}
+
+// CHECK: vpandnq (%rcx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xdf,0x29]
+          vpandnq (%rcx), %ymm20, %ymm29
+
+// CHECK: vpandnq 291(%rax,%r14,8), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x21,0xdd,0x20,0xdf,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpandnq 291(%rax,%r14,8), %ymm20, %ymm29
+
+// CHECK: vpandnq (%rcx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xdf,0x29]
+          vpandnq (%rcx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpandnq 4064(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xdf,0x6a,0x7f]
+          vpandnq 4064(%rdx), %ymm20, %ymm29
+
+// CHECK: vpandnq 4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xdf,0xaa,0x00,0x10,0x00,0x00]
+          vpandnq 4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpandnq -4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xdf,0x6a,0x80]
+          vpandnq -4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpandnq -4128(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xdf,0xaa,0xe0,0xef,0xff,0xff]
+          vpandnq -4128(%rdx), %ymm20, %ymm29
+
+// CHECK: vpandnq 1016(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xdf,0x6a,0x7f]
+          vpandnq 1016(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpandnq 1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xdf,0xaa,0x00,0x04,0x00,0x00]
+          vpandnq 1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpandnq -1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xdf,0x6a,0x80]
+          vpandnq -1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpandnq -1032(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xdf,0xaa,0xf8,0xfb,0xff,0xff]
+          vpandnq -1032(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpandq %xmm25, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0x81,0xe5,0x00,0xdb,0xf1]
+          vpandq %xmm25, %xmm19, %xmm22
+
+// CHECK: vpandq %xmm25, %xmm19, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0x81,0xe5,0x05,0xdb,0xf1]
+          vpandq %xmm25, %xmm19, %xmm22 {%k5}
+
+// CHECK: vpandq %xmm25, %xmm19, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0xe5,0x85,0xdb,0xf1]
+          vpandq %xmm25, %xmm19, %xmm22 {%k5} {z}
+
+// CHECK: vpandq (%rcx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0xdb,0x31]
+          vpandq (%rcx), %xmm19, %xmm22
+
+// CHECK: vpandq 291(%rax,%r14,8), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0xe5,0x00,0xdb,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpandq 291(%rax,%r14,8), %xmm19, %xmm22
+
+// CHECK: vpandq (%rcx){1to2}, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0xdb,0x31]
+          vpandq (%rcx){1to2}, %xmm19, %xmm22
+
+// CHECK: vpandq 2032(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0xdb,0x72,0x7f]
+          vpandq 2032(%rdx), %xmm19, %xmm22
+
+// CHECK: vpandq 2048(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0xdb,0xb2,0x00,0x08,0x00,0x00]
+          vpandq 2048(%rdx), %xmm19, %xmm22
+
+// CHECK: vpandq -2048(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0xdb,0x72,0x80]
+          vpandq -2048(%rdx), %xmm19, %xmm22
+
+// CHECK: vpandq -2064(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0xdb,0xb2,0xf0,0xf7,0xff,0xff]
+          vpandq -2064(%rdx), %xmm19, %xmm22
+
+// CHECK: vpandq 1016(%rdx){1to2}, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0xdb,0x72,0x7f]
+          vpandq 1016(%rdx){1to2}, %xmm19, %xmm22
+
+// CHECK: vpandq 1024(%rdx){1to2}, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0xdb,0xb2,0x00,0x04,0x00,0x00]
+          vpandq 1024(%rdx){1to2}, %xmm19, %xmm22
+
+// CHECK: vpandq -1024(%rdx){1to2}, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0xdb,0x72,0x80]
+          vpandq -1024(%rdx){1to2}, %xmm19, %xmm22
+
+// CHECK: vpandq -1032(%rdx){1to2}, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0xdb,0xb2,0xf8,0xfb,0xff,0xff]
+          vpandq -1032(%rdx){1to2}, %xmm19, %xmm22
+
+// CHECK: vpandq %ymm24, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xad,0x20,0xdb,0xc8]
+          vpandq %ymm24, %ymm26, %ymm25
+
+// CHECK: vpandq %ymm24, %ymm26, %ymm25 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xad,0x27,0xdb,0xc8]
+          vpandq %ymm24, %ymm26, %ymm25 {%k7}
+
+// CHECK: vpandq %ymm24, %ymm26, %ymm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xad,0xa7,0xdb,0xc8]
+          vpandq %ymm24, %ymm26, %ymm25 {%k7} {z}
+
+// CHECK: vpandq (%rcx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0xdb,0x09]
+          vpandq (%rcx), %ymm26, %ymm25
+
+// CHECK: vpandq 291(%rax,%r14,8), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x21,0xad,0x20,0xdb,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpandq 291(%rax,%r14,8), %ymm26, %ymm25
+
+// CHECK: vpandq (%rcx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0xdb,0x09]
+          vpandq (%rcx){1to4}, %ymm26, %ymm25
+
+// CHECK: vpandq 4064(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0xdb,0x4a,0x7f]
+          vpandq 4064(%rdx), %ymm26, %ymm25
+
+// CHECK: vpandq 4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0xdb,0x8a,0x00,0x10,0x00,0x00]
+          vpandq 4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vpandq -4096(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0xdb,0x4a,0x80]
+          vpandq -4096(%rdx), %ymm26, %ymm25
+
+// CHECK: vpandq -4128(%rdx), %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x20,0xdb,0x8a,0xe0,0xef,0xff,0xff]
+          vpandq -4128(%rdx), %ymm26, %ymm25
+
+// CHECK: vpandq 1016(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0xdb,0x4a,0x7f]
+          vpandq 1016(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vpandq 1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0xdb,0x8a,0x00,0x04,0x00,0x00]
+          vpandq 1024(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vpandq -1024(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0xdb,0x4a,0x80]
+          vpandq -1024(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vpandq -1032(%rdx){1to4}, %ymm26, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xad,0x30,0xdb,0x8a,0xf8,0xfb,0xff,0xff]
+          vpandq -1032(%rdx){1to4}, %ymm26, %ymm25
+
+// CHECK: vpbroadcastd %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5}
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %ebp, %xmm22
+
+// CHECK: vpbroadcastd %r13d, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %r13d, %xmm22
+
+// CHECK: vpbroadcastd %eax, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5}
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %ebp, %ymm25
+
+// CHECK: vpbroadcastd %r13d, %ymm25
+// CHECK:  encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %r13d, %ymm25
+
+// CHECK: vpbroadcastq %rax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2}
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2} {z}
+
+// CHECK: vpbroadcastq %r8, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %r8, %xmm22
+
+// CHECK: vpbroadcastq %rax, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5}
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5} {z}
+
+// CHECK: vpbroadcastq %r8, %ymm19
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %r8, %ymm19
+
+// CHECK: vpcmpd $171, %xmm20, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xb3,0x45,0x00,0x1f,0xe4,0xab]
+          vpcmpd $171, %xmm20, %xmm23, %k4
+
+// CHECK: vpcmpd $171, %xmm20, %xmm23, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb3,0x45,0x01,0x1f,0xe4,0xab]
+          vpcmpd $171, %xmm20, %xmm23, %k4 {%k1}
+
+// CHECK: vpcmpd $123, %xmm20, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xb3,0x45,0x00,0x1f,0xe4,0x7b]
+          vpcmpd $123, %xmm20, %xmm23, %k4
+
+// CHECK: vpcmpd $123, (%rcx), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x00,0x1f,0x21,0x7b]
+          vpcmpd $123, (%rcx), %xmm23, %k4
+
+// CHECK: vpcmpd $123, 291(%rax,%r14,8), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xb3,0x45,0x00,0x1f,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpd $123, 291(%rax,%r14,8), %xmm23, %k4
+
+// CHECK: vpcmpd $123, (%rcx){1to4}, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x10,0x1f,0x21,0x7b]
+          vpcmpd $123, (%rcx){1to4}, %xmm23, %k4
+
+// CHECK: vpcmpd $123, 2032(%rdx), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x00,0x1f,0x62,0x7f,0x7b]
+          vpcmpd $123, 2032(%rdx), %xmm23, %k4
+
+// CHECK: vpcmpd $123, 2048(%rdx), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x00,0x1f,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpd $123, 2048(%rdx), %xmm23, %k4
+
+// CHECK: vpcmpd $123, -2048(%rdx), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x00,0x1f,0x62,0x80,0x7b]
+          vpcmpd $123, -2048(%rdx), %xmm23, %k4
+
+// CHECK: vpcmpd $123, -2064(%rdx), %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x00,0x1f,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpd $123, -2064(%rdx), %xmm23, %k4
+
+// CHECK: vpcmpd $123, 508(%rdx){1to4}, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x10,0x1f,0x62,0x7f,0x7b]
+          vpcmpd $123, 508(%rdx){1to4}, %xmm23, %k4
+
+// CHECK: vpcmpd $123, 512(%rdx){1to4}, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x10,0x1f,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpd $123, 512(%rdx){1to4}, %xmm23, %k4
+
+// CHECK: vpcmpd $123, -512(%rdx){1to4}, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x10,0x1f,0x62,0x80,0x7b]
+          vpcmpd $123, -512(%rdx){1to4}, %xmm23, %k4
+
+// CHECK: vpcmpd $123, -516(%rdx){1to4}, %xmm23, %k4
+// CHECK:  encoding: [0x62,0xf3,0x45,0x10,0x1f,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpd $123, -516(%rdx){1to4}, %xmm23, %k4
+
+// CHECK: vpcmpd $171, %ymm19, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xb3,0x3d,0x20,0x1f,0xe3,0xab]
+          vpcmpd $171, %ymm19, %ymm24, %k4
+
+// CHECK: vpcmpd $171, %ymm19, %ymm24, %k4 {%k3}
+// CHECK:  encoding: [0x62,0xb3,0x3d,0x23,0x1f,0xe3,0xab]
+          vpcmpd $171, %ymm19, %ymm24, %k4 {%k3}
+
+// CHECK: vpcmpd $123, %ymm19, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xb3,0x3d,0x20,0x1f,0xe3,0x7b]
+          vpcmpd $123, %ymm19, %ymm24, %k4
+
+// CHECK: vpcmpd $123, (%rcx), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x20,0x1f,0x21,0x7b]
+          vpcmpd $123, (%rcx), %ymm24, %k4
+
+// CHECK: vpcmpd $123, 291(%rax,%r14,8), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xb3,0x3d,0x20,0x1f,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpd $123, 291(%rax,%r14,8), %ymm24, %k4
+
+// CHECK: vpcmpd $123, (%rcx){1to8}, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x30,0x1f,0x21,0x7b]
+          vpcmpd $123, (%rcx){1to8}, %ymm24, %k4
+
+// CHECK: vpcmpd $123, 4064(%rdx), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x20,0x1f,0x62,0x7f,0x7b]
+          vpcmpd $123, 4064(%rdx), %ymm24, %k4
+
+// CHECK: vpcmpd $123, 4096(%rdx), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x20,0x1f,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpd $123, 4096(%rdx), %ymm24, %k4
+
+// CHECK: vpcmpd $123, -4096(%rdx), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x20,0x1f,0x62,0x80,0x7b]
+          vpcmpd $123, -4096(%rdx), %ymm24, %k4
+
+// CHECK: vpcmpd $123, -4128(%rdx), %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x20,0x1f,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpd $123, -4128(%rdx), %ymm24, %k4
+
+// CHECK: vpcmpd $123, 508(%rdx){1to8}, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x30,0x1f,0x62,0x7f,0x7b]
+          vpcmpd $123, 508(%rdx){1to8}, %ymm24, %k4
+
+// CHECK: vpcmpd $123, 512(%rdx){1to8}, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x30,0x1f,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpd $123, 512(%rdx){1to8}, %ymm24, %k4
+
+// CHECK: vpcmpd $123, -512(%rdx){1to8}, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x30,0x1f,0x62,0x80,0x7b]
+          vpcmpd $123, -512(%rdx){1to8}, %ymm24, %k4
+
+// CHECK: vpcmpd $123, -516(%rdx){1to8}, %ymm24, %k4
+// CHECK:  encoding: [0x62,0xf3,0x3d,0x30,0x1f,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpd $123, -516(%rdx){1to8}, %ymm24, %k4
+
+// CHECK: vpcmpeqd %xmm24, %xmm29, %k3
+// CHECK:  encoding: [0x62,0x91,0x15,0x00,0x76,0xd8]
+          vpcmpeqd %xmm24, %xmm29, %k3
+
+// CHECK: vpcmpeqd %xmm24, %xmm29, %k3 {%k5}
+// CHECK:  encoding: [0x62,0x91,0x15,0x05,0x76,0xd8]
+          vpcmpeqd %xmm24, %xmm29, %k3 {%k5}
+
+// CHECK: vpcmpeqd (%rcx), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x76,0x19]
+          vpcmpeqd (%rcx), %xmm29, %k3
+
+// CHECK: vpcmpeqd 291(%rax,%r14,8), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xb1,0x15,0x00,0x76,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqd 291(%rax,%r14,8), %xmm29, %k3
+
+// CHECK: vpcmpeqd (%rcx){1to4}, %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x76,0x19]
+          vpcmpeqd (%rcx){1to4}, %xmm29, %k3
+
+// CHECK: vpcmpeqd 2032(%rdx), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x76,0x5a,0x7f]
+          vpcmpeqd 2032(%rdx), %xmm29, %k3
+
+// CHECK: vpcmpeqd 2048(%rdx), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x76,0x9a,0x00,0x08,0x00,0x00]
+          vpcmpeqd 2048(%rdx), %xmm29, %k3
+
+// CHECK: vpcmpeqd -2048(%rdx), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x76,0x5a,0x80]
+          vpcmpeqd -2048(%rdx), %xmm29, %k3
+
+// CHECK: vpcmpeqd -2064(%rdx), %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x76,0x9a,0xf0,0xf7,0xff,0xff]
+          vpcmpeqd -2064(%rdx), %xmm29, %k3
+
+// CHECK: vpcmpeqd 508(%rdx){1to4}, %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x76,0x5a,0x7f]
+          vpcmpeqd 508(%rdx){1to4}, %xmm29, %k3
+
+// CHECK: vpcmpeqd 512(%rdx){1to4}, %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x76,0x9a,0x00,0x02,0x00,0x00]
+          vpcmpeqd 512(%rdx){1to4}, %xmm29, %k3
+
+// CHECK: vpcmpeqd -512(%rdx){1to4}, %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x76,0x5a,0x80]
+          vpcmpeqd -512(%rdx){1to4}, %xmm29, %k3
+
+// CHECK: vpcmpeqd -516(%rdx){1to4}, %xmm29, %k3
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x76,0x9a,0xfc,0xfd,0xff,0xff]
+          vpcmpeqd -516(%rdx){1to4}, %xmm29, %k3
+
+// CHECK: vpcmpeqd %ymm20, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x20,0x76,0xec]
+          vpcmpeqd %ymm20, %ymm26, %k5
+
+// CHECK: vpcmpeqd %ymm20, %ymm26, %k5 {%k5}
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x25,0x76,0xec]
+          vpcmpeqd %ymm20, %ymm26, %k5 {%k5}
+
+// CHECK: vpcmpeqd (%rcx), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x76,0x29]
+          vpcmpeqd (%rcx), %ymm26, %k5
+
+// CHECK: vpcmpeqd 291(%rax,%r14,8), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x20,0x76,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqd 291(%rax,%r14,8), %ymm26, %k5
+
+// CHECK: vpcmpeqd (%rcx){1to8}, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x30,0x76,0x29]
+          vpcmpeqd (%rcx){1to8}, %ymm26, %k5
+
+// CHECK: vpcmpeqd 4064(%rdx), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x76,0x6a,0x7f]
+          vpcmpeqd 4064(%rdx), %ymm26, %k5
+
+// CHECK: vpcmpeqd 4096(%rdx), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x76,0xaa,0x00,0x10,0x00,0x00]
+          vpcmpeqd 4096(%rdx), %ymm26, %k5
+
+// CHECK: vpcmpeqd -4096(%rdx), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x76,0x6a,0x80]
+          vpcmpeqd -4096(%rdx), %ymm26, %k5
+
+// CHECK: vpcmpeqd -4128(%rdx), %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x76,0xaa,0xe0,0xef,0xff,0xff]
+          vpcmpeqd -4128(%rdx), %ymm26, %k5
+
+// CHECK: vpcmpeqd 508(%rdx){1to8}, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x30,0x76,0x6a,0x7f]
+          vpcmpeqd 508(%rdx){1to8}, %ymm26, %k5
+
+// CHECK: vpcmpeqd 512(%rdx){1to8}, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x30,0x76,0xaa,0x00,0x02,0x00,0x00]
+          vpcmpeqd 512(%rdx){1to8}, %ymm26, %k5
+
+// CHECK: vpcmpeqd -512(%rdx){1to8}, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x30,0x76,0x6a,0x80]
+          vpcmpeqd -512(%rdx){1to8}, %ymm26, %k5
+
+// CHECK: vpcmpeqd -516(%rdx){1to8}, %ymm26, %k5
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x30,0x76,0xaa,0xfc,0xfd,0xff,0xff]
+          vpcmpeqd -516(%rdx){1to8}, %ymm26, %k5
+
+// CHECK: vpcmpeqq %xmm29, %xmm20, %k3
+// CHECK:  encoding: [0x62,0x92,0xdd,0x00,0x29,0xdd]
+          vpcmpeqq %xmm29, %xmm20, %k3
+
+// CHECK: vpcmpeqq %xmm29, %xmm20, %k3 {%k3}
+// CHECK:  encoding: [0x62,0x92,0xdd,0x03,0x29,0xdd]
+          vpcmpeqq %xmm29, %xmm20, %k3 {%k3}
+
+// CHECK: vpcmpeqq (%rcx), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0x29,0x19]
+          vpcmpeqq (%rcx), %xmm20, %k3
+
+// CHECK: vpcmpeqq 291(%rax,%r14,8), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xb2,0xdd,0x00,0x29,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqq 291(%rax,%r14,8), %xmm20, %k3
+
+// CHECK: vpcmpeqq (%rcx){1to2}, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x10,0x29,0x19]
+          vpcmpeqq (%rcx){1to2}, %xmm20, %k3
+
+// CHECK: vpcmpeqq 2032(%rdx), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0x29,0x5a,0x7f]
+          vpcmpeqq 2032(%rdx), %xmm20, %k3
+
+// CHECK: vpcmpeqq 2048(%rdx), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0x29,0x9a,0x00,0x08,0x00,0x00]
+          vpcmpeqq 2048(%rdx), %xmm20, %k3
+
+// CHECK: vpcmpeqq -2048(%rdx), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0x29,0x5a,0x80]
+          vpcmpeqq -2048(%rdx), %xmm20, %k3
+
+// CHECK: vpcmpeqq -2064(%rdx), %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0x29,0x9a,0xf0,0xf7,0xff,0xff]
+          vpcmpeqq -2064(%rdx), %xmm20, %k3
+
+// CHECK: vpcmpeqq 1016(%rdx){1to2}, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x10,0x29,0x5a,0x7f]
+          vpcmpeqq 1016(%rdx){1to2}, %xmm20, %k3
+
+// CHECK: vpcmpeqq 1024(%rdx){1to2}, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x10,0x29,0x9a,0x00,0x04,0x00,0x00]
+          vpcmpeqq 1024(%rdx){1to2}, %xmm20, %k3
+
+// CHECK: vpcmpeqq -1024(%rdx){1to2}, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x10,0x29,0x5a,0x80]
+          vpcmpeqq -1024(%rdx){1to2}, %xmm20, %k3
+
+// CHECK: vpcmpeqq -1032(%rdx){1to2}, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xf2,0xdd,0x10,0x29,0x9a,0xf8,0xfb,0xff,0xff]
+          vpcmpeqq -1032(%rdx){1to2}, %xmm20, %k3
+
+// CHECK: vpcmpeqq %ymm23, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xb2,0x8d,0x20,0x29,0xef]
+          vpcmpeqq %ymm23, %ymm30, %k5
+
+// CHECK: vpcmpeqq %ymm23, %ymm30, %k5 {%k6}
+// CHECK:  encoding: [0x62,0xb2,0x8d,0x26,0x29,0xef]
+          vpcmpeqq %ymm23, %ymm30, %k5 {%k6}
+
+// CHECK: vpcmpeqq (%rcx), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x20,0x29,0x29]
+          vpcmpeqq (%rcx), %ymm30, %k5
+
+// CHECK: vpcmpeqq 291(%rax,%r14,8), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xb2,0x8d,0x20,0x29,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpeqq 291(%rax,%r14,8), %ymm30, %k5
+
+// CHECK: vpcmpeqq (%rcx){1to4}, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x30,0x29,0x29]
+          vpcmpeqq (%rcx){1to4}, %ymm30, %k5
+
+// CHECK: vpcmpeqq 4064(%rdx), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x20,0x29,0x6a,0x7f]
+          vpcmpeqq 4064(%rdx), %ymm30, %k5
+
+// CHECK: vpcmpeqq 4096(%rdx), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x20,0x29,0xaa,0x00,0x10,0x00,0x00]
+          vpcmpeqq 4096(%rdx), %ymm30, %k5
+
+// CHECK: vpcmpeqq -4096(%rdx), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x20,0x29,0x6a,0x80]
+          vpcmpeqq -4096(%rdx), %ymm30, %k5
+
+// CHECK: vpcmpeqq -4128(%rdx), %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x20,0x29,0xaa,0xe0,0xef,0xff,0xff]
+          vpcmpeqq -4128(%rdx), %ymm30, %k5
+
+// CHECK: vpcmpeqq 1016(%rdx){1to4}, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x30,0x29,0x6a,0x7f]
+          vpcmpeqq 1016(%rdx){1to4}, %ymm30, %k5
+
+// CHECK: vpcmpeqq 1024(%rdx){1to4}, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x30,0x29,0xaa,0x00,0x04,0x00,0x00]
+          vpcmpeqq 1024(%rdx){1to4}, %ymm30, %k5
+
+// CHECK: vpcmpeqq -1024(%rdx){1to4}, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x30,0x29,0x6a,0x80]
+          vpcmpeqq -1024(%rdx){1to4}, %ymm30, %k5
+
+// CHECK: vpcmpeqq -1032(%rdx){1to4}, %ymm30, %k5
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x30,0x29,0xaa,0xf8,0xfb,0xff,0xff]
+          vpcmpeqq -1032(%rdx){1to4}, %ymm30, %k5
+
+// CHECK: vpcmpgtd %xmm20, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xb1,0x15,0x00,0x66,0xe4]
+          vpcmpgtd %xmm20, %xmm29, %k4
+
+// CHECK: vpcmpgtd %xmm20, %xmm29, %k4 {%k2}
+// CHECK:  encoding: [0x62,0xb1,0x15,0x02,0x66,0xe4]
+          vpcmpgtd %xmm20, %xmm29, %k4 {%k2}
+
+// CHECK: vpcmpgtd (%rcx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x66,0x21]
+          vpcmpgtd (%rcx), %xmm29, %k4
+
+// CHECK: vpcmpgtd 291(%rax,%r14,8), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xb1,0x15,0x00,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtd 291(%rax,%r14,8), %xmm29, %k4
+
+// CHECK: vpcmpgtd (%rcx){1to4}, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x66,0x21]
+          vpcmpgtd (%rcx){1to4}, %xmm29, %k4
+
+// CHECK: vpcmpgtd 2032(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x66,0x62,0x7f]
+          vpcmpgtd 2032(%rdx), %xmm29, %k4
+
+// CHECK: vpcmpgtd 2048(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x66,0xa2,0x00,0x08,0x00,0x00]
+          vpcmpgtd 2048(%rdx), %xmm29, %k4
+
+// CHECK: vpcmpgtd -2048(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x66,0x62,0x80]
+          vpcmpgtd -2048(%rdx), %xmm29, %k4
+
+// CHECK: vpcmpgtd -2064(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x00,0x66,0xa2,0xf0,0xf7,0xff,0xff]
+          vpcmpgtd -2064(%rdx), %xmm29, %k4
+
+// CHECK: vpcmpgtd 508(%rdx){1to4}, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x66,0x62,0x7f]
+          vpcmpgtd 508(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vpcmpgtd 512(%rdx){1to4}, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x66,0xa2,0x00,0x02,0x00,0x00]
+          vpcmpgtd 512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vpcmpgtd -512(%rdx){1to4}, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x66,0x62,0x80]
+          vpcmpgtd -512(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vpcmpgtd -516(%rdx){1to4}, %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf1,0x15,0x10,0x66,0xa2,0xfc,0xfd,0xff,0xff]
+          vpcmpgtd -516(%rdx){1to4}, %xmm29, %k4
+
+// CHECK: vpcmpgtd %ymm17, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xb1,0x4d,0x20,0x66,0xd1]
+          vpcmpgtd %ymm17, %ymm22, %k2
+
+// CHECK: vpcmpgtd %ymm17, %ymm22, %k2 {%k1}
+// CHECK:  encoding: [0x62,0xb1,0x4d,0x21,0x66,0xd1]
+          vpcmpgtd %ymm17, %ymm22, %k2 {%k1}
+
+// CHECK: vpcmpgtd (%rcx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x20,0x66,0x11]
+          vpcmpgtd (%rcx), %ymm22, %k2
+
+// CHECK: vpcmpgtd 291(%rax,%r14,8), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xb1,0x4d,0x20,0x66,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtd 291(%rax,%r14,8), %ymm22, %k2
+
+// CHECK: vpcmpgtd (%rcx){1to8}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x30,0x66,0x11]
+          vpcmpgtd (%rcx){1to8}, %ymm22, %k2
+
+// CHECK: vpcmpgtd 4064(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x20,0x66,0x52,0x7f]
+          vpcmpgtd 4064(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtd 4096(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x20,0x66,0x92,0x00,0x10,0x00,0x00]
+          vpcmpgtd 4096(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtd -4096(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x20,0x66,0x52,0x80]
+          vpcmpgtd -4096(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtd -4128(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x20,0x66,0x92,0xe0,0xef,0xff,0xff]
+          vpcmpgtd -4128(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtd 508(%rdx){1to8}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x30,0x66,0x52,0x7f]
+          vpcmpgtd 508(%rdx){1to8}, %ymm22, %k2
+
+// CHECK: vpcmpgtd 512(%rdx){1to8}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x30,0x66,0x92,0x00,0x02,0x00,0x00]
+          vpcmpgtd 512(%rdx){1to8}, %ymm22, %k2
+
+// CHECK: vpcmpgtd -512(%rdx){1to8}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x30,0x66,0x52,0x80]
+          vpcmpgtd -512(%rdx){1to8}, %ymm22, %k2
+
+// CHECK: vpcmpgtd -516(%rdx){1to8}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf1,0x4d,0x30,0x66,0x92,0xfc,0xfd,0xff,0xff]
+          vpcmpgtd -516(%rdx){1to8}, %ymm22, %k2
+
+// CHECK: vpcmpgtq %xmm25, %xmm30, %k3
+// CHECK:  encoding: [0x62,0x92,0x8d,0x00,0x37,0xd9]
+          vpcmpgtq %xmm25, %xmm30, %k3
+
+// CHECK: vpcmpgtq %xmm25, %xmm30, %k3 {%k6}
+// CHECK:  encoding: [0x62,0x92,0x8d,0x06,0x37,0xd9]
+          vpcmpgtq %xmm25, %xmm30, %k3 {%k6}
+
+// CHECK: vpcmpgtq (%rcx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x00,0x37,0x19]
+          vpcmpgtq (%rcx), %xmm30, %k3
+
+// CHECK: vpcmpgtq 291(%rax,%r14,8), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xb2,0x8d,0x00,0x37,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtq 291(%rax,%r14,8), %xmm30, %k3
+
+// CHECK: vpcmpgtq (%rcx){1to2}, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x10,0x37,0x19]
+          vpcmpgtq (%rcx){1to2}, %xmm30, %k3
+
+// CHECK: vpcmpgtq 2032(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x00,0x37,0x5a,0x7f]
+          vpcmpgtq 2032(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtq 2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x00,0x37,0x9a,0x00,0x08,0x00,0x00]
+          vpcmpgtq 2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtq -2048(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x00,0x37,0x5a,0x80]
+          vpcmpgtq -2048(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtq -2064(%rdx), %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x00,0x37,0x9a,0xf0,0xf7,0xff,0xff]
+          vpcmpgtq -2064(%rdx), %xmm30, %k3
+
+// CHECK: vpcmpgtq 1016(%rdx){1to2}, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x10,0x37,0x5a,0x7f]
+          vpcmpgtq 1016(%rdx){1to2}, %xmm30, %k3
+
+// CHECK: vpcmpgtq 1024(%rdx){1to2}, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x10,0x37,0x9a,0x00,0x04,0x00,0x00]
+          vpcmpgtq 1024(%rdx){1to2}, %xmm30, %k3
+
+// CHECK: vpcmpgtq -1024(%rdx){1to2}, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x10,0x37,0x5a,0x80]
+          vpcmpgtq -1024(%rdx){1to2}, %xmm30, %k3
+
+// CHECK: vpcmpgtq -1032(%rdx){1to2}, %xmm30, %k3
+// CHECK:  encoding: [0x62,0xf2,0x8d,0x10,0x37,0x9a,0xf8,0xfb,0xff,0xff]
+          vpcmpgtq -1032(%rdx){1to2}, %xmm30, %k3
+
+// CHECK: vpcmpgtq %ymm20, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xb2,0xcd,0x20,0x37,0xd4]
+          vpcmpgtq %ymm20, %ymm22, %k2
+
+// CHECK: vpcmpgtq %ymm20, %ymm22, %k2 {%k1}
+// CHECK:  encoding: [0x62,0xb2,0xcd,0x21,0x37,0xd4]
+          vpcmpgtq %ymm20, %ymm22, %k2 {%k1}
+
+// CHECK: vpcmpgtq (%rcx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x20,0x37,0x11]
+          vpcmpgtq (%rcx), %ymm22, %k2
+
+// CHECK: vpcmpgtq 291(%rax,%r14,8), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xb2,0xcd,0x20,0x37,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpcmpgtq 291(%rax,%r14,8), %ymm22, %k2
+
+// CHECK: vpcmpgtq (%rcx){1to4}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x30,0x37,0x11]
+          vpcmpgtq (%rcx){1to4}, %ymm22, %k2
+
+// CHECK: vpcmpgtq 4064(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x20,0x37,0x52,0x7f]
+          vpcmpgtq 4064(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtq 4096(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x20,0x37,0x92,0x00,0x10,0x00,0x00]
+          vpcmpgtq 4096(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtq -4096(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x20,0x37,0x52,0x80]
+          vpcmpgtq -4096(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtq -4128(%rdx), %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x20,0x37,0x92,0xe0,0xef,0xff,0xff]
+          vpcmpgtq -4128(%rdx), %ymm22, %k2
+
+// CHECK: vpcmpgtq 1016(%rdx){1to4}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x30,0x37,0x52,0x7f]
+          vpcmpgtq 1016(%rdx){1to4}, %ymm22, %k2
+
+// CHECK: vpcmpgtq 1024(%rdx){1to4}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x30,0x37,0x92,0x00,0x04,0x00,0x00]
+          vpcmpgtq 1024(%rdx){1to4}, %ymm22, %k2
+
+// CHECK: vpcmpgtq -1024(%rdx){1to4}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x30,0x37,0x52,0x80]
+          vpcmpgtq -1024(%rdx){1to4}, %ymm22, %k2
+
+// CHECK: vpcmpgtq -1032(%rdx){1to4}, %ymm22, %k2
+// CHECK:  encoding: [0x62,0xf2,0xcd,0x30,0x37,0x92,0xf8,0xfb,0xff,0xff]
+          vpcmpgtq -1032(%rdx){1to4}, %ymm22, %k2
+
+// CHECK: vpcmpq $171, %xmm24, %xmm27, %k5
+// CHECK:  encoding: [0x62,0x93,0xa5,0x00,0x1f,0xe8,0xab]
+          vpcmpq $171, %xmm24, %xmm27, %k5
+
+// CHECK: vpcmpq $171, %xmm24, %xmm27, %k5 {%k7}
+// CHECK:  encoding: [0x62,0x93,0xa5,0x07,0x1f,0xe8,0xab]
+          vpcmpq $171, %xmm24, %xmm27, %k5 {%k7}
+
+// CHECK: vpcmpq $123, %xmm24, %xmm27, %k5
+// CHECK:  encoding: [0x62,0x93,0xa5,0x00,0x1f,0xe8,0x7b]
+          vpcmpq $123, %xmm24, %xmm27, %k5
+
+// CHECK: vpcmpq $123, (%rcx), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x1f,0x29,0x7b]
+          vpcmpq $123, (%rcx), %xmm27, %k5
+
+// CHECK: vpcmpq $123, 291(%rax,%r14,8), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xb3,0xa5,0x00,0x1f,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpq $123, 291(%rax,%r14,8), %xmm27, %k5
+
+// CHECK: vpcmpq $123, (%rcx){1to2}, %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x10,0x1f,0x29,0x7b]
+          vpcmpq $123, (%rcx){1to2}, %xmm27, %k5
+
+// CHECK: vpcmpq $123, 2032(%rdx), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x1f,0x6a,0x7f,0x7b]
+          vpcmpq $123, 2032(%rdx), %xmm27, %k5
+
+// CHECK: vpcmpq $123, 2048(%rdx), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x1f,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpq $123, 2048(%rdx), %xmm27, %k5
+
+// CHECK: vpcmpq $123, -2048(%rdx), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x1f,0x6a,0x80,0x7b]
+          vpcmpq $123, -2048(%rdx), %xmm27, %k5
+
+// CHECK: vpcmpq $123, -2064(%rdx), %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x1f,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpq $123, -2064(%rdx), %xmm27, %k5
+
+// CHECK: vpcmpq $123, 1016(%rdx){1to2}, %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x10,0x1f,0x6a,0x7f,0x7b]
+          vpcmpq $123, 1016(%rdx){1to2}, %xmm27, %k5
+
+// CHECK: vpcmpq $123, 1024(%rdx){1to2}, %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x10,0x1f,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpq $123, 1024(%rdx){1to2}, %xmm27, %k5
+
+// CHECK: vpcmpq $123, -1024(%rdx){1to2}, %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x10,0x1f,0x6a,0x80,0x7b]
+          vpcmpq $123, -1024(%rdx){1to2}, %xmm27, %k5
+
+// CHECK: vpcmpq $123, -1032(%rdx){1to2}, %xmm27, %k5
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x10,0x1f,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpq $123, -1032(%rdx){1to2}, %xmm27, %k5
+
+// CHECK: vpcmpq $171, %ymm19, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x1f,0xe3,0xab]
+          vpcmpq $171, %ymm19, %ymm26, %k4
+
+// CHECK: vpcmpq $171, %ymm19, %ymm26, %k4 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0xad,0x26,0x1f,0xe3,0xab]
+          vpcmpq $171, %ymm19, %ymm26, %k4 {%k6}
+
+// CHECK: vpcmpq $123, %ymm19, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x1f,0xe3,0x7b]
+          vpcmpq $123, %ymm19, %ymm26, %k4
+
+// CHECK: vpcmpq $123, (%rcx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x1f,0x21,0x7b]
+          vpcmpq $123, (%rcx), %ymm26, %k4
+
+// CHECK: vpcmpq $123, 291(%rax,%r14,8), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xb3,0xad,0x20,0x1f,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpq $123, 291(%rax,%r14,8), %ymm26, %k4
+
+// CHECK: vpcmpq $123, (%rcx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x30,0x1f,0x21,0x7b]
+          vpcmpq $123, (%rcx){1to4}, %ymm26, %k4
+
+// CHECK: vpcmpq $123, 4064(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x1f,0x62,0x7f,0x7b]
+          vpcmpq $123, 4064(%rdx), %ymm26, %k4
+
+// CHECK: vpcmpq $123, 4096(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x1f,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpq $123, 4096(%rdx), %ymm26, %k4
+
+// CHECK: vpcmpq $123, -4096(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x1f,0x62,0x80,0x7b]
+          vpcmpq $123, -4096(%rdx), %ymm26, %k4
+
+// CHECK: vpcmpq $123, -4128(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x20,0x1f,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpq $123, -4128(%rdx), %ymm26, %k4
+
+// CHECK: vpcmpq $123, 1016(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x30,0x1f,0x62,0x7f,0x7b]
+          vpcmpq $123, 1016(%rdx){1to4}, %ymm26, %k4
+
+// CHECK: vpcmpq $123, 1024(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x30,0x1f,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpq $123, 1024(%rdx){1to4}, %ymm26, %k4
+
+// CHECK: vpcmpq $123, -1024(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x30,0x1f,0x62,0x80,0x7b]
+          vpcmpq $123, -1024(%rdx){1to4}, %ymm26, %k4
+
+// CHECK: vpcmpq $123, -1032(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf3,0xad,0x30,0x1f,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpq $123, -1032(%rdx){1to4}, %ymm26, %k4
+
+// CHECK: vpcmpud $171, %xmm21, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x1e,0xdd,0xab]
+          vpcmpud $171, %xmm21, %xmm22, %k3
+
+// CHECK: vpcmpud $171, %xmm21, %xmm22, %k3 {%k1}
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x01,0x1e,0xdd,0xab]
+          vpcmpud $171, %xmm21, %xmm22, %k3 {%k1}
+
+// CHECK: vpcmpud $123, %xmm21, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x1e,0xdd,0x7b]
+          vpcmpud $123, %xmm21, %xmm22, %k3
+
+// CHECK: vpcmpud $123, (%rcx), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x1e,0x19,0x7b]
+          vpcmpud $123, (%rcx), %xmm22, %k3
+
+// CHECK: vpcmpud $123, 291(%rax,%r14,8), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xb3,0x4d,0x00,0x1e,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpud $123, 291(%rax,%r14,8), %xmm22, %k3
+
+// CHECK: vpcmpud $123, (%rcx){1to4}, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x10,0x1e,0x19,0x7b]
+          vpcmpud $123, (%rcx){1to4}, %xmm22, %k3
+
+// CHECK: vpcmpud $123, 2032(%rdx), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x1e,0x5a,0x7f,0x7b]
+          vpcmpud $123, 2032(%rdx), %xmm22, %k3
+
+// CHECK: vpcmpud $123, 2048(%rdx), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x1e,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpud $123, 2048(%rdx), %xmm22, %k3
+
+// CHECK: vpcmpud $123, -2048(%rdx), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x1e,0x5a,0x80,0x7b]
+          vpcmpud $123, -2048(%rdx), %xmm22, %k3
+
+// CHECK: vpcmpud $123, -2064(%rdx), %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x00,0x1e,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpud $123, -2064(%rdx), %xmm22, %k3
+
+// CHECK: vpcmpud $123, 508(%rdx){1to4}, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x10,0x1e,0x5a,0x7f,0x7b]
+          vpcmpud $123, 508(%rdx){1to4}, %xmm22, %k3
+
+// CHECK: vpcmpud $123, 512(%rdx){1to4}, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x10,0x1e,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpud $123, 512(%rdx){1to4}, %xmm22, %k3
+
+// CHECK: vpcmpud $123, -512(%rdx){1to4}, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x10,0x1e,0x5a,0x80,0x7b]
+          vpcmpud $123, -512(%rdx){1to4}, %xmm22, %k3
+
+// CHECK: vpcmpud $123, -516(%rdx){1to4}, %xmm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0x4d,0x10,0x1e,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpud $123, -516(%rdx){1to4}, %xmm22, %k3
+
+// CHECK: vpcmpud $171, %ymm20, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x20,0x1e,0xdc,0xab]
+          vpcmpud $171, %ymm20, %ymm30, %k3
+
+// CHECK: vpcmpud $171, %ymm20, %ymm30, %k3 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x26,0x1e,0xdc,0xab]
+          vpcmpud $171, %ymm20, %ymm30, %k3 {%k6}
+
+// CHECK: vpcmpud $123, %ymm20, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x20,0x1e,0xdc,0x7b]
+          vpcmpud $123, %ymm20, %ymm30, %k3
+
+// CHECK: vpcmpud $123, (%rcx), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x20,0x1e,0x19,0x7b]
+          vpcmpud $123, (%rcx), %ymm30, %k3
+
+// CHECK: vpcmpud $123, 291(%rax,%r14,8), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xb3,0x0d,0x20,0x1e,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpud $123, 291(%rax,%r14,8), %ymm30, %k3
+
+// CHECK: vpcmpud $123, (%rcx){1to8}, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x30,0x1e,0x19,0x7b]
+          vpcmpud $123, (%rcx){1to8}, %ymm30, %k3
+
+// CHECK: vpcmpud $123, 4064(%rdx), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x20,0x1e,0x5a,0x7f,0x7b]
+          vpcmpud $123, 4064(%rdx), %ymm30, %k3
+
+// CHECK: vpcmpud $123, 4096(%rdx), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x20,0x1e,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpud $123, 4096(%rdx), %ymm30, %k3
+
+// CHECK: vpcmpud $123, -4096(%rdx), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x20,0x1e,0x5a,0x80,0x7b]
+          vpcmpud $123, -4096(%rdx), %ymm30, %k3
+
+// CHECK: vpcmpud $123, -4128(%rdx), %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x20,0x1e,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpud $123, -4128(%rdx), %ymm30, %k3
+
+// CHECK: vpcmpud $123, 508(%rdx){1to8}, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x30,0x1e,0x5a,0x7f,0x7b]
+          vpcmpud $123, 508(%rdx){1to8}, %ymm30, %k3
+
+// CHECK: vpcmpud $123, 512(%rdx){1to8}, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x30,0x1e,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vpcmpud $123, 512(%rdx){1to8}, %ymm30, %k3
+
+// CHECK: vpcmpud $123, -512(%rdx){1to8}, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x30,0x1e,0x5a,0x80,0x7b]
+          vpcmpud $123, -512(%rdx){1to8}, %ymm30, %k3
+
+// CHECK: vpcmpud $123, -516(%rdx){1to8}, %ymm30, %k3
+// CHECK:  encoding: [0x62,0xf3,0x0d,0x30,0x1e,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vpcmpud $123, -516(%rdx){1to8}, %ymm30, %k3
+
+// CHECK: vpcmpuq $171, %xmm28, %xmm28, %k5
+// CHECK:  encoding: [0x62,0x93,0x9d,0x00,0x1e,0xec,0xab]
+          vpcmpuq $171, %xmm28, %xmm28, %k5
+
+// CHECK: vpcmpuq $171, %xmm28, %xmm28, %k5 {%k4}
+// CHECK:  encoding: [0x62,0x93,0x9d,0x04,0x1e,0xec,0xab]
+          vpcmpuq $171, %xmm28, %xmm28, %k5 {%k4}
+
+// CHECK: vpcmpuq $123, %xmm28, %xmm28, %k5
+// CHECK:  encoding: [0x62,0x93,0x9d,0x00,0x1e,0xec,0x7b]
+          vpcmpuq $123, %xmm28, %xmm28, %k5
+
+// CHECK: vpcmpuq $123, (%rcx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x1e,0x29,0x7b]
+          vpcmpuq $123, (%rcx), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, 291(%rax,%r14,8), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xb3,0x9d,0x00,0x1e,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuq $123, 291(%rax,%r14,8), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, (%rcx){1to2}, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x10,0x1e,0x29,0x7b]
+          vpcmpuq $123, (%rcx){1to2}, %xmm28, %k5
+
+// CHECK: vpcmpuq $123, 2032(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x1e,0x6a,0x7f,0x7b]
+          vpcmpuq $123, 2032(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, 2048(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x1e,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpcmpuq $123, 2048(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, -2048(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x1e,0x6a,0x80,0x7b]
+          vpcmpuq $123, -2048(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, -2064(%rdx), %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x00,0x1e,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpcmpuq $123, -2064(%rdx), %xmm28, %k5
+
+// CHECK: vpcmpuq $123, 1016(%rdx){1to2}, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x10,0x1e,0x6a,0x7f,0x7b]
+          vpcmpuq $123, 1016(%rdx){1to2}, %xmm28, %k5
+
+// CHECK: vpcmpuq $123, 1024(%rdx){1to2}, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x10,0x1e,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpuq $123, 1024(%rdx){1to2}, %xmm28, %k5
+
+// CHECK: vpcmpuq $123, -1024(%rdx){1to2}, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x10,0x1e,0x6a,0x80,0x7b]
+          vpcmpuq $123, -1024(%rdx){1to2}, %xmm28, %k5
+
+// CHECK: vpcmpuq $123, -1032(%rdx){1to2}, %xmm28, %k5
+// CHECK:  encoding: [0x62,0xf3,0x9d,0x10,0x1e,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpuq $123, -1032(%rdx){1to2}, %xmm28, %k5
+
+// CHECK: vpcmpuq $171, %ymm26, %ymm22, %k3
+// CHECK:  encoding: [0x62,0x93,0xcd,0x20,0x1e,0xda,0xab]
+          vpcmpuq $171, %ymm26, %ymm22, %k3
+
+// CHECK: vpcmpuq $171, %ymm26, %ymm22, %k3 {%k3}
+// CHECK:  encoding: [0x62,0x93,0xcd,0x23,0x1e,0xda,0xab]
+          vpcmpuq $171, %ymm26, %ymm22, %k3 {%k3}
+
+// CHECK: vpcmpuq $123, %ymm26, %ymm22, %k3
+// CHECK:  encoding: [0x62,0x93,0xcd,0x20,0x1e,0xda,0x7b]
+          vpcmpuq $123, %ymm26, %ymm22, %k3
+
+// CHECK: vpcmpuq $123, (%rcx), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x20,0x1e,0x19,0x7b]
+          vpcmpuq $123, (%rcx), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, 291(%rax,%r14,8), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xb3,0xcd,0x20,0x1e,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpcmpuq $123, 291(%rax,%r14,8), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, (%rcx){1to4}, %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x30,0x1e,0x19,0x7b]
+          vpcmpuq $123, (%rcx){1to4}, %ymm22, %k3
+
+// CHECK: vpcmpuq $123, 4064(%rdx), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x20,0x1e,0x5a,0x7f,0x7b]
+          vpcmpuq $123, 4064(%rdx), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, 4096(%rdx), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x20,0x1e,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpcmpuq $123, 4096(%rdx), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, -4096(%rdx), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x20,0x1e,0x5a,0x80,0x7b]
+          vpcmpuq $123, -4096(%rdx), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, -4128(%rdx), %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x20,0x1e,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpcmpuq $123, -4128(%rdx), %ymm22, %k3
+
+// CHECK: vpcmpuq $123, 1016(%rdx){1to4}, %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x30,0x1e,0x5a,0x7f,0x7b]
+          vpcmpuq $123, 1016(%rdx){1to4}, %ymm22, %k3
+
+// CHECK: vpcmpuq $123, 1024(%rdx){1to4}, %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x30,0x1e,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vpcmpuq $123, 1024(%rdx){1to4}, %ymm22, %k3
+
+// CHECK: vpcmpuq $123, -1024(%rdx){1to4}, %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x30,0x1e,0x5a,0x80,0x7b]
+          vpcmpuq $123, -1024(%rdx){1to4}, %ymm22, %k3
+
+// CHECK: vpcmpuq $123, -1032(%rdx){1to4}, %ymm22, %k3
+// CHECK:  encoding: [0x62,0xf3,0xcd,0x30,0x1e,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpcmpuq $123, -1032(%rdx){1to4}, %ymm22, %k3
+
+// CHECK: vpmaxsd %xmm26, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0x82,0x4d,0x00,0x3d,0xda]
+          vpmaxsd %xmm26, %xmm22, %xmm19
+
+// CHECK: vpmaxsd %xmm26, %xmm22, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x4d,0x02,0x3d,0xda]
+          vpmaxsd %xmm26, %xmm22, %xmm19 {%k2}
+
+// CHECK: vpmaxsd %xmm26, %xmm22, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x4d,0x82,0x3d,0xda]
+          vpmaxsd %xmm26, %xmm22, %xmm19 {%k2} {z}
+
+// CHECK: vpmaxsd (%rcx), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x3d,0x19]
+          vpmaxsd (%rcx), %xmm22, %xmm19
+
+// CHECK: vpmaxsd 291(%rax,%r14,8), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x00,0x3d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsd 291(%rax,%r14,8), %xmm22, %xmm19
+
+// CHECK: vpmaxsd (%rcx){1to4}, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x10,0x3d,0x19]
+          vpmaxsd (%rcx){1to4}, %xmm22, %xmm19
+
+// CHECK: vpmaxsd 2032(%rdx), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x3d,0x5a,0x7f]
+          vpmaxsd 2032(%rdx), %xmm22, %xmm19
+
+// CHECK: vpmaxsd 2048(%rdx), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x3d,0x9a,0x00,0x08,0x00,0x00]
+          vpmaxsd 2048(%rdx), %xmm22, %xmm19
+
+// CHECK: vpmaxsd -2048(%rdx), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x3d,0x5a,0x80]
+          vpmaxsd -2048(%rdx), %xmm22, %xmm19
+
+// CHECK: vpmaxsd -2064(%rdx), %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x00,0x3d,0x9a,0xf0,0xf7,0xff,0xff]
+          vpmaxsd -2064(%rdx), %xmm22, %xmm19
+
+// CHECK: vpmaxsd 508(%rdx){1to4}, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x10,0x3d,0x5a,0x7f]
+          vpmaxsd 508(%rdx){1to4}, %xmm22, %xmm19
+
+// CHECK: vpmaxsd 512(%rdx){1to4}, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x10,0x3d,0x9a,0x00,0x02,0x00,0x00]
+          vpmaxsd 512(%rdx){1to4}, %xmm22, %xmm19
+
+// CHECK: vpmaxsd -512(%rdx){1to4}, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x10,0x3d,0x5a,0x80]
+          vpmaxsd -512(%rdx){1to4}, %xmm22, %xmm19
+
+// CHECK: vpmaxsd -516(%rdx){1to4}, %xmm22, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x10,0x3d,0x9a,0xfc,0xfd,0xff,0xff]
+          vpmaxsd -516(%rdx){1to4}, %xmm22, %xmm19
+
+// CHECK: vpmaxsd %ymm23, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x3d,0x20,0x3d,0xdf]
+          vpmaxsd %ymm23, %ymm24, %ymm27
+
+// CHECK: vpmaxsd %ymm23, %ymm24, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x3d,0x26,0x3d,0xdf]
+          vpmaxsd %ymm23, %ymm24, %ymm27 {%k6}
+
+// CHECK: vpmaxsd %ymm23, %ymm24, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x3d,0xa6,0x3d,0xdf]
+          vpmaxsd %ymm23, %ymm24, %ymm27 {%k6} {z}
+
+// CHECK: vpmaxsd (%rcx), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x20,0x3d,0x19]
+          vpmaxsd (%rcx), %ymm24, %ymm27
+
+// CHECK: vpmaxsd 291(%rax,%r14,8), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x3d,0x20,0x3d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsd 291(%rax,%r14,8), %ymm24, %ymm27
+
+// CHECK: vpmaxsd (%rcx){1to8}, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x30,0x3d,0x19]
+          vpmaxsd (%rcx){1to8}, %ymm24, %ymm27
+
+// CHECK: vpmaxsd 4064(%rdx), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x20,0x3d,0x5a,0x7f]
+          vpmaxsd 4064(%rdx), %ymm24, %ymm27
+
+// CHECK: vpmaxsd 4096(%rdx), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x20,0x3d,0x9a,0x00,0x10,0x00,0x00]
+          vpmaxsd 4096(%rdx), %ymm24, %ymm27
+
+// CHECK: vpmaxsd -4096(%rdx), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x20,0x3d,0x5a,0x80]
+          vpmaxsd -4096(%rdx), %ymm24, %ymm27
+
+// CHECK: vpmaxsd -4128(%rdx), %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x20,0x3d,0x9a,0xe0,0xef,0xff,0xff]
+          vpmaxsd -4128(%rdx), %ymm24, %ymm27
+
+// CHECK: vpmaxsd 508(%rdx){1to8}, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x30,0x3d,0x5a,0x7f]
+          vpmaxsd 508(%rdx){1to8}, %ymm24, %ymm27
+
+// CHECK: vpmaxsd 512(%rdx){1to8}, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x30,0x3d,0x9a,0x00,0x02,0x00,0x00]
+          vpmaxsd 512(%rdx){1to8}, %ymm24, %ymm27
+
+// CHECK: vpmaxsd -512(%rdx){1to8}, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x30,0x3d,0x5a,0x80]
+          vpmaxsd -512(%rdx){1to8}, %ymm24, %ymm27
+
+// CHECK: vpmaxsd -516(%rdx){1to8}, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x3d,0x30,0x3d,0x9a,0xfc,0xfd,0xff,0xff]
+          vpmaxsd -516(%rdx){1to8}, %ymm24, %ymm27
+
+// CHECK: vpmaxsq %xmm25, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0x82,0xc5,0x00,0x3d,0xf9]
+          vpmaxsq %xmm25, %xmm23, %xmm23
+
+// CHECK: vpmaxsq %xmm25, %xmm23, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0x82,0xc5,0x02,0x3d,0xf9]
+          vpmaxsq %xmm25, %xmm23, %xmm23 {%k2}
+
+// CHECK: vpmaxsq %xmm25, %xmm23, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0xc5,0x82,0x3d,0xf9]
+          vpmaxsq %xmm25, %xmm23, %xmm23 {%k2} {z}
+
+// CHECK: vpmaxsq (%rcx), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x00,0x3d,0x39]
+          vpmaxsq (%rcx), %xmm23, %xmm23
+
+// CHECK: vpmaxsq 291(%rax,%r14,8), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0xc5,0x00,0x3d,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsq 291(%rax,%r14,8), %xmm23, %xmm23
+
+// CHECK: vpmaxsq (%rcx){1to2}, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x10,0x3d,0x39]
+          vpmaxsq (%rcx){1to2}, %xmm23, %xmm23
+
+// CHECK: vpmaxsq 2032(%rdx), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x00,0x3d,0x7a,0x7f]
+          vpmaxsq 2032(%rdx), %xmm23, %xmm23
+
+// CHECK: vpmaxsq 2048(%rdx), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x00,0x3d,0xba,0x00,0x08,0x00,0x00]
+          vpmaxsq 2048(%rdx), %xmm23, %xmm23
+
+// CHECK: vpmaxsq -2048(%rdx), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x00,0x3d,0x7a,0x80]
+          vpmaxsq -2048(%rdx), %xmm23, %xmm23
+
+// CHECK: vpmaxsq -2064(%rdx), %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x00,0x3d,0xba,0xf0,0xf7,0xff,0xff]
+          vpmaxsq -2064(%rdx), %xmm23, %xmm23
+
+// CHECK: vpmaxsq 1016(%rdx){1to2}, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x10,0x3d,0x7a,0x7f]
+          vpmaxsq 1016(%rdx){1to2}, %xmm23, %xmm23
+
+// CHECK: vpmaxsq 1024(%rdx){1to2}, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x10,0x3d,0xba,0x00,0x04,0x00,0x00]
+          vpmaxsq 1024(%rdx){1to2}, %xmm23, %xmm23
+
+// CHECK: vpmaxsq -1024(%rdx){1to2}, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x10,0x3d,0x7a,0x80]
+          vpmaxsq -1024(%rdx){1to2}, %xmm23, %xmm23
+
+// CHECK: vpmaxsq -1032(%rdx){1to2}, %xmm23, %xmm23
+// CHECK:  encoding: [0x62,0xe2,0xc5,0x10,0x3d,0xba,0xf8,0xfb,0xff,0xff]
+          vpmaxsq -1032(%rdx){1to2}, %xmm23, %xmm23
+
+// CHECK: vpmaxsq %ymm25, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x02,0xdd,0x20,0x3d,0xf1]
+          vpmaxsq %ymm25, %ymm20, %ymm30
+
+// CHECK: vpmaxsq %ymm25, %ymm20, %ymm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0xdd,0x26,0x3d,0xf1]
+          vpmaxsq %ymm25, %ymm20, %ymm30 {%k6}
+
+// CHECK: vpmaxsq %ymm25, %ymm20, %ymm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0xdd,0xa6,0x3d,0xf1]
+          vpmaxsq %ymm25, %ymm20, %ymm30 {%k6} {z}
+
+// CHECK: vpmaxsq (%rcx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x3d,0x31]
+          vpmaxsq (%rcx), %ymm20, %ymm30
+
+// CHECK: vpmaxsq 291(%rax,%r14,8), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x22,0xdd,0x20,0x3d,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxsq 291(%rax,%r14,8), %ymm20, %ymm30
+
+// CHECK: vpmaxsq (%rcx){1to4}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x3d,0x31]
+          vpmaxsq (%rcx){1to4}, %ymm20, %ymm30
+
+// CHECK: vpmaxsq 4064(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x3d,0x72,0x7f]
+          vpmaxsq 4064(%rdx), %ymm20, %ymm30
+
+// CHECK: vpmaxsq 4096(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x3d,0xb2,0x00,0x10,0x00,0x00]
+          vpmaxsq 4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vpmaxsq -4096(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x3d,0x72,0x80]
+          vpmaxsq -4096(%rdx), %ymm20, %ymm30
+
+// CHECK: vpmaxsq -4128(%rdx), %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x20,0x3d,0xb2,0xe0,0xef,0xff,0xff]
+          vpmaxsq -4128(%rdx), %ymm20, %ymm30
+
+// CHECK: vpmaxsq 1016(%rdx){1to4}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x3d,0x72,0x7f]
+          vpmaxsq 1016(%rdx){1to4}, %ymm20, %ymm30
+
+// CHECK: vpmaxsq 1024(%rdx){1to4}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x3d,0xb2,0x00,0x04,0x00,0x00]
+          vpmaxsq 1024(%rdx){1to4}, %ymm20, %ymm30
+
+// CHECK: vpmaxsq -1024(%rdx){1to4}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x3d,0x72,0x80]
+          vpmaxsq -1024(%rdx){1to4}, %ymm20, %ymm30
+
+// CHECK: vpmaxsq -1032(%rdx){1to4}, %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xdd,0x30,0x3d,0xb2,0xf8,0xfb,0xff,0xff]
+          vpmaxsq -1032(%rdx){1to4}, %ymm20, %ymm30
+
+// CHECK: vpmaxud %xmm19, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x75,0x00,0x3f,0xeb]
+          vpmaxud %xmm19, %xmm17, %xmm21
+
+// CHECK: vpmaxud %xmm19, %xmm17, %xmm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x75,0x07,0x3f,0xeb]
+          vpmaxud %xmm19, %xmm17, %xmm21 {%k7}
+
+// CHECK: vpmaxud %xmm19, %xmm17, %xmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x75,0x87,0x3f,0xeb]
+          vpmaxud %xmm19, %xmm17, %xmm21 {%k7} {z}
+
+// CHECK: vpmaxud (%rcx), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x00,0x3f,0x29]
+          vpmaxud (%rcx), %xmm17, %xmm21
+
+// CHECK: vpmaxud 291(%rax,%r14,8), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x75,0x00,0x3f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxud 291(%rax,%r14,8), %xmm17, %xmm21
+
+// CHECK: vpmaxud (%rcx){1to4}, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x10,0x3f,0x29]
+          vpmaxud (%rcx){1to4}, %xmm17, %xmm21
+
+// CHECK: vpmaxud 2032(%rdx), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x00,0x3f,0x6a,0x7f]
+          vpmaxud 2032(%rdx), %xmm17, %xmm21
+
+// CHECK: vpmaxud 2048(%rdx), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x00,0x3f,0xaa,0x00,0x08,0x00,0x00]
+          vpmaxud 2048(%rdx), %xmm17, %xmm21
+
+// CHECK: vpmaxud -2048(%rdx), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x00,0x3f,0x6a,0x80]
+          vpmaxud -2048(%rdx), %xmm17, %xmm21
+
+// CHECK: vpmaxud -2064(%rdx), %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x00,0x3f,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmaxud -2064(%rdx), %xmm17, %xmm21
+
+// CHECK: vpmaxud 508(%rdx){1to4}, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x10,0x3f,0x6a,0x7f]
+          vpmaxud 508(%rdx){1to4}, %xmm17, %xmm21
+
+// CHECK: vpmaxud 512(%rdx){1to4}, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x10,0x3f,0xaa,0x00,0x02,0x00,0x00]
+          vpmaxud 512(%rdx){1to4}, %xmm17, %xmm21
+
+// CHECK: vpmaxud -512(%rdx){1to4}, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x10,0x3f,0x6a,0x80]
+          vpmaxud -512(%rdx){1to4}, %xmm17, %xmm21
+
+// CHECK: vpmaxud -516(%rdx){1to4}, %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x75,0x10,0x3f,0xaa,0xfc,0xfd,0xff,0xff]
+          vpmaxud -516(%rdx){1to4}, %xmm17, %xmm21
+
+// CHECK: vpmaxud %ymm23, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x45,0x20,0x3f,0xe7]
+          vpmaxud %ymm23, %ymm23, %ymm28
+
+// CHECK: vpmaxud %ymm23, %ymm23, %ymm28 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x45,0x27,0x3f,0xe7]
+          vpmaxud %ymm23, %ymm23, %ymm28 {%k7}
+
+// CHECK: vpmaxud %ymm23, %ymm23, %ymm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x45,0xa7,0x3f,0xe7]
+          vpmaxud %ymm23, %ymm23, %ymm28 {%k7} {z}
+
+// CHECK: vpmaxud (%rcx), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0x3f,0x21]
+          vpmaxud (%rcx), %ymm23, %ymm28
+
+// CHECK: vpmaxud 291(%rax,%r14,8), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x45,0x20,0x3f,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxud 291(%rax,%r14,8), %ymm23, %ymm28
+
+// CHECK: vpmaxud (%rcx){1to8}, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0x3f,0x21]
+          vpmaxud (%rcx){1to8}, %ymm23, %ymm28
+
+// CHECK: vpmaxud 4064(%rdx), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0x3f,0x62,0x7f]
+          vpmaxud 4064(%rdx), %ymm23, %ymm28
+
+// CHECK: vpmaxud 4096(%rdx), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0x3f,0xa2,0x00,0x10,0x00,0x00]
+          vpmaxud 4096(%rdx), %ymm23, %ymm28
+
+// CHECK: vpmaxud -4096(%rdx), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0x3f,0x62,0x80]
+          vpmaxud -4096(%rdx), %ymm23, %ymm28
+
+// CHECK: vpmaxud -4128(%rdx), %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x20,0x3f,0xa2,0xe0,0xef,0xff,0xff]
+          vpmaxud -4128(%rdx), %ymm23, %ymm28
+
+// CHECK: vpmaxud 508(%rdx){1to8}, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0x3f,0x62,0x7f]
+          vpmaxud 508(%rdx){1to8}, %ymm23, %ymm28
+
+// CHECK: vpmaxud 512(%rdx){1to8}, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0x3f,0xa2,0x00,0x02,0x00,0x00]
+          vpmaxud 512(%rdx){1to8}, %ymm23, %ymm28
+
+// CHECK: vpmaxud -512(%rdx){1to8}, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0x3f,0x62,0x80]
+          vpmaxud -512(%rdx){1to8}, %ymm23, %ymm28
+
+// CHECK: vpmaxud -516(%rdx){1to8}, %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x45,0x30,0x3f,0xa2,0xfc,0xfd,0xff,0xff]
+          vpmaxud -516(%rdx){1to8}, %ymm23, %ymm28
+
+// CHECK: vpmaxuq %xmm25, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x00,0x3f,0xd1]
+          vpmaxuq %xmm25, %xmm26, %xmm26
+
+// CHECK: vpmaxuq %xmm25, %xmm26, %xmm26 {%k7}
+// CHECK:  encoding: [0x62,0x02,0xad,0x07,0x3f,0xd1]
+          vpmaxuq %xmm25, %xmm26, %xmm26 {%k7}
+
+// CHECK: vpmaxuq %xmm25, %xmm26, %xmm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0xad,0x87,0x3f,0xd1]
+          vpmaxuq %xmm25, %xmm26, %xmm26 {%k7} {z}
+
+// CHECK: vpmaxuq (%rcx), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0x3f,0x11]
+          vpmaxuq (%rcx), %xmm26, %xmm26
+
+// CHECK: vpmaxuq 291(%rax,%r14,8), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xad,0x00,0x3f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxuq 291(%rax,%r14,8), %xmm26, %xmm26
+
+// CHECK: vpmaxuq (%rcx){1to2}, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x10,0x3f,0x11]
+          vpmaxuq (%rcx){1to2}, %xmm26, %xmm26
+
+// CHECK: vpmaxuq 2032(%rdx), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0x3f,0x52,0x7f]
+          vpmaxuq 2032(%rdx), %xmm26, %xmm26
+
+// CHECK: vpmaxuq 2048(%rdx), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0x3f,0x92,0x00,0x08,0x00,0x00]
+          vpmaxuq 2048(%rdx), %xmm26, %xmm26
+
+// CHECK: vpmaxuq -2048(%rdx), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0x3f,0x52,0x80]
+          vpmaxuq -2048(%rdx), %xmm26, %xmm26
+
+// CHECK: vpmaxuq -2064(%rdx), %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x00,0x3f,0x92,0xf0,0xf7,0xff,0xff]
+          vpmaxuq -2064(%rdx), %xmm26, %xmm26
+
+// CHECK: vpmaxuq 1016(%rdx){1to2}, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x10,0x3f,0x52,0x7f]
+          vpmaxuq 1016(%rdx){1to2}, %xmm26, %xmm26
+
+// CHECK: vpmaxuq 1024(%rdx){1to2}, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x10,0x3f,0x92,0x00,0x04,0x00,0x00]
+          vpmaxuq 1024(%rdx){1to2}, %xmm26, %xmm26
+
+// CHECK: vpmaxuq -1024(%rdx){1to2}, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x10,0x3f,0x52,0x80]
+          vpmaxuq -1024(%rdx){1to2}, %xmm26, %xmm26
+
+// CHECK: vpmaxuq -1032(%rdx){1to2}, %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x10,0x3f,0x92,0xf8,0xfb,0xff,0xff]
+          vpmaxuq -1032(%rdx){1to2}, %xmm26, %xmm26
+
+// CHECK: vpmaxuq %ymm20, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x22,0xf5,0x20,0x3f,0xf4]
+          vpmaxuq %ymm20, %ymm17, %ymm30
+
+// CHECK: vpmaxuq %ymm20, %ymm17, %ymm30 {%k4}
+// CHECK:  encoding: [0x62,0x22,0xf5,0x24,0x3f,0xf4]
+          vpmaxuq %ymm20, %ymm17, %ymm30 {%k4}
+
+// CHECK: vpmaxuq %ymm20, %ymm17, %ymm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0xf5,0xa4,0x3f,0xf4]
+          vpmaxuq %ymm20, %ymm17, %ymm30 {%k4} {z}
+
+// CHECK: vpmaxuq (%rcx), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x3f,0x31]
+          vpmaxuq (%rcx), %ymm17, %ymm30
+
+// CHECK: vpmaxuq 291(%rax,%r14,8), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x22,0xf5,0x20,0x3f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaxuq 291(%rax,%r14,8), %ymm17, %ymm30
+
+// CHECK: vpmaxuq (%rcx){1to4}, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x3f,0x31]
+          vpmaxuq (%rcx){1to4}, %ymm17, %ymm30
+
+// CHECK: vpmaxuq 4064(%rdx), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x3f,0x72,0x7f]
+          vpmaxuq 4064(%rdx), %ymm17, %ymm30
+
+// CHECK: vpmaxuq 4096(%rdx), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x3f,0xb2,0x00,0x10,0x00,0x00]
+          vpmaxuq 4096(%rdx), %ymm17, %ymm30
+
+// CHECK: vpmaxuq -4096(%rdx), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x3f,0x72,0x80]
+          vpmaxuq -4096(%rdx), %ymm17, %ymm30
+
+// CHECK: vpmaxuq -4128(%rdx), %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x20,0x3f,0xb2,0xe0,0xef,0xff,0xff]
+          vpmaxuq -4128(%rdx), %ymm17, %ymm30
+
+// CHECK: vpmaxuq 1016(%rdx){1to4}, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x3f,0x72,0x7f]
+          vpmaxuq 1016(%rdx){1to4}, %ymm17, %ymm30
+
+// CHECK: vpmaxuq 1024(%rdx){1to4}, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x3f,0xb2,0x00,0x04,0x00,0x00]
+          vpmaxuq 1024(%rdx){1to4}, %ymm17, %ymm30
+
+// CHECK: vpmaxuq -1024(%rdx){1to4}, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x3f,0x72,0x80]
+          vpmaxuq -1024(%rdx){1to4}, %ymm17, %ymm30
+
+// CHECK: vpmaxuq -1032(%rdx){1to4}, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x62,0xf5,0x30,0x3f,0xb2,0xf8,0xfb,0xff,0xff]
+          vpmaxuq -1032(%rdx){1to4}, %ymm17, %ymm30
+
+// CHECK: vpminsd %xmm17, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x55,0x00,0x39,0xe9]
+          vpminsd %xmm17, %xmm21, %xmm29
+
+// CHECK: vpminsd %xmm17, %xmm21, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x55,0x04,0x39,0xe9]
+          vpminsd %xmm17, %xmm21, %xmm29 {%k4}
+
+// CHECK: vpminsd %xmm17, %xmm21, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x55,0x84,0x39,0xe9]
+          vpminsd %xmm17, %xmm21, %xmm29 {%k4} {z}
+
+// CHECK: vpminsd (%rcx), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x39,0x29]
+          vpminsd (%rcx), %xmm21, %xmm29
+
+// CHECK: vpminsd 291(%rax,%r14,8), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x55,0x00,0x39,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpminsd 291(%rax,%r14,8), %xmm21, %xmm29
+
+// CHECK: vpminsd (%rcx){1to4}, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x10,0x39,0x29]
+          vpminsd (%rcx){1to4}, %xmm21, %xmm29
+
+// CHECK: vpminsd 2032(%rdx), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x39,0x6a,0x7f]
+          vpminsd 2032(%rdx), %xmm21, %xmm29
+
+// CHECK: vpminsd 2048(%rdx), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x39,0xaa,0x00,0x08,0x00,0x00]
+          vpminsd 2048(%rdx), %xmm21, %xmm29
+
+// CHECK: vpminsd -2048(%rdx), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x39,0x6a,0x80]
+          vpminsd -2048(%rdx), %xmm21, %xmm29
+
+// CHECK: vpminsd -2064(%rdx), %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x39,0xaa,0xf0,0xf7,0xff,0xff]
+          vpminsd -2064(%rdx), %xmm21, %xmm29
+
+// CHECK: vpminsd 508(%rdx){1to4}, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x10,0x39,0x6a,0x7f]
+          vpminsd 508(%rdx){1to4}, %xmm21, %xmm29
+
+// CHECK: vpminsd 512(%rdx){1to4}, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x10,0x39,0xaa,0x00,0x02,0x00,0x00]
+          vpminsd 512(%rdx){1to4}, %xmm21, %xmm29
+
+// CHECK: vpminsd -512(%rdx){1to4}, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x10,0x39,0x6a,0x80]
+          vpminsd -512(%rdx){1to4}, %xmm21, %xmm29
+
+// CHECK: vpminsd -516(%rdx){1to4}, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x55,0x10,0x39,0xaa,0xfc,0xfd,0xff,0xff]
+          vpminsd -516(%rdx){1to4}, %xmm21, %xmm29
+
+// CHECK: vpminsd %ymm25, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0x82,0x2d,0x20,0x39,0xd9]
+          vpminsd %ymm25, %ymm26, %ymm19
+
+// CHECK: vpminsd %ymm25, %ymm26, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x27,0x39,0xd9]
+          vpminsd %ymm25, %ymm26, %ymm19 {%k7}
+
+// CHECK: vpminsd %ymm25, %ymm26, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x2d,0xa7,0x39,0xd9]
+          vpminsd %ymm25, %ymm26, %ymm19 {%k7} {z}
+
+// CHECK: vpminsd (%rcx), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x39,0x19]
+          vpminsd (%rcx), %ymm26, %ymm19
+
+// CHECK: vpminsd 291(%rax,%r14,8), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0x39,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsd 291(%rax,%r14,8), %ymm26, %ymm19
+
+// CHECK: vpminsd (%rcx){1to8}, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x39,0x19]
+          vpminsd (%rcx){1to8}, %ymm26, %ymm19
+
+// CHECK: vpminsd 4064(%rdx), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x39,0x5a,0x7f]
+          vpminsd 4064(%rdx), %ymm26, %ymm19
+
+// CHECK: vpminsd 4096(%rdx), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x39,0x9a,0x00,0x10,0x00,0x00]
+          vpminsd 4096(%rdx), %ymm26, %ymm19
+
+// CHECK: vpminsd -4096(%rdx), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x39,0x5a,0x80]
+          vpminsd -4096(%rdx), %ymm26, %ymm19
+
+// CHECK: vpminsd -4128(%rdx), %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x39,0x9a,0xe0,0xef,0xff,0xff]
+          vpminsd -4128(%rdx), %ymm26, %ymm19
+
+// CHECK: vpminsd 508(%rdx){1to8}, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x39,0x5a,0x7f]
+          vpminsd 508(%rdx){1to8}, %ymm26, %ymm19
+
+// CHECK: vpminsd 512(%rdx){1to8}, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x39,0x9a,0x00,0x02,0x00,0x00]
+          vpminsd 512(%rdx){1to8}, %ymm26, %ymm19
+
+// CHECK: vpminsd -512(%rdx){1to8}, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x39,0x5a,0x80]
+          vpminsd -512(%rdx){1to8}, %ymm26, %ymm19
+
+// CHECK: vpminsd -516(%rdx){1to8}, %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x39,0x9a,0xfc,0xfd,0xff,0xff]
+          vpminsd -516(%rdx){1to8}, %ymm26, %ymm19
+
+// CHECK: vpminsq %xmm18, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x00,0x39,0xda]
+          vpminsq %xmm18, %xmm24, %xmm19
+
+// CHECK: vpminsq %xmm18, %xmm24, %xmm19 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x06,0x39,0xda]
+          vpminsq %xmm18, %xmm24, %xmm19 {%k6}
+
+// CHECK: vpminsq %xmm18, %xmm24, %xmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x86,0x39,0xda]
+          vpminsq %xmm18, %xmm24, %xmm19 {%k6} {z}
+
+// CHECK: vpminsq (%rcx), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0x39,0x19]
+          vpminsq (%rcx), %xmm24, %xmm19
+
+// CHECK: vpminsq 291(%rax,%r14,8), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xbd,0x00,0x39,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsq 291(%rax,%r14,8), %xmm24, %xmm19
+
+// CHECK: vpminsq (%rcx){1to2}, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0x39,0x19]
+          vpminsq (%rcx){1to2}, %xmm24, %xmm19
+
+// CHECK: vpminsq 2032(%rdx), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0x39,0x5a,0x7f]
+          vpminsq 2032(%rdx), %xmm24, %xmm19
+
+// CHECK: vpminsq 2048(%rdx), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0x39,0x9a,0x00,0x08,0x00,0x00]
+          vpminsq 2048(%rdx), %xmm24, %xmm19
+
+// CHECK: vpminsq -2048(%rdx), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0x39,0x5a,0x80]
+          vpminsq -2048(%rdx), %xmm24, %xmm19
+
+// CHECK: vpminsq -2064(%rdx), %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x00,0x39,0x9a,0xf0,0xf7,0xff,0xff]
+          vpminsq -2064(%rdx), %xmm24, %xmm19
+
+// CHECK: vpminsq 1016(%rdx){1to2}, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0x39,0x5a,0x7f]
+          vpminsq 1016(%rdx){1to2}, %xmm24, %xmm19
+
+// CHECK: vpminsq 1024(%rdx){1to2}, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0x39,0x9a,0x00,0x04,0x00,0x00]
+          vpminsq 1024(%rdx){1to2}, %xmm24, %xmm19
+
+// CHECK: vpminsq -1024(%rdx){1to2}, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0x39,0x5a,0x80]
+          vpminsq -1024(%rdx){1to2}, %xmm24, %xmm19
+
+// CHECK: vpminsq -1032(%rdx){1to2}, %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xbd,0x10,0x39,0x9a,0xf8,0xfb,0xff,0xff]
+          vpminsq -1032(%rdx){1to2}, %xmm24, %xmm19
+
+// CHECK: vpminsq %ymm28, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x02,0x95,0x20,0x39,0xcc]
+          vpminsq %ymm28, %ymm29, %ymm25
+
+// CHECK: vpminsq %ymm28, %ymm29, %ymm25 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x95,0x26,0x39,0xcc]
+          vpminsq %ymm28, %ymm29, %ymm25 {%k6}
+
+// CHECK: vpminsq %ymm28, %ymm29, %ymm25 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x95,0xa6,0x39,0xcc]
+          vpminsq %ymm28, %ymm29, %ymm25 {%k6} {z}
+
+// CHECK: vpminsq (%rcx), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0x39,0x09]
+          vpminsq (%rcx), %ymm29, %ymm25
+
+// CHECK: vpminsq 291(%rax,%r14,8), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x95,0x20,0x39,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpminsq 291(%rax,%r14,8), %ymm29, %ymm25
+
+// CHECK: vpminsq (%rcx){1to4}, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0x39,0x09]
+          vpminsq (%rcx){1to4}, %ymm29, %ymm25
+
+// CHECK: vpminsq 4064(%rdx), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0x39,0x4a,0x7f]
+          vpminsq 4064(%rdx), %ymm29, %ymm25
+
+// CHECK: vpminsq 4096(%rdx), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0x39,0x8a,0x00,0x10,0x00,0x00]
+          vpminsq 4096(%rdx), %ymm29, %ymm25
+
+// CHECK: vpminsq -4096(%rdx), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0x39,0x4a,0x80]
+          vpminsq -4096(%rdx), %ymm29, %ymm25
+
+// CHECK: vpminsq -4128(%rdx), %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x20,0x39,0x8a,0xe0,0xef,0xff,0xff]
+          vpminsq -4128(%rdx), %ymm29, %ymm25
+
+// CHECK: vpminsq 1016(%rdx){1to4}, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0x39,0x4a,0x7f]
+          vpminsq 1016(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vpminsq 1024(%rdx){1to4}, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0x39,0x8a,0x00,0x04,0x00,0x00]
+          vpminsq 1024(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vpminsq -1024(%rdx){1to4}, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0x39,0x4a,0x80]
+          vpminsq -1024(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vpminsq -1032(%rdx){1to4}, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x95,0x30,0x39,0x8a,0xf8,0xfb,0xff,0xff]
+          vpminsq -1032(%rdx){1to4}, %ymm29, %ymm25
+
+// CHECK: vpminud %xmm17, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x3b,0xd1]
+          vpminud %xmm17, %xmm23, %xmm18
+
+// CHECK: vpminud %xmm17, %xmm23, %xmm18 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x45,0x03,0x3b,0xd1]
+          vpminud %xmm17, %xmm23, %xmm18 {%k3}
+
+// CHECK: vpminud %xmm17, %xmm23, %xmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x45,0x83,0x3b,0xd1]
+          vpminud %xmm17, %xmm23, %xmm18 {%k3} {z}
+
+// CHECK: vpminud (%rcx), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x3b,0x11]
+          vpminud (%rcx), %xmm23, %xmm18
+
+// CHECK: vpminud 291(%rax,%r14,8), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x3b,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpminud 291(%rax,%r14,8), %xmm23, %xmm18
+
+// CHECK: vpminud (%rcx){1to4}, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x3b,0x11]
+          vpminud (%rcx){1to4}, %xmm23, %xmm18
+
+// CHECK: vpminud 2032(%rdx), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x3b,0x52,0x7f]
+          vpminud 2032(%rdx), %xmm23, %xmm18
+
+// CHECK: vpminud 2048(%rdx), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x3b,0x92,0x00,0x08,0x00,0x00]
+          vpminud 2048(%rdx), %xmm23, %xmm18
+
+// CHECK: vpminud -2048(%rdx), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x3b,0x52,0x80]
+          vpminud -2048(%rdx), %xmm23, %xmm18
+
+// CHECK: vpminud -2064(%rdx), %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x3b,0x92,0xf0,0xf7,0xff,0xff]
+          vpminud -2064(%rdx), %xmm23, %xmm18
+
+// CHECK: vpminud 508(%rdx){1to4}, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x3b,0x52,0x7f]
+          vpminud 508(%rdx){1to4}, %xmm23, %xmm18
+
+// CHECK: vpminud 512(%rdx){1to4}, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x3b,0x92,0x00,0x02,0x00,0x00]
+          vpminud 512(%rdx){1to4}, %xmm23, %xmm18
+
+// CHECK: vpminud -512(%rdx){1to4}, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x3b,0x52,0x80]
+          vpminud -512(%rdx){1to4}, %xmm23, %xmm18
+
+// CHECK: vpminud -516(%rdx){1to4}, %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x3b,0x92,0xfc,0xfd,0xff,0xff]
+          vpminud -516(%rdx){1to4}, %xmm23, %xmm18
+
+// CHECK: vpminud %ymm19, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x20,0x3b,0xdb]
+          vpminud %ymm19, %ymm22, %ymm19
+
+// CHECK: vpminud %ymm19, %ymm22, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x23,0x3b,0xdb]
+          vpminud %ymm19, %ymm22, %ymm19 {%k3}
+
+// CHECK: vpminud %ymm19, %ymm22, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x4d,0xa3,0x3b,0xdb]
+          vpminud %ymm19, %ymm22, %ymm19 {%k3} {z}
+
+// CHECK: vpminud (%rcx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3b,0x19]
+          vpminud (%rcx), %ymm22, %ymm19
+
+// CHECK: vpminud 291(%rax,%r14,8), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x4d,0x20,0x3b,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpminud 291(%rax,%r14,8), %ymm22, %ymm19
+
+// CHECK: vpminud (%rcx){1to8}, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x3b,0x19]
+          vpminud (%rcx){1to8}, %ymm22, %ymm19
+
+// CHECK: vpminud 4064(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3b,0x5a,0x7f]
+          vpminud 4064(%rdx), %ymm22, %ymm19
+
+// CHECK: vpminud 4096(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3b,0x9a,0x00,0x10,0x00,0x00]
+          vpminud 4096(%rdx), %ymm22, %ymm19
+
+// CHECK: vpminud -4096(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3b,0x5a,0x80]
+          vpminud -4096(%rdx), %ymm22, %ymm19
+
+// CHECK: vpminud -4128(%rdx), %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x20,0x3b,0x9a,0xe0,0xef,0xff,0xff]
+          vpminud -4128(%rdx), %ymm22, %ymm19
+
+// CHECK: vpminud 508(%rdx){1to8}, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x3b,0x5a,0x7f]
+          vpminud 508(%rdx){1to8}, %ymm22, %ymm19
+
+// CHECK: vpminud 512(%rdx){1to8}, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x3b,0x9a,0x00,0x02,0x00,0x00]
+          vpminud 512(%rdx){1to8}, %ymm22, %ymm19
+
+// CHECK: vpminud -512(%rdx){1to8}, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x3b,0x5a,0x80]
+          vpminud -512(%rdx){1to8}, %ymm22, %ymm19
+
+// CHECK: vpminud -516(%rdx){1to8}, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x4d,0x30,0x3b,0x9a,0xfc,0xfd,0xff,0xff]
+          vpminud -516(%rdx){1to8}, %ymm22, %ymm19
+
+// CHECK: vpminuq %xmm23, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xcd,0x00,0x3b,0xe7]
+          vpminuq %xmm23, %xmm22, %xmm28
+
+// CHECK: vpminuq %xmm23, %xmm22, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x22,0xcd,0x04,0x3b,0xe7]
+          vpminuq %xmm23, %xmm22, %xmm28 {%k4}
+
+// CHECK: vpminuq %xmm23, %xmm22, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0xcd,0x84,0x3b,0xe7]
+          vpminuq %xmm23, %xmm22, %xmm28 {%k4} {z}
+
+// CHECK: vpminuq (%rcx), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x00,0x3b,0x21]
+          vpminuq (%rcx), %xmm22, %xmm28
+
+// CHECK: vpminuq 291(%rax,%r14,8), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x22,0xcd,0x00,0x3b,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpminuq 291(%rax,%r14,8), %xmm22, %xmm28
+
+// CHECK: vpminuq (%rcx){1to2}, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x10,0x3b,0x21]
+          vpminuq (%rcx){1to2}, %xmm22, %xmm28
+
+// CHECK: vpminuq 2032(%rdx), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x00,0x3b,0x62,0x7f]
+          vpminuq 2032(%rdx), %xmm22, %xmm28
+
+// CHECK: vpminuq 2048(%rdx), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x00,0x3b,0xa2,0x00,0x08,0x00,0x00]
+          vpminuq 2048(%rdx), %xmm22, %xmm28
+
+// CHECK: vpminuq -2048(%rdx), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x00,0x3b,0x62,0x80]
+          vpminuq -2048(%rdx), %xmm22, %xmm28
+
+// CHECK: vpminuq -2064(%rdx), %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x00,0x3b,0xa2,0xf0,0xf7,0xff,0xff]
+          vpminuq -2064(%rdx), %xmm22, %xmm28
+
+// CHECK: vpminuq 1016(%rdx){1to2}, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x10,0x3b,0x62,0x7f]
+          vpminuq 1016(%rdx){1to2}, %xmm22, %xmm28
+
+// CHECK: vpminuq 1024(%rdx){1to2}, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x10,0x3b,0xa2,0x00,0x04,0x00,0x00]
+          vpminuq 1024(%rdx){1to2}, %xmm22, %xmm28
+
+// CHECK: vpminuq -1024(%rdx){1to2}, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x10,0x3b,0x62,0x80]
+          vpminuq -1024(%rdx){1to2}, %xmm22, %xmm28
+
+// CHECK: vpminuq -1032(%rdx){1to2}, %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x62,0xcd,0x10,0x3b,0xa2,0xf8,0xfb,0xff,0xff]
+          vpminuq -1032(%rdx){1to2}, %xmm22, %xmm28
+
+// CHECK: vpminuq %ymm23, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x22,0xd5,0x20,0x3b,0xef]
+          vpminuq %ymm23, %ymm21, %ymm29
+
+// CHECK: vpminuq %ymm23, %ymm21, %ymm29 {%k5}
+// CHECK:  encoding: [0x62,0x22,0xd5,0x25,0x3b,0xef]
+          vpminuq %ymm23, %ymm21, %ymm29 {%k5}
+
+// CHECK: vpminuq %ymm23, %ymm21, %ymm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0xd5,0xa5,0x3b,0xef]
+          vpminuq %ymm23, %ymm21, %ymm29 {%k5} {z}
+
+// CHECK: vpminuq (%rcx), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x3b,0x29]
+          vpminuq (%rcx), %ymm21, %ymm29
+
+// CHECK: vpminuq 291(%rax,%r14,8), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x22,0xd5,0x20,0x3b,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpminuq 291(%rax,%r14,8), %ymm21, %ymm29
+
+// CHECK: vpminuq (%rcx){1to4}, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0x29]
+          vpminuq (%rcx){1to4}, %ymm21, %ymm29
+
+// CHECK: vpminuq 4064(%rdx), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x3b,0x6a,0x7f]
+          vpminuq 4064(%rdx), %ymm21, %ymm29
+
+// CHECK: vpminuq 4096(%rdx), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x3b,0xaa,0x00,0x10,0x00,0x00]
+          vpminuq 4096(%rdx), %ymm21, %ymm29
+
+// CHECK: vpminuq -4096(%rdx), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x3b,0x6a,0x80]
+          vpminuq -4096(%rdx), %ymm21, %ymm29
+
+// CHECK: vpminuq -4128(%rdx), %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x20,0x3b,0xaa,0xe0,0xef,0xff,0xff]
+          vpminuq -4128(%rdx), %ymm21, %ymm29
+
+// CHECK: vpminuq 1016(%rdx){1to4}, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0x6a,0x7f]
+          vpminuq 1016(%rdx){1to4}, %ymm21, %ymm29
+
+// CHECK: vpminuq 1024(%rdx){1to4}, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0x00,0x04,0x00,0x00]
+          vpminuq 1024(%rdx){1to4}, %ymm21, %ymm29
+
+// CHECK: vpminuq -1024(%rdx){1to4}, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0x6a,0x80]
+          vpminuq -1024(%rdx){1to4}, %ymm21, %ymm29
+
+// CHECK: vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0xf8,0xfb,0xff,0xff]
+          vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29
+
+// CHECK: vpmulld %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x02,0x65,0x00,0x40,0xc8]
+          vpmulld %xmm24, %xmm19, %xmm25
+
+// CHECK: vpmulld %xmm24, %xmm19, %xmm25 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x65,0x06,0x40,0xc8]
+          vpmulld %xmm24, %xmm19, %xmm25 {%k6}
+
+// CHECK: vpmulld %xmm24, %xmm19, %xmm25 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x65,0x86,0x40,0xc8]
+          vpmulld %xmm24, %xmm19, %xmm25 {%k6} {z}
+
+// CHECK: vpmulld (%rcx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0x40,0x09]
+          vpmulld (%rcx), %xmm19, %xmm25
+
+// CHECK: vpmulld 291(%rax,%r14,8), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x65,0x00,0x40,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmulld 291(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vpmulld (%rcx){1to4}, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0x40,0x09]
+          vpmulld (%rcx){1to4}, %xmm19, %xmm25
+
+// CHECK: vpmulld 2032(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0x40,0x4a,0x7f]
+          vpmulld 2032(%rdx), %xmm19, %xmm25
+
+// CHECK: vpmulld 2048(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0x40,0x8a,0x00,0x08,0x00,0x00]
+          vpmulld 2048(%rdx), %xmm19, %xmm25
+
+// CHECK: vpmulld -2048(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0x40,0x4a,0x80]
+          vpmulld -2048(%rdx), %xmm19, %xmm25
+
+// CHECK: vpmulld -2064(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x00,0x40,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmulld -2064(%rdx), %xmm19, %xmm25
+
+// CHECK: vpmulld 508(%rdx){1to4}, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0x40,0x4a,0x7f]
+          vpmulld 508(%rdx){1to4}, %xmm19, %xmm25
+
+// CHECK: vpmulld 512(%rdx){1to4}, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0x40,0x8a,0x00,0x02,0x00,0x00]
+          vpmulld 512(%rdx){1to4}, %xmm19, %xmm25
+
+// CHECK: vpmulld -512(%rdx){1to4}, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0x40,0x4a,0x80]
+          vpmulld -512(%rdx){1to4}, %xmm19, %xmm25
+
+// CHECK: vpmulld -516(%rdx){1to4}, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x62,0x65,0x10,0x40,0x8a,0xfc,0xfd,0xff,0xff]
+          vpmulld -516(%rdx){1to4}, %xmm19, %xmm25
+
+// CHECK: vpmulld %ymm26, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0x82,0x2d,0x20,0x40,0xfa]
+          vpmulld %ymm26, %ymm26, %ymm23
+
+// CHECK: vpmulld %ymm26, %ymm26, %ymm23 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x2d,0x22,0x40,0xfa]
+          vpmulld %ymm26, %ymm26, %ymm23 {%k2}
+
+// CHECK: vpmulld %ymm26, %ymm26, %ymm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x2d,0xa2,0x40,0xfa]
+          vpmulld %ymm26, %ymm26, %ymm23 {%k2} {z}
+
+// CHECK: vpmulld (%rcx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x40,0x39]
+          vpmulld (%rcx), %ymm26, %ymm23
+
+// CHECK: vpmulld 291(%rax,%r14,8), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0x40,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmulld 291(%rax,%r14,8), %ymm26, %ymm23
+
+// CHECK: vpmulld (%rcx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x40,0x39]
+          vpmulld (%rcx){1to8}, %ymm26, %ymm23
+
+// CHECK: vpmulld 4064(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x40,0x7a,0x7f]
+          vpmulld 4064(%rdx), %ymm26, %ymm23
+
+// CHECK: vpmulld 4096(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x40,0xba,0x00,0x10,0x00,0x00]
+          vpmulld 4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vpmulld -4096(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x40,0x7a,0x80]
+          vpmulld -4096(%rdx), %ymm26, %ymm23
+
+// CHECK: vpmulld -4128(%rdx), %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x40,0xba,0xe0,0xef,0xff,0xff]
+          vpmulld -4128(%rdx), %ymm26, %ymm23
+
+// CHECK: vpmulld 508(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x40,0x7a,0x7f]
+          vpmulld 508(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vpmulld 512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x40,0xba,0x00,0x02,0x00,0x00]
+          vpmulld 512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vpmulld -512(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x40,0x7a,0x80]
+          vpmulld -512(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vpmulld -516(%rdx){1to8}, %ymm26, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x40,0xba,0xfc,0xfd,0xff,0xff]
+          vpmulld -516(%rdx){1to8}, %ymm26, %ymm23
+
+// CHECK: vpord  %xmm28, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x01,0x15,0x00,0xeb,0xe4]
+          vpord  %xmm28, %xmm29, %xmm28
+
+// CHECK: vpord  %xmm28, %xmm29, %xmm28 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x15,0x02,0xeb,0xe4]
+          vpord  %xmm28, %xmm29, %xmm28 {%k2}
+
+// CHECK: vpord  %xmm28, %xmm29, %xmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x15,0x82,0xeb,0xe4]
+          vpord  %xmm28, %xmm29, %xmm28 {%k2} {z}
+
+// CHECK: vpord  (%rcx), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xeb,0x21]
+          vpord  (%rcx), %xmm29, %xmm28
+
+// CHECK: vpord  291(%rax,%r14,8), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x21,0x15,0x00,0xeb,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpord  291(%rax,%r14,8), %xmm29, %xmm28
+
+// CHECK: vpord  (%rcx){1to4}, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x10,0xeb,0x21]
+          vpord  (%rcx){1to4}, %xmm29, %xmm28
+
+// CHECK: vpord  2032(%rdx), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xeb,0x62,0x7f]
+          vpord  2032(%rdx), %xmm29, %xmm28
+
+// CHECK: vpord  2048(%rdx), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xeb,0xa2,0x00,0x08,0x00,0x00]
+          vpord  2048(%rdx), %xmm29, %xmm28
+
+// CHECK: vpord  -2048(%rdx), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xeb,0x62,0x80]
+          vpord  -2048(%rdx), %xmm29, %xmm28
+
+// CHECK: vpord  -2064(%rdx), %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x00,0xeb,0xa2,0xf0,0xf7,0xff,0xff]
+          vpord  -2064(%rdx), %xmm29, %xmm28
+
+// CHECK: vpord  508(%rdx){1to4}, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x10,0xeb,0x62,0x7f]
+          vpord  508(%rdx){1to4}, %xmm29, %xmm28
+
+// CHECK: vpord  512(%rdx){1to4}, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x10,0xeb,0xa2,0x00,0x02,0x00,0x00]
+          vpord  512(%rdx){1to4}, %xmm29, %xmm28
+
+// CHECK: vpord  -512(%rdx){1to4}, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x10,0xeb,0x62,0x80]
+          vpord  -512(%rdx){1to4}, %xmm29, %xmm28
+
+// CHECK: vpord  -516(%rdx){1to4}, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x15,0x10,0xeb,0xa2,0xfc,0xfd,0xff,0xff]
+          vpord  -516(%rdx){1to4}, %xmm29, %xmm28
+
+// CHECK: vpord  %ymm22, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x35,0x20,0xeb,0xee]
+          vpord  %ymm22, %ymm25, %ymm21
+
+// CHECK: vpord  %ymm22, %ymm25, %ymm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x35,0x26,0xeb,0xee]
+          vpord  %ymm22, %ymm25, %ymm21 {%k6}
+
+// CHECK: vpord  %ymm22, %ymm25, %ymm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x35,0xa6,0xeb,0xee]
+          vpord  %ymm22, %ymm25, %ymm21 {%k6} {z}
+
+// CHECK: vpord  (%rcx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0xeb,0x29]
+          vpord  (%rcx), %ymm25, %ymm21
+
+// CHECK: vpord  291(%rax,%r14,8), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x35,0x20,0xeb,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpord  291(%rax,%r14,8), %ymm25, %ymm21
+
+// CHECK: vpord  (%rcx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x30,0xeb,0x29]
+          vpord  (%rcx){1to8}, %ymm25, %ymm21
+
+// CHECK: vpord  4064(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0xeb,0x6a,0x7f]
+          vpord  4064(%rdx), %ymm25, %ymm21
+
+// CHECK: vpord  4096(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0xeb,0xaa,0x00,0x10,0x00,0x00]
+          vpord  4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vpord  -4096(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0xeb,0x6a,0x80]
+          vpord  -4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vpord  -4128(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0xeb,0xaa,0xe0,0xef,0xff,0xff]
+          vpord  -4128(%rdx), %ymm25, %ymm21
+
+// CHECK: vpord  508(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x30,0xeb,0x6a,0x7f]
+          vpord  508(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vpord  512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x30,0xeb,0xaa,0x00,0x02,0x00,0x00]
+          vpord  512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vpord  -512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x30,0xeb,0x6a,0x80]
+          vpord  -512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vpord  -516(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x30,0xeb,0xaa,0xfc,0xfd,0xff,0xff]
+          vpord  -516(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vporq  %xmm20, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x00,0xeb,0xec]
+          vporq  %xmm20, %xmm28, %xmm21
+
+// CHECK: vporq  %xmm20, %xmm28, %xmm21 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x07,0xeb,0xec]
+          vporq  %xmm20, %xmm28, %xmm21 {%k7}
+
+// CHECK: vporq  %xmm20, %xmm28, %xmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x87,0xeb,0xec]
+          vporq  %xmm20, %xmm28, %xmm21 {%k7} {z}
+
+// CHECK: vporq  (%rcx), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xeb,0x29]
+          vporq  (%rcx), %xmm28, %xmm21
+
+// CHECK: vporq  291(%rax,%r14,8), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x00,0xeb,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vporq  291(%rax,%r14,8), %xmm28, %xmm21
+
+// CHECK: vporq  (%rcx){1to2}, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xeb,0x29]
+          vporq  (%rcx){1to2}, %xmm28, %xmm21
+
+// CHECK: vporq  2032(%rdx), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xeb,0x6a,0x7f]
+          vporq  2032(%rdx), %xmm28, %xmm21
+
+// CHECK: vporq  2048(%rdx), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xeb,0xaa,0x00,0x08,0x00,0x00]
+          vporq  2048(%rdx), %xmm28, %xmm21
+
+// CHECK: vporq  -2048(%rdx), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xeb,0x6a,0x80]
+          vporq  -2048(%rdx), %xmm28, %xmm21
+
+// CHECK: vporq  -2064(%rdx), %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0xeb,0xaa,0xf0,0xf7,0xff,0xff]
+          vporq  -2064(%rdx), %xmm28, %xmm21
+
+// CHECK: vporq  1016(%rdx){1to2}, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xeb,0x6a,0x7f]
+          vporq  1016(%rdx){1to2}, %xmm28, %xmm21
+
+// CHECK: vporq  1024(%rdx){1to2}, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xeb,0xaa,0x00,0x04,0x00,0x00]
+          vporq  1024(%rdx){1to2}, %xmm28, %xmm21
+
+// CHECK: vporq  -1024(%rdx){1to2}, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xeb,0x6a,0x80]
+          vporq  -1024(%rdx){1to2}, %xmm28, %xmm21
+
+// CHECK: vporq  -1032(%rdx){1to2}, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x10,0xeb,0xaa,0xf8,0xfb,0xff,0xff]
+          vporq  -1032(%rdx){1to2}, %xmm28, %xmm21
+
+// CHECK: vporq  %ymm24, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x01,0xe5,0x20,0xeb,0xe0]
+          vporq  %ymm24, %ymm19, %ymm28
+
+// CHECK: vporq  %ymm24, %ymm19, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x01,0xe5,0x24,0xeb,0xe0]
+          vporq  %ymm24, %ymm19, %ymm28 {%k4}
+
+// CHECK: vporq  %ymm24, %ymm19, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0xe5,0xa4,0xeb,0xe0]
+          vporq  %ymm24, %ymm19, %ymm28 {%k4} {z}
+
+// CHECK: vporq  (%rcx), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x20,0xeb,0x21]
+          vporq  (%rcx), %ymm19, %ymm28
+
+// CHECK: vporq  291(%rax,%r14,8), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xe5,0x20,0xeb,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vporq  291(%rax,%r14,8), %ymm19, %ymm28
+
+// CHECK: vporq  (%rcx){1to4}, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x30,0xeb,0x21]
+          vporq  (%rcx){1to4}, %ymm19, %ymm28
+
+// CHECK: vporq  4064(%rdx), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x20,0xeb,0x62,0x7f]
+          vporq  4064(%rdx), %ymm19, %ymm28
+
+// CHECK: vporq  4096(%rdx), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x20,0xeb,0xa2,0x00,0x10,0x00,0x00]
+          vporq  4096(%rdx), %ymm19, %ymm28
+
+// CHECK: vporq  -4096(%rdx), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x20,0xeb,0x62,0x80]
+          vporq  -4096(%rdx), %ymm19, %ymm28
+
+// CHECK: vporq  -4128(%rdx), %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x20,0xeb,0xa2,0xe0,0xef,0xff,0xff]
+          vporq  -4128(%rdx), %ymm19, %ymm28
+
+// CHECK: vporq  1016(%rdx){1to4}, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x30,0xeb,0x62,0x7f]
+          vporq  1016(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vporq  1024(%rdx){1to4}, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x30,0xeb,0xa2,0x00,0x04,0x00,0x00]
+          vporq  1024(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vporq  -1024(%rdx){1to4}, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x30,0xeb,0x62,0x80]
+          vporq  -1024(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vporq  -1032(%rdx){1to4}, %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xe5,0x30,0xeb,0xa2,0xf8,0xfb,0xff,0xff]
+          vporq  -1032(%rdx){1to4}, %ymm19, %ymm28
+
+// CHECK: vpsubd %xmm26, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0x81,0x6d,0x00,0xfa,0xda]
+          vpsubd %xmm26, %xmm18, %xmm19
+
+// CHECK: vpsubd %xmm26, %xmm18, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x6d,0x02,0xfa,0xda]
+          vpsubd %xmm26, %xmm18, %xmm19 {%k2}
+
+// CHECK: vpsubd %xmm26, %xmm18, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x6d,0x82,0xfa,0xda]
+          vpsubd %xmm26, %xmm18, %xmm19 {%k2} {z}
+
+// CHECK: vpsubd (%rcx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0xfa,0x19]
+          vpsubd (%rcx), %xmm18, %xmm19
+
+// CHECK: vpsubd 291(%rax,%r14,8), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0x6d,0x00,0xfa,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpsubd 291(%rax,%r14,8), %xmm18, %xmm19
+
+// CHECK: vpsubd (%rcx){1to4}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x10,0xfa,0x19]
+          vpsubd (%rcx){1to4}, %xmm18, %xmm19
+
+// CHECK: vpsubd 2032(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0xfa,0x5a,0x7f]
+          vpsubd 2032(%rdx), %xmm18, %xmm19
+
+// CHECK: vpsubd 2048(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0xfa,0x9a,0x00,0x08,0x00,0x00]
+          vpsubd 2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vpsubd -2048(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0xfa,0x5a,0x80]
+          vpsubd -2048(%rdx), %xmm18, %xmm19
+
+// CHECK: vpsubd -2064(%rdx), %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0xfa,0x9a,0xf0,0xf7,0xff,0xff]
+          vpsubd -2064(%rdx), %xmm18, %xmm19
+
+// CHECK: vpsubd 508(%rdx){1to4}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x10,0xfa,0x5a,0x7f]
+          vpsubd 508(%rdx){1to4}, %xmm18, %xmm19
+
+// CHECK: vpsubd 512(%rdx){1to4}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x10,0xfa,0x9a,0x00,0x02,0x00,0x00]
+          vpsubd 512(%rdx){1to4}, %xmm18, %xmm19
+
+// CHECK: vpsubd -512(%rdx){1to4}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x10,0xfa,0x5a,0x80]
+          vpsubd -512(%rdx){1to4}, %xmm18, %xmm19
+
+// CHECK: vpsubd -516(%rdx){1to4}, %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x10,0xfa,0x9a,0xfc,0xfd,0xff,0xff]
+          vpsubd -516(%rdx){1to4}, %xmm18, %xmm19
+
+// CHECK: vpsubd %ymm21, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x2d,0x20,0xfa,0xc5]
+          vpsubd %ymm21, %ymm26, %ymm24
+
+// CHECK: vpsubd %ymm21, %ymm26, %ymm24 {%k1}
+// CHECK:  encoding: [0x62,0x21,0x2d,0x21,0xfa,0xc5]
+          vpsubd %ymm21, %ymm26, %ymm24 {%k1}
+
+// CHECK: vpsubd %ymm21, %ymm26, %ymm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0x2d,0xa1,0xfa,0xc5]
+          vpsubd %ymm21, %ymm26, %ymm24 {%k1} {z}
+
+// CHECK: vpsubd (%rcx), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xfa,0x01]
+          vpsubd (%rcx), %ymm26, %ymm24
+
+// CHECK: vpsubd 291(%rax,%r14,8), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x2d,0x20,0xfa,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpsubd 291(%rax,%r14,8), %ymm26, %ymm24
+
+// CHECK: vpsubd (%rcx){1to8}, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x30,0xfa,0x01]
+          vpsubd (%rcx){1to8}, %ymm26, %ymm24
+
+// CHECK: vpsubd 4064(%rdx), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xfa,0x42,0x7f]
+          vpsubd 4064(%rdx), %ymm26, %ymm24
+
+// CHECK: vpsubd 4096(%rdx), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xfa,0x82,0x00,0x10,0x00,0x00]
+          vpsubd 4096(%rdx), %ymm26, %ymm24
+
+// CHECK: vpsubd -4096(%rdx), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xfa,0x42,0x80]
+          vpsubd -4096(%rdx), %ymm26, %ymm24
+
+// CHECK: vpsubd -4128(%rdx), %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x20,0xfa,0x82,0xe0,0xef,0xff,0xff]
+          vpsubd -4128(%rdx), %ymm26, %ymm24
+
+// CHECK: vpsubd 508(%rdx){1to8}, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x30,0xfa,0x42,0x7f]
+          vpsubd 508(%rdx){1to8}, %ymm26, %ymm24
+
+// CHECK: vpsubd 512(%rdx){1to8}, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x30,0xfa,0x82,0x00,0x02,0x00,0x00]
+          vpsubd 512(%rdx){1to8}, %ymm26, %ymm24
+
+// CHECK: vpsubd -512(%rdx){1to8}, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x30,0xfa,0x42,0x80]
+          vpsubd -512(%rdx){1to8}, %ymm26, %ymm24
+
+// CHECK: vpsubd -516(%rdx){1to8}, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x2d,0x30,0xfa,0x82,0xfc,0xfd,0xff,0xff]
+          vpsubd -516(%rdx){1to8}, %ymm26, %ymm24
+
+// CHECK: vpsubq %xmm27, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0x81,0xa5,0x00,0xfb,0xe3]
+          vpsubq %xmm27, %xmm27, %xmm20
+
+// CHECK: vpsubq %xmm27, %xmm27, %xmm20 {%k2}
+// CHECK:  encoding: [0x62,0x81,0xa5,0x02,0xfb,0xe3]
+          vpsubq %xmm27, %xmm27, %xmm20 {%k2}
+
+// CHECK: vpsubq %xmm27, %xmm27, %xmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0xa5,0x82,0xfb,0xe3]
+          vpsubq %xmm27, %xmm27, %xmm20 {%k2} {z}
+
+// CHECK: vpsubq (%rcx), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0xfb,0x21]
+          vpsubq (%rcx), %xmm27, %xmm20
+
+// CHECK: vpsubq 291(%rax,%r14,8), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xa5,0x00,0xfb,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpsubq 291(%rax,%r14,8), %xmm27, %xmm20
+
+// CHECK: vpsubq (%rcx){1to2}, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0xfb,0x21]
+          vpsubq (%rcx){1to2}, %xmm27, %xmm20
+
+// CHECK: vpsubq 2032(%rdx), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0xfb,0x62,0x7f]
+          vpsubq 2032(%rdx), %xmm27, %xmm20
+
+// CHECK: vpsubq 2048(%rdx), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0xfb,0xa2,0x00,0x08,0x00,0x00]
+          vpsubq 2048(%rdx), %xmm27, %xmm20
+
+// CHECK: vpsubq -2048(%rdx), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0xfb,0x62,0x80]
+          vpsubq -2048(%rdx), %xmm27, %xmm20
+
+// CHECK: vpsubq -2064(%rdx), %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0xfb,0xa2,0xf0,0xf7,0xff,0xff]
+          vpsubq -2064(%rdx), %xmm27, %xmm20
+
+// CHECK: vpsubq 1016(%rdx){1to2}, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0xfb,0x62,0x7f]
+          vpsubq 1016(%rdx){1to2}, %xmm27, %xmm20
+
+// CHECK: vpsubq 1024(%rdx){1to2}, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0xfb,0xa2,0x00,0x04,0x00,0x00]
+          vpsubq 1024(%rdx){1to2}, %xmm27, %xmm20
+
+// CHECK: vpsubq -1024(%rdx){1to2}, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0xfb,0x62,0x80]
+          vpsubq -1024(%rdx){1to2}, %xmm27, %xmm20
+
+// CHECK: vpsubq -1032(%rdx){1to2}, %xmm27, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0xfb,0xa2,0xf8,0xfb,0xff,0xff]
+          vpsubq -1032(%rdx){1to2}, %xmm27, %xmm20
+
+// CHECK: vpsubq %ymm28, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x01,0xdd,0x20,0xfb,0xec]
+          vpsubq %ymm28, %ymm20, %ymm29
+
+// CHECK: vpsubq %ymm28, %ymm20, %ymm29 {%k5}
+// CHECK:  encoding: [0x62,0x01,0xdd,0x25,0xfb,0xec]
+          vpsubq %ymm28, %ymm20, %ymm29 {%k5}
+
+// CHECK: vpsubq %ymm28, %ymm20, %ymm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0xdd,0xa5,0xfb,0xec]
+          vpsubq %ymm28, %ymm20, %ymm29 {%k5} {z}
+
+// CHECK: vpsubq (%rcx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xfb,0x29]
+          vpsubq (%rcx), %ymm20, %ymm29
+
+// CHECK: vpsubq 291(%rax,%r14,8), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x21,0xdd,0x20,0xfb,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpsubq 291(%rax,%r14,8), %ymm20, %ymm29
+
+// CHECK: vpsubq (%rcx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xfb,0x29]
+          vpsubq (%rcx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpsubq 4064(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xfb,0x6a,0x7f]
+          vpsubq 4064(%rdx), %ymm20, %ymm29
+
+// CHECK: vpsubq 4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xfb,0xaa,0x00,0x10,0x00,0x00]
+          vpsubq 4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpsubq -4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xfb,0x6a,0x80]
+          vpsubq -4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpsubq -4128(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0xfb,0xaa,0xe0,0xef,0xff,0xff]
+          vpsubq -4128(%rdx), %ymm20, %ymm29
+
+// CHECK: vpsubq 1016(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xfb,0x6a,0x7f]
+          vpsubq 1016(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpsubq 1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xfb,0xaa,0x00,0x04,0x00,0x00]
+          vpsubq 1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpsubq -1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xfb,0x6a,0x80]
+          vpsubq -1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpsubq -1032(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0xfb,0xaa,0xf8,0xfb,0xff,0xff]
+          vpsubq -1032(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpxord %xmm25, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0x81,0x75,0x00,0xef,0xf9]
+          vpxord %xmm25, %xmm17, %xmm23
+
+// CHECK: vpxord %xmm25, %xmm17, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x75,0x03,0xef,0xf9]
+          vpxord %xmm25, %xmm17, %xmm23 {%k3}
+
+// CHECK: vpxord %xmm25, %xmm17, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x75,0x83,0xef,0xf9]
+          vpxord %xmm25, %xmm17, %xmm23 {%k3} {z}
+
+// CHECK: vpxord (%rcx), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xef,0x39]
+          vpxord (%rcx), %xmm17, %xmm23
+
+// CHECK: vpxord 291(%rax,%r14,8), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x75,0x00,0xef,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpxord 291(%rax,%r14,8), %xmm17, %xmm23
+
+// CHECK: vpxord (%rcx){1to4}, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x10,0xef,0x39]
+          vpxord (%rcx){1to4}, %xmm17, %xmm23
+
+// CHECK: vpxord 2032(%rdx), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xef,0x7a,0x7f]
+          vpxord 2032(%rdx), %xmm17, %xmm23
+
+// CHECK: vpxord 2048(%rdx), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xef,0xba,0x00,0x08,0x00,0x00]
+          vpxord 2048(%rdx), %xmm17, %xmm23
+
+// CHECK: vpxord -2048(%rdx), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xef,0x7a,0x80]
+          vpxord -2048(%rdx), %xmm17, %xmm23
+
+// CHECK: vpxord -2064(%rdx), %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x00,0xef,0xba,0xf0,0xf7,0xff,0xff]
+          vpxord -2064(%rdx), %xmm17, %xmm23
+
+// CHECK: vpxord 508(%rdx){1to4}, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x10,0xef,0x7a,0x7f]
+          vpxord 508(%rdx){1to4}, %xmm17, %xmm23
+
+// CHECK: vpxord 512(%rdx){1to4}, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x10,0xef,0xba,0x00,0x02,0x00,0x00]
+          vpxord 512(%rdx){1to4}, %xmm17, %xmm23
+
+// CHECK: vpxord -512(%rdx){1to4}, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x10,0xef,0x7a,0x80]
+          vpxord -512(%rdx){1to4}, %xmm17, %xmm23
+
+// CHECK: vpxord -516(%rdx){1to4}, %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x75,0x10,0xef,0xba,0xfc,0xfd,0xff,0xff]
+          vpxord -516(%rdx){1to4}, %xmm17, %xmm23
+
+// CHECK: vpxord %ymm22, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xa1,0x15,0x20,0xef,0xf6]
+          vpxord %ymm22, %ymm29, %ymm22
+
+// CHECK: vpxord %ymm22, %ymm29, %ymm22 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0x15,0x24,0xef,0xf6]
+          vpxord %ymm22, %ymm29, %ymm22 {%k4}
+
+// CHECK: vpxord %ymm22, %ymm29, %ymm22 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0x15,0xa4,0xef,0xf6]
+          vpxord %ymm22, %ymm29, %ymm22 {%k4} {z}
+
+// CHECK: vpxord (%rcx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xef,0x31]
+          vpxord (%rcx), %ymm29, %ymm22
+
+// CHECK: vpxord 291(%rax,%r14,8), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xa1,0x15,0x20,0xef,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpxord 291(%rax,%r14,8), %ymm29, %ymm22
+
+// CHECK: vpxord (%rcx){1to8}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x30,0xef,0x31]
+          vpxord (%rcx){1to8}, %ymm29, %ymm22
+
+// CHECK: vpxord 4064(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xef,0x72,0x7f]
+          vpxord 4064(%rdx), %ymm29, %ymm22
+
+// CHECK: vpxord 4096(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xef,0xb2,0x00,0x10,0x00,0x00]
+          vpxord 4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vpxord -4096(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xef,0x72,0x80]
+          vpxord -4096(%rdx), %ymm29, %ymm22
+
+// CHECK: vpxord -4128(%rdx), %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x20,0xef,0xb2,0xe0,0xef,0xff,0xff]
+          vpxord -4128(%rdx), %ymm29, %ymm22
+
+// CHECK: vpxord 508(%rdx){1to8}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x30,0xef,0x72,0x7f]
+          vpxord 508(%rdx){1to8}, %ymm29, %ymm22
+
+// CHECK: vpxord 512(%rdx){1to8}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x30,0xef,0xb2,0x00,0x02,0x00,0x00]
+          vpxord 512(%rdx){1to8}, %ymm29, %ymm22
+
+// CHECK: vpxord -512(%rdx){1to8}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x30,0xef,0x72,0x80]
+          vpxord -512(%rdx){1to8}, %ymm29, %ymm22
+
+// CHECK: vpxord -516(%rdx){1to8}, %ymm29, %ymm22
+// CHECK:  encoding: [0x62,0xe1,0x15,0x30,0xef,0xb2,0xfc,0xfd,0xff,0xff]
+          vpxord -516(%rdx){1to8}, %ymm29, %ymm22
+
+// CHECK: vpxorq %xmm18, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0xf5,0x00,0xef,0xd2]
+          vpxorq %xmm18, %xmm17, %xmm26
+
+// CHECK: vpxorq %xmm18, %xmm17, %xmm26 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xf5,0x02,0xef,0xd2]
+          vpxorq %xmm18, %xmm17, %xmm26 {%k2}
+
+// CHECK: vpxorq %xmm18, %xmm17, %xmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xf5,0x82,0xef,0xd2]
+          vpxorq %xmm18, %xmm17, %xmm26 {%k2} {z}
+
+// CHECK: vpxorq (%rcx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xef,0x11]
+          vpxorq (%rcx), %xmm17, %xmm26
+
+// CHECK: vpxorq 291(%rax,%r14,8), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0xf5,0x00,0xef,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpxorq 291(%rax,%r14,8), %xmm17, %xmm26
+
+// CHECK: vpxorq (%rcx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xef,0x11]
+          vpxorq (%rcx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpxorq 2032(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xef,0x52,0x7f]
+          vpxorq 2032(%rdx), %xmm17, %xmm26
+
+// CHECK: vpxorq 2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xef,0x92,0x00,0x08,0x00,0x00]
+          vpxorq 2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpxorq -2048(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xef,0x52,0x80]
+          vpxorq -2048(%rdx), %xmm17, %xmm26
+
+// CHECK: vpxorq -2064(%rdx), %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x00,0xef,0x92,0xf0,0xf7,0xff,0xff]
+          vpxorq -2064(%rdx), %xmm17, %xmm26
+
+// CHECK: vpxorq 1016(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xef,0x52,0x7f]
+          vpxorq 1016(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpxorq 1024(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xef,0x92,0x00,0x04,0x00,0x00]
+          vpxorq 1024(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpxorq -1024(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xef,0x52,0x80]
+          vpxorq -1024(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpxorq -1032(%rdx){1to2}, %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xf5,0x10,0xef,0x92,0xf8,0xfb,0xff,0xff]
+          vpxorq -1032(%rdx){1to2}, %xmm17, %xmm26
+
+// CHECK: vpxorq %ymm19, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0xed,0x20,0xef,0xeb]
+          vpxorq %ymm19, %ymm18, %ymm21
+
+// CHECK: vpxorq %ymm19, %ymm18, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0xed,0x27,0xef,0xeb]
+          vpxorq %ymm19, %ymm18, %ymm21 {%k7}
+
+// CHECK: vpxorq %ymm19, %ymm18, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0xed,0xa7,0xef,0xeb]
+          vpxorq %ymm19, %ymm18, %ymm21 {%k7} {z}
+
+// CHECK: vpxorq (%rcx), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x20,0xef,0x29]
+          vpxorq (%rcx), %ymm18, %ymm21
+
+// CHECK: vpxorq 291(%rax,%r14,8), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0xed,0x20,0xef,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpxorq 291(%rax,%r14,8), %ymm18, %ymm21
+
+// CHECK: vpxorq (%rcx){1to4}, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x30,0xef,0x29]
+          vpxorq (%rcx){1to4}, %ymm18, %ymm21
+
+// CHECK: vpxorq 4064(%rdx), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x20,0xef,0x6a,0x7f]
+          vpxorq 4064(%rdx), %ymm18, %ymm21
+
+// CHECK: vpxorq 4096(%rdx), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x20,0xef,0xaa,0x00,0x10,0x00,0x00]
+          vpxorq 4096(%rdx), %ymm18, %ymm21
+
+// CHECK: vpxorq -4096(%rdx), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x20,0xef,0x6a,0x80]
+          vpxorq -4096(%rdx), %ymm18, %ymm21
+
+// CHECK: vpxorq -4128(%rdx), %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x20,0xef,0xaa,0xe0,0xef,0xff,0xff]
+          vpxorq -4128(%rdx), %ymm18, %ymm21
+
+// CHECK: vpxorq 1016(%rdx){1to4}, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x30,0xef,0x6a,0x7f]
+          vpxorq 1016(%rdx){1to4}, %ymm18, %ymm21
+
+// CHECK: vpxorq 1024(%rdx){1to4}, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x30,0xef,0xaa,0x00,0x04,0x00,0x00]
+          vpxorq 1024(%rdx){1to4}, %ymm18, %ymm21
+
+// CHECK: vpxorq -1024(%rdx){1to4}, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x30,0xef,0x6a,0x80]
+          vpxorq -1024(%rdx){1to4}, %ymm18, %ymm21
+
+// CHECK: vpxorq -1032(%rdx){1to4}, %ymm18, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0xed,0x30,0xef,0xaa,0xf8,0xfb,0xff,0xff]
+          vpxorq -1032(%rdx){1to4}, %ymm18, %ymm21
+
+// CHECK: vrcp14pd %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0x4c,0xd5]
+          vrcp14pd %xmm29, %xmm18
+
+// CHECK: vrcp14pd %xmm29, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0c,0x4c,0xd5]
+          vrcp14pd %xmm29, %xmm18 {%k4}
+
+// CHECK: vrcp14pd %xmm29, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8c,0x4c,0xd5]
+          vrcp14pd %xmm29, %xmm18 {%k4} {z}
+
+// CHECK: vrcp14pd (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x11]
+          vrcp14pd (%rcx), %xmm18
+
+// CHECK: vrcp14pd 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x4c,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14pd 291(%rax,%r14,8), %xmm18
+
+// CHECK: vrcp14pd (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x11]
+          vrcp14pd (%rcx){1to2}, %xmm18
+
+// CHECK: vrcp14pd 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x7f]
+          vrcp14pd 2032(%rdx), %xmm18
+
+// CHECK: vrcp14pd 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0x00,0x08,0x00,0x00]
+          vrcp14pd 2048(%rdx), %xmm18
+
+// CHECK: vrcp14pd -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x52,0x80]
+          vrcp14pd -2048(%rdx), %xmm18
+
+// CHECK: vrcp14pd -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4c,0x92,0xf0,0xf7,0xff,0xff]
+          vrcp14pd -2064(%rdx), %xmm18
+
+// CHECK: vrcp14pd 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x7f]
+          vrcp14pd 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vrcp14pd 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0x00,0x04,0x00,0x00]
+          vrcp14pd 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vrcp14pd -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x52,0x80]
+          vrcp14pd -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vrcp14pd -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4c,0x92,0xf8,0xfb,0xff,0xff]
+          vrcp14pd -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vrcp14pd %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x4c,0xcd]
+          vrcp14pd %ymm29, %ymm17
+
+// CHECK: vrcp14pd %ymm29, %ymm17 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2c,0x4c,0xcd]
+          vrcp14pd %ymm29, %ymm17 {%k4}
+
+// CHECK: vrcp14pd %ymm29, %ymm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xac,0x4c,0xcd]
+          vrcp14pd %ymm29, %ymm17 {%k4} {z}
+
+// CHECK: vrcp14pd (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x09]
+          vrcp14pd (%rcx), %ymm17
+
+// CHECK: vrcp14pd 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x4c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14pd 291(%rax,%r14,8), %ymm17
+
+// CHECK: vrcp14pd (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x09]
+          vrcp14pd (%rcx){1to4}, %ymm17
+
+// CHECK: vrcp14pd 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x7f]
+          vrcp14pd 4064(%rdx), %ymm17
+
+// CHECK: vrcp14pd 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0x00,0x10,0x00,0x00]
+          vrcp14pd 4096(%rdx), %ymm17
+
+// CHECK: vrcp14pd -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x4a,0x80]
+          vrcp14pd -4096(%rdx), %ymm17
+
+// CHECK: vrcp14pd -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4c,0x8a,0xe0,0xef,0xff,0xff]
+          vrcp14pd -4128(%rdx), %ymm17
+
+// CHECK: vrcp14pd 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x7f]
+          vrcp14pd 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vrcp14pd 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0x00,0x04,0x00,0x00]
+          vrcp14pd 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrcp14pd -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x4a,0x80]
+          vrcp14pd -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrcp14pd -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4c,0x8a,0xf8,0xfb,0xff,0xff]
+          vrcp14pd -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vrcp14ps %xmm28, %xmm27
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x4c,0xdc]
+          vrcp14ps %xmm28, %xmm27
+
+// CHECK: vrcp14ps %xmm28, %xmm27 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0c,0x4c,0xdc]
+          vrcp14ps %xmm28, %xmm27 {%k4}
+
+// CHECK: vrcp14ps %xmm28, %xmm27 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8c,0x4c,0xdc]
+          vrcp14ps %xmm28, %xmm27 {%k4} {z}
+
+// CHECK: vrcp14ps (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x4c,0x19]
+          vrcp14ps (%rcx), %xmm27
+
+// CHECK: vrcp14ps 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x4c,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14ps 291(%rax,%r14,8), %xmm27
+
+// CHECK: vrcp14ps (%rcx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x4c,0x19]
+          vrcp14ps (%rcx){1to4}, %xmm27
+
+// CHECK: vrcp14ps 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x7f]
+          vrcp14ps 2032(%rdx), %xmm27
+
+// CHECK: vrcp14ps 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0x00,0x08,0x00,0x00]
+          vrcp14ps 2048(%rdx), %xmm27
+
+// CHECK: vrcp14ps -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x4c,0x5a,0x80]
+          vrcp14ps -2048(%rdx), %xmm27
+
+// CHECK: vrcp14ps -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x4c,0x9a,0xf0,0xf7,0xff,0xff]
+          vrcp14ps -2064(%rdx), %xmm27
+
+// CHECK: vrcp14ps 508(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x7f]
+          vrcp14ps 508(%rdx){1to4}, %xmm27
+
+// CHECK: vrcp14ps 512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0x00,0x02,0x00,0x00]
+          vrcp14ps 512(%rdx){1to4}, %xmm27
+
+// CHECK: vrcp14ps -512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x4c,0x5a,0x80]
+          vrcp14ps -512(%rdx){1to4}, %xmm27
+
+// CHECK: vrcp14ps -516(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x4c,0x9a,0xfc,0xfd,0xff,0xff]
+          vrcp14ps -516(%rdx){1to4}, %xmm27
+
+// CHECK: vrcp14ps %ymm21, %ymm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x4c,0xed]
+          vrcp14ps %ymm21, %ymm29
+
+// CHECK: vrcp14ps %ymm21, %ymm29 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x4c,0xed]
+          vrcp14ps %ymm21, %ymm29 {%k7}
+
+// CHECK: vrcp14ps %ymm21, %ymm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x4c,0xed]
+          vrcp14ps %ymm21, %ymm29 {%k7} {z}
+
+// CHECK: vrcp14ps (%rcx), %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4c,0x29]
+          vrcp14ps (%rcx), %ymm29
+
+// CHECK: vrcp14ps 291(%rax,%r14,8), %ymm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x4c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14ps 291(%rax,%r14,8), %ymm29
+
+// CHECK: vrcp14ps (%rcx){1to8}, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4c,0x29]
+          vrcp14ps (%rcx){1to8}, %ymm29
+
+// CHECK: vrcp14ps 4064(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x7f]
+          vrcp14ps 4064(%rdx), %ymm29
+
+// CHECK: vrcp14ps 4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0x00,0x10,0x00,0x00]
+          vrcp14ps 4096(%rdx), %ymm29
+
+// CHECK: vrcp14ps -4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4c,0x6a,0x80]
+          vrcp14ps -4096(%rdx), %ymm29
+
+// CHECK: vrcp14ps -4128(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4c,0xaa,0xe0,0xef,0xff,0xff]
+          vrcp14ps -4128(%rdx), %ymm29
+
+// CHECK: vrcp14ps 508(%rdx){1to8}, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x7f]
+          vrcp14ps 508(%rdx){1to8}, %ymm29
+
+// CHECK: vrcp14ps 512(%rdx){1to8}, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0x00,0x02,0x00,0x00]
+          vrcp14ps 512(%rdx){1to8}, %ymm29
+
+// CHECK: vrcp14ps -512(%rdx){1to8}, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4c,0x6a,0x80]
+          vrcp14ps -512(%rdx){1to8}, %ymm29
+
+// CHECK: vrcp14ps -516(%rdx){1to8}, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4c,0xaa,0xfc,0xfd,0xff,0xff]
+          vrcp14ps -516(%rdx){1to8}, %ymm29
+
+// CHECK: vrsqrt14pd %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0x4e,0xec]
+          vrsqrt14pd %xmm28, %xmm21
+
+// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x09,0x4e,0xec]
+          vrsqrt14pd %xmm28, %xmm21 {%k1}
+
+// CHECK: vrsqrt14pd %xmm28, %xmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x89,0x4e,0xec]
+          vrsqrt14pd %xmm28, %xmm21 {%k1} {z}
+
+// CHECK: vrsqrt14pd (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x29]
+          vrsqrt14pd (%rcx), %xmm21
+
+// CHECK: vrsqrt14pd 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x4e,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14pd 291(%rax,%r14,8), %xmm21
+
+// CHECK: vrsqrt14pd (%rcx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x29]
+          vrsqrt14pd (%rcx){1to2}, %xmm21
+
+// CHECK: vrsqrt14pd 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x7f]
+          vrsqrt14pd 2032(%rdx), %xmm21
+
+// CHECK: vrsqrt14pd 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0x00,0x08,0x00,0x00]
+          vrsqrt14pd 2048(%rdx), %xmm21
+
+// CHECK: vrsqrt14pd -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4e,0x6a,0x80]
+          vrsqrt14pd -2048(%rdx), %xmm21
+
+// CHECK: vrsqrt14pd -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x4e,0xaa,0xf0,0xf7,0xff,0xff]
+          vrsqrt14pd -2064(%rdx), %xmm21
+
+// CHECK: vrsqrt14pd 1016(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x7f]
+          vrsqrt14pd 1016(%rdx){1to2}, %xmm21
+
+// CHECK: vrsqrt14pd 1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0x00,0x04,0x00,0x00]
+          vrsqrt14pd 1024(%rdx){1to2}, %xmm21
+
+// CHECK: vrsqrt14pd -1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4e,0x6a,0x80]
+          vrsqrt14pd -1024(%rdx){1to2}, %xmm21
+
+// CHECK: vrsqrt14pd -1032(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x4e,0xaa,0xf8,0xfb,0xff,0xff]
+          vrsqrt14pd -1032(%rdx){1to2}, %xmm21
+
+// CHECK: vrsqrt14pd %ymm19, %ymm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x4e,0xd3]
+          vrsqrt14pd %ymm19, %ymm18
+
+// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2c,0x4e,0xd3]
+          vrsqrt14pd %ymm19, %ymm18 {%k4}
+
+// CHECK: vrsqrt14pd %ymm19, %ymm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xac,0x4e,0xd3]
+          vrsqrt14pd %ymm19, %ymm18 {%k4} {z}
+
+// CHECK: vrsqrt14pd (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x11]
+          vrsqrt14pd (%rcx), %ymm18
+
+// CHECK: vrsqrt14pd 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x4e,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14pd 291(%rax,%r14,8), %ymm18
+
+// CHECK: vrsqrt14pd (%rcx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x11]
+          vrsqrt14pd (%rcx){1to4}, %ymm18
+
+// CHECK: vrsqrt14pd 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x7f]
+          vrsqrt14pd 4064(%rdx), %ymm18
+
+// CHECK: vrsqrt14pd 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0x00,0x10,0x00,0x00]
+          vrsqrt14pd 4096(%rdx), %ymm18
+
+// CHECK: vrsqrt14pd -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x52,0x80]
+          vrsqrt14pd -4096(%rdx), %ymm18
+
+// CHECK: vrsqrt14pd -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x4e,0x92,0xe0,0xef,0xff,0xff]
+          vrsqrt14pd -4128(%rdx), %ymm18
+
+// CHECK: vrsqrt14pd 1016(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x7f]
+          vrsqrt14pd 1016(%rdx){1to4}, %ymm18
+
+// CHECK: vrsqrt14pd 1024(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0x00,0x04,0x00,0x00]
+          vrsqrt14pd 1024(%rdx){1to4}, %ymm18
+
+// CHECK: vrsqrt14pd -1024(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x52,0x80]
+          vrsqrt14pd -1024(%rdx){1to4}, %ymm18
+
+// CHECK: vrsqrt14pd -1032(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x4e,0x92,0xf8,0xfb,0xff,0xff]
+          vrsqrt14pd -1032(%rdx){1to4}, %ymm18
+
+// CHECK: vrsqrt14ps %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x4e,0xdc]
+          vrsqrt14ps %xmm20, %xmm19
+
+// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0f,0x4e,0xdc]
+          vrsqrt14ps %xmm20, %xmm19 {%k7}
+
+// CHECK: vrsqrt14ps %xmm20, %xmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8f,0x4e,0xdc]
+          vrsqrt14ps %xmm20, %xmm19 {%k7} {z}
+
+// CHECK: vrsqrt14ps (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x19]
+          vrsqrt14ps (%rcx), %xmm19
+
+// CHECK: vrsqrt14ps 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14ps 291(%rax,%r14,8), %xmm19
+
+// CHECK: vrsqrt14ps (%rcx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x19]
+          vrsqrt14ps (%rcx){1to4}, %xmm19
+
+// CHECK: vrsqrt14ps 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x7f]
+          vrsqrt14ps 2032(%rdx), %xmm19
+
+// CHECK: vrsqrt14ps 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0x00,0x08,0x00,0x00]
+          vrsqrt14ps 2048(%rdx), %xmm19
+
+// CHECK: vrsqrt14ps -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x5a,0x80]
+          vrsqrt14ps -2048(%rdx), %xmm19
+
+// CHECK: vrsqrt14ps -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x4e,0x9a,0xf0,0xf7,0xff,0xff]
+          vrsqrt14ps -2064(%rdx), %xmm19
+
+// CHECK: vrsqrt14ps 508(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x7f]
+          vrsqrt14ps 508(%rdx){1to4}, %xmm19
+
+// CHECK: vrsqrt14ps 512(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0x00,0x02,0x00,0x00]
+          vrsqrt14ps 512(%rdx){1to4}, %xmm19
+
+// CHECK: vrsqrt14ps -512(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x5a,0x80]
+          vrsqrt14ps -512(%rdx){1to4}, %xmm19
+
+// CHECK: vrsqrt14ps -516(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0x4e,0x9a,0xfc,0xfd,0xff,0xff]
+          vrsqrt14ps -516(%rdx){1to4}, %xmm19
+
+// CHECK: vrsqrt14ps %ymm18, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x4e,0xda]
+          vrsqrt14ps %ymm18, %ymm27
+
+// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x4e,0xda]
+          vrsqrt14ps %ymm18, %ymm27 {%k7}
+
+// CHECK: vrsqrt14ps %ymm18, %ymm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x4e,0xda]
+          vrsqrt14ps %ymm18, %ymm27 {%k7} {z}
+
+// CHECK: vrsqrt14ps (%rcx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4e,0x19]
+          vrsqrt14ps (%rcx), %ymm27
+
+// CHECK: vrsqrt14ps 291(%rax,%r14,8), %ymm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x4e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14ps 291(%rax,%r14,8), %ymm27
+
+// CHECK: vrsqrt14ps (%rcx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4e,0x19]
+          vrsqrt14ps (%rcx){1to8}, %ymm27
+
+// CHECK: vrsqrt14ps 4064(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x7f]
+          vrsqrt14ps 4064(%rdx), %ymm27
+
+// CHECK: vrsqrt14ps 4096(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0x00,0x10,0x00,0x00]
+          vrsqrt14ps 4096(%rdx), %ymm27
+
+// CHECK: vrsqrt14ps -4096(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4e,0x5a,0x80]
+          vrsqrt14ps -4096(%rdx), %ymm27
+
+// CHECK: vrsqrt14ps -4128(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x4e,0x9a,0xe0,0xef,0xff,0xff]
+          vrsqrt14ps -4128(%rdx), %ymm27
+
+// CHECK: vrsqrt14ps 508(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x7f]
+          vrsqrt14ps 508(%rdx){1to8}, %ymm27
+
+// CHECK: vrsqrt14ps 512(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0x00,0x02,0x00,0x00]
+          vrsqrt14ps 512(%rdx){1to8}, %ymm27
+
+// CHECK: vrsqrt14ps -512(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4e,0x5a,0x80]
+          vrsqrt14ps -512(%rdx){1to8}, %ymm27
+
+// CHECK: vrsqrt14ps -516(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x4e,0x9a,0xfc,0xfd,0xff,0xff]
+          vrsqrt14ps -516(%rdx){1to8}, %ymm27
+
+// CHECK: vsqrtpd %xmm26, %xmm29
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x51,0xea]
+          vsqrtpd %xmm26, %xmm29
+
+// CHECK: vsqrtpd %xmm26, %xmm29 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x0b,0x51,0xea]
+          vsqrtpd %xmm26, %xmm29 {%k3}
+
+// CHECK: vsqrtpd %xmm26, %xmm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x8b,0x51,0xea]
+          vsqrtpd %xmm26, %xmm29 {%k3} {z}
+
+// CHECK: vsqrtpd (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x51,0x29]
+          vsqrtpd (%rcx), %xmm29
+
+// CHECK: vsqrtpd 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x51,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtpd 291(%rax,%r14,8), %xmm29
+
+// CHECK: vsqrtpd (%rcx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x51,0x29]
+          vsqrtpd (%rcx){1to2}, %xmm29
+
+// CHECK: vsqrtpd 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x51,0x6a,0x7f]
+          vsqrtpd 2032(%rdx), %xmm29
+
+// CHECK: vsqrtpd 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x51,0xaa,0x00,0x08,0x00,0x00]
+          vsqrtpd 2048(%rdx), %xmm29
+
+// CHECK: vsqrtpd -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x51,0x6a,0x80]
+          vsqrtpd -2048(%rdx), %xmm29
+
+// CHECK: vsqrtpd -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x51,0xaa,0xf0,0xf7,0xff,0xff]
+          vsqrtpd -2064(%rdx), %xmm29
+
+// CHECK: vsqrtpd 1016(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x51,0x6a,0x7f]
+          vsqrtpd 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vsqrtpd 1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x51,0xaa,0x00,0x04,0x00,0x00]
+          vsqrtpd 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vsqrtpd -1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x51,0x6a,0x80]
+          vsqrtpd -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vsqrtpd -1032(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x51,0xaa,0xf8,0xfb,0xff,0xff]
+          vsqrtpd -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vsqrtpd %ymm20, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x51,0xd4]
+          vsqrtpd %ymm20, %ymm18
+
+// CHECK: vsqrtpd %ymm20, %ymm18 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2b,0x51,0xd4]
+          vsqrtpd %ymm20, %ymm18 {%k3}
+
+// CHECK: vsqrtpd %ymm20, %ymm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xab,0x51,0xd4]
+          vsqrtpd %ymm20, %ymm18 {%k3} {z}
+
+// CHECK: vsqrtpd (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x51,0x11]
+          vsqrtpd (%rcx), %ymm18
+
+// CHECK: vsqrtpd 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x51,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtpd 291(%rax,%r14,8), %ymm18
+
+// CHECK: vsqrtpd (%rcx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x51,0x11]
+          vsqrtpd (%rcx){1to4}, %ymm18
+
+// CHECK: vsqrtpd 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x51,0x52,0x7f]
+          vsqrtpd 4064(%rdx), %ymm18
+
+// CHECK: vsqrtpd 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x51,0x92,0x00,0x10,0x00,0x00]
+          vsqrtpd 4096(%rdx), %ymm18
+
+// CHECK: vsqrtpd -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x51,0x52,0x80]
+          vsqrtpd -4096(%rdx), %ymm18
+
+// CHECK: vsqrtpd -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x51,0x92,0xe0,0xef,0xff,0xff]
+          vsqrtpd -4128(%rdx), %ymm18
+
+// CHECK: vsqrtpd 1016(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x51,0x52,0x7f]
+          vsqrtpd 1016(%rdx){1to4}, %ymm18
+
+// CHECK: vsqrtpd 1024(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x51,0x92,0x00,0x04,0x00,0x00]
+          vsqrtpd 1024(%rdx){1to4}, %ymm18
+
+// CHECK: vsqrtpd -1024(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x51,0x52,0x80]
+          vsqrtpd -1024(%rdx){1to4}, %ymm18
+
+// CHECK: vsqrtpd -1032(%rdx){1to4}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x51,0x92,0xf8,0xfb,0xff,0xff]
+          vsqrtpd -1032(%rdx){1to4}, %ymm18
+
+// CHECK: vsqrtps %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x51,0xdc]
+          vsqrtps %xmm28, %xmm19
+
+// CHECK: vsqrtps %xmm28, %xmm19 {%k7}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x0f,0x51,0xdc]
+          vsqrtps %xmm28, %xmm19 {%k7}
+
+// CHECK: vsqrtps %xmm28, %xmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x8f,0x51,0xdc]
+          vsqrtps %xmm28, %xmm19 {%k7} {z}
+
+// CHECK: vsqrtps (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x51,0x19]
+          vsqrtps (%rcx), %xmm19
+
+// CHECK: vsqrtps 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x51,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtps 291(%rax,%r14,8), %xmm19
+
+// CHECK: vsqrtps (%rcx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x18,0x51,0x19]
+          vsqrtps (%rcx){1to4}, %xmm19
+
+// CHECK: vsqrtps 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x51,0x5a,0x7f]
+          vsqrtps 2032(%rdx), %xmm19
+
+// CHECK: vsqrtps 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x51,0x9a,0x00,0x08,0x00,0x00]
+          vsqrtps 2048(%rdx), %xmm19
+
+// CHECK: vsqrtps -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x51,0x5a,0x80]
+          vsqrtps -2048(%rdx), %xmm19
+
+// CHECK: vsqrtps -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x51,0x9a,0xf0,0xf7,0xff,0xff]
+          vsqrtps -2064(%rdx), %xmm19
+
+// CHECK: vsqrtps 508(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x18,0x51,0x5a,0x7f]
+          vsqrtps 508(%rdx){1to4}, %xmm19
+
+// CHECK: vsqrtps 512(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x18,0x51,0x9a,0x00,0x02,0x00,0x00]
+          vsqrtps 512(%rdx){1to4}, %xmm19
+
+// CHECK: vsqrtps -512(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x18,0x51,0x5a,0x80]
+          vsqrtps -512(%rdx){1to4}, %xmm19
+
+// CHECK: vsqrtps -516(%rdx){1to4}, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x18,0x51,0x9a,0xfc,0xfd,0xff,0xff]
+          vsqrtps -516(%rdx){1to4}, %xmm19
+
+// CHECK: vsqrtps %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0x81,0x7c,0x28,0x51,0xd9]
+          vsqrtps %ymm25, %ymm19
+
+// CHECK: vsqrtps %ymm25, %ymm19 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x2a,0x51,0xd9]
+          vsqrtps %ymm25, %ymm19 {%k2}
+
+// CHECK: vsqrtps %ymm25, %ymm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0xaa,0x51,0xd9]
+          vsqrtps %ymm25, %ymm19 {%k2} {z}
+
+// CHECK: vsqrtps (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x51,0x19]
+          vsqrtps (%rcx), %ymm19
+
+// CHECK: vsqrtps 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x28,0x51,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtps 291(%rax,%r14,8), %ymm19
+
+// CHECK: vsqrtps (%rcx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x38,0x51,0x19]
+          vsqrtps (%rcx){1to8}, %ymm19
+
+// CHECK: vsqrtps 4064(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x51,0x5a,0x7f]
+          vsqrtps 4064(%rdx), %ymm19
+
+// CHECK: vsqrtps 4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x51,0x9a,0x00,0x10,0x00,0x00]
+          vsqrtps 4096(%rdx), %ymm19
+
+// CHECK: vsqrtps -4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x51,0x5a,0x80]
+          vsqrtps -4096(%rdx), %ymm19
+
+// CHECK: vsqrtps -4128(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x51,0x9a,0xe0,0xef,0xff,0xff]
+          vsqrtps -4128(%rdx), %ymm19
+
+// CHECK: vsqrtps 508(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x38,0x51,0x5a,0x7f]
+          vsqrtps 508(%rdx){1to8}, %ymm19
+
+// CHECK: vsqrtps 512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x38,0x51,0x9a,0x00,0x02,0x00,0x00]
+          vsqrtps 512(%rdx){1to8}, %ymm19
+
+// CHECK: vsqrtps -512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x38,0x51,0x5a,0x80]
+          vsqrtps -512(%rdx){1to8}, %ymm19
+
+// CHECK: vsqrtps -516(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x38,0x51,0x9a,0xfc,0xfd,0xff,0xff]
+          vsqrtps -516(%rdx){1to8}, %ymm19
+
+// CHECK: vsubpd %xmm18, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x00,0x5c,0xe2]
+          vsubpd %xmm18, %xmm24, %xmm28
+
+// CHECK: vsubpd %xmm18, %xmm24, %xmm28 {%k3}
+// CHECK:  encoding: [0x62,0x21,0xbd,0x03,0x5c,0xe2]
+          vsubpd %xmm18, %xmm24, %xmm28 {%k3}
+
+// CHECK: vsubpd %xmm18, %xmm24, %xmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xbd,0x83,0x5c,0xe2]
+          vsubpd %xmm18, %xmm24, %xmm28 {%k3} {z}
+
+// CHECK: vsubpd (%rcx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x00,0x5c,0x21]
+          vsubpd (%rcx), %xmm24, %xmm28
+
+// CHECK: vsubpd 291(%rax,%r14,8), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x00,0x5c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vsubpd 291(%rax,%r14,8), %xmm24, %xmm28
+
+// CHECK: vsubpd (%rcx){1to2}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x10,0x5c,0x21]
+          vsubpd (%rcx){1to2}, %xmm24, %xmm28
+
+// CHECK: vsubpd 2032(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x00,0x5c,0x62,0x7f]
+          vsubpd 2032(%rdx), %xmm24, %xmm28
+
+// CHECK: vsubpd 2048(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x00,0x5c,0xa2,0x00,0x08,0x00,0x00]
+          vsubpd 2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vsubpd -2048(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x00,0x5c,0x62,0x80]
+          vsubpd -2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vsubpd -2064(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x00,0x5c,0xa2,0xf0,0xf7,0xff,0xff]
+          vsubpd -2064(%rdx), %xmm24, %xmm28
+
+// CHECK: vsubpd 1016(%rdx){1to2}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x10,0x5c,0x62,0x7f]
+          vsubpd 1016(%rdx){1to2}, %xmm24, %xmm28
+
+// CHECK: vsubpd 1024(%rdx){1to2}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x10,0x5c,0xa2,0x00,0x04,0x00,0x00]
+          vsubpd 1024(%rdx){1to2}, %xmm24, %xmm28
+
+// CHECK: vsubpd -1024(%rdx){1to2}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x10,0x5c,0x62,0x80]
+          vsubpd -1024(%rdx){1to2}, %xmm24, %xmm28
+
+// CHECK: vsubpd -1032(%rdx){1to2}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x10,0x5c,0xa2,0xf8,0xfb,0xff,0xff]
+          vsubpd -1032(%rdx){1to2}, %xmm24, %xmm28
+
+// CHECK: vsubpd %ymm25, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x01,0x9d,0x20,0x5c,0xf1]
+          vsubpd %ymm25, %ymm28, %ymm30
+
+// CHECK: vsubpd %ymm25, %ymm28, %ymm30 {%k7}
+// CHECK:  encoding: [0x62,0x01,0x9d,0x27,0x5c,0xf1]
+          vsubpd %ymm25, %ymm28, %ymm30 {%k7}
+
+// CHECK: vsubpd %ymm25, %ymm28, %ymm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0x9d,0xa7,0x5c,0xf1]
+          vsubpd %ymm25, %ymm28, %ymm30 {%k7} {z}
+
+// CHECK: vsubpd (%rcx), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x20,0x5c,0x31]
+          vsubpd (%rcx), %ymm28, %ymm30
+
+// CHECK: vsubpd 291(%rax,%r14,8), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x21,0x9d,0x20,0x5c,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vsubpd 291(%rax,%r14,8), %ymm28, %ymm30
+
+// CHECK: vsubpd (%rcx){1to4}, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x30,0x5c,0x31]
+          vsubpd (%rcx){1to4}, %ymm28, %ymm30
+
+// CHECK: vsubpd 4064(%rdx), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x20,0x5c,0x72,0x7f]
+          vsubpd 4064(%rdx), %ymm28, %ymm30
+
+// CHECK: vsubpd 4096(%rdx), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x20,0x5c,0xb2,0x00,0x10,0x00,0x00]
+          vsubpd 4096(%rdx), %ymm28, %ymm30
+
+// CHECK: vsubpd -4096(%rdx), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x20,0x5c,0x72,0x80]
+          vsubpd -4096(%rdx), %ymm28, %ymm30
+
+// CHECK: vsubpd -4128(%rdx), %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x20,0x5c,0xb2,0xe0,0xef,0xff,0xff]
+          vsubpd -4128(%rdx), %ymm28, %ymm30
+
+// CHECK: vsubpd 1016(%rdx){1to4}, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x30,0x5c,0x72,0x7f]
+          vsubpd 1016(%rdx){1to4}, %ymm28, %ymm30
+
+// CHECK: vsubpd 1024(%rdx){1to4}, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x30,0x5c,0xb2,0x00,0x04,0x00,0x00]
+          vsubpd 1024(%rdx){1to4}, %ymm28, %ymm30
+
+// CHECK: vsubpd -1024(%rdx){1to4}, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x30,0x5c,0x72,0x80]
+          vsubpd -1024(%rdx){1to4}, %ymm28, %ymm30
+
+// CHECK: vsubpd -1032(%rdx){1to4}, %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x61,0x9d,0x30,0x5c,0xb2,0xf8,0xfb,0xff,0xff]
+          vsubpd -1032(%rdx){1to4}, %ymm28, %ymm30
+
+// CHECK: vsubps %xmm25, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0x81,0x14,0x00,0x5c,0xe1]
+          vsubps %xmm25, %xmm29, %xmm20
+
+// CHECK: vsubps %xmm25, %xmm29, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x14,0x03,0x5c,0xe1]
+          vsubps %xmm25, %xmm29, %xmm20 {%k3}
+
+// CHECK: vsubps %xmm25, %xmm29, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x14,0x83,0x5c,0xe1]
+          vsubps %xmm25, %xmm29, %xmm20 {%k3} {z}
+
+// CHECK: vsubps (%rcx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x00,0x5c,0x21]
+          vsubps (%rcx), %xmm29, %xmm20
+
+// CHECK: vsubps 291(%rax,%r14,8), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0x14,0x00,0x5c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vsubps 291(%rax,%r14,8), %xmm29, %xmm20
+
+// CHECK: vsubps (%rcx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x10,0x5c,0x21]
+          vsubps (%rcx){1to4}, %xmm29, %xmm20
+
+// CHECK: vsubps 2032(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x00,0x5c,0x62,0x7f]
+          vsubps 2032(%rdx), %xmm29, %xmm20
+
+// CHECK: vsubps 2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x00,0x5c,0xa2,0x00,0x08,0x00,0x00]
+          vsubps 2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vsubps -2048(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x00,0x5c,0x62,0x80]
+          vsubps -2048(%rdx), %xmm29, %xmm20
+
+// CHECK: vsubps -2064(%rdx), %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x00,0x5c,0xa2,0xf0,0xf7,0xff,0xff]
+          vsubps -2064(%rdx), %xmm29, %xmm20
+
+// CHECK: vsubps 508(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x10,0x5c,0x62,0x7f]
+          vsubps 508(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vsubps 512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x10,0x5c,0xa2,0x00,0x02,0x00,0x00]
+          vsubps 512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vsubps -512(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x10,0x5c,0x62,0x80]
+          vsubps -512(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vsubps -516(%rdx){1to4}, %xmm29, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x14,0x10,0x5c,0xa2,0xfc,0xfd,0xff,0xff]
+          vsubps -516(%rdx){1to4}, %xmm29, %xmm20
+
+// CHECK: vsubps %ymm22, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x54,0x20,0x5c,0xee]
+          vsubps %ymm22, %ymm21, %ymm21
+
+// CHECK: vsubps %ymm22, %ymm21, %ymm21 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0x54,0x24,0x5c,0xee]
+          vsubps %ymm22, %ymm21, %ymm21 {%k4}
+
+// CHECK: vsubps %ymm22, %ymm21, %ymm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0x54,0xa4,0x5c,0xee]
+          vsubps %ymm22, %ymm21, %ymm21 {%k4} {z}
+
+// CHECK: vsubps (%rcx), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x20,0x5c,0x29]
+          vsubps (%rcx), %ymm21, %ymm21
+
+// CHECK: vsubps 291(%rax,%r14,8), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x54,0x20,0x5c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vsubps 291(%rax,%r14,8), %ymm21, %ymm21
+
+// CHECK: vsubps (%rcx){1to8}, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x30,0x5c,0x29]
+          vsubps (%rcx){1to8}, %ymm21, %ymm21
+
+// CHECK: vsubps 4064(%rdx), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x20,0x5c,0x6a,0x7f]
+          vsubps 4064(%rdx), %ymm21, %ymm21
+
+// CHECK: vsubps 4096(%rdx), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x20,0x5c,0xaa,0x00,0x10,0x00,0x00]
+          vsubps 4096(%rdx), %ymm21, %ymm21
+
+// CHECK: vsubps -4096(%rdx), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x20,0x5c,0x6a,0x80]
+          vsubps -4096(%rdx), %ymm21, %ymm21
+
+// CHECK: vsubps -4128(%rdx), %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x20,0x5c,0xaa,0xe0,0xef,0xff,0xff]
+          vsubps -4128(%rdx), %ymm21, %ymm21
+
+// CHECK: vsubps 508(%rdx){1to8}, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x30,0x5c,0x6a,0x7f]
+          vsubps 508(%rdx){1to8}, %ymm21, %ymm21
+
+// CHECK: vsubps 512(%rdx){1to8}, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x30,0x5c,0xaa,0x00,0x02,0x00,0x00]
+          vsubps 512(%rdx){1to8}, %ymm21, %ymm21
+
+// CHECK: vsubps -512(%rdx){1to8}, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x30,0x5c,0x6a,0x80]
+          vsubps -512(%rdx){1to8}, %ymm21, %ymm21
+
+// CHECK: vsubps -516(%rdx){1to8}, %ymm21, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x54,0x30,0x5c,0xaa,0xfc,0xfd,0xff,0xff]
+          vsubps -516(%rdx){1to8}, %ymm21, %ymm21
+
+// CHECK: vmovapd %xmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x29,0x31]
+          vmovapd %xmm22, (%rcx)
+
+// CHECK: vmovapd %xmm22, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x0a,0x29,0x31]
+          vmovapd %xmm22, (%rcx) {%k2}
+
+// CHECK: vmovapd %xmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x29,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd %xmm22, 291(%rax,%r14,8)
+
+// CHECK: vmovapd %xmm22, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x29,0x72,0x7f]
+          vmovapd %xmm22, 2032(%rdx)
+
+// CHECK: vmovapd %xmm22, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x29,0xb2,0x00,0x08,0x00,0x00]
+          vmovapd %xmm22, 2048(%rdx)
+
+// CHECK: vmovapd %xmm22, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x29,0x72,0x80]
+          vmovapd %xmm22, -2048(%rdx)
+
+// CHECK: vmovapd %xmm22, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x29,0xb2,0xf0,0xf7,0xff,0xff]
+          vmovapd %xmm22, -2064(%rdx)
+
+// CHECK: vmovapd %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x29,0x09]
+          vmovapd %ymm17, (%rcx)
+
+// CHECK: vmovapd %ymm17, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x2e,0x29,0x09]
+          vmovapd %ymm17, (%rcx) {%k6}
+
+// CHECK: vmovapd %ymm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x29,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovapd %ymm17, 291(%rax,%r14,8)
+
+// CHECK: vmovapd %ymm17, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x29,0x4a,0x7f]
+          vmovapd %ymm17, 4064(%rdx)
+
+// CHECK: vmovapd %ymm17, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x29,0x8a,0x00,0x10,0x00,0x00]
+          vmovapd %ymm17, 4096(%rdx)
+
+// CHECK: vmovapd %ymm17, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x29,0x4a,0x80]
+          vmovapd %ymm17, -4096(%rdx)
+
+// CHECK: vmovapd %ymm17, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x29,0x8a,0xe0,0xef,0xff,0xff]
+          vmovapd %ymm17, -4128(%rdx)
+
+// CHECK: vmovaps %xmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x29,0x29]
+          vmovaps %xmm29, (%rcx)
+
+// CHECK: vmovaps %xmm29, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x61,0x7c,0x0d,0x29,0x29]
+          vmovaps %xmm29, (%rcx) {%k5}
+
+// CHECK: vmovaps %xmm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x29,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vmovaps %xmm29, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x29,0x6a,0x7f]
+          vmovaps %xmm29, 2032(%rdx)
+
+// CHECK: vmovaps %xmm29, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x29,0xaa,0x00,0x08,0x00,0x00]
+          vmovaps %xmm29, 2048(%rdx)
+
+// CHECK: vmovaps %xmm29, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x29,0x6a,0x80]
+          vmovaps %xmm29, -2048(%rdx)
+
+// CHECK: vmovaps %xmm29, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x29,0xaa,0xf0,0xf7,0xff,0xff]
+          vmovaps %xmm29, -2064(%rdx)
+
+// CHECK: vmovaps %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x29,0x21]
+          vmovaps %ymm28, (%rcx)
+
+// CHECK: vmovaps %ymm28, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x61,0x7c,0x2e,0x29,0x21]
+          vmovaps %ymm28, (%rcx) {%k6}
+
+// CHECK: vmovaps %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x28,0x29,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovaps %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vmovaps %ymm28, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x29,0x62,0x7f]
+          vmovaps %ymm28, 4064(%rdx)
+
+// CHECK: vmovaps %ymm28, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x29,0xa2,0x00,0x10,0x00,0x00]
+          vmovaps %ymm28, 4096(%rdx)
+
+// CHECK: vmovaps %ymm28, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x29,0x62,0x80]
+          vmovaps %ymm28, -4096(%rdx)
+
+// CHECK: vmovaps %ymm28, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x28,0x29,0xa2,0xe0,0xef,0xff,0xff]
+          vmovaps %ymm28, -4128(%rdx)
+
+// CHECK: vmovdqa32 %xmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7f,0x01]
+          vmovdqa32 %xmm24, (%rcx)
+
+// CHECK: vmovdqa32 %xmm24, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x61,0x7d,0x0f,0x7f,0x01]
+          vmovdqa32 %xmm24, (%rcx) {%k7}
+
+// CHECK: vmovdqa32 %xmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7d,0x08,0x7f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa32 %xmm24, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7f,0x42,0x7f]
+          vmovdqa32 %xmm24, 2032(%rdx)
+
+// CHECK: vmovdqa32 %xmm24, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7f,0x82,0x00,0x08,0x00,0x00]
+          vmovdqa32 %xmm24, 2048(%rdx)
+
+// CHECK: vmovdqa32 %xmm24, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7f,0x42,0x80]
+          vmovdqa32 %xmm24, -2048(%rdx)
+
+// CHECK: vmovdqa32 %xmm24, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7f,0x82,0xf0,0xf7,0xff,0xff]
+          vmovdqa32 %xmm24, -2064(%rdx)
+
+// CHECK: vmovdqa32 %ymm29, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x7f,0x29]
+          vmovdqa32 %ymm29, (%rcx)
+
+// CHECK: vmovdqa32 %ymm29, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x61,0x7d,0x2f,0x7f,0x29]
+          vmovdqa32 %ymm29, (%rcx) {%k7}
+
+// CHECK: vmovdqa32 %ymm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7d,0x28,0x7f,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa32 %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa32 %ymm29, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x7f,0x6a,0x7f]
+          vmovdqa32 %ymm29, 4064(%rdx)
+
+// CHECK: vmovdqa32 %ymm29, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x7f,0xaa,0x00,0x10,0x00,0x00]
+          vmovdqa32 %ymm29, 4096(%rdx)
+
+// CHECK: vmovdqa32 %ymm29, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x7f,0x6a,0x80]
+          vmovdqa32 %ymm29, -4096(%rdx)
+
+// CHECK: vmovdqa32 %ymm29, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7d,0x28,0x7f,0xaa,0xe0,0xef,0xff,0xff]
+          vmovdqa32 %ymm29, -4128(%rdx)
+
+// CHECK: vmovdqa64 %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7f,0x09]
+          vmovdqa64 %xmm17, (%rcx)
+
+// CHECK: vmovdqa64 %xmm17, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x0f,0x7f,0x09]
+          vmovdqa64 %xmm17, (%rcx) {%k7}
+
+// CHECK: vmovdqa64 %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x7f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa64 %xmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7f,0x4a,0x7f]
+          vmovdqa64 %xmm17, 2032(%rdx)
+
+// CHECK: vmovdqa64 %xmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7f,0x8a,0x00,0x08,0x00,0x00]
+          vmovdqa64 %xmm17, 2048(%rdx)
+
+// CHECK: vmovdqa64 %xmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7f,0x4a,0x80]
+          vmovdqa64 %xmm17, -2048(%rdx)
+
+// CHECK: vmovdqa64 %xmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7f,0x8a,0xf0,0xf7,0xff,0xff]
+          vmovdqa64 %xmm17, -2064(%rdx)
+
+// CHECK: vmovdqa64 %ymm24, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x7f,0x01]
+          vmovdqa64 %ymm24, (%rcx)
+
+// CHECK: vmovdqa64 %ymm24, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x61,0xfd,0x2a,0x7f,0x01]
+          vmovdqa64 %ymm24, (%rcx) {%k2}
+
+// CHECK: vmovdqa64 %ymm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x7f,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqa64 %ymm24, 291(%rax,%r14,8)
+
+// CHECK: vmovdqa64 %ymm24, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x7f,0x42,0x7f]
+          vmovdqa64 %ymm24, 4064(%rdx)
+
+// CHECK: vmovdqa64 %ymm24, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x7f,0x82,0x00,0x10,0x00,0x00]
+          vmovdqa64 %ymm24, 4096(%rdx)
+
+// CHECK: vmovdqa64 %ymm24, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x7f,0x42,0x80]
+          vmovdqa64 %ymm24, -4096(%rdx)
+
+// CHECK: vmovdqa64 %ymm24, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x7f,0x82,0xe0,0xef,0xff,0xff]
+          vmovdqa64 %ymm24, -4128(%rdx)
+
+// CHECK: vmovdqu32 %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x7f,0x09]
+          vmovdqu32 %xmm17, (%rcx)
+
+// CHECK: vmovdqu32 %xmm17, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x0c,0x7f,0x09]
+          vmovdqu32 %xmm17, (%rcx) {%k4}
+
+// CHECK: vmovdqu32 %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x7f,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu32 %xmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x7f,0x4a,0x7f]
+          vmovdqu32 %xmm17, 2032(%rdx)
+
+// CHECK: vmovdqu32 %xmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x7f,0x8a,0x00,0x08,0x00,0x00]
+          vmovdqu32 %xmm17, 2048(%rdx)
+
+// CHECK: vmovdqu32 %xmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x7f,0x4a,0x80]
+          vmovdqu32 %xmm17, -2048(%rdx)
+
+// CHECK: vmovdqu32 %xmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x7f,0x8a,0xf0,0xf7,0xff,0xff]
+          vmovdqu32 %xmm17, -2064(%rdx)
+
+// CHECK: vmovdqu32 %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x7f,0x21]
+          vmovdqu32 %ymm20, (%rcx)
+
+// CHECK: vmovdqu32 %ymm20, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x29,0x7f,0x21]
+          vmovdqu32 %ymm20, (%rcx) {%k1}
+
+// CHECK: vmovdqu32 %ymm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x7f,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu32 %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu32 %ymm20, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x7f,0x62,0x7f]
+          vmovdqu32 %ymm20, 4064(%rdx)
+
+// CHECK: vmovdqu32 %ymm20, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x7f,0xa2,0x00,0x10,0x00,0x00]
+          vmovdqu32 %ymm20, 4096(%rdx)
+
+// CHECK: vmovdqu32 %ymm20, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x7f,0x62,0x80]
+          vmovdqu32 %ymm20, -4096(%rdx)
+
+// CHECK: vmovdqu32 %ymm20, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x7f,0xa2,0xe0,0xef,0xff,0xff]
+          vmovdqu32 %ymm20, -4128(%rdx)
+
+// CHECK: vmovdqu64 %xmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x7f,0x21]
+          vmovdqu64 %xmm20, (%rcx)
+
+// CHECK: vmovdqu64 %xmm20, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x0e,0x7f,0x21]
+          vmovdqu64 %xmm20, (%rcx) {%k6}
+
+// CHECK: vmovdqu64 %xmm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x7f,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 %xmm20, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu64 %xmm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x7f,0x62,0x7f]
+          vmovdqu64 %xmm20, 2032(%rdx)
+
+// CHECK: vmovdqu64 %xmm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x7f,0xa2,0x00,0x08,0x00,0x00]
+          vmovdqu64 %xmm20, 2048(%rdx)
+
+// CHECK: vmovdqu64 %xmm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x7f,0x62,0x80]
+          vmovdqu64 %xmm20, -2048(%rdx)
+
+// CHECK: vmovdqu64 %xmm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x7f,0xa2,0xf0,0xf7,0xff,0xff]
+          vmovdqu64 %xmm20, -2064(%rdx)
+
+// CHECK: vmovdqu64 %ymm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x28,0x7f,0x19]
+          vmovdqu64 %ymm19, (%rcx)
+
+// CHECK: vmovdqu64 %ymm19, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x2f,0x7f,0x19]
+          vmovdqu64 %ymm19, (%rcx) {%k7}
+
+// CHECK: vmovdqu64 %ymm19, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x28,0x7f,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovdqu64 %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vmovdqu64 %ymm19, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x28,0x7f,0x5a,0x7f]
+          vmovdqu64 %ymm19, 4064(%rdx)
+
+// CHECK: vmovdqu64 %ymm19, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x28,0x7f,0x9a,0x00,0x10,0x00,0x00]
+          vmovdqu64 %ymm19, 4096(%rdx)
+
+// CHECK: vmovdqu64 %ymm19, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x28,0x7f,0x5a,0x80]
+          vmovdqu64 %ymm19, -4096(%rdx)
+
+// CHECK: vmovdqu64 %ymm19, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfe,0x28,0x7f,0x9a,0xe0,0xef,0xff,0xff]
+          vmovdqu64 %ymm19, -4128(%rdx)
+
+// CHECK: vmovupd %xmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x11,0x31]
+          vmovupd %xmm22, (%rcx)
+
+// CHECK: vmovupd %xmm22, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x0f,0x11,0x31]
+          vmovupd %xmm22, (%rcx) {%k7}
+
+// CHECK: vmovupd %xmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x11,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd %xmm22, 291(%rax,%r14,8)
+
+// CHECK: vmovupd %xmm22, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x11,0x72,0x7f]
+          vmovupd %xmm22, 2032(%rdx)
+
+// CHECK: vmovupd %xmm22, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x11,0xb2,0x00,0x08,0x00,0x00]
+          vmovupd %xmm22, 2048(%rdx)
+
+// CHECK: vmovupd %xmm22, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x11,0x72,0x80]
+          vmovupd %xmm22, -2048(%rdx)
+
+// CHECK: vmovupd %xmm22, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x11,0xb2,0xf0,0xf7,0xff,0xff]
+          vmovupd %xmm22, -2064(%rdx)
+
+// CHECK: vmovupd %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x11,0x21]
+          vmovupd %ymm28, (%rcx)
+
+// CHECK: vmovupd %ymm28, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0x61,0xfd,0x29,0x11,0x21]
+          vmovupd %ymm28, (%rcx) {%k1}
+
+// CHECK: vmovupd %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x11,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovupd %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vmovupd %ymm28, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x11,0x62,0x7f]
+          vmovupd %ymm28, 4064(%rdx)
+
+// CHECK: vmovupd %ymm28, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x11,0xa2,0x00,0x10,0x00,0x00]
+          vmovupd %ymm28, 4096(%rdx)
+
+// CHECK: vmovupd %ymm28, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x11,0x62,0x80]
+          vmovupd %ymm28, -4096(%rdx)
+
+// CHECK: vmovupd %ymm28, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x61,0xfd,0x28,0x11,0xa2,0xe0,0xef,0xff,0xff]
+          vmovupd %ymm28, -4128(%rdx)
+
+// CHECK: vmovups %xmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x11,0x11]
+          vmovups %xmm26, (%rcx)
+
+// CHECK: vmovups %xmm26, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x61,0x7c,0x0d,0x11,0x11]
+          vmovups %xmm26, (%rcx) {%k5}
+
+// CHECK: vmovups %xmm26, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovups %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vmovups %xmm26, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x11,0x52,0x7f]
+          vmovups %xmm26, 2032(%rdx)
+
+// CHECK: vmovups %xmm26, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x11,0x92,0x00,0x08,0x00,0x00]
+          vmovups %xmm26, 2048(%rdx)
+
+// CHECK: vmovups %xmm26, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x11,0x52,0x80]
+          vmovups %xmm26, -2048(%rdx)
+
+// CHECK: vmovups %xmm26, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x11,0x92,0xf0,0xf7,0xff,0xff]
+          vmovups %xmm26, -2064(%rdx)
+
+// CHECK: vmovups %ymm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x11,0x39]
+          vmovups %ymm23, (%rcx)
+
+// CHECK: vmovups %ymm23, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x2b,0x11,0x39]
+          vmovups %ymm23, (%rcx) {%k3}
+
+// CHECK: vmovups %ymm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x28,0x11,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovups %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vmovups %ymm23, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x11,0x7a,0x7f]
+          vmovups %ymm23, 4064(%rdx)
+
+// CHECK: vmovups %ymm23, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x11,0xba,0x00,0x10,0x00,0x00]
+          vmovups %ymm23, 4096(%rdx)
+
+// CHECK: vmovups %ymm23, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x11,0x7a,0x80]
+          vmovups %ymm23, -4096(%rdx)
+
+// CHECK: vmovups %ymm23, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x28,0x11,0xba,0xe0,0xef,0xff,0xff]
+          vmovups %ymm23, -4128(%rdx)
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index 51f2e8e14685..0b3bc7f43502 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -46,3 +46,7 @@ movl %eax,(,%bx)
 
 // 32: error: invalid operand for instruction
 outb al, 4
+
+// 32: error: invalid segment register
+// 64: error: invalid segment register
+movl %eax:0x00, %ebx
diff --git a/test/MC/X86/x86_operands.s b/test/MC/X86/x86_operands.s
index b34713db8637..2258a9527746 100644
--- a/test/MC/X86/x86_operands.s
+++ b/test/MC/X86/x86_operands.s
@@ -52,6 +52,11 @@
         call *%eax
 # CHECK: calll *4(%eax)
         call *4(%eax)
+foo:
+	calll foo()
+# CHECK: calll foo{{$}}
+	calll foo(,)
+# CHECK: calll foo{{$}}
 
 # CHECK: movl	%gs:8, %eax
 movl %gs:8, %eax
diff --git a/test/Makefile b/test/Makefile
index c78c2561c34b..7f253b066256 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -57,7 +57,7 @@ LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test
 
 # Force creation of Clang Tools' lit.site.cfg.
 clang-tools-site-cfg: FORCE
-	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/tools/extra/test lit.site.cfg Unit/lit.site.cfg
 extra-site-cfgs:: clang-tools-site-cfg
 endif
 
@@ -83,19 +83,15 @@ endif
 
 # ulimits like these are redundantly enforced by the buildbots, so
 # just removing them here won't work.
-# Both AuroraUX & Solaris do not have the -m flag for ulimit
+# Solaris does not have the -m flag for ulimit
 ifeq ($(HOST_OS),SunOS)
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
 else # !SunOS
-ifeq ($(HOST_OS),AuroraUX)
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
-else # !AuroraUX
 # Newer versions of python try to allocate an insane amount of address space for
 # its thread-local storage, don't set a limit here.
 # When -v is not used, then -s has to be used to limit the stack size.
 # FIXME: Those limits should be enforced by lit instead of globally.
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -s 8192 ;
-endif # AuroraUX
 endif # SunOS
 
 check-local:: lit.site.cfg Unit/lit.site.cfg
@@ -112,11 +108,6 @@ check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-site-cfgs
 clean::
 	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
 
-ifneq ($(OCAMLOPT),)
-CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
-CXX_FOR_OCAMLOPT := $(subst gcc,g++,$(CC_FOR_OCAMLOPT))
-endif
-
 FORCE:
 
 ifeq ($(DISABLE_ASSERTIONS),1)
@@ -132,11 +123,19 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
 	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_LIBRARY_DIR@=$(LibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> lit.tmp
 	@$(ECHOPATH) s=@SHLIBEXT@=$(SHLIBEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@EXEEXT@=$(EXEEXT)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=$(PYTHON)=g >> lit.tmp
-	@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -cclib -L$(LibDir) -I $(LibDir)/ocaml=g >> lit.tmp
+	@$(ECHOPATH) s=@OCAMLFIND@=$(OCAMLFIND)=g >> lit.tmp
+	@$(ECHOPATH) s!@OCAMLFLAGS@!$(addprefix -cclib ,$(LDFLAGS))!g >> lit.tmp
+	@$(ECHOPATH) s=@HAVE_OCAMLOPT@=$(HAVE_OCAMLOPT)=g >> lit.tmp
+	@$(ECHOPATH) s=@HAVE_OCAML_OUNIT@=$(HAVE_OCAML_OUNIT)=g >> lit.tmp
+	@$(ECHOPATH) s=@GO_EXECUTABLE@=$(GO)=g >> lit.tmp
+	@$(ECHOPATH) s!@HOST_CC@!$(CC)!g >> lit.tmp
+	@$(ECHOPATH) s!@HOST_CXX@!$(CXX)!g >> lit.tmp
+	@$(ECHOPATH) s!@HOST_LDFLAGS@!$(LDFLAGS)!g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
diff --git a/test/Object/AArch64/yaml2obj-elf-aarch64-rel.yaml b/test/Object/AArch64/yaml2obj-elf-aarch64-rel.yaml
new file mode 100644
index 000000000000..b8fb68113c76
--- /dev/null
+++ b/test/Object/AArch64/yaml2obj-elf-aarch64-rel.yaml
@@ -0,0 +1,55 @@
+# RUN: yaml2obj -format=elf %s > %t
+# RUN: obj2yaml  %t | FileCheck %s
+
+# CHECK:      - Name:            .rela.text
+# CHECK-NEXT:   Type:            SHT_RELA
+# CHECK-NEXT:    Link:            .symtab
+# CHECK-NEXT:    AddressAlign:    0x0000000000000008
+# CHECK-NEXT:    Info:            .text
+# CHECK-NEXT:    Relocations:     
+# CHECK-NEXT:      - Offset:          0x0000000000000000
+# CHECK-NEXT:        Symbol:          main
+# CHECK-NEXT:        Type:            R_AARCH64_ABS64
+# CHECK-NEXT:        Addend:          0
+# CHECK-NEXT:      - Offset:          0x0000000000000008
+# CHECK-NEXT:        Symbol:          main
+# CHECK-NEXT:        Type:            R_AARCH64_TLSGD_ADR_PREL21
+# CHECK-NEXT:        Addend:          0
+
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_AARCH64
+Sections:
+  - Type:            SHT_PROGBITS
+    Name:            .text
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x04
+    Content:         00000000000000000000000000000000
+  - Type:            SHT_RELA
+    Name:            .rela.text
+    Link:            .symtab
+    Info:            .text
+    AddressAlign:    0x08
+    Relocations:
+      - Offset:          0
+        Symbol:          main
+        Type:            R_AARCH64_ABS64
+        Addend:          0
+      - Offset:          8
+        Symbol:          main
+        Type:            R_AARCH64_TLSGD_ADR_PREL21
+        Addend:          0
+
+Symbols:
+  Local:
+    - Name:            .text
+      Type:            STT_SECTION
+      Section:         .text
+
+  Global:
+    - Name:            main
+      Type:            STT_FUNC
+      Section:         .text
+      Size:            0x08
diff --git a/test/Object/ARM/macho-data-in-code.test b/test/Object/ARM/macho-data-in-code.test
index dca084c2caba..2bfb6c118644 100644
--- a/test/Object/ARM/macho-data-in-code.test
+++ b/test/Object/ARM/macho-data-in-code.test
@@ -3,5 +3,5 @@ RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data
 CHECK:      12:	80 bd                                        	pop	{r7, pc}
 
 CHECK:      14:	38 00 00 00                                  	.long 56	@ KIND_DATA
-CHECK:      16:	00 00                                        	movs	r0, r0
+CHECK:      18:	70 47                                        	bx	lr
 
diff --git a/test/Object/Inputs/COFF/long-section-name.yaml b/test/Object/Inputs/COFF/long-section-name.yaml
new file mode 100644
index 000000000000..a86f9019cd5c
--- /dev/null
+++ b/test/Object/Inputs/COFF/long-section-name.yaml
@@ -0,0 +1,11 @@
+---
+header:
+  Machine:         IMAGE_FILE_MACHINE_I386
+  Characteristics: [ IMAGE_FILE_RELOCS_STRIPPED, IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_LOCAL_SYMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE ]
+sections:
+  - Name:            .long_section_name
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    SectionData:     ''
+symbols:
+...
diff --git a/test/Object/Inputs/COFF/section-aux-symbol.yaml b/test/Object/Inputs/COFF/section-aux-symbol.yaml
new file mode 100644
index 000000000000..623af5589595
--- /dev/null
+++ b/test/Object/Inputs/COFF/section-aux-symbol.yaml
@@ -0,0 +1,167 @@
+---
+header:          
+  Machine:         IMAGE_FILE_MACHINE_I386
+  Characteristics: [ IMAGE_FILE_RELOCS_STRIPPED, IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LINE_NUMS_STRIPPED, IMAGE_FILE_32BIT_MACHINE, IMAGE_FILE_DEBUG_STRIPPED ]
+sections:        
+  - Name:            .CRT
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       4
+    SectionData:     0000000030114000000000000000000010104000401640000000000000000000B015400060154000000000000000000000000000
+symbols:         
+  - Name:            '.CRT$XCAA'
+    Value:           4
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XIAA'
+    Value:           16
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XLD'
+    Value:           36
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XLC'
+    Value:           32
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XDZ'
+    Value:           48
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XDA'
+    Value:           44
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XLZ'
+    Value:           40
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XLA'
+    Value:           28
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XIC'
+    Value:           20
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 1
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XCZ'
+    Value:           8
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XCA'
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XIZ'
+    Value:           24
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+  - Name:            '.CRT$XIA'
+    Value:           12
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition: 
+      Length:          4
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          0
+...
diff --git a/test/Object/Inputs/macho-archive-unsorted-x86_64.a b/test/Object/Inputs/macho-archive-unsorted-x86_64.a
new file mode 100644
index 000000000000..6a2b570f1609
--- /dev/null
+++ b/test/Object/Inputs/macho-archive-unsorted-x86_64.a
diff --git a/test/Object/Inputs/macho-no-exports.dylib b/test/Object/Inputs/macho-no-exports.dylib
new file mode 100755
index 000000000000..6e1be6ca97e6
--- /dev/null
+++ b/test/Object/Inputs/macho-no-exports.dylib
diff --git a/test/Object/Inputs/macho-rpath-x86_64 b/test/Object/Inputs/macho-rpath-x86_64
new file mode 100755
index 000000000000..c518fcdb3b59
--- /dev/null
+++ b/test/Object/Inputs/macho-rpath-x86_64
diff --git a/test/Object/Inputs/macho-zero-ncmds b/test/Object/Inputs/macho-zero-ncmds
new file mode 100644
index 000000000000..0505419195e9
--- /dev/null
+++ b/test/Object/Inputs/macho-zero-ncmds
diff --git a/test/Object/Inputs/micro-mips.elf-mipsel b/test/Object/Inputs/micro-mips.elf-mipsel
new file mode 100755
index 000000000000..80b8472c25bc
--- /dev/null
+++ b/test/Object/Inputs/micro-mips.elf-mipsel
diff --git a/test/Object/Inputs/mri-crlf.mri b/test/Object/Inputs/mri-crlf.mri
new file mode 100644
index 000000000000..b8540304f2ee
--- /dev/null
+++ b/test/Object/Inputs/mri-crlf.mri
@@ -0,0 +1,2 @@
+; this file intentionally has crlf line endings
+end
diff --git a/test/Object/Inputs/thin.a b/test/Object/Inputs/thin.a
new file mode 100644
index 000000000000..bb9d532a7879
--- /dev/null
+++ b/test/Object/Inputs/thin.a
diff --git a/test/Object/Inputs/trivial-label-test.elf-x86-64 b/test/Object/Inputs/trivial-label-test.elf-x86-64
new file mode 100644
index 000000000000..76f4499a8344
--- /dev/null
+++ b/test/Object/Inputs/trivial-label-test.elf-x86-64
diff --git a/test/Object/Mips/objdump-micro-mips.test b/test/Object/Mips/objdump-micro-mips.test
new file mode 100644
index 000000000000..0f28dc1a5f1c
--- /dev/null
+++ b/test/Object/Mips/objdump-micro-mips.test
@@ -0,0 +1,12 @@
+RUN: llvm-objdump -d -mattr=micromips %p/../Inputs/micro-mips.elf-mipsel \
+RUN:   | FileCheck %s
+
+CHECK:      foo:
+CHECK-NEXT:      330:   bd 33 f8 ff   addiu   $sp, $sp, -8
+CHECK-NEXT:      334:   dd fb 04 00   sw      $fp, 4($sp)
+CHECK-NEXT:      338:   1d 00 50 f1   addu    $fp, $sp, $zero
+
+CHECK:      bar:
+CHECK-NEXT:      350:   a2 41 02 00   lui     $2, 2
+CHECK-NEXT:      354:   42 30 8f 80   addiu   $2, $2, -32625
+CHECK-NEXT:      358:   bd 33 e8 ff   addiu   $sp, $sp, -24
diff --git a/test/Object/X86/nm-ir.ll b/test/Object/X86/nm-ir.ll
index 6bb7e2323a22..881397c00a42 100644
--- a/test/Object/X86/nm-ir.ll
+++ b/test/Object/X86/nm-ir.ll
@@ -28,7 +28,7 @@ module asm ".long undef_asm_sym"
 @g4 = private global i32 42
 
 @a1 = alias i32* @g1
-@a2 = alias internal i32* @g1
+@a2 = internal alias i32* @g1
 
 define void @f1() {
   ret void
diff --git a/test/Object/X86/objdump-cfg-invalid-opcode.yaml b/test/Object/X86/objdump-cfg-invalid-opcode.yaml
deleted file mode 100644
index d0a29be8697d..000000000000
--- a/test/Object/X86/objdump-cfg-invalid-opcode.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
-# REQUIRES: shell
-#
-# Generated from:
-# main:
-# .LBL0_1:
-# 	movq	8(%rsi), %rax
-# 	<invalid opcode: 06>
-# 	nop
-
-!ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data: ELFDATA2LSB
-  Type: ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
-    Content: "488B46080690"
-
-## 0000000000000000 <main>:
-
-#CFG: Atoms:
-#CFG:   - StartAddress:    0x0000000000000000
-#CFG:     Size:            4
-#CFG:     Type:            Text
-
-##    0:   48 8b 46 08             mov    0x8(%rsi),%rax
-#CFG:       - Inst:            MOV64rm
-#CFG:         Size:            4
-#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
-
-
-#CFG:   - StartAddress:    0x0000000000000004
-#CFG:     Size:            1
-#CFG:     Type:            Data
-
-##    4:   06                      (bad)
-#CFG:     Content:         '06'
-
-#CFG:   - StartAddress:    0x0000000000000005
-#CFG:     Size:            1
-#CFG:     Type:            Text
-
-##    5:   90                      nop
-#CFG:       - Inst:            NOOP
-#CFG:         Size:            1
-#CFG:         Ops:             [  ]
-
-Symbols:
-  Global:
-    - Name: main
-      Type: STT_FUNC
-      Section: .text
-      Value: 0x0
-      Size: 6
diff --git a/test/Object/X86/objdump-cfg-textatomsize.yaml b/test/Object/X86/objdump-cfg-textatomsize.yaml
deleted file mode 100644
index 87cb4e13ec1e..000000000000
--- a/test/Object/X86/objdump-cfg-textatomsize.yaml
+++ /dev/null
@@ -1,39 +0,0 @@
-# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
-# REQUIRES: shell
-#
-# Generated from:
-# main:
-# .LBL0_1:
-# 	jmp	.LBL0_1
-#
-
-!ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data: ELFDATA2LSB
-  Type: ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
-    Content: "EBFE"
-
-## 0000000000000000 <main>:
-
-#CFG: Atoms:
-#CFG:   - StartAddress:    0x0000000000000000
-#CFG:     Size:            2
-
-##    0:   eb fe          jmp $-2
-#CFG:       - Inst:            JMP_1
-#CFG:         Size:            2
-#CFG:         Ops:             [ I-2 ]
-
-Symbols:
-  Global:
-    - Name: main
-      Type: STT_FUNC
-      Section: .text
-      Value: 0x0
-      Size: 2
diff --git a/test/Object/X86/objdump-cfg.yaml b/test/Object/X86/objdump-cfg.yaml
deleted file mode 100644
index c5bff03c1d0c..000000000000
--- a/test/Object/X86/objdump-cfg.yaml
+++ /dev/null
@@ -1,86 +0,0 @@
-# RUN: yaml2obj -format=elf %s | llvm-objdump -d -yaml-cfg=%t - && FileCheck --check-prefix=CFG < %t %s
-# REQUIRES: shell
-#
-# Generated from:
-# main:
-# 	movl	$48, %eax
-# 	cmpl	$3, %edi
-# 	jl	.LBB0_2
-# 	movq	8(%rsi), %rax
-# 	movsbl	(%rax), %eax
-# .LBB0_2:
-# 	ret
-#
-
-!ELF
-FileHeader:
-  Class: ELFCLASS64
-  Data: ELFDATA2LSB
-  Type: ET_REL
-  Machine: EM_X86_64
-Sections:
-  - Name: .text
-    Type: SHT_PROGBITS
-    Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
-    Content: "B83000000083FF037C07488B46080FBE00C3"
-
-## 0000000000000000 <main>:
-
-#CFG: Atoms:
-#CFG:   - StartAddress:    0x0000000000000000
-#CFG:     Size:            10
-
-##    0:   b8 30 00 00 00          mov    $0x30,%eax
-#CFG:       - Inst:            MOV32ri
-#CFG:         Size:            5
-#CFG:         Ops:             [ REAX, I48 ]
-
-##    5:   83 ff 03                cmp    $0x3,%edi
-#CFG:       - Inst:            CMP32ri8
-#CFG:         Size:            3
-#CFG:         Ops:             [ REDI, I3 ]
-
-##    8:   7c 07                   jl     11 <main+0x11>
-#CFG:       - Inst:            JL_1
-#CFG:         Size:            2
-#CFG:         Ops:             [ I7 ]
-
-#CFG:   - StartAddress:    0x000000000000000A
-#CFG:     Size:            7
-
-##    a:   48 8b 46 08             mov    0x8(%rsi),%rax
-#CFG:       - Inst:            MOV64rm
-#CFG:         Size:            4
-#CFG:         Ops:             [ RRAX, RRSI, I1, R, I8, R ]
-
-##    e:   0f be 00                movsbl (%rax),%eax
-#CFG:       - Inst:            MOVSX32rm8
-#CFG:         Size:            3
-#CFG:         Ops:             [ REAX, RRAX, I1, R, I0, R ]
-#CFG:   - StartAddress:    0x0000000000000011
-#CFG:     Size:            1
-
-##   11:   c3                      retq
-#CFG:       - Inst:            RET
-#CFG:         Size:            1
-#CFG:         Ops:             [  ]
-
-Symbols:
-  Global:
-    - Name: main
-      Type: STT_FUNC
-      Section: .text
-      Value: 0x0
-      Size: 18
-
-#CFG: Functions:
-#CFG:     BasicBlocks:
-#CFG:       - Address:         0x0000000000000000
-#CFG:         Preds:           [  ]
-#CFG:         Succs:           [ 0x0000000000000011, 0x000000000000000A ]
-#CFG:       - Address:         0x0000000000000011
-#CFG:         Preds:           [ 0x0000000000000000, 0x000000000000000A ]
-#CFG:         Succs:           [  ]
-#CFG:       - Address:         0x000000000000000A
-#CFG:         Preds:           [ 0x0000000000000000 ]
-#CFG:         Succs:           [ 0x0000000000000011 ]
diff --git a/test/Object/X86/objdump-disassembly-inline-relocations.test b/test/Object/X86/objdump-disassembly-inline-relocations.test
index 78615765046d..3871bcbf0b55 100644
--- a/test/Object/X86/objdump-disassembly-inline-relocations.test
+++ b/test/Object/X86/objdump-disassembly-inline-relocations.test
@@ -76,11 +76,11 @@ ELF-i386: main:
 ELF-i386:        0:     83 ec 0c                                        subl    $12, %esp
 ELF-i386:        3:     c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
 ELF-i386:        b:     c7 04 24 00 00 00 00                            movl    $0, (%esp)
-ELF-i386:                              e: R_386_32      Unknown
+ELF-i386:                              e: R_386_32      .rodata.str1.1
 ELF-i386:       12:     e8 fc ff ff ff                                  calll   -4
-ELF-i386:                             13: R_386_PC32    Unknown
+ELF-i386:                             13: R_386_PC32    puts
 ELF-i386:       17:     e8 fc ff ff ff                                  calll   -4
-ELF-i386:                             18: R_386_PC32    Unknown
+ELF-i386:                             18: R_386_PC32    SomeOtherFunction
 ELF-i386:       1c:     8b 44 24 08                                     movl    8(%esp), %eax
 ELF-i386:       20:     83 c4 0c                                        addl    $12, %esp
 ELF-i386:       23:     c3                                              ret
diff --git a/test/Object/X86/objdump-disassembly-symbolic.test b/test/Object/X86/objdump-disassembly-symbolic.test
deleted file mode 100644
index 95a5fc8e70ac..000000000000
--- a/test/Object/X86/objdump-disassembly-symbolic.test
+++ /dev/null
@@ -1,68 +0,0 @@
-RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.elf-x86-64 \
-RUN:              | FileCheck %s -check-prefix ELF-x86-64
-RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-object-test.macho-x86-64 \
-RUN:              | FileCheck %s -check-prefix MACHO-x86-64
-
-# Generate this using:
-#   ld trivial-object-test.macho-x86-64 -undefined dynamic_lookup
-RUN: llvm-objdump -d -symbolize %p/../Inputs/trivial-executable-test.macho-x86-64 \
-RUN:              | FileCheck %s -check-prefix MACHO-STUBS-x86-64
-
-ELF-x86-64: file format ELF64-x86-64
-ELF-x86-64: Disassembly of section .text:
-ELF-x86-64: main:
-ELF-x86-64:        0:	48 83 ec 08                                  	subq	$8, %rsp
-ELF-x86-64:        4:	c7 44 24 04 00 00 00 00                      	movl	$0, 4(%rsp)
-ELF-x86-64:        c:	bf 00 00 00 00                               	movl	$.rodata.str1.1, %edi
-ELF-x86-64:       11:	e8 00 00 00 00                               	callq	puts-4
-ELF-x86-64:       16:	30 c0                                        	xorb	%al, %al
-ELF-x86-64:       18:	e8 00 00 00 00                               	callq	SomeOtherFunction-4
-ELF-x86-64:       1d:	8b 44 24 04                                  	movl	4(%rsp), %eax
-ELF-x86-64:       21:	48 83 c4 08                                  	addq	$8, %rsp
-ELF-x86-64:       25:	c3                                           	ret
-
-MACHO-x86-64: file format Mach-O 64-bit x86-64
-MACHO-x86-64: Disassembly of section __TEXT,__text:
-MACHO-x86-64: _main:
-MACHO-x86-64:        0:	48 83 ec 08                                  	subq	$8, %rsp
-MACHO-x86-64:        4:	c7 44 24 04 00 00 00 00                      	movl	$0, 4(%rsp)
-MACHO-x86-64:        c:	48 8d 3d 00 00 00 00                         	leaq	L_.str(%rip), %rdi ## literal pool for: Hello World!
-MACHO-x86-64:       13:	e8 00 00 00 00                               	callq	_puts
-MACHO-x86-64:       18:	30 c0                                        	xorb	%al, %al
-MACHO-x86-64:       1a:	e8 00 00 00 00                               	callq	_SomeOtherFunction
-MACHO-x86-64:       1f:	8b 44 24 04                                  	movl	4(%rsp), %eax
-MACHO-x86-64:       23:	48 83 c4 08                                  	addq	$8, %rsp
-MACHO-x86-64:       27:	c3                                           	ret
-
-MACHO-STUBS-x86-64: file format Mach-O 64-bit x86-64
-MACHO-STUBS-x86-64: Disassembly of section __TEXT,__text:
-MACHO-STUBS-x86-64: _main:
-MACHO-STUBS-x86-64:     1f90:       48 83 ec 08                                     subq    $8, %rsp
-MACHO-STUBS-x86-64:     1f94:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
-MACHO-STUBS-x86-64:     1f9c:       48 8d 3d 45 00 00 00                            leaq    69(%rip), %rdi ## literal pool for: Hello World!
-MACHO-STUBS-x86-64:     1fa3:       e8 16 00 00 00                                  callq   puts
-MACHO-STUBS-x86-64:     1fa8:       30 c0                                           xorb    %al, %al
-MACHO-STUBS-x86-64:     1faa:       e8 09 00 00 00                                  callq   SomeOtherFunction
-MACHO-STUBS-x86-64:     1faf:       8b 44 24 04                                     movl    4(%rsp), %eax
-MACHO-STUBS-x86-64:     1fb3:       48 83 c4 08                                     addq    $8, %rsp
-MACHO-STUBS-x86-64:     1fb7:       c3                                              ret
-
-
-RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-relocatable.elf-i386 \
-RUN:              | FileCheck %s -check-prefix ELF-i386-REL
-
-ELF-i386-REL: Disassembly of section .text:
-ELF-i386-REL-NEXT: f:
-ELF-i386-REL-NEXT:       0:	e9 fc ff ff ff                	jmp	h
-ELF-i386-REL:      g:
-ELF-i386-REL-NEXT:       5:	e9 fc ff ff ff                 	jmp	f
-
-
-RUN: llvm-objdump -d -symbolize %p/../Inputs/relocation-dynamic.elf-i386 \
-RUN:              | FileCheck %s -check-prefix ELF-i386-DYN
-
-ELF-i386-DYN: Disassembly of section .text:
-ELF-i386-DYN-NEXT: f:
-ELF-i386-DYN-NEXT:      1a4:	e9 fc ff ff ff                 	jmp	h
-ELF-i386-DYN:      g:
-ELF-i386-DYN-NEXT:      1a9:	e9 fc ff ff ff                 	jmp	f
diff --git a/test/Object/X86/objdump-label.test b/test/Object/X86/objdump-label.test
new file mode 100644
index 000000000000..f8b933451e52
--- /dev/null
+++ b/test/Object/X86/objdump-label.test
@@ -0,0 +1,10 @@
+RUN: llvm-objdump -d %p/../Inputs/trivial-label-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64: foo:
+ELF-x86-64:        0:	90                                           	nop
+ELF-x86-64: bum:
+ELF-x86-64:        1:	90                                           	nop
+
diff --git a/test/Object/archive-error-tmp.txt b/test/Object/archive-error-tmp.txt
index 061898655b6c..ed3b14559ac7 100644
--- a/test/Object/archive-error-tmp.txt
+++ b/test/Object/archive-error-tmp.txt
@@ -1,5 +1,3 @@
-REQUIRES: shell
-
 Test that no temporary file is left behind on error.
 
 RUN: rm -rf %t
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 0899828bdfbe..01f17bcc8b61 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -61,6 +61,7 @@ RUN: llvm-ranlib %t.a
 RUN: llvm-nm -M %t.a | FileCheck %s
 
 RUN: llvm-nm -M %p/Inputs/macho-archive-x86_64.a | FileCheck %s --check-prefix=BSD-MachO
+RUN: llvm-nm -M %p/Inputs/macho-archive-unsorted-x86_64.a | FileCheck %s --check-prefix=BSD-MachO
 
 BSD-MachO: Archive map
 BSD-MachO: _bar in bar.o
diff --git a/test/Object/archive-toc.test b/test/Object/archive-toc.test
index 4195c4000649..79a6e0e0ba80 100644
--- a/test/Object/archive-toc.test
+++ b/test/Object/archive-toc.test
@@ -26,3 +26,11 @@ CHECK:      rw-r--r-- 1002/102      8 2004-11-19 03:24:02.000000000 evenlen
 CHECK-NEXT: rw-r--r-- 1002/102      7 2004-11-19 03:24:02.000000000 oddlen
 CHECK-NEXT: rwxr-xr-x 1002/102   1465 2004-11-19 03:24:02.000000000 very_long_bytecode_file_name.bc
 CHECK-NEXT: rw-r--r-- 1002/102   2280 2004-11-19 03:24:02.000000000 IsNAN.o
+
+Test reading a thin archive created by gnu ar
+RUN: env TZ=GMT llvm-ar tv %p/Inputs/thin.a | FileCheck %s --check-prefix=THIN -strict-whitespace
+
+THIN:      rw-r--r-- 1000/1000      8 2014-12-16 00:56:27.000000000 evenlen
+THIN-NEXT: rw-r--r-- 1000/1000      7 2014-12-16 00:56:27.000000000 oddlen
+THIN-NEXT: rwxr-xr-x 1000/1000   1465 2014-12-16 00:56:27.000000000 very_long_bytecode_file_name.bc
+THIN-NEXT: rw-r--r-- 1000/1000   2280 2014-12-16 00:56:27.000000000 IsNAN.o
diff --git a/test/Object/coff-archive-short.test b/test/Object/coff-archive-short.test
index 2aee95699b50..9f7165b80f56 100644
--- a/test/Object/coff-archive-short.test
+++ b/test/Object/coff-archive-short.test
@@ -5,7 +5,7 @@
 # than 15 characters, thus, unlike coff_archive.lib, it has no string
 # table as the third member.
 #
-RUN: llvm-nm --numeric-sort -M %p/Inputs/coff_archive_short.lib | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm -a --numeric-sort -M %p/Inputs/coff_archive_short.lib | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: _shortfn1 in short1.obj
diff --git a/test/Object/coff-archive.test b/test/Object/coff-archive.test
index 3b0aa0ca0634..239a96b4c351 100644
--- a/test/Object/coff-archive.test
+++ b/test/Object/coff-archive.test
@@ -1,7 +1,7 @@
 #
 # Check if the index is appearing properly in the output file 
 #
-RUN: llvm-nm --numeric-sort -M %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
+RUN: llvm-nm -a --numeric-sort -M %p/Inputs/coff_archive.lib | FileCheck -check-prefix=CHECKIDX %s
 
 CHECKIDX: Archive map
 CHECKIDX: ??0invalid_argument@std@@QAE@PBD@Z in Debug\mymath.obj
diff --git a/test/Object/mri-addlib.test b/test/Object/mri-addlib.test
new file mode 100644
index 000000000000..745bcf65e7f4
--- /dev/null
+++ b/test/Object/mri-addlib.test
@@ -0,0 +1,14 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo addlib %p/Inputs/GNU.a >> %t.mri
+; RUN: echo addlib %p/Inputs/archive-test.a-gnu-minimal >> %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: llvm-ar -M  < %t.mri
+; RUN: llvm-ar t %t.a | FileCheck %s
+
+; CHECK: evenlen
+; CHECK-NEXT: oddlen
+; CHECK-NEXT: very_long_bytecode_file_name.bc
+; CHECK-NEXT: IsNAN.o
+; CHECK-NEXT: test
diff --git a/test/Object/mri-addmod.test b/test/Object/mri-addmod.test
new file mode 100644
index 000000000000..f1048489e95e
--- /dev/null
+++ b/test/Object/mri-addmod.test
@@ -0,0 +1,33 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo "addmod  \"%p/Inputs/trivial-object-test.elf-x86-64\" " >> %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: llvm-ar -M  < %t.mri
+; RUN: llvm-nm -M %t.a | FileCheck %s
+
+; CHECK:      Archive map
+; CHECK-NEXT: main in trivial-object-test.elf-x86-64
+
+; CHECK:      trivial-object-test.elf-x86-64:
+; CHECK-NEXT:                  U SomeOtherFunction
+; CHECK-NEXT: 0000000000000000 T main
+; CHECK-NEXT:                  U puts
+
+; Now test that CREATE overwrites an existing file.
+; RUN: echo create %t.a > %t2.mri
+; RUN: echo addmod %p/Inputs/trivial-object-test2.elf-x86-64 >> %t2.mri
+; RUN: echo save >> %t2.mri
+; RUN: echo end >> %t2.mri
+
+; RUN: llvm-ar -M  < %t2.mri
+; RUN: llvm-nm -M %t.a | FileCheck --check-prefix=NEW %s
+
+; NEW: Archive map
+; NEW-NEXT: foo in trivial-object-test2.elf-x86-64
+; NEW-NEXT: main in trivial-object-test2.elf-x86-64
+
+; NEW: trivial-object-test2.elf-x86-64:
+; NEW-NEXT: 0000000000000000 t bar
+; NEW-NEXT: 0000000000000006 T foo
+; NEW-NEXT: 0000000000000016 T main
diff --git a/test/Object/mri-crlf.test b/test/Object/mri-crlf.test
new file mode 100644
index 000000000000..3411b55095e1
--- /dev/null
+++ b/test/Object/mri-crlf.test
@@ -0,0 +1 @@
+; RUN: llvm-ar -M  < %S/Inputs/mri-crlf.mri
diff --git a/test/Object/mri1.test b/test/Object/mri1.test
new file mode 100644
index 000000000000..3d27db7996a2
--- /dev/null
+++ b/test/Object/mri1.test
@@ -0,0 +1,6 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: llvm-ar -M  < %t.mri
+; RUN: llvm-ar t %t.a
diff --git a/test/Object/mri2.test b/test/Object/mri2.test
new file mode 100644
index 000000000000..0c2417934177
--- /dev/null
+++ b/test/Object/mri2.test
@@ -0,0 +1,7 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo create %t.a >> %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: not llvm-ar -M  < %t.mri 2>&1 | FileCheck %s
+; CHECK: Editing multiple archives not supported
diff --git a/test/Object/mri3.test b/test/Object/mri3.test
new file mode 100644
index 000000000000..bdc53991510e
--- /dev/null
+++ b/test/Object/mri3.test
@@ -0,0 +1,6 @@
+; RUN: echo save > %t.mri
+; RUN: echo create %t.a >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: not llvm-ar -M  < %t.mri 2>&1 | FileCheck %s
+; CHECK: File already saved.
diff --git a/test/Object/mri4.test b/test/Object/mri4.test
new file mode 100644
index 000000000000..a24c14d7d289
--- /dev/null
+++ b/test/Object/mri4.test
@@ -0,0 +1,4 @@
+; RUN: echo abc > %t.mri
+
+; RUN: not llvm-ar -M  < %t.mri 2>&1 | FileCheck %s
+; CHECK: Unknown command: abc.
diff --git a/test/Object/mri5.test b/test/Object/mri5.test
new file mode 100644
index 000000000000..98114248b69c
--- /dev/null
+++ b/test/Object/mri5.test
@@ -0,0 +1,2 @@
+; RUN: not llvm-ar -M t < %s 2>&1 | FileCheck %s
+; CHECK: Cannot mix -M and other options.
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index 7dbc22a1e8cf..a9ae9cbbfbd6 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -1,4 +1,4 @@
-RUN: llvm-nm %p/Inputs/archive-test.a-coff-i386 \
+RUN: llvm-nm -a %p/Inputs/archive-test.a-coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
 
 COFF: trivial-object-test.coff-i386:
@@ -9,6 +9,15 @@ COFF-NEXT:          U _SomeOtherFunction
 COFF-NEXT: 00000000 T _main
 COFF-NEXT:          U _puts
 
+RUN: llvm-nm -a -o %p/Inputs/archive-test.a-coff-i386 \
+RUN:         | FileCheck %s -check-prefix COFF-o
+
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386: 00000000 d .data
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386: 00000000 t .text
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386: 00000000 d L_.str
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386:          U _SomeOtherFunction
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386: 00000000 T _main
+COFF-o: {{.*}}/archive-test.a-coff-i386:trivial-object-test.coff-i386:          U _puts
 
 RUN: llvm-as %p/Inputs/trivial.ll -o=%t1
 RUN: rm -f %t2
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 49c7683c80d4..0135f2df3cb0 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,9 +1,13 @@
-RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm - \
+RUN: yaml2obj %p/Inputs/COFF/i386.yaml | llvm-nm -a -S - \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm - \
+RUN: yaml2obj %p/Inputs/COFF/x86-64.yaml | llvm-nm -a -S - \
 RUN:         | FileCheck %s -check-prefix COFF
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm -o %p/Inputs/trivial-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF-o
+RUN: llvm-nm -u %p/Inputs/trivial-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF-u
 RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF64
 RUN: llvm-nm %p/Inputs/weak.elf-x86-64 \
@@ -26,9 +30,13 @@ RUN: llvm-nm %p/Inputs/macho-text-data-bss.macho-x86_64 -s __DATA __data \
 RUN:         | FileCheck %s -check-prefix macho-s
 RUN: llvm-nm -x %p/Inputs/macho-text-data-bss.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix macho-x
+RUN: llvm-nm -o %p/Inputs/macho-text-data-bss.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-o
 RUN: llvm-nm -p -a %p/Inputs/macho-hello-g.macho-x86_64 \
 RUN:         | FileCheck %s -check-prefix macho-pa
-RUN: llvm-nm %p/Inputs/common.coff-i386 \
+RUN: llvm-nm -u %p/Inputs/macho-hello-g.macho-x86_64 \
+RUN:         | FileCheck %s -check-prefix macho-u
+RUN: llvm-nm -S -a %p/Inputs/common.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF-COMMON
 RUN: llvm-nm %p/Inputs/relocatable-with-section-address.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF-SEC-ADDR64
@@ -41,20 +49,20 @@ RUN: llvm-nm | FileCheck %s -check-prefix A-OUT
 REQUIRES: shell
 
 
-COFF: 00000000 d .data
-COFF: 00000000 t .text
-COFF: 00000000 d L{{_?}}.str
+COFF: 00000000 {{.*}} d .data
+COFF: 00000000 {{.*}} t .text
+COFF: 00000000 0000000d d L{{_?}}.str
 COFF:          U {{_?}}SomeOtherFunction
-COFF: 00000000 T {{_?}}main
+COFF: 00000000 {{.*}} T {{_?}}main
 COFF:          U {{_?}}puts
 
-COFF-COMMON: 00000000 b .bss
-COFF-COMMON-NEXT: 00000000 d .data
-COFF-COMMON-NEXT: 00000000 d .drectve
-COFF-COMMON-NEXT: 00000000 n .file
-COFF-COMMON-NEXT: 00000000 r .rdata$zzz
-COFF-COMMON-NEXT: 00000000 t .text
-COFF-COMMON-NEXT:          C _a
+COFF-COMMON: 00000000 00000000 b .bss
+COFF-COMMON-NEXT: 00000000 00000000 d .data
+COFF-COMMON-NEXT: 00000000 00000014 d .drectve
+COFF-COMMON-NEXT: 00000000 00000000 n .file
+COFF-COMMON-NEXT: 00000000 00000014 r .rdata$zzz
+COFF-COMMON-NEXT: 00000000 00000000 t .text
+COFF-COMMON-NEXT:          00000004 C _a
 
 
 ELF-NOT:      U
@@ -62,6 +70,13 @@ ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
 
+ELF-o: {{.*}}/trivial-object-test.elf-i386:          U SomeOtherFunction
+ELF-o: {{.*}}/trivial-object-test.elf-i386: 00000000 T main
+ELF-o: {{.*}}/trivial-object-test.elf-i386:          U puts
+
+ELF-u:          U SomeOtherFunction
+ELF-u:          U puts
+
 ELF64:                  U SomeOtherFunction
 ELF64: 0000000000000000 T main
 ELF64:                  U puts
@@ -117,6 +132,13 @@ macho-x: 000000000000000c 0f 02 0000 00000004 _d
 macho-x: 0000000000000000 0f 01 0000 00000001 _t
 macho-x: 0000000000000048 0f 05 0000 00000007 _t.eh
 
+
+macho-o: {{.*}}/macho-text-data-bss.macho-x86_64: 0000000000000030 s EH_frame0
+macho-o: {{.*}}/macho-text-data-bss.macho-x86_64: 0000000000000070 b _b
+macho-o: {{.*}}/macho-text-data-bss.macho-x86_64: 000000000000000c D _d
+macho-o: {{.*}}/macho-text-data-bss.macho-x86_64: 0000000000000000 T _t
+macho-o: {{.*}}/macho-text-data-bss.macho-x86_64: 0000000000000048 S _t.eh
+
 macho-pa: 0000000000000000 - 00 0000    SO /Volumes/SandBox/
 macho-pa: 0000000000000000 - 00 0000    SO hello.c
 macho-pa: 0000000053c8408d - 03 0001   OSO /Volumes/SandBox/hello.o
@@ -130,6 +152,9 @@ macho-pa: 0000000100000f30 T _main
 macho-pa:                  U _printf
 macho-pa:                  U dyld_stub_binder
 
+macho-u: _printf
+macho-u: dyld_stub_binder
+
 Test that nm uses addresses even with ELF .o files.
 ELF-SEC-ADDR64:      0000000000000058 D a
 ELF-SEC-ADDR64-NEXT: 000000000000005c D b
diff --git a/test/Object/nm-universal-binary.test b/test/Object/nm-universal-binary.test
index 889377b5b35b..0cced1829c3f 100644
--- a/test/Object/nm-universal-binary.test
+++ b/test/Object/nm-universal-binary.test
@@ -2,10 +2,16 @@ RUN: llvm-nm -arch all %p/Inputs/macho-universal.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-OBJ
 RUN: llvm-nm -arch x86_64 %p/Inputs/macho-universal.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-OBJ-x86_64
+RUN: not llvm-nm -arch armv7m %p/Inputs/macho-universal.x86_64.i386 2>&1 \
+RUN:         | FileCheck %s -check-prefix CHECK-OBJ-armv7m
+RUN: not llvm-nm -arch foobar %p/Inputs/macho-universal.x86_64.i386 2>&1 \
+RUN:         | FileCheck %s -check-prefix CHECK-OBJ-foobar
 RUN: llvm-nm -arch all %p/Inputs/macho-universal-archive.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-AR
 RUN: llvm-nm -arch i386 %p/Inputs/macho-universal-archive.x86_64.i386 \
 RUN:         | FileCheck %s -check-prefix CHECK-AR-i386
+RUN: llvm-nm -o -arch all %p/Inputs/macho-universal-archive.x86_64.i386 \
+RUN:         | FileCheck %s -check-prefix CHECK-AR-o
 
 CHECK-OBJ: macho-universal.x86_64.i386 (for architecture x86_64):
 CHECK-OBJ: 0000000100000f60 T _main
@@ -16,6 +22,12 @@ CHECK-OBJ-x86_64: 0000000100000000 T __mh_execute_header
 CHECK-OBJ-x86_64: 0000000100000f60 T _main
 CHECK-OBJ-x86_64:                  U dyld_stub_binder
 
+CHECK-OBJ-armv7m-NOT: Unknown architecture named
+CHECK-OBJ-armv7m: does not contain architecture
+
+CHECK-OBJ-foobar: Unknown architecture named
+CHECK-OBJ-foobar: does not contain architecture
+
 CHECK-AR: macho-universal-archive.x86_64.i386(hello.o) (for architecture x86_64):
 CHECK-AR: 0000000000000068 s EH_frame0
 CHECK-AR: 000000000000003b s L_.str
@@ -29,3 +41,11 @@ CHECK-AR: 00000000 T _foo
 CHECK-AR-i386: macho-universal-archive.x86_64.i386(foo.o):
 CHECK-AR-i386: 00000008 D _bar
 CHECK-AR-i386: 00000000 T _foo
+
+CHECK-AR-o: (for architecture x86_64):{{.*}}/macho-universal-archive.x86_64.i386:hello.o: 0000000000000068 s EH_frame0
+CHECK-AR-o: (for architecture x86_64):{{.*}}/macho-universal-archive.x86_64.i386:hello.o: 000000000000003b s L_.str
+CHECK-AR-o: (for architecture x86_64):{{.*}}/macho-universal-archive.x86_64.i386:hello.o: 0000000000000000 T _main
+CHECK-AR-o: (for architecture x86_64):{{.*}}/macho-universal-archive.x86_64.i386:hello.o: 0000000000000080 S _main.eh
+CHECK-AR-o: (for architecture x86_64):{{.*}}/macho-universal-archive.x86_64.i386:hello.o:                  U _printf
+CHECK-AR-o: (for architecture i386):{{.*}}/macho-universal-archive.x86_64.i386:foo.o: 00000008 D _bar
+CHECK-AR-o: (for architecture i386):{{.*}}/macho-universal-archive.x86_64.i386:foo.o: 00000000 T _foo
diff --git a/test/Object/obj2yaml-coff-long-section-name.test b/test/Object/obj2yaml-coff-long-section-name.test
new file mode 100644
index 000000000000..5457aef28c5d
--- /dev/null
+++ b/test/Object/obj2yaml-coff-long-section-name.test
@@ -0,0 +1,3 @@
+RUN: yaml2obj %p/Inputs/COFF/long-section-name.yaml | obj2yaml | FileCheck %s --check-prefix COFF-I386
+
+COFF-I386: Name:            .long_section_name
diff --git a/test/Object/obj2yaml-coff-section-aux-symbol.test b/test/Object/obj2yaml-coff-section-aux-symbol.test
new file mode 100644
index 000000000000..55ce5f06a31e
--- /dev/null
+++ b/test/Object/obj2yaml-coff-section-aux-symbol.test
@@ -0,0 +1,96 @@
+RUN: yaml2obj %p/Inputs/COFF/section-aux-symbol.yaml | obj2yaml | FileCheck %s --check-prefix COFF-I386
+
+COFF-I386:     sections:
+COFF-I386-NEXT:  - Name:            .CRT
+COFF-I386:     symbols:
+COFF-I386:       - Name:            '.CRT$XCAA'
+COFF-I386-NEXT:    Value:           4
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 1
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XIAA'
+COFF-I386-NEXT:    Value:           16
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 1
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XLD'
+COFF-I386-NEXT:    Value:           36
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 1
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XLC'
+COFF-I386-NEXT:    Value:           32
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 1
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XDZ'
+COFF-I386-NEXT:    Value:           48
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XDA'
+COFF-I386-NEXT:    Value:           44
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XLZ'
+COFF-I386-NEXT:    Value:           40
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XLA'
+COFF-I386-NEXT:    Value:           28
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XIC'
+COFF-I386-NEXT:    Value:           20
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 1
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XCZ'
+COFF-I386-NEXT:    Value:           8
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XCA'
+COFF-I386-NEXT:    Value:           0
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XIZ'
+COFF-I386-NEXT:    Value:           24
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
+COFF-I386:       - Name:            '.CRT$XIA'
+COFF-I386-NEXT:    Value:           12
+COFF-I386:         StorageClass:    IMAGE_SYM_CLASS_STATIC
+COFF-I386-NEXT:    SectionDefinition:
+COFF-I386-NEXT:      Length:          4
+COFF-I386-NEXT:      NumberOfRelocations: 0
+COFF-I386-NEXT:      NumberOfLinenumbers: 0
diff --git a/test/Object/objdump-export-list.test b/test/Object/objdump-export-list.test
new file mode 100644
index 000000000000..74344c1c2e2e
--- /dev/null
+++ b/test/Object/objdump-export-list.test
@@ -0,0 +1,4 @@
+RUN: llvm-objdump -exports-trie %p/Inputs/macho-no-exports.dylib | FileCheck %s
+
+; Test that we don't crash with an empty export list.
+CHECK: macho-no-exports.dylib: file format Mach-O 64-bit x86-64
diff --git a/test/Object/objdump-macho-quirks.test b/test/Object/objdump-macho-quirks.test
new file mode 100644
index 000000000000..eeee1537def7
--- /dev/null
+++ b/test/Object/objdump-macho-quirks.test
@@ -0,0 +1,9 @@
+RUN: llvm-objdump -private-headers %p/Inputs/macho-zero-ncmds \
+RUN:         | FileCheck %s -check-prefix A
+
+// Check that we don't get an infinite loop if ncmds = 0
+A: file format Mach-O 64-bit unknown
+A: Mach header
+A:      magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+A: MH_MAGIC_64  0x00      OBJECT     0          0 0x00000000
+
diff --git a/test/Object/objdump-private-headers.test b/test/Object/objdump-private-headers.test
index c562044b3c4e..d311bec58489 100644
--- a/test/Object/objdump-private-headers.test
+++ b/test/Object/objdump-private-headers.test
@@ -2,6 +2,8 @@ RUN: llvm-objdump -p %p/Inputs/program-headers.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
 RUN: llvm-objdump -p %p/Inputs/program-headers.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
+RUN: llvm-objdump -p %p/Inputs/macho-rpath-x86_64 \
+RUN:              | FileCheck %s -check-prefix MACHO-x86_64
 
 ELF-i386: Program Header:
 ELF-i386:     LOAD off    0x00000000 vaddr 0x08048000 paddr 0x08048000 align 2**12
@@ -16,3 +18,8 @@ ELF-x86-64: EH_FRAME off    0x00000000000000f4 vaddr 0x00000000004000f4 paddr 0x
 ELF-x86-64:          filesz 0x0000000000000014 memsz 0x0000000000000014 flags r--
 ELF-x86-64:    STACK off    0x0000000000000000 vaddr 0x0000000000000000 paddr 0x0000000000000000 align 2**3
 ELF-x86-64:          filesz 0x0000000000000000 memsz 0x0000000000000000 flags rw-
+
+MACHO-x86_64: Load command 12
+MACHO-x86_64:          cmd LC_RPATH
+MACHO-x86_64:      cmdsize 32
+MACHO-x86_64:         path @executable_path/. (offset 12)
diff --git a/test/Object/objdump-reloc-shared.test b/test/Object/objdump-reloc-shared.test
new file mode 100644
index 000000000000..d899ffb60874
--- /dev/null
+++ b/test/Object/objdump-reloc-shared.test
@@ -0,0 +1,5 @@
+RUN: llvm-objdump -r %p/Inputs/elf-reloc-no-sym.x86_64 \
+RUN:              | FileCheck %s
+
+; CHECK: elf-reloc-no-sym.x86_64:       file format ELF64-x86-64
+; CHECK-NOT: {{.}}
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index 28cac104c7b7..1e41f78ca729 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -27,9 +27,9 @@ COFF-x86-64: IMAGE_REL_AMD64_REL32 puts
 COFF-x86-64: IMAGE_REL_AMD64_REL32 SomeOtherFunction
 
 ELF-i386: .text
-ELF-i386: R_386_32
-ELF-i386: R_386_PC32
-ELF-i386: R_386_PC32
+ELF-i386: R_386_32 .rodata.str1.1
+ELF-i386: R_386_PC32 puts
+ELF-i386: R_386_PC32 SomeOtherFunction
 
 ELF-x86-64: .text
 ELF-x86-64: R_X86_64_32S .rodata.str1.1
diff --git a/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml b/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
index 7d098077dd5e..dddc7d97f628 100644
--- a/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
+++ b/test/Object/yaml2obj-elf-file-headers-with-e_flags.yaml
@@ -5,13 +5,15 @@ FileHeader:
   Data: ELFDATA2LSB
   Type: ET_REL
   Machine: EM_MIPS
-  Flags: [ EF_MIPS_NOREORDER, EF_MIPS_ABI_O32, EF_MIPS_ARCH_32R2 ]
+  Flags: [ EF_MIPS_NOREORDER, EF_MIPS_ABI_O32, EF_MIPS_ARCH_32R2,
+           EF_MIPS_NAN2008 ]
 
 # CHECK: Format: ELF32-mips
 # CHECK: Arch: mipsel
 # CHECK: Machine: EM_MIPS
-# CHECK: Flags [ (0x70001001)
+# CHECK: Flags [ (0x70001401)
 # CHECK-NEXT: EF_MIPS_ABI_O32 (0x1000)
 # CHECK-NEXT: EF_MIPS_ARCH_32R2 (0x70000000)
+# CHECK-NEXT: EF_MIPS_NAN2008 (0x400)
 # CHECK-NEXT: EF_MIPS_NOREORDER (0x1)
 # CHECK-NEXT: ]
diff --git a/test/Object/yaml2obj-elf-symbol-visibility.yaml b/test/Object/yaml2obj-elf-symbol-visibility.yaml
index 113354a05e3a..6c4037c831d1 100644
--- a/test/Object/yaml2obj-elf-symbol-visibility.yaml
+++ b/test/Object/yaml2obj-elf-symbol-visibility.yaml
@@ -44,7 +44,7 @@
 # OBJ-NEXT:   Size: 4
 # OBJ-NEXT:   Binding: Global (0x1)
 # OBJ-NEXT:   Type: Object (0x1)
-# OBJ-NEXT:   Other: 3
+# OBJ-NEXT:   Other: 163
 # OBJ-NEXT:   Section: .data (0x1)
 # OBJ-NEXT: }
 
@@ -77,6 +77,7 @@
 # YAML-NEXT:       Value:           0x0000000000000010
 # YAML-NEXT:       Size:            0x0000000000000004
 # YAML-NEXT:       Visibility:      STV_PROTECTED
+# YAML-NEXT:       Other:           [ STO_MIPS_PIC, STO_MIPS_MICROMIPS ]
 
 ---
 FileHeader:
@@ -121,6 +122,7 @@ Symbols:
     - Name:            protected
       Type:            STT_OBJECT
       Visibility:      STV_PROTECTED
+      Other:           [ STO_MIPS_MICROMIPS, STO_MIPS_PIC ]
       Section:         .data
       Value:           0x10
       Size:            0x04
diff --git a/test/Other/Inputs/block-info-only.bc b/test/Other/Inputs/block-info-only.bc
new file mode 100755
index 000000000000..e30ca5f472f6
--- /dev/null
+++ b/test/Other/Inputs/block-info-only.bc
diff --git a/test/Other/Inputs/has-block-info.bc b/test/Other/Inputs/has-block-info.bc
new file mode 100644
index 000000000000..1815db6a0873
--- /dev/null
+++ b/test/Other/Inputs/has-block-info.bc
diff --git a/test/Other/Inputs/no-block-info.bc b/test/Other/Inputs/no-block-info.bc
new file mode 100755
index 000000000000..e79c276dd475
--- /dev/null
+++ b/test/Other/Inputs/no-block-info.bc
diff --git a/test/Other/bcanalyzer-block-info.txt b/test/Other/bcanalyzer-block-info.txt
new file mode 100644
index 000000000000..e66031211b72
--- /dev/null
+++ b/test/Other/bcanalyzer-block-info.txt
@@ -0,0 +1,32 @@
+RUN: llvm-bcanalyzer -dump %S/Inputs/has-block-info.bc | FileCheck -check-prefix=CHECK -check-prefix=DATA %s
+RUN: llvm-bcanalyzer -dump %S/Inputs/no-block-info.bc | FileCheck -check-prefix=UNKNOWN -check-prefix=DATA %s
+RUN: llvm-bcanalyzer -dump %S/Inputs/no-block-info.bc -block-info %S/Inputs/block-info-only.bc | FileCheck -check-prefix=CHECK -check-prefix=DATA %s
+
+  CHECK: <ABC
+UNKNOWN: <UnknownBlock8
+   DATA:   NumWords=4 BlockCodeSize=2>
+  CHECK:   <AAA 
+UNKNOWN:   <UnknownCode0
+   DATA:     op0=42 op1=43 op2=44/>
+  CHECK:   <BBB
+UNKNOWN:   <UnknownCode1
+   DATA:     op0=42/>
+  CHECK:   <AAA
+UNKNOWN:   <UnknownCode0
+   DATA:     op0=42/>
+  CHECK: </ABC>
+UNKNOWN: </UnknownBlock8>
+  CHECK: <XYZ
+UNKNOWN: <UnknownBlock9
+   DATA:   NumWords=3 BlockCodeSize=3>
+  CHECK:   <XXX
+UNKNOWN:   <UnknownCode0
+   DATA:     abbrevid=4 op0=50 op1=4/>
+  CHECK:   <YYY
+UNKNOWN:   <UnknownCode1
+   DATA:     op0=42/>
+  CHECK:   <XXX
+UNKNOWN:   <UnknownCode0
+   DATA:     abbrevid=4 op0=50 op1=5/>
+  CHECK: </XYZ>
+UNKNOWN: </UnknownBlock9>
diff --git a/test/Other/link-opts.ll b/test/Other/link-opts.ll
deleted file mode 100644
index 8e58ac8a5683..000000000000
--- a/test/Other/link-opts.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-;RUN: opt -S -std-link-opts < %s | FileCheck %s
-; Simple test to check that -std-link-opts keeps only the main function.
-
-; CHECK-NOT: define
-; CHECK: define void @main
-; CHECK-NOT: define
-define void @main() {
-  ret void
-}
-
-define void @foo() {
-  ret void
-}
diff --git a/test/Other/lit-unicode.txt b/test/Other/lit-unicode.txt
new file mode 100644
index 000000000000..ca92c991d175
--- /dev/null
+++ b/test/Other/lit-unicode.txt
@@ -0,0 +1,3 @@
+REQUIRES: shell
+RUN: echo "ようこそ" | FileCheck %s
+CHECK: {{^}}ようこそ{{$}}
diff --git a/test/Other/new-pass-manager.ll b/test/Other/new-pass-manager.ll
index cec01b54ff0c..6454ffc1fb09 100644
--- a/test/Other/new-pass-manager.ll
+++ b/test/Other/new-pass-manager.ll
@@ -5,27 +5,85 @@
 ; files, but for now this is just going to step the new process through its
 ; paces.
 
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes=no-op-module %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-MODULE-PASS
+; CHECK-MODULE-PASS: Starting pass manager
+; CHECK-MODULE-PASS-NEXT: Running pass: NoOpModulePass
+; CHECK-MODULE-PASS-NEXT: Finished pass manager
+
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes=no-op-cgscc %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-CGSCC-PASS
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes='cgscc(no-op-cgscc)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-CGSCC-PASS
+; CHECK-CGSCC-PASS: Starting pass manager
+; CHECK-CGSCC-PASS-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor
+; CHECK-CGSCC-PASS-NEXT: Running analysis: CGSCCAnalysisManagerModuleProxy
+; CHECK-CGSCC-PASS-NEXT: Running analysis: Lazy CallGraph Analysis
+; CHECK-CGSCC-PASS-NEXT: Starting pass manager
+; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass
+; CHECK-CGSCC-PASS-NEXT: Finished pass manager
+; CHECK-CGSCC-PASS-NEXT: Finished pass manager
+
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes=no-op-function %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-FUNCTION-PASS
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes='function(no-op-function)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-FUNCTION-PASS
+; CHECK-FUNCTION-PASS: Starting pass manager
+; CHECK-FUNCTION-PASS-NEXT: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-FUNCTION-PASS-NEXT: Running analysis: FunctionAnalysisManagerModuleProxy
+; CHECK-FUNCTION-PASS-NEXT: Starting pass manager
+; CHECK-FUNCTION-PASS-NEXT: Running pass: NoOpFunctionPass
+; CHECK-FUNCTION-PASS-NEXT: Finished pass manager
+; CHECK-FUNCTION-PASS-NEXT: Finished pass manager
+
 ; RUN: opt -disable-output -debug-pass-manager -passes=print %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-MODULE-PRINT
-; CHECK-MODULE-PRINT: Starting module pass manager
-; CHECK-MODULE-PRINT: Running module pass: VerifierPass
-; CHECK-MODULE-PRINT: Running module pass: PrintModulePass
+; CHECK-MODULE-PRINT: Starting pass manager
+; CHECK-MODULE-PRINT: Running pass: VerifierPass
+; CHECK-MODULE-PRINT: Running pass: PrintModulePass
 ; CHECK-MODULE-PRINT: ModuleID
 ; CHECK-MODULE-PRINT: define void @foo()
-; CHECK-MODULE-PRINT: Running module pass: VerifierPass
-; CHECK-MODULE-PRINT: Finished module pass manager
+; CHECK-MODULE-PRINT: Running pass: VerifierPass
+; CHECK-MODULE-PRINT: Finished pass manager
+
+; RUN: opt -disable-output -debug-pass-manager -disable-verify -passes='print,verify' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-MODULE-VERIFY
+; CHECK-MODULE-VERIFY: Starting pass manager
+; CHECK-MODULE-VERIFY: Running pass: PrintModulePass
+; CHECK-MODULE-VERIFY: ModuleID
+; CHECK-MODULE-VERIFY: define void @foo()
+; CHECK-MODULE-VERIFY: Running pass: VerifierPass
+; CHECK-MODULE-VERIFY: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager -passes='function(print)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-FUNCTION-PRINT
-; CHECK-FUNCTION-PRINT: Starting module pass manager
-; CHECK-FUNCTION-PRINT: Running module pass: VerifierPass
-; CHECK-FUNCTION-PRINT: Starting function pass manager
-; CHECK-FUNCTION-PRINT: Running function pass: PrintFunctionPass
+; CHECK-FUNCTION-PRINT: Starting pass manager
+; CHECK-FUNCTION-PRINT: Running pass: VerifierPass
+; CHECK-FUNCTION-PRINT: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-FUNCTION-PRINT: Running analysis: FunctionAnalysisManagerModuleProxy
+; CHECK-FUNCTION-PRINT: Starting pass manager
+; CHECK-FUNCTION-PRINT: Running pass: PrintFunctionPass
 ; CHECK-FUNCTION-PRINT-NOT: ModuleID
 ; CHECK-FUNCTION-PRINT: define void @foo()
-; CHECK-FUNCTION-PRINT: Finished function pass manager
-; CHECK-FUNCTION-PRINT: Running module pass: VerifierPass
-; CHECK-FUNCTION-PRINT: Finished module pass manager
+; CHECK-FUNCTION-PRINT: Finished pass manager
+; CHECK-FUNCTION-PRINT: Running pass: VerifierPass
+; CHECK-FUNCTION-PRINT: Finished pass manager
+
+; RUN: opt -disable-output -debug-pass-manager -disable-verify -passes='function(print,verify)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-FUNCTION-VERIFY
+; CHECK-FUNCTION-VERIFY: Starting pass manager
+; CHECK-FUNCTION-VERIFY: Starting pass manager
+; CHECK-FUNCTION-VERIFY: Running pass: PrintFunctionPass
+; CHECK-FUNCTION-VERIFY-NOT: ModuleID
+; CHECK-FUNCTION-VERIFY: define void @foo()
+; CHECK-FUNCTION-VERIFY: Running pass: VerifierPass
+; CHECK-FUNCTION-VERIFY: Finished pass manager
+; CHECK-FUNCTION-VERIFY: Finished pass manager
 
 ; RUN: opt -S -o - -passes='no-op-module,no-op-module' %s \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-NOOP
@@ -40,29 +98,173 @@
 
 ; RUN: opt -disable-output -debug-pass-manager -verify-each -passes='no-op-module,function(no-op-function)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-VERIFY-EACH
-; CHECK-VERIFY-EACH: Starting module pass manager
-; CHECK-VERIFY-EACH: Running module pass: VerifierPass
-; CHECK-VERIFY-EACH: Running module pass: NoOpModulePass
-; CHECK-VERIFY-EACH: Running module pass: VerifierPass
-; CHECK-VERIFY-EACH: Starting function pass manager
-; CHECK-VERIFY-EACH: Running function pass: NoOpFunctionPass
-; CHECK-VERIFY-EACH: Running function pass: VerifierPass
-; CHECK-VERIFY-EACH: Finished function pass manager
-; CHECK-VERIFY-EACH: Running module pass: VerifierPass
-; CHECK-VERIFY-EACH: Finished module pass manager
+; CHECK-VERIFY-EACH: Starting pass manager
+; CHECK-VERIFY-EACH: Running pass: VerifierPass
+; CHECK-VERIFY-EACH: Running pass: NoOpModulePass
+; CHECK-VERIFY-EACH: Running pass: VerifierPass
+; CHECK-VERIFY-EACH: Starting pass manager
+; CHECK-VERIFY-EACH: Running pass: NoOpFunctionPass
+; CHECK-VERIFY-EACH: Running pass: VerifierPass
+; CHECK-VERIFY-EACH: Finished pass manager
+; CHECK-VERIFY-EACH: Running pass: VerifierPass
+; CHECK-VERIFY-EACH: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager -disable-verify -passes='no-op-module,function(no-op-function)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-NO-VERIFY
-; CHECK-NO-VERIFY: Starting module pass manager
+; CHECK-NO-VERIFY: Starting pass manager
 ; CHECK-NO-VERIFY-NOT: VerifierPass
-; CHECK-NO-VERIFY: Running module pass: NoOpModulePass
+; CHECK-NO-VERIFY: Running pass: NoOpModulePass
 ; CHECK-NO-VERIFY-NOT: VerifierPass
-; CHECK-NO-VERIFY: Starting function pass manager
-; CHECK-NO-VERIFY: Running function pass: NoOpFunctionPass
+; CHECK-NO-VERIFY: Starting pass manager
+; CHECK-NO-VERIFY: Running pass: NoOpFunctionPass
 ; CHECK-NO-VERIFY-NOT: VerifierPass
-; CHECK-NO-VERIFY: Finished function pass manager
+; CHECK-NO-VERIFY: Finished pass manager
 ; CHECK-NO-VERIFY-NOT: VerifierPass
-; CHECK-NO-VERIFY: Finished module pass manager
+; CHECK-NO-VERIFY: Finished pass manager
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,cgscc(require<no-op-cgscc>,function(require<no-op-function>))' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-ANALYSES
+; CHECK-ANALYSES: Starting pass manager
+; CHECK-ANALYSES: Running pass: RequireAnalysisPass
+; CHECK-ANALYSES: Running analysis: NoOpModuleAnalysis
+; CHECK-ANALYSES: Starting pass manager
+; CHECK-ANALYSES: Running pass: RequireAnalysisPass
+; CHECK-ANALYSES: Running analysis: NoOpCGSCCAnalysis
+; CHECK-ANALYSES: Starting pass manager
+; CHECK-ANALYSES: Running pass: RequireAnalysisPass
+; CHECK-ANALYSES: Running analysis: NoOpFunctionAnalysis
+
+; Make sure no-op passes that preserve all analyses don't even try to do any
+; analysis invalidation.
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,cgscc(require<no-op-cgscc>,function(require<no-op-function>))' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-NO-OP-INVALIDATION
+; CHECK-NO-OP-INVALIDATION: Starting pass manager
+; CHECK-NO-OP-INVALIDATION-NOT: Invalidating all non-preserved analyses
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,require<no-op-module>,require<no-op-module>' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-CACHE-MODULE-ANALYSIS-RESULTS
+; CHECK-DO-CACHE-MODULE-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-CACHE-MODULE-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-CACHE-MODULE-ANALYSIS-RESULTS: Running analysis: NoOpModuleAnalysis
+; CHECK-DO-CACHE-MODULE-ANALYSIS-RESULTS-NOT: Running analysis: NoOpModuleAnalysis
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,invalidate<no-op-module>,require<no-op-module>' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS
+; CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS: Running analysis: NoOpModuleAnalysis
+; CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS: Invalidating analysis: NoOpModuleAnalysis
+; CHECK-DO-INVALIDATE-MODULE-ANALYSIS-RESULTS: Running analysis: NoOpModuleAnalysis
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='cgscc(require<no-op-cgscc>,require<no-op-cgscc>,require<no-op-cgscc>)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-CACHE-CGSCC-ANALYSIS-RESULTS
+; CHECK-DO-CACHE-CGSCC-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-CACHE-CGSCC-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-CACHE-CGSCC-ANALYSIS-RESULTS: Running analysis: NoOpCGSCCAnalysis
+; CHECK-DO-CACHE-CGSCC-ANALYSIS-RESULTS-NOT: Running analysis: NoOpCGSCCAnalysis
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='cgscc(require<no-op-cgscc>,invalidate<no-op-cgscc>,require<no-op-cgscc>)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS
+; CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS: Running analysis: NoOpCGSCCAnalysis
+; CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS: Invalidating analysis: NoOpCGSCCAnalysis
+; CHECK-DO-INVALIDATE-CGSCC-ANALYSIS-RESULTS: Running analysis: NoOpCGSCCAnalysis
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='function(require<no-op-function>,require<no-op-function>,require<no-op-function>)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-CACHE-FUNCTION-ANALYSIS-RESULTS
+; CHECK-DO-CACHE-FUNCTION-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-CACHE-FUNCTION-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-CACHE-FUNCTION-ANALYSIS-RESULTS: Running analysis: NoOpFunctionAnalysis
+; CHECK-DO-CACHE-FUNCTION-ANALYSIS-RESULTS-NOT: Running analysis: NoOpFunctionAnalysis
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='function(require<no-op-function>,invalidate<no-op-function>,require<no-op-function>)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS
+; CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS: Starting pass manager
+; CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS: Running pass: RequireAnalysisPass
+; CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS: Running analysis: NoOpFunctionAnalysis
+; CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS: Invalidating analysis: NoOpFunctionAnalysis
+; CHECK-DO-INVALIDATE-FUNCTION-ANALYSIS-RESULTS: Running analysis: NoOpFunctionAnalysis
+
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,module(require<no-op-module>,function(require<no-op-function>,invalidate<all>,require<no-op-function>),require<no-op-module>),require<no-op-module>' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-INVALIDATE-ALL
+; CHECK-INVALIDATE-ALL: Starting pass manager
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Starting pass manager
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-NOT: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Starting pass manager
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL: Running pass: InvalidateAllAnalysesPass
+; CHECK-INVALIDATE-ALL: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL: Invalidating analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL: Finished pass manager
+; CHECK-INVALIDATE-ALL: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-NOT: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL: Invalidating analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Finished pass manager
+; CHECK-INVALIDATE-ALL: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-NOT: Invalidating analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-NOT: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL: Finished pass manager
+
+; RUN: opt -disable-output -disable-verify -debug-pass-manager \
+; RUN:     -passes='require<no-op-module>,module(require<no-op-module>,cgscc(require<no-op-cgscc>,function(require<no-op-function>,invalidate<all>,require<no-op-function>),require<no-op-cgscc>),require<no-op-module>),require<no-op-module>' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-INVALIDATE-ALL-CG
+; CHECK-INVALIDATE-ALL-CG: Starting pass manager
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Starting pass manager
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Starting pass manager
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpCGSCCAnalysis
+; CHECK-INVALIDATE-ALL-CG: Starting pass manager
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL-CG: Running pass: InvalidateAllAnalysesPass
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL-CG: Finished pass manager
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpFunctionAnalysis
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpCGSCCAnalysis
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpCGSCCAnalysis
+; CHECK-INVALIDATE-ALL-CG: Finished pass manager
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG-NOT: Invalidating analysis: NoOpCGSCCAnalysis
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Finished pass manager
+; CHECK-INVALIDATE-ALL-CG: Invalidating all non-preserved analyses
+; CHECK-INVALIDATE-ALL-CG-NOT: Invalidating analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass
+; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpModuleAnalysis
+; CHECK-INVALIDATE-ALL-CG: Finished pass manager
 
 define void @foo() {
   ret void
diff --git a/test/Other/pass-pipeline-parsing.ll b/test/Other/pass-pipeline-parsing.ll
index 4ec4162cd4c9..da0e76040034 100644
--- a/test/Other/pass-pipeline-parsing.ll
+++ b/test/Other/pass-pipeline-parsing.ll
@@ -1,59 +1,55 @@
 ; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes=no-op-module,no-op-module %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-TWO-NOOP-MP
-; CHECK-TWO-NOOP-MP: Starting module pass manager
-; CHECK-TWO-NOOP-MP: Running module pass: NoOpModulePass
-; CHECK-TWO-NOOP-MP: Running module pass: NoOpModulePass
-; CHECK-TWO-NOOP-MP: Finished module pass manager
+; CHECK-TWO-NOOP-MP: Starting pass manager
+; CHECK-TWO-NOOP-MP: Running pass: NoOpModulePass
+; CHECK-TWO-NOOP-MP: Running pass: NoOpModulePass
+; CHECK-TWO-NOOP-MP: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='module(no-op-module,no-op-module)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-NESTED-TWO-NOOP-MP
-; CHECK-NESTED-TWO-NOOP-MP: Starting module pass manager
-; CHECK-NESTED-TWO-NOOP-MP: Running module pass: ModulePassManager
-; CHECK-NESTED-TWO-NOOP-MP: Starting module pass manager
-; CHECK-NESTED-TWO-NOOP-MP: Running module pass: NoOpModulePass
-; CHECK-NESTED-TWO-NOOP-MP: Running module pass: NoOpModulePass
-; CHECK-NESTED-TWO-NOOP-MP: Finished module pass manager
-; CHECK-NESTED-TWO-NOOP-MP: Finished module pass manager
+; CHECK-NESTED-TWO-NOOP-MP: Starting pass manager
+; CHECK-NESTED-TWO-NOOP-MP: Starting pass manager
+; CHECK-NESTED-TWO-NOOP-MP: Running pass: NoOpModulePass
+; CHECK-NESTED-TWO-NOOP-MP: Running pass: NoOpModulePass
+; CHECK-NESTED-TWO-NOOP-MP: Finished pass manager
+; CHECK-NESTED-TWO-NOOP-MP: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes=no-op-function,no-op-function %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-TWO-NOOP-FP
-; CHECK-TWO-NOOP-FP: Starting module pass manager
-; CHECK-TWO-NOOP-FP: Running module pass: ModuleToFunctionPassAdaptor
-; CHECK-TWO-NOOP-FP: Starting function pass manager
-; CHECK-TWO-NOOP-FP: Running function pass: NoOpFunctionPass
-; CHECK-TWO-NOOP-FP: Running function pass: NoOpFunctionPass
-; CHECK-TWO-NOOP-FP: Finished function pass manager
-; CHECK-TWO-NOOP-FP: Finished module pass manager
+; CHECK-TWO-NOOP-FP: Starting pass manager
+; CHECK-TWO-NOOP-FP: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-TWO-NOOP-FP: Starting pass manager
+; CHECK-TWO-NOOP-FP: Running pass: NoOpFunctionPass
+; CHECK-TWO-NOOP-FP: Running pass: NoOpFunctionPass
+; CHECK-TWO-NOOP-FP: Finished pass manager
+; CHECK-TWO-NOOP-FP: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='function(no-op-function,no-op-function)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-NESTED-TWO-NOOP-FP
-; CHECK-NESTED-TWO-NOOP-FP: Starting module pass manager
-; CHECK-NESTED-TWO-NOOP-FP: Running module pass: ModuleToFunctionPassAdaptor
-; CHECK-NESTED-TWO-NOOP-FP: Starting function pass manager
-; CHECK-NESTED-TWO-NOOP-FP: Running function pass: FunctionPassManager
-; CHECK-NESTED-TWO-NOOP-FP: Starting function pass manager
-; CHECK-NESTED-TWO-NOOP-FP: Running function pass: NoOpFunctionPass
-; CHECK-NESTED-TWO-NOOP-FP: Running function pass: NoOpFunctionPass
-; CHECK-NESTED-TWO-NOOP-FP: Finished function pass manager
-; CHECK-NESTED-TWO-NOOP-FP: Finished function pass manager
-; CHECK-NESTED-TWO-NOOP-FP: Finished module pass manager
+; CHECK-NESTED-TWO-NOOP-FP: Starting pass manager
+; CHECK-NESTED-TWO-NOOP-FP: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-NESTED-TWO-NOOP-FP: Starting pass manager
+; CHECK-NESTED-TWO-NOOP-FP: Running pass: NoOpFunctionPass
+; CHECK-NESTED-TWO-NOOP-FP: Running pass: NoOpFunctionPass
+; CHECK-NESTED-TWO-NOOP-FP: Finished pass manager
+; CHECK-NESTED-TWO-NOOP-FP: Finished pass manager
 
 ; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='no-op-module,function(no-op-function,no-op-function),no-op-module' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-MIXED-FP-AND-MP
-; CHECK-MIXED-FP-AND-MP: Starting module pass manager
-; CHECK-MIXED-FP-AND-MP: Running module pass: NoOpModulePass
-; CHECK-MIXED-FP-AND-MP: Running module pass: ModuleToFunctionPassAdaptor
-; CHECK-MIXED-FP-AND-MP: Starting function pass manager
-; CHECK-MIXED-FP-AND-MP: Running function pass: NoOpFunctionPass
-; CHECK-MIXED-FP-AND-MP: Running function pass: NoOpFunctionPass
-; CHECK-MIXED-FP-AND-MP: Finished function pass manager
-; CHECK-MIXED-FP-AND-MP: Running module pass: NoOpModulePass
-; CHECK-MIXED-FP-AND-MP: Finished module pass manager
+; CHECK-MIXED-FP-AND-MP: Starting pass manager
+; CHECK-MIXED-FP-AND-MP: Running pass: NoOpModulePass
+; CHECK-MIXED-FP-AND-MP: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-MIXED-FP-AND-MP: Starting pass manager
+; CHECK-MIXED-FP-AND-MP: Running pass: NoOpFunctionPass
+; CHECK-MIXED-FP-AND-MP: Running pass: NoOpFunctionPass
+; CHECK-MIXED-FP-AND-MP: Finished pass manager
+; CHECK-MIXED-FP-AND-MP: Running pass: NoOpModulePass
+; CHECK-MIXED-FP-AND-MP: Finished pass manager
 
 ; RUN: not opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='no-op-module)' %s 2>&1 \
@@ -105,41 +101,41 @@
 ; RUN:     | FileCheck %s --check-prefix=CHECK-UNBALANCED10
 ; CHECK-UNBALANCED10: unable to parse pass pipeline description
 
-; RUN: opt -disable-output -debug-pass-manager -debug-cgscc-pass-manager \
+; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes=no-op-cgscc,no-op-cgscc %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-TWO-NOOP-CG
-; CHECK-TWO-NOOP-CG: Starting module pass manager
-; CHECK-TWO-NOOP-CG: Running module pass: ModuleToPostOrderCGSCCPassAdaptor
-; CHECK-TWO-NOOP-CG: Starting CGSCC pass manager
-; CHECK-TWO-NOOP-CG: Running CGSCC pass: NoOpCGSCCPass
-; CHECK-TWO-NOOP-CG: Running CGSCC pass: NoOpCGSCCPass
-; CHECK-TWO-NOOP-CG: Finished CGSCC pass manager
-; CHECK-TWO-NOOP-CG: Finished module pass manager
-
-; RUN: opt -disable-output -debug-pass-manager -debug-cgscc-pass-manager \
+; CHECK-TWO-NOOP-CG: Starting pass manager
+; CHECK-TWO-NOOP-CG: Running pass: ModuleToPostOrderCGSCCPassAdaptor
+; CHECK-TWO-NOOP-CG: Starting pass manager
+; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass
+; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass
+; CHECK-TWO-NOOP-CG: Finished pass manager
+; CHECK-TWO-NOOP-CG: Finished pass manager
+
+; RUN: opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='module(function(no-op-function),cgscc(no-op-cgscc,function(no-op-function),no-op-cgscc),function(no-op-function))' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-NESTED-MP-CG-FP
-; CHECK-NESTED-MP-CG-FP: Starting module pass manager
-; CHECK-NESTED-MP-CG-FP: Starting module pass manager
-; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToFunctionPassAdaptor
-; CHECK-NESTED-MP-CG-FP: Starting function pass manager
-; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
-; CHECK-NESTED-MP-CG-FP: Finished function pass manager
-; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToPostOrderCGSCCPassAdaptor
-; CHECK-NESTED-MP-CG-FP: Starting CGSCC pass manager
-; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: NoOpCGSCCPass
-; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: CGSCCToFunctionPassAdaptor
-; CHECK-NESTED-MP-CG-FP: Starting function pass manager
-; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
-; CHECK-NESTED-MP-CG-FP: Finished function pass manager
-; CHECK-NESTED-MP-CG-FP: Running CGSCC pass: NoOpCGSCCPass
-; CHECK-NESTED-MP-CG-FP: Finished CGSCC pass manager
-; CHECK-NESTED-MP-CG-FP: Running module pass: ModuleToFunctionPassAdaptor
-; CHECK-NESTED-MP-CG-FP: Starting function pass manager
-; CHECK-NESTED-MP-CG-FP: Running function pass: NoOpFunctionPass
-; CHECK-NESTED-MP-CG-FP: Finished function pass manager
-; CHECK-NESTED-MP-CG-FP: Finished module pass manager
-; CHECK-NESTED-MP-CG-FP: Finished module pass manager
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToPostOrderCGSCCPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass
+; CHECK-NESTED-MP-CG-FP: Running pass: CGSCCToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToFunctionPassAdaptor
+; CHECK-NESTED-MP-CG-FP: Starting pass manager
+; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
+; CHECK-NESTED-MP-CG-FP: Finished pass manager
 
 define void @f() {
  ret void
diff --git a/test/SymbolRewriter/rewrite.ll b/test/SymbolRewriter/rewrite.ll
new file mode 100644
index 000000000000..716fff9f284c
--- /dev/null
+++ b/test/SymbolRewriter/rewrite.ll
@@ -0,0 +1,59 @@
+; RUN: opt -mtriple i686-win32 -rewrite-symbols -rewrite-map-file %p/rewrite.map \
+; RUN:   %s -o - | llvm-dis | FileCheck %s
+
+declare void @source_function()
+@source_variable = external global i32
+declare void @source_function_pattern_function()
+declare void @source_function_pattern_multiple_function_matches()
+@source_variable_pattern_variable = external global i32
+@source_variable_pattern_multiple_variable_matches = external global i32
+declare void @"\01naked_source_function"()
+declare void @"\01__imp_missing_global_leader_prefix"()
+
+declare i32 @first_callee()
+declare i32 @second_callee()
+define i32 @caller() {
+  %rhs = call i32 @first_callee()
+  %lhs = call i32 @second_callee()
+  %res = add i32 %rhs, %lhs
+  ret i32 %res
+}
+
+%struct.S = type { i8 }
+@_ZN1SC1Ev = alias void (%struct.S*)* @_ZN1SC2Ev
+define void @_ZN1SC2Ev(%struct.S* %this) unnamed_addr align 2 {
+entry:
+  %this.addr = alloca %struct.S*, align 4
+  store %struct.S* %this, %struct.S** %this.addr, align 4
+  ret void
+}
+
+; CHECK: @target_variable = external global i32
+; CHECK-NOT: @source_variable = external global i32
+; CHECK: @target_pattern_variable = external global i32
+; CHECK-NOT: @source_pattern_variable = external global i32
+; CHECK: @target_pattern_multiple_variable_matches = external global i32
+; CHECK-NOT: @source_pattern_multiple_variable_matches = external global i32
+; CHECK: declare void @target_function()
+; CHECK-NOT: declare void @source_function()
+; CHECK: declare void @target_pattern_function()
+; CHECK-NOT: declare void @source_function_pattern_function()
+; CHECK: declare void @target_pattern_multiple_function_matches()
+; CHECK-NOT: declare void @source_function_pattern_multiple_function_matches()
+; CHECK: declare void @naked_target_function()
+; CHECK-NOT: declare void @"\01naked_source_function"()
+; CHECK-NOT: declare void @"\01__imp__imported_function"()
+; CHECK: declare void @"\01__imp_missing_global_leader_prefix"()
+; CHECK-NOT: declare void @"\01__imp_DO_NOT_REWRITE"()
+
+; CHECK: declare i32 @renamed_callee()
+; CHECK-NOT: declare i32 @first_callee()
+; CHECK: declare i32 @second_callee()
+; CHECK: define i32 @caller() {
+; CHECK:   %rhs = call i32 @renamed_callee()
+; CHECK-NOT: %rhs = call i32 @first_callee()
+; CHECK:   %lhs = call i32 @second_callee()
+; CHECK:   %res = add i32 %rhs, %lhs
+; CHECK:   ret i32 %res
+; CHECK: }
+
diff --git a/test/SymbolRewriter/rewrite.map b/test/SymbolRewriter/rewrite.map
new file mode 100644
index 000000000000..ef6dfc8ca2b9
--- /dev/null
+++ b/test/SymbolRewriter/rewrite.map
@@ -0,0 +1,46 @@
+function: {
+  source: source_function,
+  target: target_function,
+}
+
+global variable: {
+  source: source_variable,
+  target: target_variable,
+}
+
+function: {
+  source: source_function_(.*),
+  transform: target_\1,
+}
+
+global variable: {
+  source: source_variable_(.*),
+  transform: target_\1,
+}
+
+function: {
+  source: naked_source_function,
+  target: naked_target_function,
+  naked: true,
+}
+
+function: {
+  source: imported_function,
+  target: exported_function,
+}
+
+function: {
+  source: missing_global_leader_prefix,
+  target: DO_NOT_REWRITE,
+}
+
+function: {
+  source: first_callee,
+  target: renamed_callee,
+}
+
+global alias: {
+  source: _ZN1SC1Ev,
+  target: _ZN1SD1Ev,
+}
+
diff --git a/test/TableGen/BitOffsetDecoder.td b/test/TableGen/BitOffsetDecoder.td
new file mode 100644
index 000000000000..ec0ceeee8a6d
--- /dev/null
+++ b/test/TableGen/BitOffsetDecoder.td
@@ -0,0 +1,74 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+    let InstructionSet = archInstrInfo;
+}
+
+def  Myi32  : Operand<i32> {
+  let DecoderMethod = "DecodeMyi32";
+}
+
+
+let OutOperandList = (outs), Size = 2 in {
+
+def foo : Instruction {
+    let InOperandList = (ins i32imm:$factor);
+    field bits<16> Inst;
+    bits<32> factor;
+    let Inst{7-0} = 0xAA;
+    let Inst{14-8} = factor{6-0}; // no offset
+    let AsmString = "foo  $factor";
+    field bits<16> SoftFail = 0;
+    }
+
+def bar : Instruction {
+    let InOperandList = (ins i32imm:$factor);
+    field bits<16> Inst;
+    bits<32> factor;
+    let Inst{7-0} = 0xBB;
+    let Inst{15-8} = factor{10-3}; // offset by 3
+    let AsmString = "bar  $factor";
+    field bits<16> SoftFail = 0;
+    }
+
+def biz : Instruction {
+    let InOperandList = (ins i32imm:$factor);
+    field bits<16> Inst;
+    bits<32> factor;
+    let Inst{7-0} = 0xCC;
+    let Inst{11-8,15-12} = factor{10-3}; // offset by 3, multipart
+    let AsmString = "biz  $factor";
+    field bits<16> SoftFail = 0;
+    }
+
+def baz : Instruction {
+    let InOperandList = (ins Myi32:$factor);
+    field bits<16> Inst;
+    bits<32> factor;
+    let Inst{7-0} = 0xDD;
+    let Inst{15-8} = factor{11-4}; // offset by 4 + custom decode
+    let AsmString = "baz  $factor";
+    field bits<16> SoftFail = 0;
+    }
+
+def bum : Instruction {
+    let InOperandList = (ins i32imm:$factor);
+    field bits<16> Inst;
+    bits<32> factor;
+    let Inst{7-0} = 0xEE;
+    let Inst{15-8} = !srl(factor,5);
+    let AsmString = "bum  $factor";
+    field bits<16> SoftFail = 0;
+    }
+}
+
+
+// CHECK: tmp = fieldFromInstruction(insn, 8, 7);
+// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 3;
+// CHECK: tmp |= fieldFromInstruction(insn, 8, 4) << 7;
+// CHECK: tmp |= fieldFromInstruction(insn, 12, 4) << 3;
+// CHECK: tmp = fieldFromInstruction(insn, 8, 8) << 4;
diff --git a/test/TableGen/BitsInit.td b/test/TableGen/BitsInit.td
new file mode 100644
index 000000000000..6aac3e41c6cb
--- /dev/null
+++ b/test/TableGen/BitsInit.td
@@ -0,0 +1,85 @@
+
+// RUN: not llvm-tblgen %s 2>&1 > %t
+// RUN: FileCheck %s < %t
+
+def a {
+  bits<2> opc = { 0, 1 };
+  bits<2> opc2 = { 1, 0 };
+  bits<1> opc3 = { 1 };
+  bits<2> a = { opc, opc2 }; // error!
+  bits<2> b = { opc{0}, opc2{0} };
+  bits<2> c = { opc{1}, opc2{1} };
+  bits<2> c = { opc3{0}, opc3 };
+}
+
+// CHECK: def a {
+// CHECK:   bits<2> opc = { 0, 1 };
+// CHECK:   bits<2> opc2 = { 1, 0 };
+// CHECK:   bits<1> opc3 = { 1 };
+// CHECK:   bits<2> a;
+// CHECK:   bits<2> b = { 1, 0 };
+// CHECK:   bits<2> c = { 1, 1 };
+// CHECK: }
+
+def {
+  bits<2> B1 = 0b011;  // bitfield is too small, reject
+  bits<3> B2 = 0b011;  // ok
+
+  bits<2> C1 = 0b111;  // bitfield is too small, reject
+  bits<3> C2 = 0b111;  // ok
+
+  bits<2> D1 = { 0, 0 }; // ok
+  bits<2> D2 = { 0b00 }; // ok
+  bits<3> D3 = { 0, 0 }; // type mismatch.  RHS doesn't have enough bits
+  bits<3> D4 = { 0b00 }; // type mismatch.  RHS doesn't have enough bits
+  bits<1> D5 = { 0 };    // ok
+  bits<1> D6 = { 1 };    // ok
+  bits<1> D7 = { 3 };    // type mismatch.  LHS doesn't have enough bits
+  bits<2> D8 = { 0 };    // type mismatch.  RHS doesn't have enough bits
+
+  bits<8> E;
+  let E{7-0} = {0,0,1,?,?,?,?,?};
+  let E{3-0} = 0b0010;
+
+  bits<8> F1 = { 0, 1, 0b1001, 0, 0b0 }; // ok
+  bits<7> F2 = { 0, 1, 0b1001, 0, 0b0 }; // LHS doesn't have enough bits
+  bits<9> F3 = { 0, 1, 0b1001, 0, 0b0 }; // RHS doesn't have enough bits
+
+  bits<8> G1 = { 0, { 1, 0b1001, 0 }, 0b0 }; // ok
+  bits<8> G2 = { 0, { 1, 0b1001 }, 0, 0b0 }; // ok
+  bits<8> G3 = { 0, 1, { 0b1001 }, 0, 0b0 }; // ok
+
+  bits<16> H;
+  let H{15-0} = { { 0b11001100 }, 0b00110011 };
+  bits<16> I = { G1, G2 };
+
+  // Make sure we can initialise ints with bits<> values.
+  int J = H;
+  int K = { 0, 1 };
+}
+
+// CHECK: def {{.*}} {
+// CHECK: bits<2> B1;
+// CHECK: bits<3> B2 = { 0, 1, 1 };
+// CHECK: bits<2> C1;
+// CHECK: bits<3> C2 = { 1, 1, 1 };
+// CHECK: bits<2> D1 = { 0, 0 };
+// CHECK: bits<2> D2 = { 0, 0 };
+// CHECK: bits<3> D3;
+// CHECK: bits<3> D4;
+// CHECK: bits<1> D5 = { 0 };
+// CHECK: bits<1> D6 = { 1 };
+// CHECK: bits<1> D7 = { ? };
+// CHECK: bits<2> D8;
+// CHECK: bits<8> E = { 0, 0, 1, ?, 0, 0, 1, 0 };
+// CHECK: bits<8> F1 = { 0, 1, 1, 0, 0, 1, 0, 0 };
+// CHECK: bits<7> F2;
+// CHECK: bits<9> F3;
+// CHECK: bits<8> G1 = { 0, 1, 1, 0, 0, 1, 0, 0 };
+// CHECK: bits<8> G2 = { 0, 1, 1, 0, 0, 1, 0, 0 };
+// CHECK: bits<8> G3 = { 0, 1, 1, 0, 0, 1, 0, 0 };
+// CHECK: bits<16> H = { 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1 };
+// CHECK: bits<16> I = { 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0 };
+// CHECK: int J = 52275;
+// CHECK: int K = 1;
+// CHECK: }
diff --git a/test/TableGen/ClassInstanceValue.td b/test/TableGen/ClassInstanceValue.td
new file mode 100644
index 000000000000..b6c4c93cb096
--- /dev/null
+++ b/test/TableGen/ClassInstanceValue.td
@@ -0,0 +1,19 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Struct<int i> {
+  int I = !shl(i, 1);
+  int J = !shl(I, 1);
+}
+
+class Class<Struct s> {
+    int Class_J = s.J;
+}
+
+multiclass MultiClass<int i> {
+  def Def : Class<Struct<i>>;
+// CHECK: Class_J = 8
+// CHECK-NOT: Class_J = !shl(I, 1)
+}
+
+defm Defm : MultiClass<2>;
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
index 99b7e14c2d5f..9bc76e0f0cf8 100644
--- a/test/TableGen/ForeachList.td
+++ b/test/TableGen/ForeachList.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
index 25208fae227e..ce8d44c7526e 100644
--- a/test/TableGen/ForeachLoop.td
+++ b/test/TableGen/ForeachLoop.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Register<string name, int idx> {
   string Name = name;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
index e8c16f720d0e..5b63175b192a 100644
--- a/test/TableGen/NestedForeach.td
+++ b/test/TableGen/NestedForeach.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Droid<string series, int release, string model, int patchlevel> {
   string Series = series;
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
index a11f6f87b427..e4c4704a5e39 100644
--- a/test/TableGen/SiblingForeach.td
+++ b/test/TableGen/SiblingForeach.td
@@ -1,5 +1,4 @@
 // RUN: llvm-tblgen %s | FileCheck %s
-// XFAIL: vg_leak
 
 class Set<int i = 0, int j = 0, int k = 0> {
   int I = i;
diff --git a/test/TableGen/if.td b/test/TableGen/if.td
index 1d8d62329ae3..05a2d992856a 100644
--- a/test/TableGen/if.td
+++ b/test/TableGen/if.td
@@ -3,7 +3,7 @@
 
 // Support for an `!if' operator as part of a `let' statement.
 // CHECK:      class C
-// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, !if({ C:y{3} }, 1, !if({ C:y{2} }, { C:x{0} }, !if({ C:y{1} }, { C:x{1} }, !if({ C:y{0} }, { C:x{2} }, ?)))){0}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){1}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){0}, !if({ C:x{2} }, 2, 6){2}, !if({ C:x{2} }, 2, 6){1}, !if({ C:x{2} }, 2, 6){0}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){1}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){0}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){3}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){2}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){1}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){0} };
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, !if({ C:y{3} }, 1, !if({ C:y{2} }, { C:x{0} }, !if({ C:y{1} }, { C:x{1} }, !if({ C:y{0} }, { C:x{2} }, ?)))){0}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){1}, !if({ C:x{2} }, { C:y{3}, C:y{2} }, !if({ C:x{1} }, { C:y{2}, C:y{1} }, !if({ C:x{0} }, { C:y{1}, C:y{0} }, ?))){0}, !if({ C:x{2} }, { 0, 1, 0 }, { 1, 1, 0 }){2}, !if({ C:x{2} }, { 0, 1, 0 }, { 1, 1, 0 }){1}, !if({ C:x{2} }, { 0, 1, 0 }, { 1, 1, 0 }){0}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){1}, !if({ C:x{1} }, { C:y{3}, C:y{2} }, { 0, 1 }){0}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){3}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){2}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){1}, !if({ C:x{0} }, { C:y{3}, C:y{2}, C:y{1}, C:y{0} }, { C:z, C:y{2}, C:y{1}, C:y{0} }){0} };
 class C<bits<3> x, bits<4> y, bit z> {
   bits<16> n;
 
diff --git a/test/TableGen/ifbit.td b/test/TableGen/ifbit.td
index 88f575e9acfc..18797cac1107 100644
--- a/test/TableGen/ifbit.td
+++ b/test/TableGen/ifbit.td
@@ -5,6 +5,8 @@
 
 class A<bit b = 1> {
   int a = !if(b, 5, 6);
+  bit c = !if(b, 0, 1);
+  bits<1> d = !if(b, 0, 1);
 }
 
 def X : A<0>;
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
index 3e48f8da33bf..935a62503803 100644
--- a/test/TableGen/intrinsic-varargs.td
+++ b/test/TableGen/intrinsic-varargs.td
@@ -26,5 +26,5 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
 def isVoid : ValueType<0, 56>;   // Produces no value
 def llvm_vararg_ty : LLVMType<isVoid>;   // this means vararg here
 
-// CHECK: /* 0 */ 0, 27, 0,
+// CHECK: /* 0 */ 0, 28, 0,
 def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
diff --git a/test/TableGen/list-element-bitref.td b/test/TableGen/list-element-bitref.td
index 4622f28526eb..0f59b537fa6d 100644
--- a/test/TableGen/list-element-bitref.td
+++ b/test/TableGen/list-element-bitref.td
@@ -1,7 +1,7 @@
 // RUN: llvm-tblgen %s | FileCheck %s
 // XFAIL: vg_leak
 
-class C<list<bits<8>> L> {
+class C<list<bits<4>> L> {
   bits<2> V0 = L[0]{1-0};
   bits<2> V1 = L[1]{3-2};
   string V2 = !if(L[0]{0}, "Odd", "Even");
diff --git a/test/TableGen/math.td b/test/TableGen/math.td
index 71c60579de21..d966346596ae 100644
--- a/test/TableGen/math.td
+++ b/test/TableGen/math.td
@@ -15,6 +15,12 @@ class Int<int value> {
   int Value = value;
 }
 
+// CHECK: def v0
+// CHECK: Value = 0
+
+// CHECK: def v1
+// CHECK: Value = 1
+
 def v1024   : Int<1024>;
 // CHECK: def v1024
 // CHECK: Value = 1024
@@ -27,3 +33,5 @@ def v2048   : Int<!add(v1024.Value, v1024.Value)>;
 // CHECK: def v2048
 // CHECK: Value = 2048
 
+def v0 : Int<!and(v1024.Value, v2048.Value)>;
+def v1 : Int<!and(v1025.Value, 1)>;
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index b12cbee6adb5..7c8b3d3a7ce8 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -40,20 +40,20 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"basic.c", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [basic.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 "}
-!10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [basic.c]
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
-
-; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./basic.c]
-; CHECK: !14 = metadata !{i32 4, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [basic.c] [DW_LANG_C99]
+!1 = !{!"basic.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [basic.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 "}
+!10 = !MDLocation(line: 3, scope: !11)
+!11 = !{!"0xb\003\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [basic.c]
+!12 = !MDLocation(line: 4, scope: !4)
+
+; CHECK: !12 = !MDLocation(line: 3, scope: !13)
+; CHECK: !13 = !{!"0xb\001", !1, !11} ; [ DW_TAG_lexical_block ] [./basic.c]
+; CHECK: !14 = !MDLocation(line: 4, scope: !4)
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index f3b0357e5766..153cfc8a44cc 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -50,33 +50,33 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"first-only.c", metadata !"."}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [first-only.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
-!10 = metadata !{i32 3, i32 0, metadata !11, null}
-
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [first-only.c]
-; CHECK: !11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0}
-
-!12 = metadata !{i32 3, i32 0, metadata !13, null}
-
-!13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [first-only.c]
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !14, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./first-only.c]
-
-!14 = metadata !{i32 4, i32 0, metadata !13, null}
-; CHECK: !14 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1}
-
-!15 = metadata !{i32 5, i32 0, metadata !13, null}
-; CHECK: !15 = metadata !{i32 4, i32 0, metadata !14, null}
-
-!16 = metadata !{i32 6, i32 0, metadata !4, null}
-; CHECK: !16 = metadata !{i32 5, i32 0, metadata !14, null}
-; CHECK: !17 = metadata !{i32 6, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 (trunk 199750) (llvm/trunk 199751)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [first-only.c] [DW_LANG_C99]
+!1 = !{!"first-only.c", !"."}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [first-only.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
+!10 = !MDLocation(line: 3, scope: !11)
+
+!11 = !{!"0xb\003\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !11 = !{!"0xb\003\000\000", !1, !4}
+
+!12 = !MDLocation(line: 3, scope: !13)
+
+!13 = !{!"0xb\003\000\001", !1, !11} ; [ DW_TAG_lexical_block ] [first-only.c]
+; CHECK: !13 = !{!"0xb\001", !1, !14} ; [ DW_TAG_lexical_block ] [./first-only.c]
+
+!14 = !MDLocation(line: 4, scope: !13)
+; CHECK: !14 = !{!"0xb\003\000\001", !1, !11}
+
+!15 = !MDLocation(line: 5, scope: !13)
+; CHECK: !15 = !MDLocation(line: 4, scope: !14)
+
+!16 = !MDLocation(line: 6, scope: !4)
+; CHECK: !16 = !MDLocation(line: 5, scope: !14)
+; CHECK: !17 = !MDLocation(line: 6, scope: !4)
 
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 0241a0c1a0b1..5e552a87bbb2 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -51,21 +51,21 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"multiple.c", metadata !"."}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [multiple.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
-!10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [multiple.c]
-!12 = metadata !{i32 4, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 (trunk 199750) (llvm/trunk 199751)\000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [multiple.c] [DW_LANG_C99]
+!1 = !{!"multiple.c", !"."}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, void (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [multiple.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 (trunk 199750) (llvm/trunk 199751)"}
+!10 = !MDLocation(line: 3, scope: !11)
+!11 = !{!"0xb\003\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [multiple.c]
+!12 = !MDLocation(line: 4, scope: !4)
 
-; CHECK: !12 = metadata !{i32 3, i32 0, metadata !13, null}
-; CHECK: !13 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 1, i32 0} ; [ DW_TAG_lexical_block ] [./multiple.c]
-; CHECK: !14 = metadata !{i32 3, i32 0, metadata !15, null}
-; CHECK: !15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 0, i32 2, i32 1} ; [ DW_TAG_lexical_block ] [./multiple.c]
+; CHECK: !12 = !MDLocation(line: 3, scope: !13)
+; CHECK: !13 = !{!"0xb\001", !1, !11} ; [ DW_TAG_lexical_block ] [./multiple.c]
+; CHECK: !14 = !MDLocation(line: 3, scope: !15)
+; CHECK: !15 = !{!"0xb\002", !1, !11} ; [ DW_TAG_lexical_block ] [./multiple.c]
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index f7b45e295551..dd7faf0a71e3 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -17,7 +17,7 @@ entry:
   %retval = alloca i32, align 4
   %i.addr = alloca i64, align 8
   store i64 %i, i64* %i.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %i.addr}, metadata !13), !dbg !14
+  call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !13, metadata !{}), !dbg !14
   %0 = load i64* %i.addr, align 8, !dbg !15
 ; CHECK:  %0 = load i64* %i.addr, align 8, !dbg !15
   %cmp = icmp slt i64 %0, 5, !dbg !15
@@ -39,7 +39,7 @@ return:                                           ; preds = %if.else, %if.then
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -48,24 +48,24 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [./no-discriminators] [DW_LANG_C99]
-!1 = metadata !{metadata !"no-discriminators", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i64)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./no-discriminators]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786468, null, null, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-; CHECK: !10 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!11 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5.0 "}
-!13 = metadata !{i32 786689, metadata !4, metadata !"i", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [i] [line 1]
-!14 = metadata !{i32 1, i32 0, metadata !4, null}
-!15 = metadata !{i32 2, i32 0, metadata !16, null}
-; CHECK: !15 = metadata !{i32 2, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
-; CHECK: !16 = metadata !{i32 786443, metadata !1, metadata !4, i32 2, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./no-discriminators]
-!17 = metadata !{i32 3, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./no-discriminators] [DW_LANG_C99]
+!1 = !{!"no-discriminators", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i64)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./no-discriminators]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0x24\00long int\000\0064\0064\000\000\005", null, null} ; [ DW_TAG_base_type ] [long int] [line 0, size 64, align 64, offset 0, enc DW_ATE_signed]
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+; CHECK: !10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 1, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5.0 "}
+!13 = !{!"0x101\00i\0016777217\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [i] [line 1]
+!14 = !MDLocation(line: 1, scope: !4)
+!15 = !MDLocation(line: 2, scope: !16)
+; CHECK: !15 = !MDLocation(line: 2, scope: !16)
+!16 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+; CHECK: !16 = !{!"0xb\002\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [./no-discriminators]
+!17 = !MDLocation(line: 3, scope: !4)
diff --git a/test/Transforms/AlignmentFromAssumptions/simple.ll b/test/Transforms/AlignmentFromAssumptions/simple.ll
new file mode 100644
index 000000000000..884c8bad02f5
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -0,0 +1,215 @@
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: ret i32
+}
+
+define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 -1
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2a
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @goo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @hoo
+; CHECK: load i32* %arrayidx, align 32
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @joo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo2
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @moo(i32* nocapture %a) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo2
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+declare void @llvm.assume(i1) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/AlignmentFromAssumptions/simple32.ll b/test/Transforms/AlignmentFromAssumptions/simple32.ll
new file mode 100644
index 000000000000..166e7ef38936
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -0,0 +1,215 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+
+define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 2
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: ret i32
+}
+
+define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds i32* %a, i64 -1
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo2a
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @goo
+; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: ret i32
+}
+
+define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @hoo
+; CHECK: load i32* %arrayidx, align 32
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 8
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @joo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
+  %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.06
+  %indvars.iv.next = add i64 %indvars.iv, 4
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 2048
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+; CHECK-LABEL: @koo2
+; CHECK: load i32* %arrayidx, align 16
+; CHECK: ret i32 %add.lcssa
+}
+
+define i32 @moo(i32* nocapture %a) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo
+; CHECK: @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
+  %0 = bitcast i32* %a to i8*
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 4, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo2
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 64, i32 32, i1 false)
+; CHECK: ret i32 undef
+}
+
+declare void @llvm.assume(i1) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/AlignmentFromAssumptions/start-unk.ll b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
new file mode 100644
index 000000000000..b7fe24947cb5
--- /dev/null
+++ b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
@@ -0,0 +1,154 @@
+; RUN: opt -alignment-from-assumptions -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%type1 = type { %type2 }
+%type2 = type { [4 x i8] }
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #0
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bswap.i32(i32) #1
+
+; Function Attrs: nounwind uwtable
+define void @test1() unnamed_addr #2 align 2 {
+
+; CHECK-LABEL: @test1
+
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %return, label %if.end8
+
+if.end8:                                          ; preds = %if.end
+  br i1 undef, label %if.then13, label %if.end14
+
+if.then13:                                        ; preds = %if.end8
+  unreachable
+
+if.end14:                                         ; preds = %if.end8
+  br i1 undef, label %cond.false.i129, label %cond.end.i136
+
+cond.false.i129:                                  ; preds = %if.end14
+  unreachable
+
+cond.end.i136:                                    ; preds = %if.end14
+  br i1 undef, label %land.lhs.true.i, label %if.end.i145
+
+land.lhs.true.i:                                  ; preds = %cond.end.i136
+  br i1 undef, label %if.end.i145, label %if.then.i137
+
+if.then.i137:                                     ; preds = %land.lhs.true.i
+  br i1 undef, label %cond.false8.i, label %cond.end9.i
+
+cond.false8.i:                                    ; preds = %if.then.i137
+  unreachable
+
+cond.end9.i:                                      ; preds = %if.then.i137
+  br i1 undef, label %if.then23, label %if.end24
+
+if.end.i145:                                      ; preds = %land.lhs.true.i, %cond.end.i136
+  unreachable
+
+if.then23:                                        ; preds = %cond.end9.i
+  unreachable
+
+if.end24:                                         ; preds = %cond.end9.i
+  br i1 undef, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %if.end24
+  unreachable
+
+for.end:                                          ; preds = %if.end24
+  br i1 undef, label %if.end123, label %if.then121
+
+if.then121:                                       ; preds = %for.end
+  unreachable
+
+if.end123:                                        ; preds = %for.end
+  br i1 undef, label %if.end150, label %if.then126
+
+if.then126:                                       ; preds = %if.end123
+  %ptrint.i.i185 = ptrtoint %type1* undef to i64
+  %maskedptr.i.i186 = and i64 %ptrint.i.i185, 1
+  %maskcond.i.i187 = icmp eq i64 %maskedptr.i.i186, 0
+  tail call void @llvm.assume(i1 %maskcond.i.i187) #0
+  %ret.0..sroa_cast.i.i188 = bitcast %type1* undef to i32*
+  %ret.0.copyload.i.i189 = load i32* %ret.0..sroa_cast.i.i188, align 2
+
+; CHECK: load {{.*}} align 2
+
+  %0 = tail call i32 @llvm.bswap.i32(i32 %ret.0.copyload.i.i189) #0
+  %conv131 = zext i32 %0 to i64
+  %add.ptr132 = getelementptr inbounds i8* undef, i64 %conv131
+  %1 = bitcast i8* %add.ptr132 to %type1*
+  br i1 undef, label %if.end150, label %if.end.i173
+
+if.end.i173:                                      ; preds = %if.then126
+  br i1 undef, label %test1.exit, label %cond.false.i.i.i.i174
+
+cond.false.i.i.i.i174:                            ; preds = %if.end.i173
+  unreachable
+
+test1.exit: ; preds = %if.end.i173
+  br i1 undef, label %test1a.exit, label %if.end.i124
+
+if.end.i124:                                      ; preds = %test1.exit
+  unreachable
+
+test1a.exit: ; preds = %test1.exit
+  br i1 undef, label %if.end150, label %for.body137.lr.ph
+
+for.body137.lr.ph:                                ; preds = %test1a.exit
+  br label %for.body137
+
+for.body137:                                      ; preds = %test1b.exit, %for.body137.lr.ph
+  %ShndxTable.0309 = phi %type1* [ %1, %for.body137.lr.ph ], [ %incdec.ptr, %test1b.exit ]
+  %ret.0..sroa_cast.i.i106 = bitcast %type1* %ShndxTable.0309 to i32*
+  br i1 undef, label %for.body137.if.end146_crit_edge, label %if.then140
+
+for.body137.if.end146_crit_edge:                  ; preds = %for.body137
+  %incdec.ptr = getelementptr inbounds %type1* %ShndxTable.0309, i64 1
+  br i1 undef, label %cond.false.i70, label %cond.end.i
+
+if.then140:                                       ; preds = %for.body137
+  %ret.0.copyload.i.i102 = load i32* %ret.0..sroa_cast.i.i106, align 2
+
+; CHECK: load {{.*}} align 2
+
+  unreachable
+
+cond.false.i70:                                   ; preds = %for.body137.if.end146_crit_edge
+  unreachable
+
+cond.end.i:                                       ; preds = %for.body137.if.end146_crit_edge
+  br i1 undef, label %test1b.exit, label %cond.false.i.i
+
+cond.false.i.i:                                   ; preds = %cond.end.i
+  unreachable
+
+test1b.exit: ; preds = %cond.end.i
+  br i1 undef, label %if.end150, label %for.body137
+
+if.end150:                                        ; preds = %test1b.exit, %test1a.exit, %if.then126, %if.end123
+  br i1 undef, label %for.end176, label %for.body155.lr.ph
+
+for.body155.lr.ph:                                ; preds = %if.end150
+  unreachable
+
+for.end176:                                       ; preds = %if.end150
+  unreachable
+
+return:                                           ; preds = %if.end
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind uwtable }
+
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index 70503afb5870..65cf3678bd99 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -1,22 +1,26 @@
 ; RUN: opt < %s -argpromotion -S | FileCheck %s
-; CHECK: call void @test(), !dbg [[DBG_LOC:![0-9]]]
-; CHECK: [[TEST_FN:.*]] = {{.*}} void ()* @test
-; CHECK: [[DBG_LOC]] = metadata !{i32 8, i32 0, metadata [[TEST_FN]], null}
+; CHECK: call void @test(i32 %
+; CHECK: void (i32)* @test, {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [test]
 
-define internal void @test(i32* %X) {
+declare void @sink(i32)
+
+define internal void @test(i32** %X) {
+  %1 = load i32** %X, align 8
+  %2 = load i32* %1, align 8
+  call void @sink(i32 %2)
   ret void
 }
 
-define void @caller() {
-  call void @test(i32* null), !dbg !1
+define void @caller(i32** %Y) {
+  call void @test(i32** %Y)
   ret void
 }
 
 !llvm.module.flags = !{!0}
 !llvm.dbg.cu = !{!3}
 
-!0 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!1 = metadata !{i32 8, i32 0, metadata !2, null}
-!2 = metadata !{i32 786478, null, null, metadata !"test", metadata !"test", metadata !"", i32 3, null, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32*)* @test, null, null, null, i32 3}
-!3 = metadata !{i32 786449, null, i32 4, metadata !"clang version 3.5.0 ", i1 false, metadata !"", i32 0, null, null, metadata !4, null, null, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
-!4 = metadata !{metadata !2}
+!0 = !{i32 2, !"Debug Info Version", i32 2}
+!1 = !MDLocation(line: 8, scope: !2)
+!2 = !{!"0x2e\00test\00test\00\003\001\001\000\006\00256\000\003", null, null, null, null, void (i32**)* @test, null, null, null} ; [ DW_TAG_subprogram ]
+!3 = !{!"0x11\004\00clang version 3.5.0 \000\00\000\00\002", null, null, null, !4, null, null} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/blaikie/dev/scratch/pr20038/reduce/<stdin>] [DW_LANG_C_plus_plus]
+!4 = !{!2}
diff --git a/test/Transforms/ArgumentPromotion/fp80.ll b/test/Transforms/ArgumentPromotion/fp80.ll
new file mode 100644
index 000000000000..a770d6013d6b
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/fp80.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.u = type { x86_fp80 }
+%struct.s = type { double, i16, i8, [5 x i8] }
+
+@b = internal global %struct.s { double 3.14, i16 9439, i8 25, [5 x i8] undef }, align 16
+
+%struct.Foo = type { i32, i64 }
+@a = internal global %struct.Foo { i32 1, i64 2 }, align 8
+
+define void @run() {
+entry:
+  tail call i8 @UseLongDoubleUnsafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  tail call x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 bitcast (%struct.s* @b to %union.u*))
+  call i64 @AccessPaddingOfStruct(%struct.Foo* @a)
+  call i64 @CaptureAStruct(%struct.Foo* @a)
+  ret void
+}
+
+; CHECK: internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+define internal i8 @UseLongDoubleUnsafely(%union.u* byval align 16 %arg) {
+entry:
+  %bitcast = bitcast %union.u* %arg to %struct.s*
+  %gep = getelementptr inbounds %struct.s* %bitcast, i64 0, i32 2
+  %result = load i8* %gep
+  ret i8 %result
+}
+
+; CHECK: internal x86_fp80 @UseLongDoubleSafely(x86_fp80 {{%.*}}) {
+define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
+  %gep = getelementptr inbounds %union.u* %arg, i64 0, i32 0
+  %fp80 = load x86_fp80* %gep
+  ret x86_fp80 %fp80
+}
+
+; CHECK: define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
+  %p = bitcast %struct.Foo* %a to i64*
+  %v = load i64* %p
+  ret i64 %v
+}
+
+; CHECK: define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
+entry:
+  %a_ptr = alloca %struct.Foo*
+  br label %loop
+
+loop:
+  %phi = phi %struct.Foo* [ null, %entry ], [ %gep, %loop ]
+  %0   = phi %struct.Foo* [ %a, %entry ],   [ %0, %loop ]
+  store %struct.Foo* %phi, %struct.Foo** %a_ptr
+  %gep = getelementptr %struct.Foo* %a, i64 0
+  br label %loop
+}
diff --git a/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
index 4688a83f2425..db9d70d337a1 100644
--- a/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
+++ b/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -37,16 +37,16 @@ entry:
   ret i32 0
 }
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"long", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{metadata !6, metadata !6, i64 0}
-!6 = metadata !{metadata !"int", metadata !3, i64 0}
-!7 = metadata !{metadata !3, metadata !3, i64 0}
-!8 = metadata !{metadata !9, metadata !9, i64 0}
-!9 = metadata !{metadata !"any pointer", metadata !3, i64 0}
-; CHECK: ![[I32]] = metadata !{metadata ![[I32_TYPE:[0-9]+]], metadata ![[I32_TYPE]], i64 0}
-; CHECK: ![[I32_TYPE]] = metadata !{metadata !"int", metadata !{{.*}}, i64 0}
-; CHECK: ![[LONG]] = metadata !{metadata ![[LONG_TYPE:[0-9]+]], metadata ![[LONG_TYPE]], i64 0}
-; CHECK: ![[LONG_TYPE]] = metadata !{metadata !"long", metadata !{{.*}}, i64 0}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
+!7 = !{!3, !3, i64 0}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !3, i64 0}
+; CHECK: ![[I32]] = !{![[I32_TYPE:[0-9]+]], ![[I32_TYPE]], i64 0}
+; CHECK: ![[I32_TYPE]] = !{!"int", !{{.*}}, i64 0}
+; CHECK: ![[LONG]] = !{![[LONG_TYPE:[0-9]+]], ![[LONG_TYPE]], i64 0}
+; CHECK: ![[LONG_TYPE]] = !{!"long", !{{.*}}, i64 0}
diff --git a/test/Transforms/ArgumentPromotion/tail.ll b/test/Transforms/ArgumentPromotion/tail.ll
index 43b8996ca18a..2ea387cd2645 100644
--- a/test/Transforms/ArgumentPromotion/tail.ll
+++ b/test/Transforms/ArgumentPromotion/tail.ll
@@ -1,6 +1,8 @@
 ; RUN: opt %s -argpromotion -S -o - | FileCheck %s
 ; PR14710
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 %pair = type { i32, i32 }
 
 declare i8* @foo(%pair*)
diff --git a/test/Transforms/ArgumentPromotion/variadic.ll b/test/Transforms/ArgumentPromotion/variadic.ll
new file mode 100644
index 000000000000..0ae52b3bbbd1
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/variadic.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -argpromotion -S | FileCheck %s
+
+; Unused arguments from variadic functions cannot be eliminated as that changes
+; their classiciation according to the SysV amd64 ABI. Clang and other frontends
+; bake in the classification when they use things like byval, as in this test.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.tt0 = type { i64, i64 }
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+@t45 = internal global %struct.tt0 { i64 1335139741, i64 438042995 }, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+entry:
+  tail call void (i8*, i8*, i8*, i8*, i8*, ...)* @callee_t0f(i8* undef, i8* undef, i8* undef, i8* undef, i8* undef, %struct.tt0* byval align 8 @t45)
+  ret i32 0
+}
+
+; Function Attrs: nounwind uwtable
+define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) {
+entry:
+  ret void
+}
+
+; CHECK-LABEL: define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...)
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
new file mode 100644
index 000000000000..282d42f75f05
--- /dev/null
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -0,0 +1,364 @@
+; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
+
+define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
+; CHECK-LABEL: @test_atomic_xchg_i8
+; CHECK-NOT: dmb
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: dmb
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
+  ret i8 %res
+}
+
+define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
+; CHECK-LABEL: @test_atomic_add_i16
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
+  ret i16 %res
+}
+
+define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
+; CHECK-LABEL: @test_atomic_sub_i32
+; CHECK-NOT: dmb
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
+; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i32 [[OLDVAL]]
+  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
+  ret i32 %res
+}
+
+define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
+; CHECK-LABEL: @test_atomic_and_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: dmb
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw and i8* %ptr, i8 %andend release
+  ret i8 %res
+}
+
+define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
+; CHECK-LABEL: @test_atomic_nand_i16
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
+; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
+  ret i16 %res
+}
+
+define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
+; CHECK-LABEL: @test_atomic_or_i64
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
+; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i64 [[OLDVAL]]
+  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
+  ret i64 %res
+}
+
+define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
+; CHECK-LABEL: @test_atomic_xor_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
+; CHECK-LABEL: @test_atomic_max_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
+; CHECK-LABEL: @test_atomic_min_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
+; CHECK-LABEL: @test_atomic_umax_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
+; CHECK-LABEL: @test_atomic_umin_i8
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
+; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
+; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
+  ret i8 %res
+}
+
+define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i8 [[OLDVAL]]
+
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
+  ret i8 %old
+}
+
+define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: dmb
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i16 [[OLDVAL]]
+
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
+  ret i16 %old
+}
+
+define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
+; CHECK-NOT: dmb
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[OLDVAL]]
+
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
+  ret i32 %old
+}
+
+define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
+; CHECK-NOT: dmb
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: dmb
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: dmb
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i64 [[OLDVAL]]
+
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
+  ret i64 %old
+}
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
new file mode 100644
index 000000000000..42d7b781006d
--- /dev/null
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -0,0 +1,226 @@
+; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
+
+define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
+; CHECK-LABEL: @test_atomic_xchg_i8
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i8 [[OLDVAL]]
+  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
+  ret i8 %res
+}
+
+define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
+; CHECK-LABEL: @test_atomic_add_i16
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
+; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
+; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i16 [[OLDVAL]]
+  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
+  ret i16 %res
+}
+
+define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
+; CHECK-LABEL: @test_atomic_sub_i32
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
+; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i32 [[OLDVAL]]
+  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
+  ret i32 %res
+}
+
+define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
+; CHECK-LABEL: @test_atomic_or_i64
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
+; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
+; CHECK: [[END]]:
+; CHECK-NOT: fence
+; CHECK: ret i64 [[OLDVAL]]
+  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
+  ret i64 %res
+}
+
+define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i8 [[OLDVAL]]
+
+  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
+  %old = extractvalue { i8, i1 } %pairold, 0
+  ret i8 %old
+}
+
+define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i16 [[OLDVAL]]
+
+  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
+  %old = extractvalue { i16, i1 } %pairold, 0
+  ret i16 %old
+}
+
+define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[OLDVAL]]
+
+  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
+  %old = extractvalue { i32, i1 } %pairold, 0
+  ret i32 %old
+}
+
+define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
+; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
+; CHECK-NOT: fence
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
+; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
+; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
+; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
+; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
+; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
+; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
+; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
+; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
+; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
+; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
+; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: fence_cst
+; CHECK: br label %[[DONE]]
+
+; CHECK: [[DONE]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i64 [[OLDVAL]]
+
+  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
+  %old = extractvalue { i64, i1 } %pairold, 0
+  ret i64 %old
+}
diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
new file mode 100644
index 000000000000..54653000f5d8
--- /dev/null
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -0,0 +1,98 @@
+; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+
+define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_seq_cst
+; Intrinsic for "dmb ishst" is then expected
+; CHECK:     call void @llvm.arm.dmb(i32 10)
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
+
+define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_weak_fail
+; CHECK:     call void @llvm.arm.dmb(i32 10)
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK:     call void @llvm.arm.dmb(i32 11)
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: dmb
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i1 [[SUCCESS]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 1
+  ret i1 %oldval
+}
+
+define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
+; CHECK-LABEL: @test_cmpxchg_monotonic
+; CHECK-NOT: dmb
+; CHECK:     br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK-NOT: dmb
+; CHECK:     br label %[[END:.*]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK-NOT: dmb
+; CHECK:     br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK:     ret i32 [[LOADED]]
+
+  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
+  %oldval = extractvalue { i32, i1 } %pair, 0
+  ret i32 %oldval
+}
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg b/test/Transforms/AtomicExpand/ARM/lit.local.cfg
index 98c6700c209d..98c6700c209d 100644
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/lit.local.cfg
+++ b/test/Transforms/AtomicExpand/ARM/lit.local.cfg
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
deleted file mode 100644
index 6a93016fc26e..000000000000
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v7.ll
+++ /dev/null
@@ -1,364 +0,0 @@
-; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-ll-sc %s | FileCheck %s
-
-define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
-; CHECK-LABEL: @test_atomic_xchg_i8
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
-  ret i8 %res
-}
-
-define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
-; CHECK-LABEL: @test_atomic_add_i16
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
-; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
-; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i16 [[OLDVAL]]
-  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
-  ret i16 %res
-}
-
-define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
-; CHECK-LABEL: @test_atomic_sub_i32
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
-; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence acquire
-; CHECK: ret i32 [[OLDVAL]]
-  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
-  ret i32 %res
-}
-
-define i8 @test_atomic_and_i8(i8* %ptr, i8 %andend) {
-; CHECK-LABEL: @test_atomic_and_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[NEWVAL:%.*]] = and i8 [[OLDVAL]], %andend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw and i8* %ptr, i8 %andend release
-  ret i8 %res
-}
-
-define i16 @test_atomic_nand_i16(i16* %ptr, i16 %nandend) {
-; CHECK-LABEL: @test_atomic_nand_i16
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
-; CHECK: [[NEWVAL_TMP:%.*]] = and i16 [[OLDVAL]], %nandend
-; CHECK: [[NEWVAL:%.*]] = xor i16 [[NEWVAL_TMP]], -1
-; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i16 [[OLDVAL]]
-  %res = atomicrmw nand i16* %ptr, i16 %nandend seq_cst
-  ret i16 %res
-}
-
-define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
-; CHECK-LABEL: @test_atomic_or_i64
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
-; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
-; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
-; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
-; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
-; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
-; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
-; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
-; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
-; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
-; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i64 [[OLDVAL]]
-  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
-  ret i64 %res
-}
-
-define i8 @test_atomic_xor_i8(i8* %ptr, i8 %xorend) {
-; CHECK-LABEL: @test_atomic_xor_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[NEWVAL:%.*]] = xor i8 [[OLDVAL]], %xorend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw xor i8* %ptr, i8 %xorend seq_cst
-  ret i8 %res
-}
-
-define i8 @test_atomic_max_i8(i8* %ptr, i8 %maxend) {
-; CHECK-LABEL: @test_atomic_max_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[WANT_OLD:%.*]] = icmp sgt i8 [[OLDVAL]], %maxend
-; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %maxend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw max i8* %ptr, i8 %maxend seq_cst
-  ret i8 %res
-}
-
-define i8 @test_atomic_min_i8(i8* %ptr, i8 %minend) {
-; CHECK-LABEL: @test_atomic_min_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[WANT_OLD:%.*]] = icmp sle i8 [[OLDVAL]], %minend
-; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %minend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw min i8* %ptr, i8 %minend seq_cst
-  ret i8 %res
-}
-
-define i8 @test_atomic_umax_i8(i8* %ptr, i8 %umaxend) {
-; CHECK-LABEL: @test_atomic_umax_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[WANT_OLD:%.*]] = icmp ugt i8 [[OLDVAL]], %umaxend
-; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %umaxend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw umax i8* %ptr, i8 %umaxend seq_cst
-  ret i8 %res
-}
-
-define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
-; CHECK-LABEL: @test_atomic_umin_i8
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[WANT_OLD:%.*]] = icmp ule i8 [[OLDVAL]], %uminend
-; CHECK: [[NEWVAL:%.*]] = select i1 [[WANT_OLD]], i8 [[OLDVAL]], i8 %uminend
-; CHECK: [[NEWVAL32:%.*]] = zext i8 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK: fence seq_cst
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw umin i8* %ptr, i8 %uminend seq_cst
-  ret i8 %res
-}
-
-define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK: fence seq_cst
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
-
-  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
-  %old = extractvalue { i8, i1 } %pairold, 0
-  ret i8 %old
-}
-
-define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: fence release
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence seq_cst
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
-
-  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
-  %old = extractvalue { i16, i1 } %pairold, 0
-  ret i16 %old
-}
-
-define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK: fence acquire
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK: fence acquire
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i32 [[OLDVAL]]
-
-  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
-  %old = extractvalue { i32, i1 } %pairold, 0
-  ret i32 %old
-}
-
-define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
-; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
-; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
-; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
-; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
-; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
-; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
-; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
-; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i64 [[OLDVAL]]
-
-  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
-  %old = extractvalue { i64, i1 } %pairold, 0
-  ret i64 %old
-}
-\ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
deleted file mode 100644
index 8092c1010ff5..000000000000
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/atomic-expansion-v8.ll
+++ /dev/null
@@ -1,226 +0,0 @@
-; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-ll-sc %s | FileCheck %s
-
-define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
-; CHECK-LABEL: @test_atomic_xchg_i8
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
-; CHECK: [[NEWVAL32:%.*]] = zext i8 %xchgend to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i8 [[OLDVAL]]
-  %res = atomicrmw xchg i8* %ptr, i8 %xchgend monotonic
-  ret i8 %res
-}
-
-define i16 @test_atomic_add_i16(i16* %ptr, i16 %addend) {
-; CHECK-LABEL: @test_atomic_add_i16
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i16
-; CHECK: [[NEWVAL:%.*]] = add i16 [[OLDVAL]], %addend
-; CHECK: [[NEWVAL32:%.*]] = zext i16 [[NEWVAL]] to i32
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i16 [[OLDVAL]]
-  %res = atomicrmw add i16* %ptr, i16 %addend seq_cst
-  ret i16 %res
-}
-
-define i32 @test_atomic_sub_i32(i32* %ptr, i32 %subend) {
-; CHECK-LABEL: @test_atomic_sub_i32
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
-; CHECK: [[NEWVAL:%.*]] = sub i32 [[OLDVAL]], %subend
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 [[NEWVAL]], i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i32 [[OLDVAL]]
-  %res = atomicrmw sub i32* %ptr, i32 %subend acquire
-  ret i32 %res
-}
-
-define i64 @test_atomic_or_i64(i64* %ptr, i64 %orend) {
-; CHECK-LABEL: @test_atomic_or_i64
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-; CHECK: [[LOOP]]:
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldaexd(i8* [[PTR8]])
-; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
-; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
-; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
-; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
-; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
-; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
-; CHECK: [[NEWVAL:%.*]] = or i64 [[OLDVAL]], %orend
-; CHECK: [[NEWLO:%.*]] = trunc i64 [[NEWVAL]] to i32
-; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 [[NEWVAL]], 32
-; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.stlexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp ne i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[LOOP]], label %[[END:.*]]
-; CHECK: [[END]]:
-; CHECK-NOT: fence
-; CHECK: ret i64 [[OLDVAL]]
-  %res = atomicrmw or i64* %ptr, i64 %orend seq_cst
-  ret i64 %res
-}
-
-define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
-
-  %pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
-  %old = extractvalue { i8, i1 } %pairold, 0
-  ret i8 %old
-}
-
-define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.stlex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
-
-  %pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
-  %old = extractvalue { i16, i1 } %pairold, 0
-  ret i16 %old
-}
-
-define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i32_acquire_acquire
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i32 [[OLDVAL]]
-
-  %pairold = cmpxchg i32* %ptr, i32 %desired, i32 %newval acquire acquire
-  %old = extractvalue { i32, i1 } %pairold, 0
-  ret i32 %old
-}
-
-define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %newval) {
-; CHECK-LABEL: @test_cmpxchg_i64_monotonic_monotonic
-; CHECK-NOT: fence
-; CHECK: br label %[[LOOP:.*]]
-
-; CHECK: [[LOOP]]:
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[LOHI:%.*]] = call { i32, i32 } @llvm.arm.ldrexd(i8* [[PTR8]])
-; CHECK: [[LO:%.*]] = extractvalue { i32, i32 } [[LOHI]], 0
-; CHECK: [[HI:%.*]] = extractvalue { i32, i32 } [[LOHI]], 1
-; CHECK: [[LO64:%.*]] = zext i32 [[LO]] to i64
-; CHECK: [[HI64_TMP:%.*]] = zext i32 [[HI]] to i64
-; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
-; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
-; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
-; CHECK: [[NEWHI_TMP:%.*]] = lshr i64 %newval, 32
-; CHECK: [[NEWHI:%.*]] = trunc i64 [[NEWHI_TMP]] to i32
-; CHECK: [[PTR8:%.*]] = bitcast i64* %ptr to i8*
-; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strexd(i32 [[NEWLO]], i32 [[NEWHI]], i8* [[PTR8]])
-; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence_cst
-; CHECK: br label %[[DONE]]
-
-; CHECK: [[DONE]]:
-; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i64 [[OLDVAL]]
-
-  %pairold = cmpxchg i64* %ptr, i64 %desired, i64 %newval monotonic monotonic
-  %old = extractvalue { i64, i1 } %pairold, 0
-  ret i64 %old
-}
-\ No newline at end of file
diff --git a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
deleted file mode 100644
index 07a4a7f26e62..000000000000
--- a/test/Transforms/AtomicExpandLoadLinked/ARM/cmpxchg-weak.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; RUN: opt -atomic-ll-sc -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
-
-define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: @test_cmpxchg_seq_cst
-; CHECK:     fence release
-; CHECK:     br label %[[START:.*]]
-
-; CHECK: [[START]]:
-; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
-; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
-; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
-; CHECK:     br label %[[END:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK:     fence seq_cst
-; CHECK:     br label %[[END]]
-
-; CHECK: [[END]]:
-; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
-
-  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
-  %oldval = extractvalue { i32, i1 } %pair, 0
-  ret i32 %oldval
-}
-
-define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK:     fence release
-; CHECK:     br label %[[START:.*]]
-
-; CHECK: [[START]]:
-; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
-; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
-; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK:     fence seq_cst
-; CHECK:     br label %[[END:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
-; CHECK:     br label %[[END]]
-
-; CHECK: [[END]]:
-; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i1 [[SUCCESS]]
-
-  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
-  %oldval = extractvalue { i32, i1 } %pair, 1
-  ret i1 %oldval
-}
-
-define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: @test_cmpxchg_monotonic
-; CHECK-NOT: fence
-; CHECK:     br label %[[START:.*]]
-
-; CHECK: [[START]]:
-; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
-; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[TRY_STORE]]:
-; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
-; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
-
-; CHECK: [[SUCCESS_BB]]:
-; CHECK-NOT: fence
-; CHECK:     br label %[[END:.*]]
-
-; CHECK: [[FAILURE_BB]]:
-; CHECK-NOT: fence
-; CHECK:     br label %[[END]]
-
-; CHECK: [[END]]:
-; CHECK:     [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK:     ret i32 [[LOADED]]
-
-  %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new monotonic monotonic
-  %oldval = extractvalue { i32, i1 } %pair, 0
-  ret i32 %oldval
-}
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index ed7be15f7adf..ca361703adcb 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -83,7 +83,7 @@ for.body:                                         ; preds = %for.body, %entry
 ; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
 ; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
 ; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
-; CHECK-UNRL: %indvars.iv.next.1 = add i64 %indvars.iv, 2
+; CHECK-UNRL: %indvars.iv.next.1 = add nsw i64 %indvars.iv, 2
 ; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
 ; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
 ; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
diff --git a/test/Transforms/BBVectorize/metadata.ll b/test/Transforms/BBVectorize/metadata.ll
index ac7297dd5417..874fbb87967c 100644
--- a/test/Transforms/BBVectorize/metadata.ll
+++ b/test/Transforms/BBVectorize/metadata.ll
@@ -41,9 +41,9 @@ entry:
 ; CHECK: ret void
 }
 
-!0 = metadata !{i64 0, i64 2}
-!1 = metadata !{i64 3, i64 5}
+!0 = !{i64 0, i64 2}
+!1 = !{i64 3, i64 5}
 
-!2 = metadata !{ float 5.0 }
-!3 = metadata !{ float 2.5 }
+!2 = !{ float 5.0 }
+!3 = !{ float 2.5 }
 
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index 598ea0e354e1..d4b94fe62c71 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -O3 -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
 
 ;; We don't want branch folding to fold asm directives.
 
diff --git a/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg b/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll b/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll
new file mode 100644
index 000000000000..b4e6a409288f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/trunc-weird-user.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -codegenprepare -mtriple=arm64-apple-ios7.0 %s | FileCheck %s
+
+%foo = type { i8 }
+
+define %foo @test_merge(i32 %in) {
+; CHECK-LABEL: @test_merge
+
+  ; CodeGenPrepare was requesting the EVT for { i8 } to determine
+  ; whether the insertvalue user of the trunc was legal. This
+  ; asserted.
+
+; CHECK: insertvalue %foo undef, i8 %byte, 0
+  %lobit = lshr i32 %in, 31
+  %byte = trunc i32 %lobit to i8
+  %struct = insertvalue %foo undef, i8 %byte, 0
+  ret %"foo" %struct
+}
+
+define i64* @test_merge_PR21548(i32 %a, i64* %p1, i64* %p2, i64* %p3) {
+; CHECK-LABEL: @test_merge_PR21548
+  %as = lshr i32 %a, 3
+  %Tr = trunc i32 %as to i1
+  br i1 %Tr, label %BB2, label %BB3
+
+BB2:
+  ; Similarly to above:
+  ; CodeGenPrepare was requesting the EVT for i8* to determine
+  ; whether the select user of the trunc was legal. This asserted.
+
+; CHECK: select i1 {{%.*}}, i64* %p1, i64* %p2
+  %p = select i1 %Tr, i64* %p1, i64* %p2
+  ret i64* %p
+
+BB3:
+  ret i64* %p3
+}
diff --git a/test/Transforms/ConstProp/trunc_vec.ll b/test/Transforms/ConstProp/trunc_vec.ll
new file mode 100644
index 000000000000..99db329cdd2b
--- /dev/null
+++ b/test/Transforms/ConstProp/trunc_vec.ll
@@ -0,0 +1,9 @@
+; RUN: opt -constprop < %s
+
+; Make sure we don't crash on this one
+
+define <8 x i8> @test_truc_vec() {
+  %x = bitcast <2 x i64> <i64 1, i64 2> to <8 x i16>
+  %y = trunc <8 x i16> %x to <8 x i8>
+  ret <8 x i8> %y
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/icmp.ll b/test/Transforms/CorrelatedValuePropagation/icmp.ll
new file mode 100644
index 000000000000..c2863ffda0fb
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -0,0 +1,63 @@
+; RUN: opt -correlated-propagation -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: noreturn
+declare void @check1(i1) #1
+
+; Function Attrs: noreturn
+declare void @check2(i1) #1
+
+; Make sure we propagate the value of %tmp35 to the true/false cases
+; CHECK-LABEL: @test1
+; CHECK: call void @check1(i1 false)
+; CHECK: call void @check2(i1 true)
+define void @test1(i64 %tmp35) {
+bb:
+  %tmp36 = icmp sgt i64 %tmp35, 0
+  br i1 %tmp36, label %bb_true, label %bb_false
+
+bb_true:
+  %tmp47 = icmp slt i64 %tmp35, 0
+  tail call void @check1(i1 %tmp47) #4
+  unreachable
+
+bb_false:
+  %tmp48 = icmp sle i64 %tmp35, 0
+  tail call void @check2(i1 %tmp48) #4
+  unreachable
+}
+
+; Function Attrs: noreturn
+; This is the same as test1 but with a diamond to ensure we
+; get %tmp36 from both true and false BBs.
+; CHECK-LABEL: @test2
+; CHECK: call void @check1(i1 false)
+; CHECK: call void @check2(i1 true)
+define void @test2(i64 %tmp35, i1 %inner_cmp) {
+bb:
+  %tmp36 = icmp sgt i64 %tmp35, 0
+  br i1 %tmp36, label %bb_true, label %bb_false
+
+bb_true:
+  br i1 %inner_cmp, label %inner_true, label %inner_false
+
+inner_true:
+  br label %merge
+
+inner_false:
+  br label %merge
+
+merge:
+  %tmp47 = icmp slt i64 %tmp35, 0
+  tail call void @check1(i1 %tmp47) #0
+  unreachable
+
+bb_false:
+  %tmp48 = icmp sle i64 %tmp35, 0
+  tail call void @check2(i1 %tmp48) #4
+  unreachable
+}
+
+attributes #4 = { noreturn }
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 26982db8322d..dd283aebc1d4 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -4,24 +4,24 @@
 
 define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !0)
-  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
-  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
-  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
+  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !0, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !10, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !11, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !12, metadata !{})
 ; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) [[NUW:#[0-9]+]], !dbg !{{[0-9]+}}
   %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
   ret i8* %0, !dbg !13
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) noinline nounwind ssp {
 entry:
-  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
-  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
-  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !21)
-  call void @llvm.dbg.value(metadata !{i8 %extra}, i64 0, metadata !22)
-  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !23)
+  call void @llvm.dbg.value(metadata i8* %name, i64 0, metadata !15, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %len, i64 0, metadata !20, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %hash, i64 0, metadata !21, metadata !{})
+  call void @llvm.dbg.value(metadata i8 %extra, i64 0, metadata !22, metadata !{})
+  call void @llvm.dbg.value(metadata i32 %flags, i64 0, metadata !23, metadata !{})
   %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !24
 
@@ -36,7 +36,7 @@ bb2:                                              ; preds = %bb1, %bb
   ret i8* %.0, !dbg !27
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { nounwind ssp }
 ; CHECK: attributes #1 = { nounwind readnone }
@@ -45,34 +45,34 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
-!0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 524329, metadata !28} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 524305, metadata !28, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
-!6 = metadata !{i32 524303, metadata !28, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 524326, metadata !28, metadata !2, metadata !"", i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
-!8 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!10 = metadata !{i32 524545, metadata !1, metadata !"len", metadata !2, i32 9, metadata !9} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 524545, metadata !1, metadata !"hash", metadata !2, i32 10, metadata !9} ; [ DW_TAG_arg_variable ]
-!12 = metadata !{i32 524545, metadata !1, metadata !"flags", metadata !2, i32 11, metadata !9} ; [ DW_TAG_arg_variable ]
-!13 = metadata !{i32 13, i32 0, metadata !14, null}
-!14 = metadata !{i32 524299, metadata !28, metadata !1, i32 12, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
-!16 = metadata !{i32 524334, metadata !28, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!17 = metadata !{i32 524309, metadata !28, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
-!19 = metadata !{i32 524324, metadata !28, metadata !2, metadata !"unsigned char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
-!20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
-!21 = metadata !{i32 524545, metadata !16, metadata !"hash", metadata !2, i32 19, metadata !9} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !16, metadata !"extra", metadata !2, i32 20, metadata !19} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !16, metadata !"flags", metadata !2, i32 21, metadata !9} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 23, i32 0, metadata !25, null}
-!25 = metadata !{i32 524299, metadata !28, metadata !16, i32 22, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!26 = metadata !{i32 24, i32 0, metadata !25, null}
-!27 = metadata !{i32 26, i32 0, metadata !25, null}
-!28 = metadata !{metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/"}
-!29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00name\008\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00vfs_addname\00vfs_addname\00vfs_addname\0012\000\001\000\006\000\000\000", !28, !2, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)\001\00\000\00\000", !28, !29, !29, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !28, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !6, !9, !9, !9}
+!6 = !{!"0xf\00\000\0064\0064\000\000", !28, !2, !7} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x26\00\000\008\008\000\000", !28, !2, !8} ; [ DW_TAG_const_type ]
+!8 = !{!"0x24\00char\000\008\008\000\000\006", !28, !2} ; [ DW_TAG_base_type ]
+!9 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", !28, !2} ; [ DW_TAG_base_type ]
+!10 = !{!"0x101\00len\009\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!11 = !{!"0x101\00hash\0010\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!12 = !{!"0x101\00flags\0011\000", !1, !2, !9} ; [ DW_TAG_arg_variable ]
+!13 = !MDLocation(line: 13, scope: !14)
+!14 = !{!"0xb\0012\000\000", !28, !1} ; [ DW_TAG_lexical_block ]
+!15 = !{!"0x101\00name\0017\000", !16, !2, !6} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x2e\00add_name_internal\00add_name_internal\00add_name_internal\0022\001\001\000\006\000\000\000", !28, !2, !17, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!17 = !{!"0x15\00\000\000\000\000\000\000", !28, !2, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{!6, !6, !9, !9, !19, !9}
+!19 = !{!"0x24\00unsigned char\000\008\008\000\000\008", !28, !2} ; [ DW_TAG_base_type ]
+!20 = !{!"0x101\00len\0018\000", !16, !2, !9} ; [ DW_TAG_arg_variable ]
+!21 = !{!"0x101\00hash\0019\000", !16, !2, !9} ; [ DW_TAG_arg_variable ]
+!22 = !{!"0x101\00extra\0020\000", !16, !2, !19} ; [ DW_TAG_arg_variable ]
+!23 = !{!"0x101\00flags\0021\000", !16, !2, !9} ; [ DW_TAG_arg_variable ]
+!24 = !MDLocation(line: 23, scope: !25)
+!25 = !{!"0xb\0022\000\000", !28, !16} ; [ DW_TAG_lexical_block ]
+!26 = !MDLocation(line: 24, scope: !25)
+!27 = !MDLocation(line: 26, scope: !25)
+!28 = !{!"tail.c", !"/Users/echeng/LLVM/radars/r7927803/"}
+!29 = !{i32 0}
+!30 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index 7bdcbf5c0623..5bbf821ef213 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -1,65 +1,70 @@
 ; RUN: opt -deadargelim -S < %s | FileCheck %s
 ; PR14016
 
-; Check that debug info metadata for subprograms stores pointers to
-; updated LLVM functions.
+; Built with clang (then manually running -mem2reg with opt) from the following source:
+; static void f1(int, ...) {
+; }
+;
+; void f2() {
+;   f1(1);
+; }
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; Test both varargs removal and removal of a traditional dead arg together, to
+; test both the basic functionality, and a particular wrinkle involving updating
+; the function->debug info mapping on update to ensure it's accurate when used
+; again for the next removal.
 
-@x = global i32 0, align 4
+; CHECK: void ()* @_ZL2f1iz, {{.*}} ; [ DW_TAG_subprogram ] {{.*}} [f1]
 
-define void @_Z3runv() uwtable {
-entry:
-  call void @_ZN12_GLOBAL__N_18dead_argEPv(i8* null), !dbg !10
-  call void (...)* @_ZN12_GLOBAL__N_111dead_varargEz(), !dbg !12
-  ret void, !dbg !13
-}
+; Check that debug info metadata for subprograms stores pointers to
+; updated LLVM functions.
 
-; Argument will be deleted
-define internal void @_ZN12_GLOBAL__N_18dead_argEPv(i8* %foo) nounwind uwtable {
+; Function Attrs: uwtable
+define void @_Z2f2v() #0 {
 entry:
-  %0 = load i32* @x, align 4, !dbg !14
-  %inc = add nsw i32 %0, 1, !dbg !14
-  store i32 %inc, i32* @x, align 4, !dbg !14
+  call void (i32, ...)* @_ZL2f1iz(i32 1), !dbg !15
   ret void, !dbg !16
 }
 
-; Vararg will be deleted
-define internal void @_ZN12_GLOBAL__N_111dead_varargEz(...) nounwind uwtable {
+; Function Attrs: nounwind uwtable
+define internal void @_ZL2f1iz(i32, ...) #1 {
 entry:
-  %0 = load i32* @x, align 4, !dbg !17
-  %inc = add nsw i32 %0, 1, !dbg !17
-  store i32 %inc, i32* @x, align 4, !dbg !17
-  ret void, !dbg !19
+  call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !17, metadata !18), !dbg !19
+  ret void, !dbg !20
 }
 
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!21}
-
-!0 = metadata !{i32 786449, metadata !20, i32 4, metadata !"clang version 3.2 (trunk 165305)", i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1,  metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !8, metadata !9}
-!5 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"run", metadata !"run", metadata !"", i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
-!6 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !1, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
 
-; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
 
-!9 = metadata !{i32 786478, metadata !20, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
 
-; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
 
-!10 = metadata !{i32 8, i32 14, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !20, metadata !5, i32 8, i32 12, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!12 = metadata !{i32 8, i32 27, metadata !11, null}
-!13 = metadata !{i32 8, i32 42, metadata !11, null}
-!14 = metadata !{i32 4, i32 28, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !20, metadata !9, i32 4, i32 26, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!16 = metadata !{i32 4, i32 33, metadata !15, null}
-!17 = metadata !{i32 5, i32 25, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !20, metadata !8, i32 5, i32 23, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
-!19 = metadata !{i32 5, i32 30, metadata !18, null}
-!20 = metadata !{metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.6.0 \000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/tmp/dbginfo/dbg.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"dbg.cpp", !"/tmp/dbginfo"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x2e\00f2\00f2\00_Z2f2v\004\000\001\000\000\00256\000\004", !1, !5, !6, null, void ()* @_Z2f2v, null, null, !2} ; [ DW_TAG_subprogram ] [line 4] [def] [f2]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/tmp/dbginfo/dbg.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!"0x2e\00f1\00f1\00_ZL2f1iz\001\001\001\000\000\00256\000\001", !1, !5, !9, null, void (i32, ...)* @_ZL2f1iz, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [local] [def] [f1]
+!9 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !10, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!10 = !{null, !11, null}
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 2}
+!14 = !{!"clang version 3.6.0 "}
+!15 = !MDLocation(line: 5, column: 3, scope: !4)
+!16 = !MDLocation(line: 6, column: 1, scope: !4)
+!17 = !{!"0x101\00\0016777217\000", !8, !5, !11} ; [ DW_TAG_arg_variable ] [line 1]
+!18 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!19 = !MDLocation(line: 1, column: 19, scope: !8)
+!20 = !MDLocation(line: 2, column: 1, scope: !8)
diff --git a/test/Transforms/DeadArgElim/dead_vaargs.ll b/test/Transforms/DeadArgElim/dead_vaargs.ll
index db3135c8393b..c8189c66b48c 100644
--- a/test/Transforms/DeadArgElim/dead_vaargs.ll
+++ b/test/Transforms/DeadArgElim/dead_vaargs.ll
@@ -1,12 +1,36 @@
-; RUN: opt < %s -deadargelim -S | not grep 47 
-; RUN: opt < %s -deadargelim -S | not grep 1.0
+; RUN: opt < %s -deadargelim -S | FileCheck %s
 
 define i32 @bar(i32 %A) {
-        %tmp4 = tail call i32 (i32, ...)* @foo( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )   ; <i32> [#uses=1]
-        ret i32 %tmp4
+  call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
+  %a = call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+  %b = call i32 (i32, ...)* @no_vastart( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )
+  %c = add i32 %a, %b
+  ret i32 %c
 }
+; CHECK-LABEL: define i32 @bar
+; CHECK: call void (i32, ...)* @thunk(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 (i32, ...)* @has_vastart(i32 %A, i64 47, double 1.000000e+00)
+; CHECK: call i32 @no_vastart(i32 %A)
 
-define internal i32 @foo(i32 %X, ...) {
-        ret i32 %X
+declare void @thunk_target(i32 %X, ...)
+
+define internal void @thunk(i32 %X, ...) {
+  musttail call void(i32, ...)* @thunk_target(i32 %X, ...)
+  ret void
+}
+; CHECK-LABEL: define internal void @thunk(i32 %X, ...)
+; CHECK: musttail call void (i32, ...)* @thunk_target(i32 %X, ...)
+
+define internal i32 @has_vastart(i32 %X, ...) {
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+  ret i32 %X
 }
+; CHECK-LABEL: define internal i32 @has_vastart(i32 %X, ...)
 
+declare void @llvm.va_start(i8*)
+
+define internal i32 @no_vastart(i32 %X, ...) {
+  ret i32 %X
+}
+; CHECK-LABEL: define internal i32 @no_vastart(i32 %X)
diff --git a/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
index 079eec43bf85..39d53584dc37 100644
--- a/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
+++ b/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
@@ -5,9 +5,9 @@ target triple = "i386-apple-darwin9.8"
 
 @A = external global [0 x i32]
 
-declare cc10 void @Func2(i32*, i32*, i32*, i32)
+declare ghccc void @Func2(i32*, i32*, i32*, i32)
 
-define cc10 void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) {
+define ghccc void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) {
 entry:
   store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
 ; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
@@ -18,6 +18,6 @@ entry:
   %ln2gE = bitcast i32* %ln2gD to double*
   store double %ln2gB, double* %ln2gE
 ; CHECK: store double %ln2gB, double* %ln2gE
-  tail call cc10 void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind
+  tail call ghccc void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind
   ret void
 }
diff --git a/test/Transforms/DeadStoreElimination/atomic.ll b/test/Transforms/DeadStoreElimination/atomic.ll
index 2e84298ad400..af303fa983ee 100644
--- a/test/Transforms/DeadStoreElimination/atomic.ll
+++ b/test/Transforms/DeadStoreElimination/atomic.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Sanity tests for atomic stores.
 ; Note that it turns out essentially every transformation DSE does is legal on
-; atomic ops, just some transformations are not allowed across them. 
+; atomic ops, just some transformations are not allowed across release-acquire pairs.
 
 @x = common global i32 0, align 4
 @y = common global i32 0, align 4
@@ -13,35 +13,32 @@ target triple = "x86_64-apple-macosx10.7.0"
 declare void @randomop(i32*)
 
 ; DSE across unordered store (allowed)
-define void @test1()  nounwind uwtable ssp {
-; CHECK: test1
+define void @test1() {
+; CHECK-LABEL: test1
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
-entry:
   store i32 0, i32* @x
   store atomic i32 0, i32* @y unordered, align 4
   store i32 1, i32* @x
   ret void
 }
 
-; DSE across seq_cst load (allowed in theory; not implemented ATM)
-define i32 @test2()  nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: store i32 0
+; DSE across seq_cst load (allowed)
+define i32 @test2() {
+; CHECK-LABEL: test2
+; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
-entry:
   store i32 0, i32* @x
   %x = load atomic i32* @y seq_cst, align 4
   store i32 1, i32* @x
   ret i32 %x
 }
 
-; DSE across seq_cst store (store before atomic store must not be removed)
-define void @test3()  nounwind uwtable ssp {
-; CHECK: test3
-; CHECK: store i32
+; DSE across seq_cst store (allowed)
+define void @test3() {
+; CHECK-LABEL: test3
+; CHECK-NOT: store i32 0
 ; CHECK: store atomic i32 2
-entry:
   store i32 0, i32* @x
   store atomic i32 2, i32* @y seq_cst, align 4
   store i32 1, i32* @x
@@ -49,32 +46,29 @@ entry:
 }
 
 ; DSE remove unordered store (allowed)
-define void @test4()  nounwind uwtable ssp {
-; CHECK: test4
+define void @test4() {
+; CHECK-LABEL: test4
 ; CHECK-NOT: store atomic
 ; CHECK: store i32 1
-entry:
   store atomic i32 0, i32* @x unordered, align 4
   store i32 1, i32* @x
   ret void
 }
 
 ; DSE unordered store overwriting non-atomic store (allowed)
-define void @test5()  nounwind uwtable ssp {
-; CHECK: test5
+define void @test5() {
+; CHECK-LABEL: test5
 ; CHECK: store atomic i32 1
-entry:
   store i32 0, i32* @x
   store atomic i32 1, i32* @x unordered, align 4
   ret void
 }
 
 ; DSE no-op unordered atomic store (allowed)
-define void @test6()  nounwind uwtable ssp {
-; CHECK: test6
+define void @test6() {
+; CHECK-LABEL: test6
 ; CHECK-NOT: store
 ; CHECK: ret void
-entry:
   %x = load atomic i32* @x unordered, align 4
   store atomic i32 %x, i32* @x unordered, align 4
   ret void
@@ -82,10 +76,9 @@ entry:
 
 ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure
 ; to reason about atomic operations).
-define void @test7()  nounwind uwtable ssp {
-; CHECK: test7
-; CHECK: store atomic 
-entry:
+define void @test7() {
+; CHECK-LABEL: test7
+; CHECK: store atomic
   %a = alloca i32
   store atomic i32 0, i32* %a seq_cst, align 4
   ret void
@@ -93,11 +86,10 @@ entry:
 
 ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure
 ; to reason about atomic operations).
-define i32 @test8()  nounwind uwtable ssp {
-; CHECK: test8
+define i32 @test8() {
+; CHECK-LABEL: test8
 ; CHECK: store
-; CHECK: load atomic 
-entry:
+; CHECK: load atomic
   %a = alloca i32
   call void @randomop(i32* %a)
   store i32 0, i32* %a, align 4
@@ -105,3 +97,82 @@ entry:
   ret i32 %x
 }
 
+; DSE across monotonic load (allowed as long as the eliminated store isUnordered)
+define i32 @test9() {
+; CHECK-LABEL: test9
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; DSE across monotonic store (allowed as long as the eliminated store isUnordered)
+define void @test10() {
+; CHECK-LABEL: test10
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 42, i32* @y monotonic, align 4
+  store i32 1, i32* @x
+  ret void
+}
+
+; DSE across monotonic load (forbidden since the eliminated store is atomic)
+define i32 @test11() {
+; CHECK-LABEL: test11
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  %x = load atomic i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret i32 %x
+}
+
+; DSE across monotonic store (forbidden since the eliminated store is atomic)
+define void @test12() {
+; CHECK-LABEL: test12
+; CHECK: store atomic i32 0
+; CHECK: store atomic i32 1
+  store atomic i32 0, i32* @x monotonic, align 4
+  store atomic i32 42, i32* @y monotonic, align 4
+  store atomic i32 1, i32* @x monotonic, align 4
+  ret void
+}
+
+; DSE is allowed across a pair of an atomic read and then write.
+define i32 @test13() {
+; CHECK-LABEL: test13
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y seq_cst, align 4
+  store atomic i32 %x, i32* @y seq_cst, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; Same if it is acquire-release instead of seq_cst/seq_cst
+define i32 @test14() {
+; CHECK-LABEL: test14
+; CHECK-NOT: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  %x = load atomic i32* @y acquire, align 4
+  store atomic i32 %x, i32* @y release, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
+
+; But DSE is not allowed across a release-acquire pair.
+define i32 @test15() {
+; CHECK-LABEL: test15
+; CHECK: store i32 0
+; CHECK: store i32 1
+  store i32 0, i32* @x
+  store atomic i32 0, i32* @y release, align 4
+  %x = load atomic i32* @y acquire, align 4
+  store i32 1, i32* @x
+  ret i32 %x
+}
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index c90d824b34c1..3e772d79c70a 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 %t = type { i32 }
 
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 9df88014e5c6..3ef560723387 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -1,4 +1,5 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 ; If there are two stores to the same location, DSE should be able to remove
 ; the first store if the two stores are separated by no more than 98
@@ -117,7 +118,7 @@ entry:
 
   ; Insert a meaningless dbg.value intrinsic; it should have no
   ; effect on the working of DSE in any way.
-  call void @llvm.dbg.value(metadata !12, i64 0, metadata !10)
+  call void @llvm.dbg.value(metadata i32* undef, i64 0, metadata !10, metadata !{})
 
   ; CHECK:  store i32 -1, i32* @x, align 4
   store i32 -1, i32* @x, align 4
@@ -239,23 +240,23 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !9, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"test.c", metadata !"/home/tmp"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_within_limit", metadata !"test_within_limit", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @test_within_limit, null, null, metadata !2, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{metadata !10}
-!10 = metadata !{i32 786484, i32 0, null, metadata !"x", metadata !"x", metadata !"", metadata !5, i32 1, metadata !8, i32 0, i32 1, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
-!11 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!12 = metadata !{i32* undef}
+!0 = !{!"0x11\004\00clang version 3.4\001\00\000\00\000", !1, !2, !2, !3, !9, !2} ; [ DW_TAG_compile_unit ] [/home/tmp/test.c] [DW_LANG_C99]
+!1 = !{!"test.c", !"/home/tmp"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00test_within_limit\00test_within_limit\00\003\000\001\000\006\00256\000\004", !1, !5, !6, null, i32 ()* @test_within_limit, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [test]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/tmp/test.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!10}
+!10 = !{!"0x34\00x\00x\00\001\000\001", null, !5, !8, i32* @x, null} ; [ DW_TAG_variable ] [x] [line 1] [def]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32* undef}
 
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/DeadStoreElimination/no-targetdata.ll b/test/Transforms/DeadStoreElimination/no-targetdata.ll
index c0c7c58d4ead..25395330bf37 100644
--- a/test/Transforms/DeadStoreElimination/no-targetdata.ll
+++ b/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -1,15 +1,21 @@
 ; RUN: opt -basicaa -dse -S < %s | FileCheck %s
 
-declare void @test1f()
-
-define void @test1(i32* noalias %p) {
-       store i32 1, i32* %p
-       call void @test1f()
-       store i32 2, i32 *%p
-       ret void
-; CHECK-LABEL: define void @test1(
-; CHECK-NOT: store
-; CHECK-NEXT: call void
-; CHECK-NEXT: store i32 2
-; CHECK-NEXT: ret void
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @fn(i8* nocapture %buf) #0 {
+entry:
+
+; We would not eliminate the first memcpy with data layout, and we should not
+; eliminate it without data layout.
+; CHECK-LABEL: @fn
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64
+; CHECK: ret void
+
+  %arrayidx = getelementptr i8* %buf, i64 18
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arrayidx, i8* %buf, i64 18, i32 1, i1 false)
+  store i8 1, i8* %arrayidx, align 1
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* %arrayidx, i64 18, i32 1, i1 false)
+  ret void
 }
+
diff --git a/test/Transforms/DebugIR/crash.ll b/test/Transforms/DebugIR/crash.ll
deleted file mode 100644
index f4a88d7234cb..000000000000
--- a/test/Transforms/DebugIR/crash.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; ModuleID = 'crash.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-@.str = private unnamed_addr constant [18 x i8] c"Hello, segfault!\0A\00", align 1
-@.str1 = private unnamed_addr constant [14 x i8] c"Now crash %d\0A\00", align 1
-
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
-  %1 = alloca i32, align 4                                        ;CHECK: !dbg
-  %2 = alloca i32, align 4                                        ;CHECK-NEXT: !dbg
-  %3 = alloca i8**, align 8                                       ;CHECK-NEXT: !dbg
-  %null_ptr = alloca i32*, align 8                                ;CHECK-NEXT: !dbg
-  store i32 0, i32* %1                                            ;CHECK-NEXT: !dbg
-  store i32 %argc, i32* %2, align 4                               ;CHECK-NEXT: !dbg
-  store i8** %argv, i8*** %3, align 8                             ;CHECK-NEXT: !dbg
-  store i32* null, i32** %null_ptr, align 8                       ;CHECK-NEXT: !dbg
-  %4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str, i32 0, i32 0)) ;CHECK-NEXT: !dbg
-  %5 = load i32** %null_ptr, align 8                              ;CHECK-NEXT: !dbg
-  %6 = load i32* %5, align 4                                      ;CHECK-NEXT: !dbg
-  %7 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i32 0, i32 0), i32 %6) ;CHECK-NEXT: !dbg
-  %8 = load i32* %2, align 4                                      ;CHECK-NEXT: !dbg
-  ret i32 %8                                                      ;CHECK-NEXT: !dbg
-}
-
-declare i32 @printf(i8*, ...) #1
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-; CHECK: = metadata !{i32 14,
-; CHECK-NEXT: = metadata !{i32 15,
-; CHECK-NEXT: = metadata !{i32 16,
-; CHECK-NEXT: = metadata !{i32 17,
-; CHECK-NEXT: = metadata !{i32 18,
-; CHECK-NEXT: = metadata !{i32 19,
-; CHECK-NEXT: = metadata !{i32 20,
-; CHECK-NEXT: = metadata !{i32 21,
-; CHECK-NEXT: = metadata !{i32 22,
-; CHECK-NEXT: = metadata !{i32 23,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/exception.ll b/test/Transforms/DebugIR/exception.ll
deleted file mode 100644
index 2436d38968c9..000000000000
--- a/test/Transforms/DebugIR/exception.ll
+++ /dev/null
@@ -1,127 +0,0 @@
-; ModuleID = 'exception.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-@_ZTIi = external constant i8*
-
-; Function Attrs: uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
-  %1 = alloca i32, align 4                        ; CHECK: !dbg
-  %2 = alloca i32, align 4                        ; CHECK-NEXT: !dbg
-  %3 = alloca i8**, align 8                       ; CHECK-NEXT: !dbg
-  %4 = alloca i8*                                 ; CHECK-NEXT: !dbg
-  %5 = alloca i32                                 ; CHECK-NEXT: !dbg
-  %e = alloca i32, align 4                        ; CHECK-NEXT: !dbg
-  %6 = alloca i32                                 ; CHECK-NEXT: !dbg
-  store i32 0, i32* %1                            ; CHECK-NEXT: !dbg
-  store i32 %argc, i32* %2, align 4               ; CHECK-NEXT: !dbg
-  store i8** %argv, i8*** %3, align 8             ; CHECK-NEXT: !dbg
-  %7 = call i8* @__cxa_allocate_exception(i64 4) #2 ; CHECK-NEXT: !dbg
-  %8 = bitcast i8* %7 to i32*                     ; CHECK-NEXT: !dbg
-  %9 = load i32* %2, align 4                      ; CHECK-NEXT: !dbg
-  store i32 %9, i32* %8                           ; CHECK-NEXT: !dbg
-  invoke void @__cxa_throw(i8* %7, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #3
-          to label %31 unwind label %10           ; CHECK: !dbg
-
-; <label>:10                                      ; preds = %0
-  %11 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
-          catch i8* bitcast (i8** @_ZTIi to i8*)  ; CHECK: !dbg
-  %12 = extractvalue { i8*, i32 } %11, 0          ; CHECK-NEXT: !dbg
-  store i8* %12, i8** %4                          ; CHECK-NEXT: !dbg
-  %13 = extractvalue { i8*, i32 } %11, 1          ; CHECK-NEXT: !dbg
-  store i32 %13, i32* %5                          ; CHECK-NEXT: !dbg
-  br label %14                                    ; CHECK-NEXT: !dbg
-
-; <label>:14                                      ; preds = %10
-  %15 = load i32* %5                              ; CHECK: !dbg
-  %16 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2   ; CHECK-NEXT: !dbg
-  %17 = icmp eq i32 %15, %16                      ; CHECK-NEXT: !dbg
-  br i1 %17, label %18, label %26                 ; CHECK-NEXT: !dbg
-
-; <label>:18                                      ; preds = %14
-  %19 = load i8** %4                              ; CHECK: !dbg
-  %20 = call i8* @__cxa_begin_catch(i8* %19) #2   ; CHECK-NEXT: !dbg
-  %21 = bitcast i8* %20 to i32*                   ; CHECK-NEXT: !dbg
-  %22 = load i32* %21, align 4                    ; CHECK-NEXT: !dbg
-  store i32 %22, i32* %e, align 4                 ; CHECK-NEXT: !dbg
-  %23 = load i32* %e, align 4                     ; CHECK-NEXT: !dbg
-  store i32 %23, i32* %1                          ; CHECK-NEXT: !dbg
-  store i32 1, i32* %6                            ; CHECK-NEXT: !dbg
-  call void @__cxa_end_catch() #2                 ; CHECK-NEXT: !dbg
-  br label %24                                    ; CHECK-NEXT: !dbg
-
-; <label>:24                                      ; preds = %18
-  %25 = load i32* %1                              ; CHECK: !dbg
-  ret i32 %25                                     ; CHECK-NEXT: !dbg
-
-; <label>:26                                      ; preds = %14
-  %27 = load i8** %4                              ; CHECK: !dbg
-  %28 = load i32* %5                              ; CHECK-NEXT: !dbg
-  %29 = insertvalue { i8*, i32 } undef, i8* %27, 0 ; CHECK-NEXT: !dbg
-  %30 = insertvalue { i8*, i32 } %29, i32 %28, 1   ; CHECK-NEXT: !dbg
-  resume { i8*, i32 } %30                         ; CHECK-NEXT: !dbg
-
-; <label>:31                                      ; preds = %0
-  unreachable                                     ; CHECK: !dbg
-}
-
-declare i8* @__cxa_allocate_exception(i64)
-
-declare void @__cxa_throw(i8*, i8*, i8*)
-
-declare i32 @__gxx_personality_v0(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-declare i8* @__cxa_begin_catch(i8*)
-
-declare void @__cxa_end_catch()
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-attributes #3 = { noreturn }
-; CHECK: = metadata !{i32 16,
-; CHECK-NEXT: = metadata !{i32 17,
-; CHECK-NEXT: = metadata !{i32 18,
-; CHECK-NEXT: = metadata !{i32 19,
-; CHECK-NEXT: = metadata !{i32 20,
-; CHECK-NEXT: = metadata !{i32 21,
-; CHECK-NEXT: = metadata !{i32 22,
-; CHECK-NEXT: = metadata !{i32 24,
-
-; CHECK-NEXT: = metadata !{i32 28,
-; CHECK-NEXT: = metadata !{i32 29,
-; CHECK-NEXT: = metadata !{i32 30,
-; CHECK-NEXT: = metadata !{i32 31,
-; CHECK-NEXT: = metadata !{i32 32,
-; CHECK-NEXT: = metadata !{i32 33,
-
-; CHECK-NEXT: = metadata !{i32 36,
-; CHECK-NEXT: = metadata !{i32 37,
-; CHECK-NEXT: = metadata !{i32 38,
-; CHECK-NEXT: = metadata !{i32 39,
-
-; CHECK-NEXT: = metadata !{i32 42,
-; CHECK-NEXT: = metadata !{i32 43,
-; CHECK-NEXT: = metadata !{i32 44,
-; CHECK-NEXT: = metadata !{i32 45,
-; CHECK-NEXT: = metadata !{i32 46,
-; CHECK-NEXT: = metadata !{i32 47,
-; CHECK-NEXT: = metadata !{i32 48,
-; CHECK-NEXT: = metadata !{i32 49,
-; CHECK-NEXT: = metadata !{i32 50,
-; CHECK-NEXT: = metadata !{i32 51,
-
-; CHECK-NEXT: = metadata !{i32 54,
-; CHECK-NEXT: = metadata !{i32 55,
-
-; CHECK-NEXT: = metadata !{i32 58,
-; CHECK-NEXT: = metadata !{i32 59,
-; CHECK-NEXT: = metadata !{i32 60,
-; CHECK-NEXT: = metadata !{i32 61,
-; CHECK-NEXT: = metadata !{i32 62,
-; CHECK-NEXT: = metadata !{i32 65,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/function.ll b/test/Transforms/DebugIR/function.ll
deleted file mode 100644
index dba073de37e9..000000000000
--- a/test/Transforms/DebugIR/function.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; ModuleID = 'function.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-; Function Attrs: nounwind uwtable
-define void @blah(i32* %i) #0 {
-  %1 = alloca i32*, align 8                   ; CHECK: !dbg
-  store i32* %i, i32** %1, align 8            ; CHECK-NEXT: !dbg
-  %2 = load i32** %1, align 8                 ; CHECK-NEXT: !dbg
-  %3 = load i32* %2, align 4                  ; CHECK-NEXT: !dbg
-  %4 = add nsw i32 %3, 1                      ; CHECK-NEXT: !dbg
-  store i32 %4, i32* %2, align 4              ; CHECK-NEXT: !dbg
-  ret void                                    ; CHECK-NEXT: !dbg
-}
-
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
-  %1 = alloca i32, align 4                    ; CHECK: !dbg
-  %2 = alloca i32, align 4                    ; CHECK-NEXT: !dbg
-  %3 = alloca i8**, align 8                   ; CHECK-NEXT: !dbg
-  %i = alloca i32, align 4                    ; CHECK-NEXT: !dbg
-  store i32 0, i32* %1                        ; CHECK-NEXT: !dbg
-  store i32 %argc, i32* %2, align 4           ; CHECK-NEXT: !dbg
-  store i8** %argv, i8*** %3, align 8         ; CHECK-NEXT: !dbg
-  store i32 7, i32* %i, align 4               ; CHECK-NEXT: !dbg
-  call void @blah(i32* %i)                    ; CHECK-NEXT: !dbg
-  %4 = load i32* %i, align 4                  ; CHECK-NEXT: !dbg
-  ret i32 %4                                  ; CHECK-NEXT: !dbg
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; CHECK: = metadata !{i32 8,
-; CHECK-NEXT: = metadata !{i32 9,
-; CHECK-NEXT: = metadata !{i32 10,
-; CHECK-NEXT: = metadata !{i32 11,
-; CHECK-NEXT: = metadata !{i32 12,
-; CHECK-NEXT: = metadata !{i32 13,
-
-; CHECK-NEXT: = metadata !{i32 18,
-; CHECK-NEXT: = metadata !{i32 19,
-; CHECK-NEXT: = metadata !{i32 20,
-; CHECK-NEXT: = metadata !{i32 21,
-; CHECK-NEXT: = metadata !{i32 22,
-; CHECK-NEXT: = metadata !{i32 23,
-; CHECK-NEXT: = metadata !{i32 24,
-; CHECK-NEXT: = metadata !{i32 25,
-; CHECK-NEXT: = metadata !{i32 26,
-; CHECK-NEXT: = metadata !{i32 27,
-; CHECK-NEXT: = metadata !{i32 28,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/simple-addrspace.ll b/test/Transforms/DebugIR/simple-addrspace.ll
deleted file mode 100644
index 6bea9b269f57..000000000000
--- a/test/Transforms/DebugIR/simple-addrspace.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt -debug-ir -S %s -o - | FileCheck %s
-
-target datalayout = "e-p:64:64:64-p1:16:16:16"
-
-define void @foo(i32 addrspace(1)*) nounwind {
-  ret void
-}
-
-; Make sure the pointer size is 16
-
-; CHECK: metadata !"i32 addrspace(1)*", i32 0, i64 16, i64 2, i64 0, i32 0
-
-
diff --git a/test/Transforms/DebugIR/simple.ll b/test/Transforms/DebugIR/simple.ll
deleted file mode 100644
index 3b188958261f..000000000000
--- a/test/Transforms/DebugIR/simple.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; ModuleID = 'simple.c'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
-  %1 = alloca i32, align 4                  ; CHECK: !dbg
-  %2 = alloca i32, align 4                  ; CHECK-NEXT: !dbg
-  %3 = alloca i8**, align 8                 ; CHECK-NEXT: !dbg
-  store i32 0, i32* %1                      ; CHECK-NEXT: !dbg
-  store i32 %argc, i32* %2, align 4         ; CHECK-NEXT: !dbg
-  store i8** %argv, i8*** %3, align 8       ; CHECK-NEXT: !dbg
-  %4 = load i32* %2, align 4                ; CHECK-NEXT: !dbg
-  ret i32 %4                                ; CHECK-NEXT: !dbg
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-; CHECK: = metadata !{i32 10,
-; CHECK-NEXT: = metadata !{i32 11,
-; CHECK-NEXT: = metadata !{i32 12,
-; CHECK-NEXT: = metadata !{i32 13,
-; CHECK-NEXT: = metadata !{i32 14,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/struct.ll b/test/Transforms/DebugIR/struct.ll
deleted file mode 100644
index 8db3dbebe90b..000000000000
--- a/test/Transforms/DebugIR/struct.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; ModuleID = 'struct.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-%struct.blah = type { i32, float, i8 }
-
-; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
-  %1 = alloca i32, align 4                                    ; CHECK: !dbg
-  %b = alloca %struct.blah, align 4                           ; CHECK-NEXT: !dbg
-  store i32 0, i32* %1                                        ; CHECK-NEXT: !dbg
-  %2 = getelementptr inbounds %struct.blah* %b, i32 0, i32 0  ; CHECK-NEXT: !dbg
-  %3 = load i32* %2, align 4                                  ; CHECK-NEXT: !dbg
-  ret i32 %3                                                  ; CHECK-NEXT: !dbg
-}
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-; CHECK: = metadata !{i32 11,
-; CHECK-NEXT: = metadata !{i32 12,
-; CHECK-NEXT: = metadata !{i32 13,
-; CHECK-NEXT: = metadata !{i32 14,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/DebugIR/vector.ll b/test/Transforms/DebugIR/vector.ll
deleted file mode 100644
index 50d99ac2254f..000000000000
--- a/test/Transforms/DebugIR/vector.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-; ModuleID = 'vector.cpp'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-; Function Attrs: noinline nounwind uwtable
-define <4 x float> @_Z3fooDv2_fS_(double %a.coerce, double %b.coerce) #0 {
-  %1 = alloca <2 x float>, align 8                    ; CHECK: !dbg
-  %2 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %3 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %4 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %c = alloca <4 x float>, align 16                   ; CHECK-NEXT: !dbg
-  %5 = bitcast <2 x float>* %1 to double*             ; CHECK-NEXT: !dbg
-  store double %a.coerce, double* %5, align 1         ; CHECK-NEXT: !dbg
-  %a = load <2 x float>* %1, align 8                  ; CHECK-NEXT: !dbg
-  store <2 x float> %a, <2 x float>* %2, align 8      ; CHECK-NEXT: !dbg
-  %6 = bitcast <2 x float>* %3 to double*             ; CHECK-NEXT: !dbg
-  store double %b.coerce, double* %6, align 1         ; CHECK-NEXT: !dbg
-  %b = load <2 x float>* %3, align 8                  ; CHECK-NEXT: !dbg
-  store <2 x float> %b, <2 x float>* %4, align 8      ; CHECK-NEXT: !dbg
-  %7 = load <2 x float>* %2, align 8                  ; CHECK-NEXT: !dbg
-  %8 = load <4 x float>* %c, align 16                 ; CHECK-NEXT: !dbg
-  %9 = shufflevector <2 x float> %7, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>   ; CHECK-NEXT: !dbg
-  %10 = shufflevector <4 x float> %8, <4 x float> %9, <4 x i32> <i32 4, i32 1, i32 5, i32 3>             ; CHECK-NEXT: !dbg
-  store <4 x float> %10, <4 x float>* %c, align 16    ; CHECK-NEXT: !dbg
-  %11 = load <2 x float>* %4, align 8                 ; CHECK-NEXT: !dbg
-  %12 = load <4 x float>* %c, align 16                ; CHECK-NEXT: !dbg
-  %13 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ; CHECK-NEXT: !dbg
-  %14 = shufflevector <4 x float> %12, <4 x float> %13, <4 x i32> <i32 0, i32 4, i32 2, i32 5>           ; CHECK-NEXT: !dbg
-  store <4 x float> %14, <4 x float>* %c, align 16    ; CHECK-NEXT: !dbg
-  %15 = load <4 x float>* %c, align 16                ; CHECK-NEXT: !dbg
-  ret <4 x float> %15                                 ; CHECK-NEXT: !dbg
-}
-
-; Function Attrs: nounwind uwtable
-define i32 @main() #1 {
-  %1 = alloca i32, align 4                            ; CHECK: !dbg
-  %a = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %b = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %x = alloca <4 x float>, align 16                   ; CHECK-NEXT: !dbg
-  %2 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  %3 = alloca <2 x float>, align 8                    ; CHECK-NEXT: !dbg
-  store i32 0, i32* %1                                ; CHECK-NEXT: !dbg
-  store <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float>* %a, align 8                   ; CHECK-NEXT: !dbg
-  store <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float>* %b, align 8                   ; CHECK-NEXT: !dbg
-  %4 = load <2 x float>* %a, align 8                  ; CHECK-NEXT: !dbg
-  %5 = load <2 x float>* %b, align 8                  ; CHECK-NEXT: !dbg
-  store <2 x float> %4, <2 x float>* %2, align 8      ; CHECK-NEXT: !dbg
-  %6 = bitcast <2 x float>* %2 to double*             ; CHECK-NEXT: !dbg
-  %7 = load double* %6, align 1                       ; CHECK-NEXT: !dbg
-  store <2 x float> %5, <2 x float>* %3, align 8      ; CHECK-NEXT: !dbg
-  %8 = bitcast <2 x float>* %3 to double*             ; CHECK-NEXT: !dbg
-  %9 = load double* %8, align 1                       ; CHECK-NEXT: !dbg
-  %10 = call <4 x float> @_Z3fooDv2_fS_(double %7, double %9)                                            ; CHECK-NEXT: !dbg
-  store <4 x float> %10, <4 x float>* %x, align 16    ; CHECK-NEXT: !dbg
-  %11 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
-  %12 = extractelement <4 x float> %11, i32 0         ; CHECK-NEXT: !dbg
-  %13 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
-  %14 = extractelement <4 x float> %13, i32 1         ; CHECK-NEXT: !dbg
-  %15 = fadd float %12, %14                           ; CHECK-NEXT: !dbg
-  %16 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
-  %17 = extractelement <4 x float> %16, i32 2         ; CHECK-NEXT: !dbg
-  %18 = fadd float %15, %17                           ; CHECK-NEXT: !dbg
-  %19 = load <4 x float>* %x, align 16                ; CHECK-NEXT: !dbg
-  %20 = extractelement <4 x float> %19, i32 3         ; CHECK-NEXT: !dbg
-  %21 = fadd float %18, %20                           ; CHECK-NEXT: !dbg
-  %22 = fptosi float %21 to i32                       ; CHECK-NEXT: !dbg
-  ret i32 %22                                         ; CHECK-NEXT: !dbg
-}
-
-attributes #0 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-; CHECK: = metadata !{i32 13,
-; CHECK-NEXT: = metadata !{i32 14,
-; CHECK-NEXT: = metadata !{i32 15,
-; CHECK-NEXT: = metadata !{i32 16,
-; CHECK-NEXT: = metadata !{i32 17,
-; CHECK-NEXT: = metadata !{i32 18,
-; CHECK-NEXT: = metadata !{i32 19,
-; CHECK-NEXT: = metadata !{i32 20,
-; CHECK-NEXT: = metadata !{i32 21,
-; CHECK-NEXT: = metadata !{i32 22,
-; CHECK-NEXT: = metadata !{i32 23,
-; CHECK-NEXT: = metadata !{i32 24,
-; CHECK-NEXT: = metadata !{i32 25,
-; CHECK-NEXT: = metadata !{i32 26,
-; CHECK-NEXT: = metadata !{i32 27,
-; CHECK-NEXT: = metadata !{i32 28,
-; CHECK-NEXT: = metadata !{i32 29,
-; CHECK-NEXT: = metadata !{i32 30,
-; CHECK-NEXT: = metadata !{i32 31,
-
-; RUN: opt %s -debug-ir -S | FileCheck %s
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 80704df9852e..155d36f60e21 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -S -early-cse | FileCheck %s
 
+declare void @llvm.assume(i1) nounwind
 
 ; CHECK-LABEL: @test1(
 define void @test1(i8 %V, i32 *%P) {
@@ -42,6 +43,16 @@ define i32 @test2(i32 *%P) {
   ; CHECK: ret i32 0
 }
 
+; CHECK-LABEL: @test2a(
+define i32 @test2a(i32 *%P, i1 %b) {
+  %V1 = load i32* %P
+  tail call void @llvm.assume(i1 %b)
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
 ;; Cross block load value numbering.
 ; CHECK-LABEL: @test3(
 define i32 @test3(i32 *%P, i1 %Cond) {
@@ -58,6 +69,22 @@ F:
   ; CHECK: ret i32 0
 }
 
+; CHECK-LABEL: @test3a(
+define i32 @test3a(i32 *%P, i1 %Cond, i1 %b) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  store i32 4, i32* %P
+  ret i32 42
+F:
+  tail call void @llvm.assume(i1 %b)
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 0
+}
+
 ;; Cross block load value numbering stops when stores happen.
 ; CHECK-LABEL: @test4(
 define i32 @test4(i32 *%P, i1 %Cond) {
@@ -97,6 +124,15 @@ define i32 @test6(i32 *%P) {
   ; CHECK: ret i32 42
 }
 
+; CHECK-LABEL: @test6a(
+define i32 @test6a(i32 *%P, i1 %b) {
+  store i32 42, i32* %P
+  tail call void @llvm.assume(i1 %b)
+  %V1 = load i32* %P
+  ret i32 %V1
+  ; CHECK: ret i32 42
+}
+
 ;; Trivial dead store elimination.
 ; CHECK-LABEL: @test7(
 define void @test7(i32 *%P) {
@@ -118,4 +154,42 @@ define i32 @test8(i32 *%P) {
   ; CHECK: ret i32 0
 }
 
+;; Trivial DSE can't be performed across a readonly call.  The call
+;; can observe the earlier write.
+; CHECK-LABEL: @test9(
+define i32 @test9(i32 *%P) {
+  store i32 4, i32* %P
+  %V1 = call i32 @func(i32* %P) readonly
+  store i32 5, i32* %P        
+  ret i32 %V1
+  ; CHECK: store i32 4, i32* %P        
+  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
+  ; CHECK-NEXT: store i32 5, i32* %P        
+  ; CHECK-NEXT: ret i32 %V1
+}
+
+;; Trivial DSE can be performed across a readnone call.
+; CHECK-LABEL: @test10
+define i32 @test10(i32 *%P) {
+  store i32 4, i32* %P
+  %V1 = call i32 @func(i32* %P) readnone
+  store i32 5, i32* %P        
+  ret i32 %V1
+  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
+  ; CHECK-NEXT: store i32 5, i32* %P        
+  ; CHECK-NEXT: ret i32 %V1
+}
+
+;; Trivial dead store elimination - should work for an entire series of dead stores too.
+; CHECK-LABEL: @test11(
+define void @test11(i32 *%P) {
+  store i32 42, i32* %P
+  store i32 43, i32* %P
+  store i32 44, i32* %P
+  store i32 45, i32* %P
+  ret void
+  ; CHECK-NEXT: store i32 45
+  ; CHECK-NEXT: ret void
+}
+
 
diff --git a/test/Transforms/FunctionAttrs/optnone-simple.ll b/test/Transforms/FunctionAttrs/optnone-simple.ll
new file mode 100644
index 000000000000..9d0f8e3710a6
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/optnone-simple.ll
@@ -0,0 +1,135 @@
+; RUN: opt -O3 -S < %s | FileCheck %s
+; Show 'optnone' suppresses optimizations.
+
+; Two attribute groups that differ only by 'optnone'.
+; 'optnone' requires 'noinline' so #0 is 'noinline' by itself,
+; even though it would otherwise be irrelevant to this example.
+attributes #0 = { noinline }
+attributes #1 = { noinline optnone }
+
+; int iadd(int a, int b){ return a + b; }
+
+define i32 @iadd_optimize(i32 %a, i32 %b) #0 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @iadd_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define i32 @iadd_optnone(i32 %a, i32 %b) #1 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  %0 = load i32* %a.addr, align 4
+  %1 = load i32* %b.addr, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; CHECK-LABEL: @iadd_optnone
+; CHECK: alloca i32
+; CHECK: alloca i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: load i32
+; CHECK: load i32
+; CHECK: add nsw i32
+; CHECK: ret i32
+
+; float fsub(float a, float b){ return a - b; }
+
+define float @fsub_optimize(float %a, float %b) #0 {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float* %a.addr, align 4
+  %1 = load float* %b.addr, align 4
+  %sub = fsub float %0, %1
+  ret float %sub
+}
+
+; CHECK-LABEL: @fsub_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define float @fsub_optnone(float %a, float %b) #1 {
+entry:
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float* %a.addr, align 4
+  %1 = load float* %b.addr, align 4
+  %sub = fsub float %0, %1
+  ret float %sub
+}
+
+; CHECK-LABEL: @fsub_optnone
+; CHECK: alloca float
+; CHECK: alloca float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load float
+; CHECK: load float
+; CHECK: fsub float
+; CHECK: ret float
+
+; typedef float __attribute__((ext_vector_type(4))) float4;
+; float4 vmul(float4 a, float4 b){ return a * b; }
+
+define <4 x float> @vmul_optimize(<4 x float> %a, <4 x float> %b) #0 {
+entry:
+  %a.addr = alloca <4 x float>, align 16
+  %b.addr = alloca <4 x float>, align 16
+  store <4 x float> %a, <4 x float>* %a.addr, align 16
+  store <4 x float> %b, <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>* %b.addr, align 16
+  %mul = fmul <4 x float> %0, %1
+  ret <4 x float> %mul
+}
+
+; CHECK-LABEL: @vmul_optimize
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: ret
+
+define <4 x float> @vmul_optnone(<4 x float> %a, <4 x float> %b) #1 {
+entry:
+  %a.addr = alloca <4 x float>, align 16
+  %b.addr = alloca <4 x float>, align 16
+  store <4 x float> %a, <4 x float>* %a.addr, align 16
+  store <4 x float> %b, <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>* %b.addr, align 16
+  %mul = fmul <4 x float> %0, %1
+  ret <4 x float> %mul
+}
+
+; CHECK-LABEL: @vmul_optnone
+; CHECK: alloca <4 x float>
+; CHECK: alloca <4 x float>
+; CHECK: store <4 x float>
+; CHECK: store <4 x float>
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: fmul <4 x float>
+; CHECK: ret
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
new file mode 100644
index 000000000000..7694bfe13aa5
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+@x = global i32 0
+
+define void @test_opt(i8* %p) {
+; CHECK-LABEL: @test_opt
+; CHECK: (i8* nocapture readnone %p) #0 {
+  ret void
+}
+
+define void @test_optnone(i8* %p) noinline optnone {
+; CHECK-LABEL: @test_optnone
+; CHECK: (i8* %p) #1 {
+  ret void
+}
+
+declare i8 @strlen(i8*) noinline optnone
+; CHECK-LABEL: @strlen
+; CHECK: (i8*) #1
+
+; CHECK-LABEL: attributes #0
+; CHECK: = { readnone }
+; CHECK-LABEL: attributes #1
+; CHECK: = { noinline optnone }
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
new file mode 100644
index 000000000000..487f4ca01a6b
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -0,0 +1,56 @@
+; Test that GCOV instrumentation numbers functions correctly when some
+; functions aren't emitted.
+
+; Inject metadata to set the .gcno file location
+; RUN: echo '!14 = !{!"%/T/function-numbering.ll", !0}' > %t1
+; RUN: cat %s %t1 > %t2
+
+; RUN: opt -insert-gcov-profiling -S < %t2 | FileCheck --check-prefix GCDA %s
+; RUN: llvm-cov -n -dump %T/function-numbering.gcno 2>&1 | FileCheck --check-prefix GCNO %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; GCDA: @[[FOO:[0-9]+]] = private unnamed_addr constant [4 x i8] c"foo\00"
+; GCDA-NOT: @{{[0-9]+}} = private unnamed_addr constant .* c"bar\00"
+; GCDA: @[[BAZ:[0-9]+]] = private unnamed_addr constant [4 x i8] c"baz\00"
+; GCDA: define internal void @__llvm_gcov_writeout()
+; GCDA: call void @llvm_gcda_emit_function(i32 0, i8* getelementptr inbounds ([4 x i8]* @[[FOO]]
+; GCDA: call void @llvm_gcda_emit_function(i32 1, i8* getelementptr inbounds ([4 x i8]* @[[BAZ]]
+
+; GCNO: == foo (0) @
+; GCNO-NOT: == bar ({{[0-9]+}}) @
+; GCNO: == baz (1) @
+
+define void @foo() {
+  ret void, !dbg !12
+}
+
+define void @bar() {
+  ; This function is referenced by the debug info, but no lines have locations.
+  ret void
+}
+
+define void @baz() {
+  ret void, !dbg !13
+}
+
+!llvm.gcov = !{!14}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 \000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [function-numbering.ll] [DW_LANG_C99]
+!1 = !{!".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", !""}
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\000\000\001", !1, !5, !6, null, void ()* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [/Users/bogner/build/llvm-debug//tmp/foo.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00bar\00bar\00\002\000\001\000\000\000\000\002", !1, !5, !6, null, void ()* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [def] [bar]
+!8 = !{!"0x2e\00baz\00baz\00\003\000\001\000\000\000\000\003", !1, !5, !6, null, void ()* @baz, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [baz]
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.6.0 "}
+!12 = !MDLocation(line: 1, column: 13, scope: !4)
+!13 = !MDLocation(line: 3, column: 13, scope: !8)
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 722a0962ec57..9a9b7cecce7a 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -1,11 +1,9 @@
-; RUN: echo '!16 = metadata !{metadata !"%T/global-ctor.ll", metadata !0}' > %t1
+; RUN: echo '!16 = !{!"%/T/global-ctor.ll", !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
 ; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
 ; RUN: rm %T/global-ctor.gcno
 
-; REQUIRES: shell
-
 @x = global i32 0, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
 
@@ -40,19 +38,19 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.gcov = !{!16}
 !llvm.ident = !{!12}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 210217)", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<stdin>", metadata !"/home/nlewycky"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !8}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"__cxx_global_var_init", metadata !"__cxx_global_var_init", metadata !"", i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @__cxx_global_var_init, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
-!5 = metadata !{metadata !"global-ctor.ll", metadata !"/home/nlewycky"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 786478, metadata !1, metadata !9, metadata !"", metadata !"", metadata !"_GLOBAL__sub_I_global-ctor.ll", i32 0, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 64, i1 false, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, metadata !2, i32 0} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
-!9 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
-!10 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!11 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!12 = metadata !{metadata !"clang version 3.5.0 (trunk 210217)"}
-!13 = metadata !{i32 2, i32 0, metadata !4, null}
-!14 = metadata !{i32 0, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !5, metadata !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 210217)\000\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/home/nlewycky/<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"/home/nlewycky"}
+!2 = !{}
+!3 = !{!4, !8}
+!4 = !{!"0x2e\00__cxx_global_var_init\00__cxx_global_var_init\00\002\001\001\000\006\00256\000\002", !5, !6, !7, null, void ()* @__cxx_global_var_init, null, null, !2} ; [ DW_TAG_subprogram ] [line 2] [local] [def] [__cxx_global_var_init]
+!5 = !{!"global-ctor.ll", !"/home/nlewycky"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/home/nlewycky/global-ctor.ll]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!"0x2e\00\00\00_GLOBAL__sub_I_global-ctor.ll\000\001\001\000\006\0064\000\000", !1, !9, !7, null, void ()* @_GLOBAL__sub_I_global-ctor.ll, null, null, !2} ; [ DW_TAG_subprogram ] [line 0] [local] [def]
+!9 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/nlewycky/<stdin>]
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 2}
+!12 = !{!"clang version 3.5.0 (trunk 210217)"}
+!13 = !MDLocation(line: 2, scope: !4)
+!14 = !MDLocation(line: 0, scope: !15)
+!15 = !{!"0xb\000", !5, !8} ; [ DW_TAG_lexical_block ] [/home/nlewycky/global-ctor.ll]
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index ba1c563c64ab..cf0fcd2cc820 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -1,7 +1,6 @@
-; RUN: sed -e 's|PATTERN|%T|g' < %s > %t1
+; RUN: sed -e 's|PATTERN|%/T|g' < %s > %t1
 ; RUN: opt -insert-gcov-profiling -disable-output < %t1
 ; RUN: rm %T/linezero.gcno %t1
-; REQUIRES: shell
 
 ; This is a crash test.
 
@@ -20,17 +19,17 @@ entry:
   %__begin = alloca i8*, align 8
   %__end = alloca i8*, align 8
   %spec = alloca i8, align 1
-  call void @llvm.dbg.declare(metadata !{%struct.vector** %__range}, metadata !27), !dbg !30
+  call void @llvm.dbg.declare(metadata %struct.vector** %__range, metadata !27, metadata !{}), !dbg !30
   br label %0
 
 ; <label>:0                                       ; preds = %entry
   call void @_Z13TagFieldSpecsv(), !dbg !31
   store %struct.vector* %ref.tmp, %struct.vector** %__range, align 8, !dbg !31
-  call void @llvm.dbg.declare(metadata !{i8** %__begin}, metadata !32), !dbg !30
+  call void @llvm.dbg.declare(metadata i8** %__begin, metadata !32, metadata !{}), !dbg !30
   %1 = load %struct.vector** %__range, align 8, !dbg !31
   %call = call i8* @_ZN6vector5beginEv(%struct.vector* %1), !dbg !31
   store i8* %call, i8** %__begin, align 8, !dbg !31
-  call void @llvm.dbg.declare(metadata !{i8** %__end}, metadata !33), !dbg !30
+  call void @llvm.dbg.declare(metadata i8** %__end, metadata !33, metadata !{}), !dbg !30
   %2 = load %struct.vector** %__range, align 8, !dbg !31
   %call1 = call i8* @_ZN6vector3endEv(%struct.vector* %2), !dbg !31
   store i8* %call1, i8** %__end, align 8, !dbg !31
@@ -43,7 +42,7 @@ for.cond:                                         ; preds = %for.inc, %0
   br i1 %cmp, label %for.body, label %for.end, !dbg !34
 
 for.body:                                         ; preds = %for.cond
-  call void @llvm.dbg.declare(metadata !{i8* %spec}, metadata !37), !dbg !31
+  call void @llvm.dbg.declare(metadata i8* %spec, metadata !37, metadata !{}), !dbg !31
   %5 = load i8** %__begin, align 8, !dbg !38
   %6 = load i8* %5, align 1, !dbg !38
   store i8 %6, i8* %spec, align 1, !dbg !38
@@ -65,7 +64,7 @@ return:                                           ; No predecessors!
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare void @_Z13TagFieldSpecsv() #2
 
@@ -95,49 +94,49 @@ attributes #3 = { noreturn nounwind }
 !llvm.gcov = !{!25}
 !llvm.ident = !{!26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0 (trunk 209871)", i1 false, metadata !"", i32 0, metadata !2, metadata !3, metadata !14, metadata !2, metadata !2, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"<stdin>", metadata !"PATTERN"}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786451, metadata !5, null, metadata !"vector", i32 21, i64 8, i64 8, i32 0, i32 0, null, metadata !6, i32 0, null, null, metadata !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
-!5 = metadata !{metadata !"linezero.cc", metadata !"PATTERN"}
-!6 = metadata !{metadata !7, metadata !13}
-!7 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"begin", metadata !"begin", metadata !"_ZN6vector5beginEv", i32 25, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 25} ; [ DW_TAG_subprogram ] [line 25] [begin]
-!8 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!9 = metadata !{metadata !10, metadata !12}
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!12 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
-!13 = metadata !{i32 786478, metadata !5, metadata !"_ZTS6vector", metadata !"end", metadata !"end", metadata !"_ZN6vector3endEv", i32 26, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, null, i32 26} ; [ DW_TAG_subprogram ] [line 26] [end]
-!14 = metadata !{metadata !15, metadata !20}
-!15 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"test", metadata !"test", metadata !"_Z4testv", i32 50, metadata !17, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z4testv, null, null, metadata !2, i32 50} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
-!16 = metadata !{i32 786473, metadata !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
-!17 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!18 = metadata !{metadata !19}
-!19 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!20 = metadata !{i32 786478, metadata !5, metadata !16, metadata !"f1", metadata !"f1", metadata !"_Z2f1v", i32 54, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z2f1v, null, null, metadata !2, i32 54} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
-!21 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!22 = metadata !{null}
-!23 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!24 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!25 = metadata !{metadata !"PATTERN/linezero.o", metadata !0}
-!26 = metadata !{metadata !"clang version 3.5.0 (trunk 209871)"}
-!27 = metadata !{i32 786688, metadata !28, metadata !"__range", null, i32 0, metadata !29, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__range] [line 0]
-!28 = metadata !{i32 786443, metadata !5, metadata !15, i32 51, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!29 = metadata !{i32 786498, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
-!30 = metadata !{i32 0, i32 0, metadata !28, null}
-!31 = metadata !{i32 51, i32 0, metadata !28, null}
-!32 = metadata !{i32 786688, metadata !28, metadata !"__begin", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
-!33 = metadata !{i32 786688, metadata !28, metadata !"__end", null, i32 0, metadata !10, i32 64, i32 0} ; [ DW_TAG_auto_variable ] [__end] [line 0]
-!34 = metadata !{i32 51, i32 0, metadata !35, null}
-!35 = metadata !{i32 786443, metadata !5, metadata !36, i32 51, i32 0, i32 5, i32 5} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!36 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!37 = metadata !{i32 786688, metadata !28, metadata !"spec", metadata !16, i32 51, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [spec] [line 51]
-!38 = metadata !{i32 51, i32 0, metadata !39, null}
-!39 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 2, i32 2} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!40 = metadata !{i32 51, i32 0, metadata !41, null}
-!41 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 4, i32 4} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!42 = metadata !{i32 51, i32 0, metadata !43, null}
-!43 = metadata !{i32 786443, metadata !5, metadata !28, i32 51, i32 0, i32 3, i32 3} ; [ DW_TAG_lexical_block ] [./linezero.cc]
-!44 = metadata !{i32 52, i32 0, metadata !15, null}
-!45 = metadata !{i32 54, i32 0, metadata !20, null}
+!0 = !{!"0x11\004\00clang version 3.5.0 (trunk 209871)\000\00\000\00\001", !1, !2, !3, !14, !2, !2} ; [ DW_TAG_compile_unit ] [<stdin>] [DW_LANG_C_plus_plus]
+!1 = !{!"<stdin>", !"PATTERN"}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x13\00vector\0021\008\008\000\000\000", !5, null, null, !6, null, null, !"_ZTS6vector"} ; [ DW_TAG_structure_type ] [vector] [line 21, size 8, align 8, offset 0] [def] [from ]
+!5 = !{!"linezero.cc", !"PATTERN"}
+!6 = !{!7, !13}
+!7 = !{!"0x2e\00begin\00begin\00_ZN6vector5beginEv\0025\000\000\000\006\00256\000\0025", !5, !"_ZTS6vector", !8, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 25] [begin]
+!8 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !9, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!9 = !{!10, !12}
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = !{!"0xf\00\000\0064\0064\000\001088", null, null, !"_ZTS6vector"} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [artificial] [from _ZTS6vector]
+!13 = !{!"0x2e\00end\00end\00_ZN6vector3endEv\0026\000\000\000\006\00256\000\0026", !5, !"_ZTS6vector", !8, null, null, null, i32 0, null} ; [ DW_TAG_subprogram ] [line 26] [end]
+!14 = !{!15, !20}
+!15 = !{!"0x2e\00test\00test\00_Z4testv\0050\000\001\000\006\00256\000\0050", !5, !16, !17, null, i32 ()* @_Z4testv, null, null, !2} ; [ DW_TAG_subprogram ] [line 50] [def] [test]
+!16 = !{!"0x29", !5}         ; [ DW_TAG_file_type ] [./linezero.cc]
+!17 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !18, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!18 = !{!19}
+!19 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!20 = !{!"0x2e\00f1\00f1\00_Z2f1v\0054\000\001\000\006\00256\000\0054", !5, !16, !21, null, void ()* @_Z2f1v, null, null, !2} ; [ DW_TAG_subprogram ] [line 54] [def] [f1]
+!21 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !22, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!22 = !{null}
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 2, !"Debug Info Version", i32 2}
+!25 = !{!"PATTERN/linezero.o", !0}
+!26 = !{!"clang version 3.5.0 (trunk 209871)"}
+!27 = !{!"0x100\00__range\000\0064", !28, null, !29} ; [ DW_TAG_auto_variable ] [__range] [line 0]
+!28 = !{!"0xb\0051\000\000", !5, !15} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!29 = !{!"0x42\00\000\000\000\000\000", null, null, !"_ZTS6vector"} ; [ DW_TAG_rvalue_reference_type ] [line 0, size 0, align 0, offset 0] [from _ZTS6vector]
+!30 = !MDLocation(line: 0, scope: !28)
+!31 = !MDLocation(line: 51, scope: !28)
+!32 = !{!"0x100\00__begin\000\0064", !28, null, !10} ; [ DW_TAG_auto_variable ] [__begin] [line 0]
+!33 = !{!"0x100\00__end\000\0064", !28, null, !10} ; [ DW_TAG_auto_variable ] [__end] [line 0]
+!34 = !MDLocation(line: 51, scope: !35)
+!35 = !{!"0xb\0051\000\005", !5, !36} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!36 = !{!"0xb\0051\000\001", !5, !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!37 = !{!"0x100\00spec\0051\000", !28, !16, !11} ; [ DW_TAG_auto_variable ] [spec] [line 51]
+!38 = !MDLocation(line: 51, scope: !39)
+!39 = !{!"0xb\0051\000\002", !5, !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!40 = !MDLocation(line: 51, scope: !41)
+!41 = !{!"0xb\0051\000\004", !5, !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!42 = !MDLocation(line: 51, scope: !43)
+!43 = !{!"0xb\0051\000\003", !5, !28} ; [ DW_TAG_lexical_block ] [./linezero.cc]
+!44 = !MDLocation(line: 52, scope: !15)
+!45 = !MDLocation(line: 54, scope: !20)
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index ed3a5bd93ada..c30d4a61ced4 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -1,11 +1,9 @@
-; RUN: echo '!9 = metadata !{metadata !"%T/linkagename.ll", metadata !0}' > %t1
+; RUN: echo '!9 = !{!"%/T/linkagename.ll", !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
 ; RUN: grep _Z3foov %T/linkagename.gcno
 ; RUN: rm %T/linkagename.gcno
 
-; REQUIRES: shell
-
 define void @_Z3foov() {
 entry:
   ret void, !dbg !8
@@ -15,15 +13,15 @@ entry:
 !llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
-!0 = metadata !{i32 786449, metadata !2, i32 4, metadata !"clang version 3.3 (trunk 177323)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3,  metadata !3, metadata !""} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{i32 786473, metadata !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
-!2 = metadata !{metadata !"hello.cc", metadata !"/home/nlewycky"}
-!3 = metadata !{i32 0}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !1, metadata !1, metadata !"foo", metadata !"foo", metadata !"_Z3foov", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3foov, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!6 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null}
-!8 = metadata !{i32 1, i32 0, metadata !5, null}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 177323)\000\00\000\00\000", !2, !3, !3, !4, !3,  !3} ; [ DW_TAG_compile_unit ] [/home/nlewycky/hello.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"0x29", !2}          ; [ DW_TAG_file_type ] [/home/nlewycky/hello.cc]
+!2 = !{!"hello.cc", !"/home/nlewycky"}
+!3 = !{i32 0}
+!4 = !{!5}
+!5 = !{!"0x2e\00foo\00foo\00_Z3foov\001\000\001\000\006\00256\000\001", !1, !1, !6, null, void ()* @_Z3foov, null, null, !3} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !MDLocation(line: 1, scope: !5)
 
 
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
new file mode 100644
index 000000000000..f0be3d21aeb2
--- /dev/null
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -0,0 +1,66 @@
+; Inject metadata to set the .gcno file location
+; RUN: echo '!19 = !{!"%/T/return-block.ll", !0}' > %t1
+; RUN: cat %s %t1 > %t2
+; RUN: opt -insert-gcov-profiling -disable-output %t2
+; RUN: llvm-cov gcov -n -dump %T/return-block.gcno 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@A = common global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define void @test() #0 {
+entry:
+  tail call void (...)* @f() #2, !dbg !14
+  %0 = load i32* @A, align 4, !dbg !15
+  %tobool = icmp eq i32 %0, 0, !dbg !15
+  br i1 %tobool, label %if.end, label %if.then, !dbg !15
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @g() #2, !dbg !16
+  br label %if.end, !dbg !16
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void, !dbg !18
+}
+
+declare void @f(...) #1
+
+declare void @g(...) #1
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.gcov = !{!19}
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk 223182)\001\00\000\00\001", !1, !2, !2, !3, !8, !2} ; [ DW_TAG_compile_unit ] [return-block.c] [DW_LANG_C99]
+!1 = !{!".../llvm/test/Transforms/GCOVProfiling/return-block.ll", !""}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test\00test\00\005\000\001\000\000\000\001\005", !1, !5, !6, null, void ()* @test, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [test]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [return-block.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null}
+!8 = !{!9}
+!9 = !{!"0x34\00A\00A\00\003\000\001", null, !5, !10, i32* @A, null} ; [ DW_TAG_variable ] [A] [line 3] [def]
+!10 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 2}
+!13 = !{!"clang version 3.6.0 (trunk 223182)"}
+!14 = !MDLocation(line: 6, column: 3, scope: !4)
+!15 = !MDLocation(line: 7, column: 7, scope: !4)
+!16 = !MDLocation(line: 8, column: 5, scope: !17)
+!17 = !{!"0xb\007\007\000", !1, !4} ; [ DW_TAG_lexical_block ] [return-block.c]
+!18 = !MDLocation(line: 9, column: 1, scope: !4)
+
+; There should be no destination edges for block 1.
+; CHECK: Block : 0 Counter : 0
+; CHECK-NEXT:         Destination Edges : 2 (0), 
+; CHECK-NEXT: Block : 1 Counter : 0
+; CHECK-NEXT:         Source Edges : 4 (0), 
+; CHECK-NEXT: Block : 2 Counter : 0
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 04f3f992b836..9436bd657da3 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -1,32 +1,30 @@
-; RUN: echo '!9 = metadata !{metadata !"%T/version.ll", metadata !0}' > %t1
+; RUN: echo '!9 = !{!"%/T/version.ll", !0}' > %t1
 ; RUN: cat %s %t1 > %t2
 ; RUN: opt -insert-gcov-profiling -disable-output < %t2
-; RUN: head -c8 %T/version.gcno | grep '^oncg\*204'
+; RUN: head -c8 %T/version.gcno | grep '^oncg.204'
 ; RUN: rm %T/version.gcno
 ; RUN: not opt -insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
 ; RUN: opt -insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
-; RUN: head -c8 %T/version.gcno | grep '^oncg\*704'
+; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
 ; RUN: rm %T/version.gcno
 
 define void @test() {
   ret void, !dbg !8
 }
 
-; REQUIRES: shell
-
 !llvm.gcov = !{!9}
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = metadata !{i32 786449, metadata !11, i32 4, metadata !"clang version 3.3 (trunk 176994)", i1 false, metadata !"", i32 0, metadata !3, metadata !3, metadata !4, metadata !3, null, metadata !""} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
-!2 = metadata !{i32 786473, metadata !11} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 0}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786478, metadata !10, metadata !6, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @test, null, null, metadata !3, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
-!6 = metadata !{i32 786473, metadata !10} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !3, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{i32 1, i32 0, metadata !5, null}
+!0 = !{!"0x11\004\00clang version 3.3 (trunk 176994)\000\00\000\00\000", !11, !3, !3, !4, !3, null} ; [ DW_TAG_compile_unit ] [./version] [DW_LANG_C_plus_plus]
+!2 = !{!"0x29", !11} ; [ DW_TAG_file_type ]
+!3 = !{i32 0}
+!4 = !{!5}
+!5 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\000\001", !10, !6, !7, null, void ()* @test, null, null, !3} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!6 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !3, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !MDLocation(line: 1, scope: !5)
 ;; !9 is added through the echo line at the top.
-!10 = metadata !{metadata !"<stdin>", metadata !"."}
-!11 = metadata !{metadata !"version", metadata !"/usr/local/google/home/nlewycky"}
-!12 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!10 = !{!"<stdin>", !"."}
+!11 = !{!"version", !"/usr/local/google/home/nlewycky"}
+!12 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index fd31fce59a84..fdf17e0b46df 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -17,20 +17,20 @@ target triple = "i386-pc-linux-gnu"
 	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
 	%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
 
 declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
 
diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll
index 094e22bd07e1..8c13d209c53a 100644
--- a/test/Transforms/GVN/atomic.ll
+++ b/test/Transforms/GVN/atomic.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; GVN across unordered store (allowed)
 define i32 @test1() nounwind uwtable ssp {
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
@@ -18,10 +18,10 @@ entry:
   ret i32 %z
 }
 
-; GVN across seq_cst store (allowed in theory; not implemented ATM)
+; GVN across seq_cst store (allowed)
 define i32 @test2() nounwind uwtable ssp {
-; CHECK: test2
-; CHECK: add i32 %x, %y
+; CHECK-LABEL: test2
+; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
   store atomic i32 %x, i32* @x seq_cst, align 4
@@ -32,7 +32,7 @@ entry:
 
 ; GVN across unordered load (allowed)
 define i32 @test3() nounwind uwtable ssp {
-; CHECK: test3
+; CHECK-LABEL: test3
 ; CHECK: add i32 %x, %x
 entry:
   %x = load i32* @y
@@ -43,11 +43,11 @@ entry:
   ret i32 %b
 }
 
-; GVN across acquire load (load after atomic load must not be removed)
+; GVN across acquire load (allowed as the original load was not atomic)
 define i32 @test4() nounwind uwtable ssp {
-; CHECK: test4
+; CHECK-LABEL: test4
 ; CHECK: load atomic i32* @x
-; CHECK: load i32* @y
+; CHECK-NOT: load i32* @y
 entry:
   %x = load i32* @y
   %y = load atomic i32* @x seq_cst, align 4
@@ -59,7 +59,7 @@ entry:
 
 ; GVN load to unordered load (allowed)
 define i32 @test5() nounwind uwtable ssp {
-; CHECK: test5
+; CHECK-LABEL: test5
 ; CHECK: add i32 %x, %x
 entry:
   %x = load atomic i32* @x unordered, align 4
@@ -70,7 +70,7 @@ entry:
 
 ; GVN unordered load to load (unordered load must not be removed)
 define i32 @test6() nounwind uwtable ssp {
-; CHECK: test6
+; CHECK-LABEL: test6
 ; CHECK: load atomic i32* @x unordered
 entry:
   %x = load i32* @x
@@ -78,3 +78,54 @@ entry:
   %x3 = add i32 %x, %x2
   ret i32 %x3
 }
+
+; GVN across release-acquire pair (forbidden)
+define i32 @test7() nounwind uwtable ssp {
+; CHECK-LABEL: test7
+; CHECK: add i32 %x, %y
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x release, align 4
+  %w = load atomic i32* @x acquire, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across acquire-release pair (allowed)
+define i32 @test8() nounwind uwtable ssp {
+; CHECK-LABEL: test8
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  %w = load atomic i32* @x acquire, align 4
+  store atomic i32 %x, i32* @x release, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN across monotonic store (allowed)
+define i32 @test9() nounwind uwtable ssp {
+; CHECK-LABEL: test9
+; CHECK: add i32 %x, %x
+entry:
+  %x = load i32* @y
+  store atomic i32 %x, i32* @x monotonic, align 4
+  %y = load i32* @y
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
+; GVN of an unordered across monotonic load (not allowed)
+define i32 @test10() nounwind uwtable ssp {
+; CHECK-LABEL: test10
+; CHECK: add i32 %x, %y
+entry:
+  %x = load atomic i32* @y unordered, align 4
+  %clobber = load atomic i32* @x monotonic, align 4
+  %y = load atomic i32* @y monotonic, align 4
+  %z = add i32 %x, %y
+  ret i32 %z
+}
+
diff --git a/test/Transforms/GVN/cond_br2.ll b/test/Transforms/GVN/cond_br2.ll
index 27e6f75de271..a7ca2190563b 100644
--- a/test/Transforms/GVN/cond_br2.ll
+++ b/test/Transforms/GVN/cond_br2.ll
@@ -132,9 +132,9 @@ attributes #1 = { nounwind }
 attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #3 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"int", metadata !1}
-!4 = metadata !{metadata !0, metadata !0, i64 0}
-!5 = metadata !{metadata !3, metadata !3, i64 0}
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
+!4 = !{!0, !0, i64 0}
+!5 = !{!3, !3, i64 0}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 708e4b23cb54..845f88e15895 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -144,6 +144,22 @@ different:
   ret i1 %cmp3
 }
 
+; CHECK-LABEL: @test6_fp(
+define i1 @test6_fp(float %x, float %y) {
+  %cmp2 = fcmp une float %x, %y
+  %cmp = fcmp oeq float %x, %y
+  %cmp3 = fcmp oeq float  %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
 ; CHECK-LABEL: @test7(
 define i1 @test7(i32 %x, i32 %y) {
   %cmp = icmp sgt i32 %x, %y
@@ -160,6 +176,22 @@ different:
   ret i1 %cmp3
 }
 
+; CHECK-LABEL: @test7_fp(
+define i1 @test7_fp(float %x, float %y) {
+  %cmp = fcmp ogt float %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = fcmp ule float %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = fcmp ogt float %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
 ; CHECK-LABEL: @test8(
 define i1 @test8(i32 %x, i32 %y) {
   %cmp2 = icmp sle i32 %x, %y
@@ -176,6 +208,22 @@ different:
   ret i1 %cmp3
 }
 
+; CHECK-LABEL: @test8_fp(
+define i1 @test8_fp(float %x, float %y) {
+  %cmp2 = fcmp ule float %x, %y
+  %cmp = fcmp ogt float %x, %y
+  %cmp3 = fcmp ogt float %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
 ; PR1768
 ; CHECK-LABEL: @test9(
 define i32 @test9(i32 %i, i32 %j) {
diff --git a/test/Transforms/GVN/edge.ll b/test/Transforms/GVN/edge.ll
index 646e10c0cdfb..1dc285ec3d7c 100644
--- a/test/Transforms/GVN/edge.ll
+++ b/test/Transforms/GVN/edge.ll
@@ -58,3 +58,38 @@ bb2:
 ; CHECK: call void @g(i1 %y)
   ret void
 }
+
+define double @fcmp_oeq(double %x, double %y) {
+entry:
+  %cmp = fcmp oeq double %y, 2.0
+  br i1 %cmp, label %if, label %return
+
+if:
+  %div = fdiv double %x, %y
+  br label %return
+
+return:
+  %retval.0 = phi double [ %div, %if ], [ %x, %entry ]
+  ret double %retval.0
+
+; CHECK-LABEL: define double @fcmp_oeq(
+; CHECK: %div = fdiv double %x, 2.000000e+00
+}
+
+define double @fcmp_une(double %x, double %y) {
+entry:
+  %cmp = fcmp une double %y, 2.0
+  br i1 %cmp, label %return, label %else
+
+else:
+  %div = fdiv double %x, %y
+  br label %return
+
+return:
+  %retval.0 = phi double [ %div, %else ], [ %x, %entry ]
+  ret double %retval.0
+
+; CHECK-LABEL: define double @fcmp_une(
+; CHECK: %div = fdiv double %x, 2.000000e+00
+}
+
diff --git a/test/Transforms/GVN/fpmath.ll b/test/Transforms/GVN/fpmath.ll
index 403df5c9008a..d164fb5e704e 100644
--- a/test/Transforms/GVN/fpmath.ll
+++ b/test/Transforms/GVN/fpmath.ll
@@ -41,5 +41,5 @@ define double @test4(double %x, double %y) {
   ret double %foo
 }
 
-!0 = metadata !{ float 5.0 }
-!1 = metadata !{ float 2.5 }
+!0 = !{ float 5.0 }
+!1 = !{ float 2.5 }
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
index 80e2226b5fae..2f9f2a6100fb 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -27,5 +27,5 @@ entry:
   ret i32 %add
 }
 
-!0 = metadata !{ }
+!0 = !{ }
 
diff --git a/test/Transforms/GVN/load-from-unreachable-predecessor.ll b/test/Transforms/GVN/load-from-unreachable-predecessor.ll
new file mode 100644
index 000000000000..b676d958aa4a
--- /dev/null
+++ b/test/Transforms/GVN/load-from-unreachable-predecessor.ll
@@ -0,0 +1,20 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+; Check that an unreachable predecessor to a PHI node doesn't cause a crash.
+; PR21625.
+
+define i32 @f(i32** %f) {
+; CHECK: bb0:
+; Load should be removed, since it's ignored.
+; CHECK-NEXT: br label
+bb0:
+  %bar = load i32** %f
+  br label %bb2
+bb1:
+  %zed = load i32** %f
+  br i1 false, label %bb1, label %bb2
+bb2:
+  %foo = phi i32* [ null, %bb0 ], [ %zed, %bb1 ]
+  %storemerge = load i32* %foo
+  ret i32 %storemerge
+}
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/load-pre-nonlocal.ll
index 7bac1b78e6a2..ae508b96cf59 100644
--- a/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -79,9 +79,9 @@ if.end:
   ret i32 %add1
 }
 
-!1 = metadata !{metadata !2, metadata !2, i64 0}
-!2 = metadata !{metadata !"any pointer", metadata !3, i64 0}
-!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0}
-!4 = metadata !{metadata !"Simple C/C++ TBAA"}
-!5 = metadata !{metadata !6, metadata !6, i64 0}
-!6 = metadata !{metadata !"int", metadata !3, i64 0}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"int", !3, i64 0}
diff --git a/test/Transforms/GVN/noalias.ll b/test/Transforms/GVN/noalias.ll
new file mode 100644
index 000000000000..6c310fad7ec6
--- /dev/null
+++ b/test/Transforms/GVN/noalias.ll
@@ -0,0 +1,43 @@
+; RUN: opt -scoped-noalias -basicaa -gvn -S < %s | FileCheck %s
+
+define i32 @test1(i32* %p, i32* %q) {
+; CHECK-LABEL: @test1(i32* %p, i32* %q)
+; CHECK: load i32* %p
+; CHECK-NOT: noalias
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !noalias !0
+  %b = load i32* %p
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+define i32 @test2(i32* %p, i32* %q) {
+; CHECK-LABEL: @test2(i32* %p, i32* %q)
+; CHECK: load i32* %p, !alias.scope !0
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !alias.scope !0
+  %b = load i32* %p, !alias.scope !0
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+; FIXME: In this case we can do better than intersecting the scopes, and can
+; concatenate them instead. Both loads are in the same basic block, the first
+; makes the second safe to speculatively execute, and there are no calls that may
+; throw in between.
+define i32 @test3(i32* %p, i32* %q) {
+; CHECK-LABEL: @test3(i32* %p, i32* %q)
+; CHECK: load i32* %p, !alias.scope !1
+; CHECK: %c = add i32 %a, %a
+  %a = load i32* %p, !alias.scope !1
+  %b = load i32* %p, !alias.scope !2
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare i32 @foo(i32*) readonly
+
+!0 = !{!0}
+!1 = !{!1}
+!2 = !{!0, !1}
+
diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/pre-gep-load.ll
new file mode 100644
index 000000000000..3ee3a37b81ad
--- /dev/null
+++ b/test/Transforms/GVN/pre-gep-load.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define double @foo(i32 %stat, i32 %i, double** %p) {
+; CHECK-LABEL: @foo(
+entry:
+  switch i32 %stat, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb2
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  %idxprom = sext i32 %i to i64
+  %arrayidx = getelementptr inbounds double** %p, i64 0
+  %0 = load double** %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double* %0, i64 %idxprom
+  %1 = load double* %arrayidx1, align 8
+  %sub = fsub double %1, 1.000000e+00
+  %cmp = fcmp olt double %sub, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %sw.bb
+  br label %return
+
+if.end:                                           ; preds = %sw.bb
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %if.end, %entry
+  %idxprom3 = sext i32 %i to i64
+  %arrayidx4 = getelementptr inbounds double** %p, i64 0
+  %2 = load double** %arrayidx4, align 8
+  %arrayidx5 = getelementptr inbounds double* %2, i64 %idxprom3
+  %3 = load double* %arrayidx5, align 8
+; CHECK: sw.bb2:
+; CHECK-NEXT-NOT: sext
+; CHECK-NEXT: phi double [
+; CHECK-NOT: load
+  %sub6 = fsub double 3.000000e+00, %3
+  br label %return
+
+sw.default:                                       ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %sw.default, %sw.bb2, %if.then
+  %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
+  ret double %retval.0
+}
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/preserve-tbaa.ll
index c52ed96c23c1..587d463eef27 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/preserve-tbaa.ll
@@ -25,7 +25,7 @@ for.end:                                          ; preds = %for.body, %entry
   ret void
 }
 
-!0 = metadata !{metadata !3, metadata !3, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"short", metadata !1}
+!0 = !{!3, !3, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!"short", !1}
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 2115fe85661e..37202321a6dc 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -82,20 +82,20 @@ define i32 @test8(i32* %p) {
   ret i32 %c
 }
 
-; CHECK: ![[DISJOINT_RANGE]] = metadata !{i32 0, i32 2, i32 3, i32 5}
-; CHECK: ![[MERGED_RANGE]] = metadata !{i32 0, i32 5}
-; CHECK: ![[MERGED_SIGNED_RANGE]] = metadata !{i32 -3, i32 -2, i32 1, i32 2}
-; CHECK: ![[MERGED_TEST6]] = metadata !{i32 10, i32 1}
-; CHECK: ![[MERGED_TEST7]] = metadata !{i32 3, i32 4, i32 5, i32 2}
+; CHECK: ![[DISJOINT_RANGE]] = !{i32 0, i32 2, i32 3, i32 5}
+; CHECK: ![[MERGED_RANGE]] = !{i32 0, i32 5}
+; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -3, i32 -2, i32 1, i32 2}
+; CHECK: ![[MERGED_TEST6]] = !{i32 10, i32 1}
+; CHECK: ![[MERGED_TEST7]] = !{i32 3, i32 4, i32 5, i32 2}
 
-!0 = metadata !{i32 0, i32 2}
-!1 = metadata !{i32 3, i32 5}
-!2 = metadata !{i32 2, i32 5}
-!3 = metadata !{i32 -3, i32 -2}
-!4 = metadata !{i32 1, i32 2}
-!5 = metadata !{i32 10, i32 1}
-!6 = metadata !{i32 12, i32 13}
-!7 = metadata !{i32 1, i32 2, i32 3, i32 4}
-!8 = metadata !{i32 5, i32 1}
-!9 = metadata !{i32 1, i32 5}
-!10 = metadata !{i32 5, i32 1}
+!0 = !{i32 0, i32 2}
+!1 = !{i32 3, i32 5}
+!2 = !{i32 2, i32 5}
+!3 = !{i32 -3, i32 -2}
+!4 = !{i32 1, i32 2}
+!5 = !{i32 10, i32 1}
+!6 = !{i32 12, i32 13}
+!7 = !{i32 1, i32 2, i32 3, i32 4}
+!8 = !{i32 5, i32 1}
+!9 = !{i32 1, i32 5}
+!10 = !{i32 5, i32 1}
diff --git a/test/Transforms/GVN/tbaa.ll b/test/Transforms/GVN/tbaa.ll
index d6412fce59b2..1b3494ca4be3 100644
--- a/test/Transforms/GVN/tbaa.ll
+++ b/test/Transforms/GVN/tbaa.ll
@@ -74,18 +74,18 @@ define i32 @test7(i8* %p, i8* %q) {
 
 declare i32 @foo(i8*) readonly
 
-; CHECK: [[TAGC]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
-; CHECK: [[TYPEC]] = metadata !{metadata !"C", metadata [[TYPEA:!.*]]}
-; CHECK: [[TYPEA]] = metadata !{metadata !"A", metadata !{{.*}}}
-; CHECK: [[TAGB]] = metadata !{metadata [[TYPEB:!.*]], metadata [[TYPEB]], i64 0}
-; CHECK: [[TYPEB]] = metadata !{metadata !"B", metadata [[TYPEA]]}
-; CHECK: [[TAGA]] = metadata !{metadata [[TYPEA]], metadata [[TYPEA]], i64 0}
-!0 = metadata !{metadata !5, metadata !5, i64 0}
-!1 = metadata !{metadata !6, metadata !6, i64 0}
-!2 = metadata !{metadata !"tbaa root", null}
-!3 = metadata !{metadata !7, metadata !7, i64 0}
-!4 = metadata !{metadata !8, metadata !8, i64 0}
-!5 = metadata !{metadata !"C", metadata !6}
-!6 = metadata !{metadata !"A", metadata !2}
-!7 = metadata !{metadata !"B", metadata !6}
-!8 = metadata !{metadata !"another root", null}
+; CHECK: [[TAGC]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
+; CHECK: [[TYPEC]] = !{!"C", [[TYPEA:!.*]]}
+; CHECK: [[TYPEA]] = !{!"A", !{{.*}}}
+; CHECK: [[TAGB]] = !{[[TYPEB:!.*]], [[TYPEB]], i64 0}
+; CHECK: [[TYPEB]] = !{!"B", [[TYPEA]]}
+; CHECK: [[TAGA]] = !{[[TYPEA]], [[TYPEA]], i64 0}
+!0 = !{!5, !5, i64 0}
+!1 = !{!6, !6, i64 0}
+!2 = !{!"tbaa root", null}
+!3 = !{!7, !7, i64 0}
+!4 = !{!8, !8, i64 0}
+!5 = !{!"C", !6}
+!6 = !{!"A", !2}
+!7 = !{!"B", !6}
+!8 = !{!"another root", null}
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 0bdced5114d3..584f0bf467fa 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -5,14 +5,14 @@
 @A = global i32 0
 ; CHECK: @A = global i32 0
 
-@D = alias internal i32* @A
+@D = internal alias i32* @A
 ; DEAD-NOT: @D
 
 @L1 = alias i32* @A
 ; CHECK: @L1 = alias i32* @A
 
-@L2 = alias internal i32* @L1
-; CHECK: @L2 = alias internal i32* @L1
+@L2 = internal alias i32* @L1
+; CHECK: @L2 = internal alias i32* @L1
 
 @L3 = alias i32* @L2
 ; CHECK: @L3 = alias i32* @L2
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 68933c6ef376..5fb4444c6ba8 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
 ; RUN: opt < %s -globaldce
 
-@A = alias internal void ()* @F
+@A = internal alias void ()* @F
 define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/deadblockaddr.ll b/test/Transforms/GlobalDCE/deadblockaddr.ll
new file mode 100644
index 000000000000..1ec5994d0043
--- /dev/null
+++ b/test/Transforms/GlobalDCE/deadblockaddr.ll
@@ -0,0 +1,16 @@
+; RUN: opt -globaldce -simplifycfg -S < %s | FileCheck %s
+
+; Tests whether globaldce does the right cleanup while removing @bar
+; so that a dead BlockAddress reference to foo won't prevent other passes
+; to work properly, e.g. simplifycfg
+@bar = internal unnamed_addr constant i8* blockaddress(@foo, %L1)
+
+; CHECK-LABEL: foo
+; CHECK-NOT: br label %L1
+; CHECK: ret void
+define void @foo() {
+entry:
+  br label %L1
+L1:
+  ret void
+}
diff --git a/test/Transforms/GlobalDCE/pr20981.ll b/test/Transforms/GlobalDCE/pr20981.ll
new file mode 100644
index 000000000000..0eaa6b899091
--- /dev/null
+++ b/test/Transforms/GlobalDCE/pr20981.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -globaldce -S | FileCheck %s
+
+$c1 = comdat any
+; CHECK: $c1 = comdat any
+
+@a1 = linkonce_odr alias void ()* @f1
+; CHECK: @a1 = linkonce_odr alias void ()* @f1
+
+define linkonce_odr void @f1() comdat($c1) {
+  ret void
+}
+; CHECK: define linkonce_odr void @f1() comdat($c1)
+
+define void @g() {
+  call void @f1()
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index b98facad3977..8efd01807fdb 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -13,7 +13,7 @@ define void @g() {
 	ret void
 }
 
-@b = alias internal void ()* @g
+@b = internal alias  void ()* @g
 ; CHECK-NOT: @b
 
 define void @h() {
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 01089600637b..049eef10eb7a 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -6,14 +6,14 @@
 define i32 @foo(i32 %i) nounwind ssp {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !3)
+  call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !3, metadata !{})
   %0 = icmp eq i32 %i, 1, !dbg !7                 ; <i1> [#uses=1]
   br i1 %0, label %bb, label %bb1, !dbg !7
 
 bb:                                               ; preds = %entry
   store i32 0, i32* @Stop, align 4, !dbg !9
   %1 = mul nsw i32 %i, 42, !dbg !10               ; <i32> [#uses=1]
-  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !3), !dbg !10
+  call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !3, metadata !{}), !dbg !10
   br label %bb2, !dbg !10
 
 bb1:                                              ; preds = %entry
@@ -28,7 +28,7 @@ return:                                           ; preds = %bb2
   ret i32 %i_addr.0, !dbg !12
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 define i32 @bar() nounwind ssp {
 entry:
@@ -51,29 +51,29 @@ return:                                           ; preds = %bb2
   ret i32 %.0, !dbg !19
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.gv = !{!0}
 
-!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"Stop", metadata !"Stop", metadata !"", metadata !1, i32 2, metadata !2, i1 true, i1 true, i32* @Stop} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 458769, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!2 = metadata !{i32 458788, null, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
-!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!5 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !2, metadata !2}
-!7 = metadata !{i32 5, i32 0, metadata !8, null}
-!8 = metadata !{i32 458763, metadata !20, metadata !4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 6, i32 0, metadata !8, null}
-!10 = metadata !{i32 7, i32 0, metadata !8, null}
-!11 = metadata !{i32 9, i32 0, metadata !8, null}
-!12 = metadata !{i32 11, i32 0, metadata !8, null}
-!13 = metadata !{i32 14, i32 0, metadata !14, null}
-!14 = metadata !{i32 458763, metadata !20, metadata !15, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i32 0, i32 0, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 458773, metadata !1, null, metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!17 = metadata !{metadata !2}
-!18 = metadata !{i32 15, i32 0, metadata !14, null}
-!19 = metadata !{i32 16, i32 0, metadata !14, null}
-!20 = metadata !{metadata !"g.c", metadata !"/tmp"}
-!21 = metadata !{i32 0}
+!0 = !{!"0x34\00Stop\00Stop\00\002\001\001", !1, !1, !2, i32* @Stop} ; [ DW_TAG_variable ]
+!1 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!2 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !1} ; [ DW_TAG_base_type ]
+!3 = !{!"0x101\00i\004\000", !4, !1, !2} ; [ DW_TAG_arg_variable ]
+!4 = !{!"0x2e\00foo\00foo\00foo\004\000\001\000\006\000\000\000", i32 0, !1, !5, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", !1, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!2, !2}
+!7 = !MDLocation(line: 5, scope: !8)
+!8 = !{!"0xb\000\000\000", !20, !4} ; [ DW_TAG_lexical_block ]
+!9 = !MDLocation(line: 6, scope: !8)
+!10 = !MDLocation(line: 7, scope: !8)
+!11 = !MDLocation(line: 9, scope: !8)
+!12 = !MDLocation(line: 11, scope: !8)
+!13 = !MDLocation(line: 14, scope: !14)
+!14 = !{!"0xb\000\000\000", !20, !15} ; [ DW_TAG_lexical_block ]
+!15 = !{!"0x2e\00bar\00bar\00bar\0013\000\001\000\006\000\000\000", i32 0, !1, !16, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!16 = !{!"0x15\00\000\000\000\000\000\000", !1, null, null, !17, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!17 = !{!2}
+!18 = !MDLocation(line: 15, scope: !14)
+!19 = !MDLocation(line: 16, scope: !14)
+!20 = !{!"g.c", !"/tmp"}
+!21 = !{i32 0}
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 9d70c708aad8..ebc20c66cf61 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -9,12 +9,12 @@
 @bar1  = alias void ()* @bar2
 ; CHECK: @bar1 = alias void ()* @bar2
 
-@weak1 = alias weak void ()* @bar2
-; CHECK: @weak1 = alias weak void ()* @bar2
+@weak1 = weak alias void ()* @bar2
+; CHECK: @weak1 = weak alias void ()* @bar2
 
 @bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
-@foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
-; CHECK: @foo4 = unnamed_addr alias linkonce_odr getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+@foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*]* @bar4, i32 0, i32 1)
 
 define void @bar2() {
   ret void
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 633cd344a796..62e74ba2ab48 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -7,7 +7,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @i = internal addrspace(1) global i8 42
 
 ; CHECK: @ia = internal addrspace(1) global i8 42
-@ia = alias internal i8 addrspace(1)* @i
+@ia = internal alias i8 addrspace(1)* @i
 
 @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
 ; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
@@ -18,8 +18,8 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
 ; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
 
-@ca = alias internal i8 addrspace(1)* @c
-; CHECK: @ca = alias internal i8 addrspace(1)* @c
+@ca = internal alias i8 addrspace(1)* @c
+; CHECK: @ca = internal alias i8 addrspace(1)* @c
 
 define i8 addrspace(1)* @h() {
   ret i8 addrspace(1)* @ca
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 121793724d72..4dab2f5a02f5 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -globalopt < %s | FileCheck %s
 
 @_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = dllexport alias internal i8* @_Z17in_custom_section
+@in_custom_section = internal dllexport alias i8* @_Z17in_custom_section
 
 ; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
 
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 05ac7f9bddb3..21f06b7be5ff 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -4,7 +4,7 @@
 
 @i = internal global i8 42
 ; CHECK: @ia = internal global i8 42
-@ia = alias internal i8* @i
+@ia = internal alias i8* @i
 
 @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
 ; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
@@ -18,17 +18,17 @@
 @other = global i32* bitcast (void ()* @fa to i32*)
 ; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
 
-@fa = alias internal void ()* @f
-; CHECK: @fa = alias internal void ()* @f
+@fa = internal alias void ()* @f
+; CHECK: @fa = internal alias void ()* @f
 
-@fa2 = alias internal void ()* @f
+@fa2 = internal alias void ()* @f
 ; CHECK-NOT: @fa2
 
-@fa3 = alias internal void ()* @f
+@fa3 = internal alias void ()* @f
 ; CHECK: @fa3
 
-@ca = alias internal i8* @c
-; CHECK: @ca = alias internal i8* @c
+@ca = internal alias i8* @c
+; CHECK: @ca = internal alias i8* @c
 
 define void @f() {
   ret void
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 4a25d661edcf..36de19c1cd84 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -81,10 +81,23 @@ entry:
   ret void
 }
 
+@test6_v1 = internal global { i32, i32 } { i32 42, i32 0 }, align 8
+@test6_v2 = global i32 0, align 4
+; CHECK: @test6_v2 = global i32 42, align 4
+define internal void @test6() {
+  %load = load { i32, i32 }* @test6_v1, align 8
+  %xv0 = extractvalue { i32, i32 } %load, 0
+  %iv = insertvalue { i32, i32 } %load, i32 %xv0, 1
+  %xv1 = extractvalue { i32, i32 } %iv, 1
+  store i32 %xv1, i32* @test6_v2, align 4
+  ret void
+}
+
 @llvm.global_ctors = appending constant
-  [5 x { i32, void ()* }]
+  [6 x { i32, void ()* }]
   [{ i32, void ()* } { i32 65535, void ()* @test1 },
    { i32, void ()* } { i32 65535, void ()* @test2 },
    { i32, void ()* } { i32 65535, void ()* @test3 },
    { i32, void ()* } { i32 65535, void ()* @test4 },
-   { i32, void ()* } { i32 65535, void ()* @test5 }]
+   { i32, void ()* } { i32 65535, void ()* @test5 },
+   { i32, void ()* } { i32 65535, void ()* @test6 }]
diff --git a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
index 9295c2025a2a..675211b64b46 100644
--- a/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
+++ b/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -32,4 +32,4 @@ define void @print() {
   ret void
 }
 
-!2009 = metadata !{}
+!2009 = !{}
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index ecf3f94196be..fb60b66ab58e 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -13,14 +13,20 @@ define i32 @main(i32 %argc, i8** %argv) {
 }
 
 define void @foo(i32 %x) {
-  call void @llvm.foo(metadata !{i8*** @G, i32 %x})
-; CHECK: call void @llvm.foo(metadata !{null, i32 %x})
+; Note: these arguments look like MDNodes, but they're really syntactic sugar
+; for 'MetadataAsValue::get(ValueAsMetadata::get(Value*))'.  When @G drops to
+; null, the ValueAsMetadata instance gets replaced by metadata !{}, or
+; MDNode::get({}).
+  call void @llvm.foo(metadata i8*** @G, metadata i32 %x)
+; CHECK: call void @llvm.foo(metadata ![[EMPTY:[0-9]+]], metadata i32 %x)
   ret void
 }
 
-declare void @llvm.foo(metadata) nounwind readnone
+declare void @llvm.foo(metadata, metadata) nounwind readnone
 
 !named = !{!0}
+; CHECK: !named = !{![[NULL:[0-9]+]]}
 
-!0 = metadata !{i8*** @G}
-; CHECK: !0 = metadata !{null}
+!0 = !{i8*** @G}
+; CHECK-DAG: ![[NULL]] = !{null}
+; CHECK-DAG: ![[EMPTY]] = !{}
diff --git a/test/Transforms/GlobalOpt/pr21191.ll b/test/Transforms/GlobalOpt/pr21191.ll
new file mode 100644
index 000000000000..34e15cb3404a
--- /dev/null
+++ b/test/Transforms/GlobalOpt/pr21191.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+$c = comdat any
+; CHECK: $c = comdat any
+
+define linkonce_odr void @foo() comdat($c) {
+  ret void
+}
+; CHECK: define linkonce_odr void @foo() comdat($c)
+
+define linkonce_odr void @bar() comdat($c) {
+  ret void
+}
+; CHECK: define linkonce_odr void @bar() comdat($c)
+
+define void @zed()  {
+  call void @foo()
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/preserve-comdats.ll b/test/Transforms/GlobalOpt/preserve-comdats.ll
new file mode 100644
index 000000000000..0148f0024b9d
--- /dev/null
+++ b/test/Transforms/GlobalOpt/preserve-comdats.ll
@@ -0,0 +1,37 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+$comdat_global = comdat any
+
+@comdat_global = weak_odr global i8 0, comdat($comdat_global)
+@simple_global = internal global i8 0
+; CHECK: @comdat_global = weak_odr global i8 0, comdat{{$}}
+; CHECK: @simple_global = internal global i8 42
+
+@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [
+    { i32, void ()*, i8* } { i32 65535, void ()* @init_comdat_global, i8* @comdat_global },
+    { i32, void ()*, i8* } { i32 65535, void ()* @init_simple_global, i8* null }
+]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }]
+; CHECK: [{ i32, void ()*, i8* } { i32 65535, void ()* @init_comdat_global, i8* @comdat_global }]
+
+define void @init_comdat_global() {
+  store i8 42, i8* @comdat_global
+  ret void
+}
+; CHECK: define void @init_comdat_global()
+
+define internal void @init_simple_global() comdat($comdat_global) {
+  store i8 42, i8* @simple_global
+  ret void
+}
+; CHECK-NOT: @init_simple_global()
+
+define i8* @use_simple() {
+  ret i8* @simple_global
+}
+; CHECK: define i8* @use_simple()
+
+define i8* @use_comdat() {
+  ret i8* @comdat_global
+}
+; CHECK: define i8* @use_comdat()
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index af9f1b38f1a9..82b2120c0e87 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
-; add219 should be extended to i64 because it is nsw, even though its
+; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -17,14 +17,27 @@ for.body11:                                       ; preds = %entry
 for.body153:                                      ; preds = %for.body153, %for.body11
   br i1 undef, label %for.body170, label %for.body153
 
-; CHECK: add nsw i64 %indvars.iv, 1
+; CHECK: add nuw nsw i64 %indvars.iv, 1
+; CHECK: sub nsw i64 %indvars.iv, 2
+; CHECK: sub nsw i64 4, %indvars.iv
+; CHECK: mul nsw i64 %indvars.iv, 8
 for.body170:                                      ; preds = %for.body170, %for.body153
   %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
-  %add219 = add nsw i32 %i2.19, 1
-  %idxprom220 = sext i32 %add219 to i64
+
+  %add = add nsw i32 %i2.19, 1
+  %add.idxprom = sext i32 %add to i64
+
+  %sub = sub nsw i32 %i2.19, 2
+  %sub.idxprom = sext i32 %sub to i64
+
+  %sub.neg = sub nsw i32 4, %i2.19
+  %sub.neg.idxprom = sext i32 %sub.neg to i64
+
+  %mul = mul nsw i32 %i2.19, 8
+  %mul.idxprom = sext i32 %mul to i64
+
   %add249 = add nsw i32 %i2.19, %shl132
   br label %for.body170
-
 for.end285:                                       ; preds = %entry
   ret void
 }
diff --git a/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
new file mode 100644
index 000000000000..8744b19d5e28
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/NVPTX/no-widen-expensive.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target triple = "nvptx64-unknown-unknown"
+
+; For the nvptx64 architecture, the cost of an arithmetic instruction on a
+; 64-bit integer is twice as expensive as that on a 32-bit integer, because the
+; hardware needs to simulate a 64-bit integer using two 32-bit integers.
+; Therefore, in this particular architecture, we should not widen induction
+; variables to 64-bit integers even though i64 is a legal type in the 64-bit
+; PTX ISA.
+
+define void @indvar_32_bit(i32 %n, i32* nocapture %output) {
+; CHECK-LABEL: @indvar_32_bit
+entry:
+  %cmp5 = icmp sgt i32 %n, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ]
+; CHECK: phi i32
+  %mul = mul nsw i32 %i.06, %i.06
+  %0 = sext i32 %i.06 to i64
+  %arrayidx = getelementptr inbounds i32* %output, i64 %0
+  store i32 %mul, i32* %arrayidx, align 4
+  %add = add nsw i32 %i.06, 3
+  %cmp = icmp slt i32 %add, %n
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/backedge-on-min-max.ll b/test/Transforms/IndVarSimplify/backedge-on-min-max.ll
new file mode 100644
index 000000000000..250ff9a2eca6
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/backedge-on-min-max.ll
@@ -0,0 +1,453 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+;; --- signed ---
+
+define void @min.signed.1(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @min.signed.1
+ entry:
+  %smin.cmp = icmp slt i32 %a_len, %n
+  %smin = select i1 %smin.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp slt i32 0, %smin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp slt i32 %idx, %a_len
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp slt i32 %idx.inc, %smin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.signed.2(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @min.signed.2
+ entry:
+  %smin.cmp = icmp slt i32 %a_len, %n
+  %smin = select i1 %smin.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp slt i32 0, %smin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp sgt i32 %a_len, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp slt i32 %idx.inc, %smin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.signed.3(i32* %a, i32 %n) {
+; CHECK-LABEL: @min.signed.3
+ entry:
+  %smin.cmp = icmp slt i32 42, %n
+  %smin = select i1 %smin.cmp, i32 42, i32 %n
+  %entry.cond = icmp slt i32 0, %smin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp slt i32 %idx, 42
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp slt i32 %idx.inc, %smin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.signed.4(i32* %a, i32 %n) {
+; CHECK-LABEL: @min.signed.4
+ entry:
+  %smin.cmp = icmp slt i32 42, %n
+  %smin = select i1 %smin.cmp, i32 42, i32 %n
+  %entry.cond = icmp slt i32 0, %smin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp sgt i32 42, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp slt i32 %idx.inc, %smin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.signed.1(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @max.signed.1
+ entry:
+  %smax.cmp = icmp sgt i32 %a_len, %n
+  %smax = select i1 %smax.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp sgt i32 0, %smax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp sgt i32 %idx, %a_len
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp sgt i32 %idx.inc, %smax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.signed.2(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @max.signed.2
+ entry:
+  %smax.cmp = icmp sgt i32 %a_len, %n
+  %smax = select i1 %smax.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp sgt i32 0, %smax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp slt i32 %a_len, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp sgt i32 %idx.inc, %smax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.signed.3(i32* %a, i32 %n, i32 %init) {
+; CHECK-LABEL: @max.signed.3
+ entry:
+  %smax.cmp = icmp sgt i32 42, %n
+  %smax = select i1 %smax.cmp, i32 42, i32 %n
+  %entry.cond = icmp sgt i32 %init, %smax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ %init, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp sgt i32 %idx, 42
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp sgt i32 %idx.inc, %smax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.signed.4(i32* %a, i32 %n, i32 %init) {
+; CHECK-LABEL: @max.signed.4
+ entry:
+  %smax.cmp = icmp sgt i32 42, %n
+  %smax = select i1 %smax.cmp, i32 42, i32 %n
+  %entry.cond = icmp sgt i32 %init, %smax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ %init, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp slt i32 42, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp sgt i32 %idx.inc, %smax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+;; --- unsigned ---
+
+define void @min.unsigned.1(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @min.unsigned.1
+ entry:
+  %umin.cmp = icmp ult i32 %a_len, %n
+  %umin = select i1 %umin.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp ult i32 5, %umin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ult i32 %idx, %a_len
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ult i32 %idx.inc, %umin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.unsigned.2(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @min.unsigned.2
+ entry:
+  %umin.cmp = icmp ult i32 %a_len, %n
+  %umin = select i1 %umin.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp ult i32 5, %umin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ugt i32 %a_len, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ult i32 %idx.inc, %umin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.unsigned.3(i32* %a, i32 %n) {
+; CHECK-LABEL: @min.unsigned.3
+ entry:
+  %umin.cmp = icmp ult i32 42, %n
+  %umin = select i1 %umin.cmp, i32 42, i32 %n
+  %entry.cond = icmp ult i32 5, %umin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ult i32 %idx, 42
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ult i32 %idx.inc, %umin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @min.unsigned.4(i32* %a, i32 %n) {
+; CHECK-LABEL: @min.unsigned.4
+ entry:
+  %umin.cmp = icmp ult i32 42, %n
+  %umin = select i1 %umin.cmp, i32 42, i32 %n
+  %entry.cond = icmp ult i32 5, %umin
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ugt i32 42, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ult i32 %idx.inc, %umin
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.unsigned.1(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @max.unsigned.1
+ entry:
+  %umax.cmp = icmp ugt i32 %a_len, %n
+  %umax = select i1 %umax.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp ugt i32 5, %umax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ugt i32 %idx, %a_len
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ugt i32 %idx.inc, %umax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.unsigned.2(i32* %a, i32 %a_len, i32 %n) {
+; CHECK-LABEL: @max.unsigned.2
+ entry:
+  %umax.cmp = icmp ugt i32 %a_len, %n
+  %umax = select i1 %umax.cmp, i32 %a_len, i32 %n
+  %entry.cond = icmp ugt i32 5, %umax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ 5, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ult i32 %a_len, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ugt i32 %idx.inc, %umax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.unsigned.3(i32* %a, i32 %n, i32 %init) {
+; CHECK-LABEL: @max.unsigned.3
+ entry:
+  %umax.cmp = icmp ugt i32 42, %n
+  %umax = select i1 %umax.cmp, i32 42, i32 %n
+  %entry.cond = icmp ugt i32 %init, %umax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ %init, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ugt i32 %idx, 42
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ugt i32 %idx.inc, %umax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @max.unsigned.4(i32* %a, i32 %n, i32 %init) {
+; CHECK-LABEL: @max.unsigned.4
+ entry:
+  %umax.cmp = icmp ugt i32 42, %n
+  %umax = select i1 %umax.cmp, i32 42, i32 %n
+  %entry.cond = icmp ugt i32 %init, %umax
+  br i1 %entry.cond, label %loop, label %exit
+
+ loop:
+  %idx = phi i32 [ %init, %entry ], [ %idx.inc, %latch ]
+  %idx.inc = add i32 %idx, 1
+  %in.bounds = icmp ult i32 42, %idx
+  br i1 %in.bounds, label %ok, label %latch
+; CHECK: br i1 true, label %ok, label %latch
+
+ ok:
+  %addr = getelementptr i32* %a, i32 %idx
+  store i32 %idx, i32* %addr
+  br label %latch
+
+ latch:
+  %be.cond = icmp ugt i32 %idx.inc, %umax
+  br i1 %be.cond, label %loop, label %exit
+
+ exit:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/lftr-extend-const.ll b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
index eeffd0f55024..f12c68cacb66 100644
--- a/test/Transforms/IndVarSimplify/lftr-extend-const.ll
+++ b/test/Transforms/IndVarSimplify/lftr-extend-const.ll
@@ -2,7 +2,7 @@
 
 ; CHECK-LABEL: @foo(
 ; CHECK-NOT: %lftr.wideiv = trunc i32 %indvars.iv.next to i16
-; CHECK: %exitcond = icmp ne i32 %indvars.iv, 511
+; CHECK: %exitcond = icmp ne i32 %indvars.iv.next, 512
 define void @foo() #0 {
 entry:
   br label %for.body
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 057669277cca..a7023f24314a 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -229,10 +229,11 @@ entry:
 ; loop and the OR instruction is replaced by an ADD keeping the result
 ; equivalent.
 ;
+; CHECK: sext
 ; CHECK: loop:
 ; CHECK: phi i64
 ; CHECK-NOT: sext
-; CHECK: icmp slt i32
+; CHECK: icmp slt i64
 ; CHECK: exit:
 ; CHECK: add i64
 loop:
diff --git a/test/Transforms/IndVarSimplify/sharpen-range.ll b/test/Transforms/IndVarSimplify/sharpen-range.ll
new file mode 100644
index 000000000000..5392dbc5fc04
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/sharpen-range.ll
@@ -0,0 +1,113 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if llvm can narrow !range metadata based on loop entry
+;; predicates.
+
+declare void @abort()
+
+define i1 @bounded_below_slt(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_slt
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+define i1 @bounded_below_sle(i32* nocapture readonly %buffer) {
+; CHECK-LABEL: bounded_below_sle
+entry:
+  %length = load i32* %buffer, !range !0
+  %entry.pred = icmp eq i32 %length, 0
+  br i1 %entry.pred, label %abort, label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+; CHECK: loop
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp sle i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+; CHECK: loop.next
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp sle i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+;; Assert that we're not making an incorrect transform.
+
+declare i32 @check(i8*)
+
+define void @NoChange() {
+; CHECK-LABEL: NoChange
+entry:
+  br label %loop.begin
+
+loop.begin:
+; CHECK: loop.begin:
+  %i.01 = phi i64 [ 2, %entry ], [ %add, %loop.end ]
+  %cmp = icmp ugt i64 %i.01, 1
+; CHECK: %cmp = icmp ugt i64 %i.01, 1
+  br i1 %cmp, label %loop, label %loop.end
+
+loop:
+; CHECK: loop
+  %.sum = add i64 %i.01, -2
+  %v = getelementptr inbounds i8* null, i64 %.sum
+  %r = tail call i32 @check(i8* %v)
+  %c = icmp eq i32 %r, 0
+  br i1 %c, label %loop.end, label %abort.now
+
+abort.now:
+  tail call void @abort()
+  unreachable
+
+loop.end:
+  %add = add i64 %i.01, -1
+  %eq = icmp eq i64 %add, 0
+  br i1 %eq, label %exit, label %loop.begin
+
+exit:
+  ret void
+}
+
+!0 = !{i32 0, i32 100}
diff --git a/test/Transforms/IndVarSimplify/strengthen-overflow.ll b/test/Transforms/IndVarSimplify/strengthen-overflow.ll
new file mode 100644
index 000000000000..07e489e03382
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/strengthen-overflow.ll
@@ -0,0 +1,214 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+define i32 @test.signed.add.0(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.signed.add.0
+ entry:
+  %upper = icmp slt i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = add i32 %civ, 1
+; CHECK: %civ.inc = add nsw i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp slt i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.signed.add.1(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.signed.add.1
+ entry:
+  %upper = icmp sle i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = add i32 %civ, 1
+; CHECK: %civ.inc = add i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp slt i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.signed.sub.0(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.signed.sub.0
+ entry:
+  %upper = icmp sgt i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = sub i32 %civ, 1
+; CHECK: %civ.inc = sub nsw i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp sgt i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.signed.sub.1(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.signed.sub.1
+ entry:
+  %upper = icmp sgt i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = sub i32 %civ, 1
+; CHECK: %civ.inc = sub i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp sge i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.unsigned.add.0(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.unsigned.add.0
+ entry:
+  %upper = icmp ult i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = add i32 %civ, 1
+; CHECK: %civ.inc = add nuw i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp ult i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.unsigned.add.1(i32* %array, i32 %length, i32 %init) {
+; CHECK-LABEL: @test.unsigned.add.1
+ entry:
+  %upper = icmp ule i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = add i32 %civ, 1
+; CHECK: %civ.inc = add i32 %civ, 1
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp ult i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.unsigned.sub.0(i32* %array, i32* %length_ptr, i32 %init) {
+; CHECK-LABEL: @test.unsigned.sub.0
+ entry:
+  %length = load i32* %length_ptr, !range !0
+  %upper = icmp ult i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = sub i32 %civ, 2
+; CHECK: %civ.inc = sub nuw i32 %civ, 2
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp ult i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+define i32 @test.unsigned.sub.1(i32* %array, i32* %length_ptr, i32 %init) {
+; CHECK-LABEL: @test.unsigned.sub.1
+ entry:
+  %length = load i32* %length_ptr, !range !1
+  %upper = icmp ult i32 %init, %length
+  br i1 %upper, label %loop, label %exit
+
+ loop:
+; CHECK-LABEL: loop
+  %civ = phi i32 [ %init, %entry ], [ %civ.inc, %latch ]
+  %civ.inc = sub i32 %civ, 2
+; CHECK: %civ.inc = sub i32 %civ, 2
+  %cmp = icmp slt i32 %civ.inc, %length
+  br i1 %cmp, label %latch, label %break
+
+ latch:
+  store i32 0, i32* %array
+  %check = icmp ult i32 %civ.inc, %length
+  br i1 %check, label %loop, label %break
+
+ break:
+  ret i32 %civ.inc
+
+ exit:
+  ret i32 42
+}
+
+!0 = !{i32 0, i32 2}
+!1 = !{i32 0, i32 42}
diff --git a/test/Transforms/IndVarSimplify/use-range-metadata.ll b/test/Transforms/IndVarSimplify/use-range-metadata.ll
new file mode 100644
index 000000000000..ea3b12dac391
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/use-range-metadata.ll
@@ -0,0 +1,37 @@
+;; RUN: opt -S < %s -indvars | FileCheck %s
+
+;; Check if IndVarSimplify understands !range metadata.
+
+declare void @abort()
+
+define i1 @iterate(i32* nocapture readonly %buffer) {
+entry:
+  %length = load i32* %buffer, !range !0
+  br label %loop.preheader
+
+loop.preheader:
+  br label %loop
+
+loop:
+  %idx = phi i32 [ %idx.inc, %loop.next ], [ 0, %loop.preheader ]
+  %oob.pred = icmp slt i32 %idx, %length
+  br i1 %oob.pred, label %loop.next, label %oob
+; CHECK: br i1 true, label %loop.next, label %oob
+
+loop.next:
+  %idx.inc = add i32 %idx, 1
+  %exit.pred = icmp slt i32 %idx.inc, %length
+  br i1 %exit.pred, label %loop, label %abort.loopexit
+
+abort.loopexit:
+  br label %abort
+
+abort:
+  ret i1 false
+
+oob:
+  tail call void @abort()
+  ret i1 false
+}
+
+!0 = !{i32 1, i32 100}
diff --git a/test/Transforms/IndVarSimplify/verify-scev.ll b/test/Transforms/IndVarSimplify/verify-scev.ll
index 019f5830d520..b9ce3d63ec99 100644
--- a/test/Transforms/IndVarSimplify/verify-scev.ll
+++ b/test/Transforms/IndVarSimplify/verify-scev.ll
@@ -380,11 +380,11 @@ for.body48:                                       ; preds = %for.inc221, %for.bo
 
 for.body65.lr.ph:                                 ; preds = %for.body48
   %0 = load i32* undef, align 4
+  %1 = sext i32 %0 to i64
   br label %for.body65.us
 
 for.body65.us:                                    ; preds = %for.inc219.us, %for.body65.lr.ph
-  %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
-  %idxprom66.us = sext i32 %k.09.us to i64
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
   br i1 undef, label %for.inc219.us, label %if.end72.us
 
 if.end72.us:                                      ; preds = %for.body65.us
@@ -406,8 +406,8 @@ for.cond152.us:                                   ; preds = %for.cond152.us, %fo
   br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
 
 for.inc219.us:                                    ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
-  %inc.us = add nsw i32 %k.09.us, 1
-  %cmp64.us = icmp sgt i32 %inc.us, %0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %cmp64.us = icmp sgt i64 %indvars.iv.next, %1
   br i1 %cmp64.us, label %for.inc221, label %for.body65.us
 
 for.cond139.loopexit.us:                          ; preds = %for.cond152.us
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
new file mode 100644
index 000000000000..0930a0c41390
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+; Check the loop exit i32 compare instruction and operand are widened to i64
+; instead of truncating IV before its use in the i32 compare instruction.
+
+@idx = common global i32 0, align 4
+@e = common global i32 0, align 4
+@ptr = common global i32* null, align 8
+
+; CHECK-LABEL: @test1
+; CHECK: for.body.lr.ph:
+; CHECK: sext i32
+; CHECK: for.cond:
+; CHECK: icmp slt i64
+; CHECK: for.body:
+; CHECK: phi i64
+
+define i32 @test1() {
+entry:
+  store i32 -1, i32* @idx, align 4
+  %0 = load i32* @e, align 4
+  %cmp4 = icmp slt i32 %0, 0
+  br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
+
+for.body.lr.ph:
+  %1 = load i32** @ptr, align 8
+  %2 = load i32* @e, align 4
+  br label %for.body
+
+for.cond:
+  %inc = add nsw i32 %i.05, 1
+  %cmp = icmp slt i32 %i.05, %2
+  br i1 %cmp, label %for.body, label %for.cond.for.end.loopexit_crit_edge
+
+for.body:
+  %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
+  %idxprom = sext i32 %i.05 to i64
+  %arrayidx = getelementptr inbounds i32* %1, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %3, 0
+  br i1 %tobool, label %if.then, label %for.cond
+
+if.then:
+  %i.05.lcssa = phi i32 [ %i.05, %for.body ]
+  store i32 %i.05.lcssa, i32* @idx, align 4
+  br label %for.end
+
+for.cond.for.end.loopexit_crit_edge:
+  br label %for.end.loopexit
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  %4 = load i32* @idx, align 4
+  ret i32 %4
+}
+
+; CHECK-LABEL: @test2
+; CHECK: for.body4.us
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %cmp2.us = icmp slt i64
+; CHECK-NOT: %2 = trunc i64 %indvars.iv.next to i32
+; CHECK-NOT: %cmp2.us = icmp slt i32
+
+define void @test2([8 x i8]* %a, i8* %b, i8 %limit) {
+entry:
+  %conv = zext i8 %limit to i32
+  br i1 undef, label %for.cond1.preheader, label %for.cond1.preheader.us
+
+for.cond1.preheader.us:
+  %storemerge5.us = phi i32 [ 0, %entry ], [ %inc14.us, %for.inc13.us ]
+  br i1 true, label %for.body4.lr.ph.us, label %for.inc13.us
+
+for.inc13.us:
+  %inc14.us = add nsw i32 %storemerge5.us, 1
+  %cmp.us = icmp slt i32 %inc14.us, 4
+  br i1 %cmp.us, label %for.cond1.preheader.us, label %for.end
+
+for.body4.us:
+  %storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
+  %idxprom.us = sext i32 %storemerge14.us to i64
+  %arrayidx6.us = getelementptr inbounds [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
+  %0 = load i8* %arrayidx6.us, align 1
+  %idxprom7.us = zext i8 %0 to i64
+  %arrayidx8.us = getelementptr inbounds i8* %b, i64 %idxprom7.us
+  %1 = load i8* %arrayidx8.us, align 1
+  store i8 %1, i8* %arrayidx6.us, align 1
+  %inc.us = add nsw i32 %storemerge14.us, 1
+  %cmp2.us = icmp slt i32 %inc.us, %conv
+  br i1 %cmp2.us, label %for.body4.us, label %for.inc13.us
+
+for.body4.lr.ph.us:
+  %idxprom5.us = sext i32 %storemerge5.us to i64
+  br label %for.body4.us
+
+for.cond1.preheader:
+  %storemerge5 = phi i32 [ 0, %entry ], [ %inc14, %for.inc13 ]
+  br i1 false, label %for.inc13, label %for.inc13
+
+for.inc13:
+  %inc14 = add nsw i32 %storemerge5, 1
+  %cmp = icmp slt i32 %inc14, 4
+  br i1 %cmp, label %for.cond1.preheader, label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @test3
+; CHECK: sext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp slt i64
+
+define i32 @test3(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
+
+declare i32 @fn1(i8 signext)
+
+; PR21030
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: phi i32
+; CHECK: icmp sgt i8
+
+define i32 @test4(i32 %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %c.07 = phi i8 [ -3, %entry ], [ %dec, %for.body ]
+  %conv6 = zext i8 %c.07 to i32
+  %or = or i32 %a, %conv6
+  %conv3 = trunc i32 %or to i8
+  %call = call i32 @fn1(i8 signext %conv3)
+  %dec = add i8 %c.07, -1
+  %cmp = icmp sgt i8 %dec, -14
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 0
+}
+
+; CHECK-LABEL: @test5
+; CHECK: zext i32 %b
+; CHECK: for.cond:
+; CHECK: phi i64
+; CHECK: icmp ule i64
+
+define i32 @test5(i32* %a, i32 %b) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ule i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32* %a, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
diff --git a/test/Transforms/Inline/align.ll b/test/Transforms/Inline/align.ll
new file mode 100644
index 000000000000..9ac6d54bfba9
--- /dev/null
+++ b/test/Transforms/Inline/align.ll
@@ -0,0 +1,98 @@
+; RUN: opt -inline -preserve-alignment-assumptions-during-inlining -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %ptrint = ptrtoint float* %a to i64
+; CHECK:   %maskedptr = and i64 %ptrint, 127
+; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond)
+; CHECK:   %ptrint1 = ptrtoint float* %b to i64
+; CHECK:   %maskedptr2 = and i64 %ptrint1, 127
+; CHECK:   %maskcond3 = icmp eq i64 %maskedptr2, 0
+; CHECK:   call void @llvm.assume(i1 %maskcond3)
+; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i, align 4
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/Inline/byval-tail-call.ll b/test/Transforms/Inline/byval-tail-call.ll
index 3a8906aa210f..154f3974b58d 100644
--- a/test/Transforms/Inline/byval-tail-call.ll
+++ b/test/Transforms/Inline/byval-tail-call.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; RUN: opt < %s -basicaa -tailcallelim -inline -instcombine -dse -S | FileCheck %s
 ; PR7272
 
 ; Calls that capture byval parameters cannot be marked as tail calls. Other
@@ -27,10 +27,13 @@ define internal void @qux(i32* byval %x) {
   tail call void @ext(i32* null)
   ret void
 }
+
 define void @frob(i32* %x) {
 ; CHECK-LABEL: define void @frob(
-; CHECK: alloca i32
-; CHECK: {{^ *}}call void @ext(
+; CHECK: %[[POS:.*]] = alloca i32
+; CHECK: %[[VAL:.*]] = load i32* %x
+; CHECK: store i32 %[[VAL]], i32* %[[POS]]
+; CHECK: {{^ *}}call void @ext(i32* %[[POS]]
 ; CHECK: tail call void @ext(i32* null)
 ; CHECK: ret void
   tail call void @qux(i32* byval %x)
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index 41d60742fd29..4815e689b8a5 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -4,9 +4,9 @@
 
 ; CHECK: invoke void @test()
 ; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
-; CHECK: [[EMPTY:.*]] = metadata !{}
-; CHECK: [[INL_LOC]] = metadata !{i32 1, i32 0, metadata [[EMPTY]], metadata [[INL_AT:.*]]}
-; CHECK: [[INL_AT]] = metadata !{i32 2, i32 0, metadata [[EMPTY]], null}
+; CHECK: [[EMPTY:.*]] = !{}
+; CHECK: [[INL_LOC]] = !MDLocation(line: 1, scope: [[EMPTY]], inlinedAt: [[INL_AT:.*]])
+; CHECK: [[INL_AT]] = !MDLocation(line: 2, scope: [[EMPTY]])
 
 declare void @test()
 declare i32 @__gxx_personality_v0(...)
@@ -31,7 +31,7 @@ lpad:
 }
 
 !llvm.module.flags = !{!1}
-!1 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!2 = metadata !{}
-!3 = metadata !{i32 1, i32 0, metadata !2, null}
-!4 = metadata !{i32 2, i32 0, metadata !2, null}
+!1 = !{i32 2, !"Debug Info Version", i32 2}
+!2 = !{}
+!3 = !MDLocation(line: 1, scope: !2)
+!4 = !MDLocation(line: 2, scope: !2)
diff --git a/test/Transforms/Inline/ephemeral.ll b/test/Transforms/Inline/ephemeral.ll
new file mode 100644
index 000000000000..d1135c6f0c3e
--- /dev/null
+++ b/test/Transforms/Inline/ephemeral.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -Oz %s | FileCheck %s
+
+@a = global i32 4
+
+define i1 @inner() {
+  %a1 = load volatile i32* @a
+  %x1 = add i32 %a1, %a1
+  %c = icmp eq i32 %x1, 0
+
+  ; Here are enough instructions to prevent inlining, but because they are used
+  ; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
+  ; still happen).
+  %a2 = mul i32 %a1, %a1
+  %a3 = sub i32 %a1, 5
+  %a4 = udiv i32 %a3, -13
+  %a5 = mul i32 %a4, %a4
+  %a6 = add i32 %a5, %x1
+  %ca = icmp sgt i32 %a6, -7
+  tail call void @llvm.assume(i1 %ca)
+
+  ret i1 %c
+}
+
+; @inner() should be inlined for -Oz.
+; CHECK-NOT: call i1 @inner
+define i1 @outer() optsize {
+   %r = call i1 @inner()
+   ret i1 %r
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index 543a89be0214..8bd6e7c4287b 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -7,16 +7,16 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
 entry:
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %add = fadd <4 x float> %mul, %mul1
   ret <4 x float> %add
 }
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
 ; CHECK: ret float
 
 entry:
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
-  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{}, metadata !{})
   %vecext = extractelement <4 x float> %call, i32 0
   %vecext1 = extractelement <4 x float> %call, i32 1
   %add = fadd float %vecext, %vecext1
@@ -47,9 +47,9 @@ attributes #0 = { nounwind readnone }
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !{}, metadata !2, metadata !2, metadata !""}
-!1 = metadata !{metadata !"", metadata !""}
-!2 = metadata !{i32 0}
-!3 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!4 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!5 = metadata !{metadata !""}
+!0 = !{!"0x11\004\00\000\00\000\00\000", !1, !2, !2, !{}, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"", !""}
+!2 = !{i32 0}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 2}
+!5 = !{!""}
diff --git a/test/Transforms/Inline/inline-musttail-varargs.ll b/test/Transforms/Inline/inline-musttail-varargs.ll
new file mode 100644
index 000000000000..7a8957488e1c
--- /dev/null
+++ b/test/Transforms/Inline/inline-musttail-varargs.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; We can't inline this thunk yet, but one day we will be able to.  And when we
+; do, this test case will be ready.
+
+declare void @ext_method(i8*, i32)
+
+define linkonce_odr void @thunk(i8* %this, ...) {
+  %this_adj = getelementptr i8* %this, i32 4
+  musttail call void (i8*, ...)* bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* %this_adj, ...)
+  ret void
+}
+
+define void @thunk_caller(i8* %p) {
+  call void (i8*, ...)* @thunk(i8* %p, i32 42)
+  ret void
+}
+; CHECK-LABEL: define void @thunk_caller(i8* %p)
+; CHECK: call void (i8*, ...)* @thunk(i8* %p, i32 42)
+
+; FIXME: Inline the thunk. This should be significantly easier than inlining
+; general varargs functions.
diff --git a/test/Transforms/Inline/inline-vla.ll b/test/Transforms/Inline/inline-vla.ll
index dc9deaafe743..7da448b83f3a 100644
--- a/test/Transforms/Inline/inline-vla.ll
+++ b/test/Transforms/Inline/inline-vla.ll
@@ -35,4 +35,4 @@ attributes #2 = { nounwind }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 (trunk 205695) (llvm/trunk 205706)"}
+!0 = !{!"clang version 3.5.0 (trunk 205695) (llvm/trunk 205706)"}
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
new file mode 100644
index 000000000000..563cde314160
--- /dev/null
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -S -inline | FileCheck %s
+;
+; The purpose of this test is to check that inline pass preserves debug info
+; for variable using the dbg.declare intrinsic.
+;
+;; This test was generated by running this command:
+;; clang.exe -S -O0 -emit-llvm -g foo.c
+;;
+;; foo.c
+;; ==========================
+;; float foo(float x)
+;; {
+;;    return x;
+;; }
+;;
+;; void bar(float *dst)
+;; {
+;;    dst[0] = foo(dst[0]);
+;; }
+;; ==========================
+
+target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+; Function Attrs: nounwind
+define float @foo(float %x) #0 {
+entry:
+  %x.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata float* %x.addr, metadata !16, metadata !17), !dbg !18
+  %0 = load float* %x.addr, align 4, !dbg !19
+  ret float %0, !dbg !19
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; CHECK: define void @bar
+
+; Function Attrs: nounwind
+define void @bar(float* %dst) #0 {
+entry:
+
+; CHECK: [[x_addr_i:%[a-zA-Z0-9.]+]] = alloca float, align 4
+
+  %dst.addr = alloca float*, align 4
+  store float* %dst, float** %dst.addr, align 4
+  call void @llvm.dbg.declare(metadata float** %dst.addr, metadata !20, metadata !17), !dbg !21
+  %0 = load float** %dst.addr, align 4, !dbg !22
+  %arrayidx = getelementptr inbounds float* %0, i32 0, !dbg !22
+  %1 = load float* %arrayidx, align 4, !dbg !22
+  %call = call float @foo(float %1), !dbg !22
+
+; CHECK-NOT: call float @foo
+; CHECK: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !17), !dbg [[m24:![0-9]+]]
+
+  %2 = load float** %dst.addr, align 4, !dbg !22
+  %arrayidx1 = getelementptr inbounds float* %2, i32 0, !dbg !22
+  store float %call, float* %arrayidx1, align 4, !dbg !22
+  ret void, !dbg !23
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0 (trunk)\000\00\000\00\001", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [foo.c] [DW_LANG_C99]
+!1 = !{!"foo.c", !""}
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\000\00256\000\002", !1, !5, !6, null, float (float)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [foo]
+!5 = !{!"0x29", !1}    ; [ DW_TAG_file_type ] [foo.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8}
+!8 = !{!"0x24\00float\000\0032\0032\000\000\004", null, null} ; [ DW_TAG_base_type ] [float] [line 0, size 32, align 32, offset 0, enc DW_ATE_float]
+!9 = !{!"0x2e\00bar\00bar\00\006\000\001\000\000\00256\000\007", !1, !5, !10, null, void (float*)* @bar, null, null, !2} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 7] [bar]
+!10 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !11, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!11 = !{null, !12}
+!12 = !{!"0xf\00\000\0032\0032\000\000", null, null, !8} ; [ DW_TAG_pointer_type ] [line 0, size 32, align 32, offset 0] [from float]
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 2}
+!15 = !{!"clang version 3.6.0 (trunk)"}
+!16 = !{!"0x101\00x\0016777217\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [x] [line 1]
+!17 = !{!"0x102"}               ; [ DW_TAG_expression ]
+!18 = !MDLocation(line: 1, column: 17, scope: !4)
+!19 = !MDLocation(line: 3, column: 5, scope: !4)
+!20 = !{!"0x101\00dst\0016777222\000", !9, !5, !12} ; [ DW_TAG_arg_variable ] [dst] [line 6]
+!21 = !MDLocation(line: 6, column: 17, scope: !9)
+!22 = !MDLocation(line: 8, column: 14, scope: !9)
+!23 = !MDLocation(line: 9, column: 1, scope: !9)
+
+; CHECK: [[m23]] = !{!"0x101\00x\0016777217\000", !4, !5, !8, !22} ; [ DW_TAG_arg_variable ] [x] [line 1]
+; CHECK: [[m24]] = !MDLocation(line: 1, column: 17, scope: !4, inlinedAt: !22)
diff --git a/test/Transforms/Inline/noalias-calls.ll b/test/Transforms/Inline/noalias-calls.ll
new file mode 100644
index 000000000000..c09d2a673297
--- /dev/null
+++ b/test/Transforms/Inline/noalias-calls.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0
+declare void @hey() #0
+
+define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 {
+entry:
+  %l = alloca i8, i32 512, align 1
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 0)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 0)
+  call void @hey()
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l, i8* %c, i64 16, i32 16, i1 0)
+  ret void
+}
+
+define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+entry:
+  tail call void @hello(i8* %a, i8* %c, i8* %b)
+  ret void
+}
+
+; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+; CHECK: entry:
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #0, !noalias !0
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #0, !alias.scope !5
+; CHECK:   call void @hey() #0, !noalias !5
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind uwtable }
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2, !"hello: %c"}
+; CHECK: !2 = distinct !{!2, !"hello"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2, !"hello: %a"}
+; CHECK: !5 = !{!4, !1}
+
diff --git a/test/Transforms/Inline/noalias-cs.ll b/test/Transforms/Inline/noalias-cs.ll
new file mode 100644
index 000000000000..da5ddd64ed06
--- /dev/null
+++ b/test/Transforms/Inline/noalias-cs.ll
@@ -0,0 +1,84 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4, !noalias !3
+  %arrayidx.i = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
+  %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
+  %1 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %1, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  call void @foo2(float* %a, float* %b, float* %c), !noalias !0
+  call void @foo2(float* %b, float* %b, float* %a), !alias.scope !0
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !6
+; CHECK:   %arrayidx.i.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !12, !noalias !13
+; CHECK:   %arrayidx1.i.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i.i, align 4, !alias.scope !14, !noalias !15
+; CHECK:   %1 = load float* %c, align 4, !noalias !16
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx.i, align 4, !noalias !16
+; CHECK:   %2 = load float* %a, align 4, !alias.scope !16, !noalias !17
+; CHECK:   %arrayidx.i.i1 = getelementptr inbounds float* %b, i64 5
+; CHECK:   store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
+; CHECK:   %arrayidx1.i.i2 = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
+; CHECK:   %3 = load float* %a, align 4, !alias.scope !16
+; CHECK:   %arrayidx.i3 = getelementptr inbounds float* %b, i64 7
+; CHECK:   store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"hello: %a"}
+!2 = distinct !{!2, !"hello"}
+!3 = !{!4, !6}
+!4 = distinct !{!4, !5, !"hello2: %a"}
+!5 = distinct !{!5, !"hello2"}
+!6 = distinct !{!6, !5, !"hello2: %b"}
+!7 = !{!4}
+!8 = !{!6}
+
+; CHECK: !0 = !{!1, !3}
+; CHECK: !1 = distinct !{!1, !2, !"hello2: %a"}
+; CHECK: !2 = distinct !{!2, !"hello2"}
+; CHECK: !3 = distinct !{!3, !2, !"hello2: %b"}
+; CHECK: !4 = !{!1}
+; CHECK: !5 = !{!3}
+; CHECK: !6 = !{!7, !9, !10}
+; CHECK: !7 = distinct !{!7, !8, !"hello2: %a"}
+; CHECK: !8 = distinct !{!8, !"hello2"}
+; CHECK: !9 = distinct !{!9, !8, !"hello2: %b"}
+; CHECK: !10 = distinct !{!10, !11, !"hello: %a"}
+; CHECK: !11 = distinct !{!11, !"hello"}
+; CHECK: !12 = !{!7}
+; CHECK: !13 = !{!9, !10}
+; CHECK: !14 = !{!9}
+; CHECK: !15 = !{!7, !10}
+; CHECK: !16 = !{!10}
+; CHECK: !17 = !{!18, !20}
+; CHECK: !18 = distinct !{!18, !19, !"hello2: %a"}
+; CHECK: !19 = distinct !{!19, !"hello2"}
+; CHECK: !20 = distinct !{!20, !19, !"hello2: %b"}
+; CHECK: !21 = !{!18, !10}
+; CHECK: !22 = !{!20}
+; CHECK: !23 = !{!20, !10}
+; CHECK: !24 = !{!18}
+
diff --git a/test/Transforms/Inline/noalias.ll b/test/Transforms/Inline/noalias.ll
new file mode 100644
index 000000000000..674da1e2efbb
--- /dev/null
+++ b/test/Transforms/Inline/noalias.ll
@@ -0,0 +1,76 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* noalias nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !0
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !0
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !noalias !3
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+attributes #0 = { nounwind uwtable }
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2, !"hello: %a"}
+; CHECK: !2 = distinct !{!2, !"hello"}
+; CHECK: !3 = !{!4, !6}
+; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"}
+; CHECK: !5 = distinct !{!5, !"hello2"}
+; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"}
+; CHECK: !7 = !{!4}
+; CHECK: !8 = !{!6}
+
diff --git a/test/Transforms/Inline/noalias2.ll b/test/Transforms/Inline/noalias2.ll
new file mode 100644
index 000000000000..9c8f8e22d388
--- /dev/null
+++ b/test/Transforms/Inline/noalias2.ll
@@ -0,0 +1,97 @@
+; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hello(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 5
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+entry:
+  tail call void @hello(float* %a, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !alias.scope !0, !noalias !3
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0
+; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 6
+  store float %0, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float* %b, i64 8
+  store float %0, float* %arrayidx1, align 4
+  ret void
+}
+
+; Check that when hello() is inlined into foo(), and then foo() is inlined into
+; foo2(), the noalias scopes are properly concatenated.
+define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+entry:
+  tail call void @foo(float* %a, float* %c)
+  tail call void @hello2(float* %a, float* %b, float* %c)
+  %0 = load float* %c, align 4
+  %arrayidx = getelementptr inbounds float* %a, i64 7
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
+; CHECK: entry:
+; CHECK:   %0 = load float* %c, align 4, !alias.scope !5, !noalias !10
+; CHECK:   %arrayidx.i.i = getelementptr inbounds float* %a, i64 5
+; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5
+; CHECK:   %1 = load float* %c, align 4, !alias.scope !13, !noalias !14
+; CHECK:   %arrayidx.i = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
+; CHECK:   %2 = load float* %c, align 4, !noalias !15
+; CHECK:   %arrayidx.i1 = getelementptr inbounds float* %a, i64 6
+; CHECK:   store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
+; CHECK:   %arrayidx1.i = getelementptr inbounds float* %b, i64 8
+; CHECK:   store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
+; CHECK:   %3 = load float* %c, align 4
+; CHECK:   %arrayidx = getelementptr inbounds float* %a, i64 7
+; CHECK:   store float %3, float* %arrayidx, align 4
+; CHECK:   ret void
+; CHECK: }
+
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2, !"hello: %c"}
+; CHECK: !2 = distinct !{!2, !"hello"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2, !"hello: %a"}
+; CHECK: !5 = !{!6, !8}
+; CHECK: !6 = distinct !{!6, !7, !"hello: %c"}
+; CHECK: !7 = distinct !{!7, !"hello"}
+; CHECK: !8 = distinct !{!8, !9, !"foo: %c"}
+; CHECK: !9 = distinct !{!9, !"foo"}
+; CHECK: !10 = !{!11, !12}
+; CHECK: !11 = distinct !{!11, !7, !"hello: %a"}
+; CHECK: !12 = distinct !{!12, !9, !"foo: %a"}
+; CHECK: !13 = !{!8}
+; CHECK: !14 = !{!12}
+; CHECK: !15 = !{!16, !18}
+; CHECK: !16 = distinct !{!16, !17, !"hello2: %a"}
+; CHECK: !17 = distinct !{!17, !"hello2"}
+; CHECK: !18 = distinct !{!18, !17, !"hello2: %b"}
+; CHECK: !19 = !{!16}
+; CHECK: !20 = !{!18}
+
+attributes #0 = { nounwind uwtable }
+
diff --git a/test/Transforms/Inline/optimization-remarks.ll b/test/Transforms/Inline/optimization-remarks.ll
index 9108f3ab14d6..fb1b04788885 100644
--- a/test/Transforms/Inline/optimization-remarks.ll
+++ b/test/Transforms/Inline/optimization-remarks.ll
@@ -57,4 +57,4 @@ attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"clang version 3.5.0 "}
diff --git a/test/Transforms/Inline/pr21206.ll b/test/Transforms/Inline/pr21206.ll
new file mode 100644
index 000000000000..e46003050a9f
--- /dev/null
+++ b/test/Transforms/Inline/pr21206.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+$c = comdat any
+; CHECK: $c = comdat any
+
+define linkonce_odr void @foo() comdat($c) {
+  ret void
+}
+; CHECK: define linkonce_odr void @foo() comdat($c)
+
+define linkonce_odr void @bar() comdat($c) {
+  ret void
+}
+; CHECK: define linkonce_odr void @bar() comdat($c)
+
+define void()* @zed()  {
+  ret void()* @foo
+}
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f9bd9e40dcb..625989384d70 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1646
 
-@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
 define weak i32 @pthread_cancel(i32) {
        ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index c7cef752dcc9..3793a860e8e9 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1678
 
-@A = alias weak void ()* @B		; <void ()*> [#uses=1]
+@A = weak alias void ()* @B		; <void ()*> [#uses=1]
 
 define weak void @B() {
        ret void
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index fe935f96e9e1..656fb34061d9 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -instcombine -S | not grep call
-; RUN: opt < %s -std-compile-opts -S | not grep xyz
+; RUN: opt < %s -O3 -S | not grep xyz
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 @.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
deleted file mode 100644
index 917d3d9436bc..000000000000
--- a/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "ret i.* 0" | count 2
-; PR2048
-
-define i32 @i(i32 %a) {
-  %tmp1 = sdiv i32 %a, -1431655765
-  %tmp2 = sdiv i32 %tmp1, 3
-  ret i32 %tmp2
-}
-
-define i8 @j(i8 %a) {
-  %tmp1 = sdiv i8 %a, 64
-  %tmp2 = sdiv i8 %tmp1, 3
-  ret i8 %tmp2
-}
diff --git a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
index acb259be5eb1..b72967767558 100644
--- a/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
+++ b/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -1,5 +1,8 @@
-; RUN: opt < %s -instcombine -S | grep "ret i1 false"
+; RUN: opt -instcombine -S < %s | FileCheck %s
 ; PR2359
+
+; CHECK-LABEL: @f(
+; CHECK: ret i1 false
 define i1 @f(i8* %x) {
 entry:
        %tmp462 = load i8* %x, align 1          ; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
index f33a1f59f61b..f1af7ce2fb12 100644
--- a/test/Transforms/InstCombine/2008-11-08-FCmp.ll
+++ b/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -4,6 +4,7 @@
 ; When inst combining an FCMP with the LHS coming from a uitofp instruction, we
 ; can't lower it to signed ICMP instructions.
 
+; CHECK-LABEL: @test1(
 define i1 @test1(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ole double %1, 0.000000e+00
@@ -11,6 +12,7 @@ define i1 @test1(i32 %val) {
   ret i1 %2
 }
 
+; CHECK-LABEL: @test2(
 define i1 @test2(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, 0.000000e+00
@@ -18,6 +20,7 @@ define i1 @test2(i32 %val) {
 ; CHECK: ret i1 false
 }
 
+; CHECK-LABEL: @test3(
 define i1 @test3(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp oge double %1, 0.000000e+00
@@ -25,6 +28,7 @@ define i1 @test3(i32 %val) {
 ; CHECK: ret i1 true
 }
 
+; CHECK-LABEL: @test4(
 define i1 @test4(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, 0.000000e+00
@@ -32,6 +36,7 @@ define i1 @test4(i32 %val) {
   ret i1 %2
 }
 
+; CHECK-LABEL: @test5(
 define i1 @test5(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp ogt double %1, -4.400000e+00
@@ -39,6 +44,7 @@ define i1 @test5(i32 %val) {
 ; CHECK: ret i1 true
 }
 
+; CHECK-LABEL: @test6(
 define i1 @test6(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp olt double %1, -4.400000e+00
@@ -48,6 +54,7 @@ define i1 @test6(i32 %val) {
 
 ; Check that optimizing unsigned >= comparisons correctly distinguishes
 ; positive and negative constants.  <rdar://problem/12029145>
+; CHECK-LABEL: @test7(
 define i1 @test7(i32 %val) {
   %1 = uitofp i32 %val to double
   %2 = fcmp oge double %1, 3.200000e+00
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index a75a4656e68a..c8f0351858ca 100644
--- a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -15,7 +15,7 @@ define void @fu1(i32 %parm) nounwind ssp {
 
 ; <label>:4                                       ; preds = %0
   %5 = load i32* %1, align 4
-  %6 = mul nsw i32 %5, 8
+  %6 = shl nsw i32 %5, 3
 ; With "nsw", the alloca and its bitcast can be fused:
   %7 = add nsw i32 %6, 2048
 ;  CHECK: alloca double
@@ -50,7 +50,8 @@ define void @fu2(i32 %parm) nounwind ssp {
   %7 = add  i32 %6, 2048
 ; CHECK: alloca i8
   %8 = alloca i8, i32 %7
-; CHECK-NEXT: bitcast i8*
+; CHECK-NEXT: bitcast double**
+; CHECK-NEXT: store i8*
   %9 = bitcast i8* %8 to double*
   store double* %9, double** %ptr, align 4
   br label %10
diff --git a/test/Transforms/InstCombine/AddOverFlow.ll b/test/Transforms/InstCombine/AddOverFlow.ll
index 8f3d429c8d60..bebfd6293f9b 100644
--- a/test/Transforms/InstCombine/AddOverFlow.ll
+++ b/test/Transforms/InstCombine/AddOverFlow.ll
@@ -36,8 +36,8 @@ define i16 @zero_sign_bit2(i16 %a, i16 %b) {
 
 declare i16 @bounded(i16 %input);
 declare i32 @__gxx_personality_v0(...);
-!0 = metadata !{i16 0, i16 32768} ; [0, 32767]
-!1 = metadata !{i16 0, i16 32769} ; [0, 32768]
+!0 = !{i16 0, i16 32768} ; [0, 32767]
+!1 = !{i16 0, i16 32769} ; [0, 32768]
 
 define i16 @add_bounded_values(i16 %a, i16 %b) {
 ; CHECK-LABEL: @add_bounded_values(
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index d7eac4b0fd21..fbbba5954b32 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -294,7 +294,7 @@ define i16 @add_cttz(i16 %a) {
   ret i16 %b
 }
 declare i16 @llvm.cttz.i16(i16, i1)
-!0 = metadata !{i16 0, i16 8}
+!0 = !{i16 0, i16 8}
 
 ; Similar to @add_cttz, but in this test, the range implied by the
 ; intrinsic is more strict. Therefore, ValueTracking uses that range.
@@ -312,4 +312,44 @@ define i16 @add_cttz_2(i16 %a) {
 ; CHECK: or i16 %cttz, -16
   ret i16 %b
 }
-!1 = metadata !{i16 0, i16 32}
+!1 = !{i16 0, i16 32}
+
+define i32 @add_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_or_and(
+; CHECK-NEXT: add i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nsw_or_and(
+; CHECK-NEXT: add nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_or_and(
+; CHECK-NEXT: add nuw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @add_nuw_nsw_or_and(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %and = and i32 %x, %y
+  %add = add nsw nuw i32 %or, %and
+  ret i32 %add
+; CHECK-LABEL: @add_nuw_nsw_or_and(
+; CHECK-NEXT: add nuw nsw i32 %x, %y
+; CHECK-NEXT: ret i32
+}
diff --git a/test/Transforms/InstCombine/add4.ll b/test/Transforms/InstCombine/add4.ll
deleted file mode 100644
index f9b7e3b5a079..000000000000
--- a/test/Transforms/InstCombine/add4.ll
+++ /dev/null
@@ -1,102 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define float @test1(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) -> select C, 0, A
-  %cf = uitofp i1 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test1(
-; CHECK: select i1 %C, float -0.000000e+00, float %A
-}
-
-define float @test2(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; B*(uitofp i1 C) -> select C, B, 0
-  %cf = uitofp i1 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test2(
-; CHECK: select i1 %C, float %B, float -0.000000e+00
-}
-
-define float @test3(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  select C, 0, B + select C, A, 0 -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %s1 = select i1 %C, float 0.000000e+00, float %B
-  %s2 = select i1 %C, float %A, float 0.000000e+00
-  %sum = fadd fast float %s1, %s2
-  ret float %sum
-; CHECK-LABEL: @test3(
-; CHECK: select i1 %C, float %A, float %B
-}
-
-define float @test4(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p2, %p1
-  ret float %s1
-; CHECK-LABEL: @test4(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-define float @test5(float %A, float %B, i1 %C) {
-EntryBlock:
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp i1 %C to float
-  %mc = fsub fast float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  %p2 = fmul fast float %B, %cf
-  %s1 = fadd fast float %p1, %p2
-  ret float %s1
-; CHECK-LABEL: @test5(
-; CHECK: select i1 %C, float %B, float %A
-}
-
-; PR15952
-define float @test6(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %mc = fsub float 1.000000e+00, %cf
-  %p1 = fmul fast float %A, %mc
-  ret float %p1
-; CHECK-LABEL: @test6(
-; CHECK: uitofp
-}
-
-define float @test7(float %A, float %B, i32 %C) {
-  %cf = uitofp i32 %C to float
-  %p2 = fmul fast float %B, %cf
-  ret float %p2
-; CHECK-LABEL: @test7(
-; CHECK: uitofp
-}
-
-define <4 x float> @test8(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;;  B*(uitofp i1 C) + A*(1 - uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p2, %p1
-  ret <4 x float> %s1
-; CHECK-LABEL: @test8(
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
-
-define <4 x float> @test9(<4 x float> %A, <4 x float> %B, <4 x i1> %C) {
-  ;; A*(1 - uitofp i1 C) + B*(uitofp i1 C) -> select C, A, B
-  %cf = uitofp <4 x i1> %C to <4 x float>
-  %mc = fsub fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %cf
-  %p1 = fmul fast <4 x float> %A, %mc
-  %p2 = fmul fast <4 x float> %B, %cf
-  %s1 = fadd fast <4 x float> %p1, %p2
-  ret <4 x float> %s1
-; CHECK-LABEL: @test9
-; CHECK: select <4 x i1> %C, <4 x float> %B, <4 x float> %A
-}
diff --git a/test/Transforms/InstCombine/addnegneg.ll b/test/Transforms/InstCombine/addnegneg.ll
index ad8791d1e7ee..90f6baf5dd54 100644
--- a/test/Transforms/InstCombine/addnegneg.ll
+++ b/test/Transforms/InstCombine/addnegneg.ll
@@ -9,4 +9,3 @@ entry:
 	%sub6 = add i32 %sub4, %d		; <i32> [#uses=1]
 	ret i32 %sub6
 }
-
diff --git a/test/Transforms/InstCombine/alias-recursion.ll b/test/Transforms/InstCombine/alias-recursion.ll
new file mode 100644
index 000000000000..fa63726d2d33
--- /dev/null
+++ b/test/Transforms/InstCombine/alias-recursion.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%class.A = type { i32 (...)** }
+
+@0 = constant [1 x i8*] zeroinitializer
+
+@vtbl = alias getelementptr inbounds ([1 x i8*]* @0, i32 0, i32 0)
+
+define i32 (%class.A*)* @test() {
+; CHECK-LABEL: test
+entry:
+  br i1 undef, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.body, %entry
+  br i1 undef, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  %A = phi i32 (%class.A*)** [ bitcast (i8** @vtbl to i32 (%class.A*)**), %for.body ], [ null, %entry ]
+  %B = load i32 (%class.A*)** %A
+  ret i32 (%class.A*)* %B
+}
diff --git a/test/Transforms/InstCombine/align-attr.ll b/test/Transforms/InstCombine/align-attr.ll
new file mode 100644
index 000000000000..9f366bf8fab1
--- /dev/null
+++ b/test/Transforms/InstCombine/align-attr.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* align 32 %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+  ret i32 %0
+
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK: ret i32
+}
+
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index c30a245e4156..037641b90ad7 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,11 +1,15 @@
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep and | count 1
+; RUN: FileCheck %s
 
 ; Should be optimized to one and.
 define i1 @test1(i32 %a, i32 %b) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %1 = xor i32 %a, %b
+; CHECK-NEXT: %2 = and i32 %1, 65280
+; CHECK-NEXT: %tmp = icmp ne i32 %2, 0
+; CHECK-NEXT: ret i1 %tmp
         %tmp1 = and i32 %a, 65280               ; <i32> [#uses=1]
         %tmp3 = and i32 %b, 65280               ; <i32> [#uses=1]
         %tmp = icmp ne i32 %tmp1, %tmp3         ; <i1> [#uses=1]
         ret i1 %tmp
 }
-
diff --git a/test/Transforms/InstCombine/and-xor-merge.ll b/test/Transforms/InstCombine/and-xor-merge.ll
index e432a9aef7d1..b9a6a536ce7c 100644
--- a/test/Transforms/InstCombine/and-xor-merge.ll
+++ b/test/Transforms/InstCombine/and-xor-merge.ll
@@ -1,8 +1,11 @@
-; RUN: opt < %s -instcombine -S | grep and | count 1
-; RUN: opt < %s -instcombine -S | grep xor | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 ; (x&z) ^ (y&z) -> (x^y)&z
 define i32 @test1(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %tmp61 = xor i32 %x, %y
+; CHECK-NEXT: %tmp7 = and i32 %tmp61, %z
+; CHECK-NEXT: ret i32 %tmp7
         %tmp3 = and i32 %z, %x
         %tmp6 = and i32 %z, %y
         %tmp7 = xor i32 %tmp3, %tmp6
@@ -11,9 +14,11 @@ define i32 @test1(i32 %x, i32 %y, i32 %z) {
 
 ; (x & y) ^ (x|y) -> x^y
 define i32 @test2(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %tmp7 = xor i32 %y, %x
+; CHECK-NEXT: ret i32 %tmp7
         %tmp3 = and i32 %y, %x
         %tmp6 = or i32 %y, %x
         %tmp7 = xor i32 %tmp3, %tmp6
         ret i32 %tmp7
 }
-
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index e88fd5983003..96b535dda99d 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -45,7 +45,7 @@ define <4 x i32> @test5(<4 x i32> %A) {
 
 ; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
 define i32 @test6(i64 %x) nounwind {
-; CHECK: @test6
+; CHECK-LABEL: @test6(
 ; CHECK-NEXT: add i64 %x, 1
 ; CHECK-NEXT: icmp ugt i64 %x.off, 1
   %cmp1 = icmp ne i64 %x, -1
@@ -54,3 +54,26 @@ define i32 @test6(i64 %x) nounwind {
   %land.ext = zext i1 %.cmp1 to i32
   ret i32 %land.ext
 }
+
+define i1 @test7(i32 %i, i1 %b) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], %b
+; CHECK-NEXT: ret i1 [[AND]]
+  %cmp1 = icmp slt i32 %i, 1
+  %cmp2 = icmp sgt i32 %i, -1
+  %and1 = and i1 %cmp1, %b
+  %and2 = and i1 %and1, %cmp2
+  ret i1 %and2
+}
+
+define i1 @test8(i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[DEC:%.*]] = add i32 %i, -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[DEC]], 13
+; CHECK-NEXT: ret i1 [[CMP]]
+  %cmp1 = icmp ne i32 %i, 0
+  %cmp2 = icmp ult i32 %i, 14
+  %cond = and i1 %cmp1, %cmp2
+  ret i1 %cond
+}
diff --git a/test/Transforms/InstCombine/apint-call-cast-target.ll b/test/Transforms/InstCombine/apint-call-cast-target.ll
index 4e98f9b2b3ac..f3a66c324147 100644
--- a/test/Transforms/InstCombine/apint-call-cast-target.ll
+++ b/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -5,15 +5,18 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @main() {
 ; CHECK-LABEL: @main(
-; CHECK: call i32 bitcast
+; CHECK: %[[call:.*]] = call i7* @ctime(i999* null)
+; CHECK: %[[cast:.*]] = ptrtoint i7* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
 entry:
 	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
 	ret i32 %tmp
 }
 
 define i7* @ctime(i999*) {
-; CHECK-LABEL: @ctime(
-; CHECK: call i7* bitcast
+; CHECK-LABEL: define i7* @ctime(
+; CHECK: %[[call:.*]] = call i32 @main()
+; CHECK: %[[cast:.*]] = inttoptr i32 %[[call]] to i7*
 entry:
 	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
 	ret i7* %tmp
diff --git a/test/Transforms/InstCombine/apint-sub.ll b/test/Transforms/InstCombine/apint-sub.ll
index df8ec52b5abd..3b69c17e183a 100644
--- a/test/Transforms/InstCombine/apint-sub.ll
+++ b/test/Transforms/InstCombine/apint-sub.ll
@@ -95,12 +95,6 @@ define i1024 @test14(i1024 %A) {
 	ret i1024 %D
 }
 
-define i14 @test15(i14 %A, i14 %B) {
-	%C = sub i14 0, %A		; <i14> [#uses=1]
-	%D = srem i14 %B, %C		; <i14> [#uses=1]
-	ret i14 %D
-}
-
 define i51 @test16(i51 %A) {
 	%X = sdiv i51 %A, 1123		; <i51> [#uses=1]
 	%Y = sub i51 0, %X		; <i51> [#uses=1]
diff --git a/test/Transforms/InstCombine/assume-loop-align.ll b/test/Transforms/InstCombine/assume-loop-align.ll
new file mode 100644
index 000000000000..19190de2cddd
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-loop-align.ll
@@ -0,0 +1,47 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %a, i32* %b) #0 {
+entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 63
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr2 = and i64 %ptrint1, 63
+  %maskcond3 = icmp eq i64 %maskedptr2, 0
+  tail call void @llvm.assume(i1 %maskcond3)
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: load i32* {{.*}} align 64
+; CHECK: store i32 {{.*}}  align 64
+; CHECK: ret
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx5 = getelementptr inbounds i32* %a, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16
+  %1 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %1, 1648
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
new file mode 100644
index 000000000000..81fe0945949b
--- /dev/null
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -0,0 +1,55 @@
+; RUN: opt -domtree -instcombine -loops -S < %s | FileCheck %s
+; Note: The -loops above can be anything that requires the domtree, and is
+; necessary to work around a pass-manager bug.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { double* }
+
+; Function Attrs: nounwind uwtable
+define void @_Z3fooR1s(%struct.s* nocapture readonly dereferenceable(8) %x) #0 {
+
+; CHECK-LABEL: @_Z3fooR1s
+; CHECK: call void @llvm.assume
+; CHECK-NOT: call void @llvm.assume
+
+entry:
+  %a = getelementptr inbounds %struct.s* %x, i64 0, i32 0
+  %0 = load double** %a, align 8
+  %ptrint = ptrtoint double* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds double* %0, i64 %indvars.iv
+  %1 = load double* %arrayidx, align 16
+  %add = fadd double %1, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul = fmul double %add, 2.000000e+00
+  store double %mul, double* %arrayidx, align 16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx.1 = getelementptr inbounds double* %0, i64 %indvars.iv.next
+  %2 = load double* %arrayidx.1, align 8
+  %add.1 = fadd double %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond)
+  %mul.1 = fmul double %add.1, 2.000000e+00
+  store double %mul.1, double* %arrayidx.1, align 8
+  %indvars.iv.next.1 = add nuw nsw i64 %indvars.iv.next, 1
+  %exitcond.1 = icmp eq i64 %indvars.iv.next, 1599
+  br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume.ll b/test/Transforms/InstCombine/assume.ll
new file mode 100644
index 000000000000..7e45c04622ac
--- /dev/null
+++ b/test/Transforms/InstCombine/assume.ll
@@ -0,0 +1,265 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @foo1(i32* %a) #0 {
+entry:
+  %0 = load i32* %a, align 4
+
+; Check that the alignment has been upgraded and that the assume has not
+; been removed:
+; CHECK-LABEL: @foo1
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  ret i32 %0
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @foo2(i32* %a) #0 {
+entry:
+; Same check as in @foo1, but make sure it works if the assume is first too.
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: call void @llvm.assume
+; CHECK: ret i32
+
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+
+  %0 = load i32* %a, align 4
+  ret i32 %0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+define i32 @simple(i32 %a) #1 {
+entry:
+
+; CHECK-LABEL: @simple
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %cmp = icmp eq i32 %a, 4
+  tail call void @llvm.assume(i1 %cmp)
+  ret i32 %a
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can1(i1 %a, i1 %b, i1 %c) {
+entry:
+  %and1 = and i1 %a, %b
+  %and  = and i1 %and1, %c
+  tail call void @llvm.assume(i1 %and)
+
+; CHECK-LABEL: @can1
+; CHECK: call void @llvm.assume(i1 %a)
+; CHECK: call void @llvm.assume(i1 %b)
+; CHECK: call void @llvm.assume(i1 %c)
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @can2(i1 %a, i1 %b, i1 %c) {
+entry:
+  %v = or i1 %a, %b
+  %w = xor i1 %v, 1
+  tail call void @llvm.assume(i1 %w)
+
+; CHECK-LABEL: @can2
+; CHECK: %[[V1:[^ ]+]] = xor i1 %a, true
+; CHECK: call void @llvm.assume(i1 %[[V1]])
+; CHECK: %[[V2:[^ ]+]] = xor i1 %b, true
+; CHECK: call void @llvm.assume(i1 %[[V2]])
+; CHECK: ret i32
+
+  ret i32 5
+}
+
+define i32 @bar1(i32 %a) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; CHECK-LABEL: @bar1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @bar2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar3(i32 %a, i1 %x, i1 %y) #0 {
+entry:
+  %and1 = and i32 %a, 3
+
+; Don't be fooled by other assumes around.
+; CHECK-LABEL: @bar3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  tail call void @llvm.assume(i1 %x)
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  tail call void @llvm.assume(i1 %y)
+
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @bar4(i32 %a, i32 %b) {
+entry:
+  %and1 = and i32 %b, 3
+
+; CHECK-LABEL: @bar4
+; CHECK: call void @llvm.assume
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+  %and = and i32 %a, 7
+  %cmp = icmp eq i32 %and, 1
+  tail call void @llvm.assume(i1 %cmp)
+
+  %cmp2 = icmp eq i32 %a, %b
+  tail call void @llvm.assume(i1 %cmp2)
+
+  ret i32 %and1
+}
+
+define i32 @icmp1(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+
+; CHECK-LABEL: @icmp1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 1
+
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @icmp2(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %0 = zext i1 %cmp to i32
+  %lnot.ext = xor i32 %0, 1
+  ret i32 %lnot.ext
+
+; CHECK-LABEL: @icmp2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+}
+
+declare void @escape(i32* %a)
+
+; Do we canonicalize a nonnull assumption on a load into
+; metadata form?
+define i1 @nonnull1(i32** %a) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  tail call void @escape(i32* %load)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull1
+; CHECK: !nonnull
+; CHECK-NOT: call void @llvm.assume
+; CHECK: ret i1 false
+}
+
+; Make sure the above canonicalization applies only
+; to pointer types.  Doing otherwise would be illegal.
+define i1 @nonnull2(i32* %a) {
+entry:
+  %load = load i32* %a
+  %cmp = icmp ne i32 %load, 0
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32 %load, 0
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull2
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the assume is control dependent on something else
+define i1 @nonnull3(i32** %a, i1 %control) {
+entry:
+  %load = load i32** %a
+  %cmp = icmp ne i32* %load, null
+  br i1 %control, label %taken, label %not_taken
+taken:
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+not_taken:
+  ret i1 true
+
+; CHECK-LABEL: @nonnull3
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+; Make sure the above canonicalization does not trigger
+; if the path from the load to the assume is potentially 
+; interrupted by an exception being thrown
+define i1 @nonnull4(i32** %a) {
+entry:
+  %load = load i32** %a
+  ;; This call may throw!
+  tail call void @escape(i32* %load)
+  %cmp = icmp ne i32* %load, null
+  tail call void @llvm.assume(i1 %cmp)
+  %rval = icmp eq i32* %load, null
+  ret i1 %rval
+
+; CHECK-LABEL: @nonnull4
+; CHECK-NOT: !nonnull
+; CHECK: call void @llvm.assume
+}
+
+
+
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/assume2.ll b/test/Transforms/InstCombine/assume2.ll
new file mode 100644
index 000000000000..c41bbaa04eb7
--- /dev/null
+++ b/test/Transforms/InstCombine/assume2.ll
@@ -0,0 +1,174 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test1
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %and = and i32 %a, 15
+  %cmp = icmp eq i32 %and, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test2(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %and = and i32 %a, 15
+  %nand = xor i32 %and, -1
+  %cmp = icmp eq i32 %nand, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test3(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test3
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = or i32 %a, 4294967280
+  %cmp = icmp eq i32 %v, 4294967285
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test4(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test4
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 2
+
+  %v = or i32 %a, 4294967280
+  %nv = xor i32 %v, -1
+  %cmp = icmp eq i32 %nv, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test5(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test5
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 4
+
+  %v = xor i32 %a, 1
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 7
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test6(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test6
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 5
+
+  %v = shl i32 %a, 2
+  %cmp = icmp eq i32 %v, 20
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 63
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test7(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test7
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test8(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test8
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 20
+
+  %v = lshr i32 %a, 2
+  %cmp = icmp eq i32 %v, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 252
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test9(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test9
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test10(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test10
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 -2147483648
+
+  %cmp = icmp sle i32 %a, -2
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 2147483648
+  ret i32 %and1
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @test11(i32 %a) #0 {
+entry:
+; CHECK-LABEL: @test11
+; CHECK: call void @llvm.assume
+; CHECK: ret i32 0
+
+  %cmp = icmp ule i32 %a, 256
+  tail call void @llvm.assume(i1 %cmp)
+
+  %and1 = and i32 %a, 3072
+  ret i32 %and1
+}
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll
index ccee87433f32..98cecefcc294 100644
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -5,14 +5,6 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; Check transforms involving atomic operations
 
-define i32* @test1(i8** %p) {
-; CHECK-LABEL: define i32* @test1(
-; CHECK: load atomic i8** %p monotonic, align 8
-  %c = bitcast i8** %p to i32**
-  %r = load atomic i32** %c monotonic, align 8
-  ret i32* %r
-}
-
 define i32 @test2(i32* %p) {
 ; CHECK-LABEL: define i32 @test2(
 ; CHECK: %x = load atomic i32* %p seq_cst, align 4
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index a6b56f94ffbf..cfec09200dbc 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -90,10 +90,12 @@ entry:
 define void @bitcast_alias_scalar(float* noalias %source, float* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar
-; CHECK: bitcast float %tmp to i32
+; CHECK: bitcast float* %source to i32*
+; CHECK: load i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
-; CHECK: bitcast i32 %call to float
+; CHECK: bitcast float* %dest to i32*
+; CHECK: store i32
   %tmp = load float* %source, align 8
   %call = call float @alias_i32_to_f32(float %tmp) nounwind
   store float %call, float* %dest, align 8
@@ -104,10 +106,12 @@ entry:
 define void @bitcast_alias_vector(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector
-; CHECK: bitcast <2 x float> %tmp to <2 x i32>
+; CHECK: bitcast <2 x float>* %source to <2 x i32>*
+; CHECK: load <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
-; CHECK: bitcast <2 x i32> %call to <2 x float>
+; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
+; CHECK: store <2 x i32>
   %tmp = load <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
@@ -118,9 +122,11 @@ entry:
 define void @bitcast_alias_vector_scalar_same_size(<2 x float>* noalias %source, <2 x float>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
-; CHECK: bitcast <2 x float> %tmp to i64
+; CHECK: bitcast <2 x float>* %source to i64*
+; CHECK: load i64*
 ; CHECK: %call = call i64 @func_i64
-; CHECK: bitcast i64 %call to <2 x float>
+; CHECK: bitcast <2 x float>* %dest to i64*
+; CHECK: store i64
   %tmp = load <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
@@ -130,9 +136,11 @@ entry:
 define void @bitcast_alias_scalar_vector_same_size(i64* noalias %source, i64* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
-; CHECK: bitcast i64 %tmp to <2 x float>
+; CHECK: bitcast i64* %source to <2 x float>*
+; CHECK: load <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
-; CHECK: bitcast <2 x float> %call to i64
+; CHECK: bitcast i64* %dest to <2 x float>*
+; CHECK: store <2 x float>
   %tmp = load i64* %source, align 8
   %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
   store i64 %call, i64* %dest, align 8
@@ -142,9 +150,11 @@ entry:
 define void @bitcast_alias_vector_ptrs_same_size(<2 x i64*>* noalias %source, <2 x i64*>* noalias %dest) nounwind {
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
-; CHECK: bitcast <2 x i64*> %tmp to <2 x i32*>
+; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
+; CHECK: load <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
-; CHECK: bitcast <2 x i32*> %call to <2 x i64*>
+; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
+; CHECK: store <2 x i32*>
   %tmp = load <2 x i64*>* %source, align 8
   %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
   store <2 x i64*> %call, <2 x i64*>* %dest, align 8
diff --git a/test/Transforms/InstCombine/bitcast-store.ll b/test/Transforms/InstCombine/bitcast-store.ll
index e46b5c82d9ff..ea4d680ade5a 100644
--- a/test/Transforms/InstCombine/bitcast-store.ll
+++ b/test/Transforms/InstCombine/bitcast-store.ll
@@ -10,11 +10,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 @G = external constant [5 x i8*]
 
 ; CHECK-LABEL: @foo
-; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
-define void @foo(%struct.A* %a) nounwind {
+; CHECK: store i32 %x, i32* %{{.*}}, align 16, !noalias !0
+define void @foo(i32 %x, float* %p) nounwind {
 entry:
-  %0 = bitcast %struct.A* %a to i8***
-  store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0
+  %x.cast = bitcast i32 %x to float
+  store float %x.cast, float* %p, align 16, !noalias !0
   ret void
 }
 
@@ -32,4 +32,4 @@ entry:
   ret void
 }
 
-!0 = metadata !{metadata !"hello"}
+!0 = !{!0}
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 442ce58421e1..63b0775e4aff 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -1,63 +1,79 @@
-; RUN: opt < %s -instcombine -S | not grep call.*bswap
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test1(i16 %tmp2) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT:  %tmp = icmp eq i16 %tmp2, 256
+; CHECK-NEXT:  ret i1 %tmp
         %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )
         %tmp = icmp eq i16 %tmp10, 1
         ret i1 %tmp
 }
 
 define i1 @test2(i32 %tmp) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT:  %tmp.upgrd.1 = icmp eq i32 %tmp, 16777216
+; CHECK-NEXT:  ret i1 %tmp.upgrd.1
         %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
         %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
         ret i1 %tmp.upgrd.1
 }
 
-declare i32 @llvm.bswap.i32(i32)
-
 define i1 @test3(i64 %tmp) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT:  %tmp.upgrd.2 = icmp eq i64 %tmp, 72057594037927936
+; CHECK-NEXT:  ret i1 %tmp.upgrd.2
         %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
         %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
         ret i1 %tmp.upgrd.2
 }
 
-declare i64 @llvm.bswap.i64(i64)
-
-declare i16 @llvm.bswap.i16(i16)
-
 ; rdar://5992453
 ; A & 255
 define i32 @test4(i32 %a) nounwind  {
-entry:
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+; CHECK-LABEL: @test4
+; CHECK-NEXT:  %tmp2 = and i32 %a, 255
+; CHECK-NEXT:  ret i32 %tmp2
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = lshr i32 %tmp2, 24
 	ret i32 %tmp4
 }
 
 ; A
-define i32 @test5(i32 %a) nounwind  {
-entry:
+define i32 @test5(i32 %a) nounwind {
+; CHECK-LABEL: @test5
+; CHECK-NEXT:  ret i32 %a
 	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 )
 	ret i32 %tmp4
 }
 
 ; a >> 24
-define i32 @test6(i32 %a) nounwind  {
-entry:
-	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+define i32 @test6(i32 %a) nounwind {
+; CHECK-LABEL: @test6
+; CHECK-NEXT:  %tmp2 = lshr i32 %a, 24
+; CHECK-NEXT   ret i32 %tmp4
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = and i32 %tmp2, 255
 	ret i32 %tmp4
 }
 
 ; PR5284
 define i16 @test7(i32 %A) {
-  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
+; CHECK-LABEL: @test7
+; CHECK-NEXT:  %1 = lshr i32 %A, 16
+; CHECK-NEXT:  %D = trunc i32 %1 to i16
+; CHECK-NEXT   ret i16 %D
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
   %C = trunc i32 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
   ret i16 %D
 }
 
 define i16 @test8(i64 %A) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT:  %1 = lshr i64 %A, 48
+; CHECK-NEXT:  %D = trunc i64 %1 to i16
+; CHECK-NEXT   ret i16 %D
   %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
   %C = trunc i64 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -66,6 +82,144 @@ define i16 @test8(i64 %A) {
 
 ; Misc: Fold bswap(undef) to undef.
 define i64 @foo() {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: ret i64 undef
   %a = call i64 @llvm.bswap.i64(i64 undef)
   ret i64 %a
 }
+
+; PR15782
+; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
+; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
+define i16 @bs_and16i(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16i
+; CHECK-NEXT:  %1 = and i16 %a, 4391
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %2 = and i16 %1, 10001
+  ret i16 %2
+}
+
+define i16 @bs_and16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_and16
+; CHECK-NEXT:  %1 = and i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = and i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_or16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_or16
+; CHECK-NEXT:  %1 = or i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = or i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i16 @bs_xor16(i16 %a, i16 %b) #0 {
+; CHECK-LABEL: @bs_xor16
+; CHECK-NEXT:  %1 = xor i16 %a, %b
+; CHECK-NEXT:  %2 = call i16 @llvm.bswap.i16(i16 %1)
+; CHECK-NEXT:  ret i16 %2
+  %tmp1 = tail call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = tail call i16 @llvm.bswap.i16(i16 %b)
+  %tmp3 = xor i16 %tmp1, %tmp2
+  ret i16 %tmp3
+}
+
+define i32 @bs_and32i(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32i
+; CHECK-NEXT:  %1 = and i32 %a, -1585053440
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = and i32 %tmp1, 100001
+  ret i32 %tmp2
+}
+
+define i32 @bs_and32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_and32
+; CHECK-NEXT:  %1 = and i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = and i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_or32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_or32
+; CHECK-NEXT:  %1 = or i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = or i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @bs_xor32(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: @bs_xor32
+; CHECK-NEXT:  %1 = xor i32 %a, %b
+; CHECK-NEXT:  %2 = call i32 @llvm.bswap.i32(i32 %1)
+; CHECK-NEXT:  ret i32 %2
+  %tmp1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = tail call i32 @llvm.bswap.i32(i32 %b)
+  %tmp3 = xor i32 %tmp1, %tmp2
+  ret i32 %tmp3
+}
+
+define i64 @bs_and64i(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64i
+; CHECK-NEXT:  %1 = and i64 %a, 129085117527228416
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = and i64 %tmp1, 1000000001
+  ret i64 %tmp2
+}
+
+define i64 @bs_and64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_and64
+; CHECK-NEXT:  %1 = and i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = and i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_or64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_or64
+; CHECK-NEXT:  %1 = or i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = or i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+define i64 @bs_xor64(i64 %a, i64 %b) #0 {
+; CHECK-LABEL: @bs_xor64
+; CHECK-NEXT:  %1 = xor i64 %a, %b
+; CHECK-NEXT:  %2 = call i64 @llvm.bswap.i64(i64 %1)
+; CHECK-NEXT:  ret i64 %2
+  %tmp1 = tail call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = tail call i64 @llvm.bswap.i64(i64 %b)
+  %tmp3 = xor i64 %tmp1, %tmp2
+  ret i64 %tmp3
+}
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
diff --git a/test/Transforms/InstCombine/call-cast-target.ll b/test/Transforms/InstCombine/call-cast-target.ll
index 1af3317a398b..b82dd99db36f 100644
--- a/test/Transforms/InstCombine/call-cast-target.ll
+++ b/test/Transforms/InstCombine/call-cast-target.ll
@@ -5,7 +5,9 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @main() {
 ; CHECK-LABEL: @main
-; CHECK: call i32 bitcast
+; CHECK: %[[call:.*]] = call i8* @ctime(i32* null)
+; CHECK: %[[cast:.*]] = ptrtoint i8* %[[call]] to i32
+; CHECK: ret i32 %[[cast]]
 entry:
   %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
   ret i32 %tmp
@@ -25,3 +27,37 @@ entry:
   %0 = call { i8 } bitcast ({ i8 } (i32*)* @foo to { i8 } (i16*)*)(i16* null)
   ret void
 }
+
+declare i32 @fn1(i32)
+
+define i32 @test1(i32* %a) {
+; CHECK-LABEL: @test1
+; CHECK:      %[[cast:.*]] = ptrtoint i32* %a to i32
+; CHECK-NEXT: %[[call:.*]] = tail call i32 @fn1(i32 %[[cast]])
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i32)* @fn1 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn2(i16)
+
+define i32 @test2(i32* %a) {
+; CHECK-LABEL: @test2
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i16)* @fn2 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
+
+declare i32 @fn3(i64)
+
+define i32 @test3(i32* %a) {
+; CHECK-LABEL: @test3
+; CHECK:      %[[call:.*]] = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+; CHECK-NEXT: ret i32 %[[call]]
+entry:
+  %call = tail call i32 bitcast (i32 (i64)* @fn3 to i32 (i32*)*)(i32* %a)
+  ret i32 %call
+}
diff --git a/test/Transforms/InstCombine/canonicalize_branch.ll b/test/Transforms/InstCombine/canonicalize_branch.ll
index b62b143d9d51..29fd51a39ab4 100644
--- a/test/Transforms/InstCombine/canonicalize_branch.ll
+++ b/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -57,10 +57,10 @@ F:
         ret i32 123
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 2}
-!1 = metadata !{metadata !"branch_weights", i32 3, i32 4}
-!2 = metadata !{metadata !"branch_weights", i32 5, i32 6}
-!3 = metadata !{metadata !"branch_weights", i32 7, i32 8}
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 3, i32 4}
+!2 = !{!"branch_weights", i32 5, i32 6}
+!3 = !{!"branch_weights", i32 7, i32 8}
 ; Base case shouldn't change.
 ; CHECK: !0 = {{.*}} i32 1, i32 2}
 ; Ensure that the branch metadata is reversed to match the reversals above.
diff --git a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
new file mode 100644
index 000000000000..551d0efce5ea
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -0,0 +1,454 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_uitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp one
+define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_0_sitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp one
+define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp ueq
+define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp ueq
+define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_uitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp une
+define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_0_sitofp(
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp une
+define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_uitofp(
+; CHECK: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp ogt
+define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_0_sitofp(
+; CHECK: icmp sgt i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp ogt
+define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ogt float %f, -0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_uitofp(
+; CHECK: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ole_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ole_int_0_sitofp(
+; CHECK: icmp slt i32 %i, 1
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_ole_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ole float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_olt_int_0_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_olt_int_0_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp olt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_olt_int_0_sitofp(
+; CHECK: icmp slt i32 %i, 0
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_olt_int_0_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp olt float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_uitofp(i64 %i) {
+  %f = uitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_sitofp(i64 %i) {
+  %f = sitofp i64 %i to float
+  %cmp = fcmp oeq float %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_uitofp_half(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_uitofp_half(i64 %i) {
+  %f = uitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i64_cast_cmp_oeq_int_0_sitofp_half(
+; CHECK-NEXT: icmp eq i64 %i, 0
+; CHECK-NEXT: ret
+define i1 @i64_cast_cmp_oeq_int_0_sitofp_half(i64 %i) {
+  %f = sitofp i64 %i to half
+  %cmp = fcmp oeq half %f, 0.0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
+  %f = uitofp i32 %i to ppc_fp128
+  %cmp = fcmp oeq ppc_fp128 %f, 0xM00000000000000000000000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777215
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777215
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777216
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+
+; XCHECK: icmp eq i32 %i, 16777216
+; XCHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i24maxp1_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4170000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imin_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imin_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imin_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imin_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32imax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_i32imax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x41E0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_negi32umax_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
+; CHECK: sitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0xC1F0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_oeq_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_half_sitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_oeq_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_half_uitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_one_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_one_half_sitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_one_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp one float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_uitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_ueq_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_ueq_half_sitofp(
+; CHECK: ret i1 false
+define i1 @i32_cast_cmp_ueq_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp ueq float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_half_uitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_une_half_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_une_half_sitofp(
+; CHECK: ret i1 true
+define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp une float %f, 0.5
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 0cbfbb071bb7..aac7a531b356 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -99,6 +99,26 @@ define void @test11(i32* %P) {
 ; CHECK: ret void
 }
 
+declare i32 @__gxx_personality_v0(...)
+define void @test_invoke_vararg_cast(i32* %a, i32* %b) {
+entry:
+  %0 = bitcast i32* %b to i8*
+  %1 = bitcast i32* %a to i64*
+  invoke void (i32, ...)* @varargs(i32 1, i8* %0, i64* %1)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %entry
+  %2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+; CHECK-LABEL: test_invoke_vararg_cast
+; CHECK-LABEL: entry:
+; CHECK: invoke void (i32, ...)* @varargs(i32 1, i32* %b, i32* %a)
+}
+
 define i8* @test13(i64 %A) {
         %c = getelementptr [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A             ; <i8*> [#uses=1]
         ret i8* %c
@@ -199,15 +219,6 @@ define i1 @test24(i1 %C) {
 ; CHECK: ret i1 true
 }
 
-define void @test25(i32** %P) {
-        %c = bitcast i32** %P to float**                ; <float**> [#uses=1]
-        ;; Fold cast into null
-        store float* null, float** %c
-        ret void
-; CHECK: store i32* null, i32** %P
-; CHECK: ret void
-}
-
 define i32 @test26(float %F) {
         ;; no need to cast from float->double.
         %c = fpext float %F to double           ; <double> [#uses=1]
@@ -354,6 +365,24 @@ define i32* @test41(i32* %tmp1) {
 ; CHECK: ret i32* %tmp1
 }
 
+define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %tmp1) {
+  %tmp64 = addrspacecast i32* %tmp1 to { i32 } addrspace(1)*
+  %tmp65 = getelementptr { i32 } addrspace(1)* %tmp64, i32 0, i32 0
+  ret i32 addrspace(1)* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_smaller(
+; CHECK: addrspacecast i32* %tmp1 to i32 addrspace(1)*
+; CHECK-NEXT: ret i32 addrspace(1)*
+}
+
+define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %tmp1) {
+  %tmp64 = addrspacecast i32 addrspace(1)* %tmp1 to { i32 }*
+  %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
+  ret i32* %tmp65
+; CHECK-LABEL: @test41_addrspacecast_larger(
+; CHECK: addrspacecast i32 addrspace(1)* %tmp1 to i32*
+; CHECK-NEXT: ret i32*
+}
+
 define i32 @test42(i32 %X) {
         %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
         %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
@@ -792,7 +821,7 @@ define double @test71(double *%p, i64 %i) {
 
 define double @test72(double *%p, i32 %i) {
 ; CHECK-LABEL: @test72(
-  %so = mul nsw i32 %i, 8
+  %so = shl nsw i32 %i, 3
   %o = sext i32 %so to i64
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast double* %p to i8*
@@ -807,7 +836,7 @@ define double @test72(double *%p, i32 %i) {
 
 define double @test73(double *%p, i128 %i) {
 ; CHECK-LABEL: @test73(
-  %lo = mul nsw i128 %i, 8
+  %lo = shl nsw i128 %i, 3
   %o = trunc i128 %lo to i64
 ; CHECK-NEXT: trunc i128 %i to i64
   %q = bitcast double* %p to i8*
@@ -919,7 +948,7 @@ define %s @test79(%s *%p, i64 %i, i32 %j) {
 
 define double @test80([100 x double]* %p, i32 %i) {
 ; CHECK-LABEL: @test80(
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
 ; CHECK-NEXT: sext i32 %i to i64
   %q = bitcast [100 x double]* %p to i8*
   %pp = getelementptr i8* %q, i32 %tmp
@@ -936,7 +965,7 @@ define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: getelementptr [100 x double] addrspace(1)* %p
 ; CHECK-NEXT: load double addrspace(1)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
@@ -950,7 +979,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-NEXT: addrspacecast double addrspace(1)*
 ; CHECK-NEXT: load double addrspace(3)*
 ; CHECK-NEXT: ret double
-  %tmp = mul nsw i32 %i, 8
+  %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
@@ -960,7 +989,7 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
 
 define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
 ; CHECK-LABEL: @test80_as1(
-  %tmp = mul nsw i16 %i, 8
+  %tmp = shl nsw i16 %i, 3
 ; CHECK-NEXT: sext i16 %i to i32
   %q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
   %pp = getelementptr i8 addrspace(1)* %q, i16 %tmp
@@ -1004,7 +1033,74 @@ define i64 @test83(i16 %a, i64 %k) {
   ret i64 %sh_prom1
 
 ; CHECK-LABEL: @test83(
-; CHECK: %sub = add nsw i64 %k, 4294967295
+; CHECK: %sub = add i64 %k, 4294967295
 ; CHECK: %sh_prom = trunc i64 %sub to i32
 ; CHECK: %shl = shl i32 %conv, %sh_prom
 }
+
+define i8 @test84(i32 %a) {
+  %add = add nsw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test84(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+define i8 @test85(i32 %a) {
+  %add = add nuw i32 %a, -16777216
+  %shr = lshr exact i32 %add, 23
+  %trunc = trunc i32 %shr to i8
+  ret i8 %trunc
+
+; CHECK-LABEL: @test85(
+; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
+}
+
+; Overflow on a float to int or int to float conversion is undefined (PR21130).
+
+define i8 @overflow_fptosi() {
+  %i = fptosi double 1.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptosi(
+; CHECK-NEXT: ret i8 undef 
+}
+
+define i8 @overflow_fptoui() {
+  %i = fptoui double 2.56e+02 to i8
+  ret i8 %i
+; CHECK-LABEL: @overflow_fptoui(
+; CHECK-NEXT: ret i8 undef 
+}
+
+; The maximum float is approximately 2 ** 128 which is 3.4E38. 
+; The constant below is 4E38. Use a 130 bit integer to hold that
+; number; 129-bits for the value + 1 bit for the sign.
+define float @overflow_uitofp() {
+  %i = uitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_uitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define float @overflow_sitofp() {
+  %i = sitofp i130 400000000000000000000000000000000000000 to float
+  ret float %i
+; CHECK-LABEL: @overflow_sitofp(
+; CHECK-NEXT: ret float undef 
+}
+
+define i32 @PR21388(i32* %v) {
+  %icmp = icmp slt i32* %v, null
+  %sext = sext i1 %icmp to i32
+  ret i32 %sext
+; CHECK-LABEL: @PR21388(
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i32* %v, null
+; CHECK-NEXT: %[[sext:.*]] = sext i1 %[[icmp]] to i32
+; CHECK-NEXT: ret i32 %[[sext]]
+}
diff --git a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 7fac78a40f56..bb61f02c8e23 100644
--- a/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -161,12 +161,11 @@ define i32 @constant_fold_bitcast_itof_load() {
   ret i32 %a
 }
 
-define <4 x i32> @constant_fold_bitcast_vector_as() {
+define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
 ; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-; CHECK: bitcast <4 x float> %1 to <4 x i32>
-  %a = load <4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*), align 4
-  ret <4 x i32> %a
+  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  ret <4 x float> %a
 }
 
 @i32_array_as3 = addrspace(3) global [10 x i32] zeroinitializer
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
new file mode 100644
index 000000000000..13da0f445242
--- /dev/null
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S < %s -instcombine | FileCheck %s
+
+target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
+
+@G1 = global i32 42, align 1
+@G2 = global i32 42
+@G3 = global [4 x i8] zeroinitializer, align 1
+
+@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+
+define i64 @f1() {
+; This cannot be constant folded because G1 is underaligned.
+; CHECK-LABEL: @f1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @G1 to i64), i64 1)
+}
+
+define i64 @f2() {
+; The preferred alignment for G2 allows this one to foled to zero.
+; CHECK-LABEL: @f2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @G2 to i64), i64 1)
+}
+
+define i64 @g1() {
+; This cannot be constant folded because A1 aliases G3 which is underalaigned.
+; CHECK-LABEL: @g1(
+; CHECK: ret i64 and
+  ret i64 and (i64 ptrtoint (i32* @A1 to i64), i64 1)
+}
+
+define i64 @g2() {
+; While A2 also aliases G3 which is underaligned, the math of A2 forces a
+; certain alignment allowing this to fold to zero.
+; CHECK-LABEL: @g2(
+; CHECK: ret i64 0
+  ret i64 and (i64 ptrtoint (i32* @A2 to i64), i64 1)
+}
+
diff --git a/test/Transforms/InstCombine/constant-fold-math.ll b/test/Transforms/InstCombine/constant-fold-math.ll
index 14377df37299..ce8d337c08bf 100644
--- a/test/Transforms/InstCombine/constant-fold-math.ll
+++ b/test/Transforms/InstCombine/constant-fold-math.ll
@@ -7,6 +7,7 @@ declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
 declare double @llvm.fma.f64(double, double, double) #0
 declare double @llvm.fmuladd.f64(double, double, double) #0
 
+declare double @llvm.sqrt.f64(double) #0
 
 
 ; CHECK-LABEL: @constant_fold_fma_f32
@@ -44,4 +45,12 @@ define double @constant_fold_fmuladd_f64() #0 {
   ret double %x
 }
 
+; The sqrt intrinsic is undefined for negative inputs besides -0.0.
+; CHECK-LABEL: @bad_sqrt
+; CHECK-NEXT: ret double undef
+define double @bad_sqrt() {
+  %x = call double @llvm.sqrt.f64(double -2.000000e+00)
+  ret double %x
+}
+
 attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 2e3785fe597e..1946576183c4 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -15,14 +15,14 @@ declare i32 @printf(i8*, ...)
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 5, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 4, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 6, i32 1, metadata !6, null}
-!8 = metadata !{metadata !"m.c", metadata !"/private/tmp"}
-!9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\004\000\001\000\006\000\000\000", !8, !1, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang\001\00\000\00\000", !8, !4, !4, !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !MDLocation(line: 5, column: 2, scope: !6)
+!6 = !{!"0xb\004\0012\000", !8, !0} ; [ DW_TAG_lexical_block ]
+!7 = !MDLocation(line: 6, column: 1, scope: !6)
+!8 = !{!"m.c", !"/private/tmp"}
+!9 = !{!0}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 75082dcae055..ae72f700039e 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
@@ -14,11 +14,11 @@ entry:
   store i8* %__dest, i8** %__dest.addr, align 8
 ; CHECK-NOT: call void @llvm.dbg.declare
 ; CHECK: call void @llvm.dbg.value
-  call void @llvm.dbg.declare(metadata !{i8** %__dest.addr}, metadata !0), !dbg !16
+  call void @llvm.dbg.declare(metadata i8** %__dest.addr, metadata !0, metadata !{}), !dbg !16
   store i32 %__val, i32* %__val.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %__val.addr}, metadata !7), !dbg !18
+  call void @llvm.dbg.declare(metadata i32* %__val.addr, metadata !7, metadata !{}), !dbg !18
   store i64 %__len, i64* %__len.addr, align 8
-  call void @llvm.dbg.declare(metadata !{i64* %__len.addr}, metadata !9), !dbg !20
+  call void @llvm.dbg.declare(metadata i64* %__len.addr, metadata !9, metadata !{}), !dbg !20
   %tmp = load i8** %__dest.addr, align 8, !dbg !21
   %tmp1 = load i32* %__val.addr, align 4, !dbg !21
   %tmp2 = load i64* %__len.addr, align 8, !dbg !21
@@ -31,29 +31,29 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"__dest", metadata !2, i32 16777294, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !27, metadata !2, metadata !"foobar", metadata !"foobar", metadata !"", i32 79, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, i32, i64)* @foobar, null, null, metadata !25, i32 79} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
-!2 = metadata !{i32 786473, metadata !27} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !28, i32 12, metadata !"clang version 3.0 (trunk 127710)", i1 true, metadata !"", i32 0, metadata !29, metadata !29, metadata !24, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !27, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6}
-!6 = metadata !{i32 786447, null, metadata !3, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 786689, metadata !1, metadata !"__val", metadata !2, i32 33554510, metadata !8, i32 0, null} ; [ DW_TAG_arg_variable ]
-!8 = metadata !{i32 786468, null, metadata !3, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!9 = metadata !{i32 786689, metadata !1, metadata !"__len", metadata !2, i32 50331726, metadata !10, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"size_t", i32 80, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_typedef ]
-!11 = metadata !{i32 589846, metadata !27, metadata !3, metadata !"__darwin_size_t", i32 90, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
-!12 = metadata !{i32 786468, null, metadata !3, metadata !"long unsigned int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!16 = metadata !{i32 78, i32 28, metadata !1, null}
-!18 = metadata !{i32 78, i32 40, metadata !1, null}
-!20 = metadata !{i32 78, i32 54, metadata !1, null}
-!21 = metadata !{i32 80, i32 3, metadata !22, null}
-!22 = metadata !{i32 786443, metadata !27, metadata !23, i32 80, i32 3, i32 7} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 786443, metadata !27, metadata !1, i32 79, i32 1, i32 6} ; [ DW_TAG_lexical_block ]
-!24 = metadata !{metadata !1}
-!25 = metadata !{metadata !0, metadata !7, metadata !9}
-!26 = metadata !{i32 786473, metadata !28} ; [ DW_TAG_file_type ]
-!27 = metadata !{metadata !"string.h", metadata !"Game"}
-!28 = metadata !{metadata !"bits.c", metadata !"Game"}
-!29 = metadata !{i32 0}
-!30 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00__dest\0016777294\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00foobar\00foobar\00\0079\001\001\000\006\00256\001\0079", !27, !2, !4, null, i8* (i8*, i32, i64)* @foobar, null, null, !25} ; [ DW_TAG_subprogram ] [line 79] [local] [def] [foobar]
+!2 = !{!"0x29", !27} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\0012\00clang version 3.0 (trunk 127710)\001\00\000\00\000", !28, !29, !29, !24, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !27, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6}
+!6 = !{!"0xf\00\000\0064\0064\000\000", null, !3, null} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x101\00__val\0033554510\000", !1, !2, !8} ; [ DW_TAG_arg_variable ]
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !3} ; [ DW_TAG_base_type ]
+!9 = !{!"0x101\00__len\0050331726\000", !1, !2, !10} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x16\00size_t\0080\000\000\000\000", !27, !3, !11} ; [ DW_TAG_typedef ]
+!11 = !{!"0x16\00__darwin_size_t\0090\000\000\000\000", !27, !3, !12} ; [ DW_TAG_typedef ]
+!12 = !{!"0x24\00long unsigned int\000\0064\0064\000\000\007", null, !3} ; [ DW_TAG_base_type ]
+!16 = !MDLocation(line: 78, column: 28, scope: !1)
+!18 = !MDLocation(line: 78, column: 40, scope: !1)
+!20 = !MDLocation(line: 78, column: 54, scope: !1)
+!21 = !MDLocation(line: 80, column: 3, scope: !22)
+!22 = !{!"0xb\0080\003\007", !27, !23} ; [ DW_TAG_lexical_block ]
+!23 = !{!"0xb\0079\001\006", !27, !1} ; [ DW_TAG_lexical_block ]
+!24 = !{!1}
+!25 = !{!0, !7, !9}
+!26 = !{!"0x29", !28} ; [ DW_TAG_file_type ]
+!27 = !{!"string.h", !"Game"}
+!28 = !{!"bits.c", !"Game"}
+!29 = !{i32 0}
+!30 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/InstCombine/descale-zero.ll b/test/Transforms/InstCombine/descale-zero.ll
index 7990fdb3eca3..4656837cc05e 100644
--- a/test/Transforms/InstCombine/descale-zero.ll
+++ b/test/Transforms/InstCombine/descale-zero.ll
@@ -5,8 +5,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define internal i8* @descale_zero() {
 entry:
-; CHECK: load i16** inttoptr (i64 48 to i16**), align 16
-; CHECK-NEXT: bitcast i16*
+; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
 ; CHECK-NEXT: ret i8*
   %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
   %num = load i64* inttoptr (i64 64 to i64*), align 64
diff --git a/test/Transforms/InstCombine/devirt.ll b/test/Transforms/InstCombine/devirt.ll
deleted file mode 100644
index 9c7cf5d697e8..000000000000
--- a/test/Transforms/InstCombine/devirt.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-; CHECK-NOT: getelementptr
-; CHECK-NOT: ptrtoint
-; CHECK: bitcast i8*
-%struct.S = type { i32 (...)** }
-
-@_ZL1p = internal constant { i64, i64 } { i64 1, i64 0 }, align 8
-
-define void @_Z1g1S(%struct.S* %s) nounwind {
-entry:
-  %tmp = load { i64, i64 }* @_ZL1p, align 8
-  %memptr.adj = extractvalue { i64, i64 } %tmp, 1
-  %0 = bitcast %struct.S* %s to i8*
-  %1 = getelementptr inbounds i8* %0, i64 %memptr.adj
-  %this.adjusted = bitcast i8* %1 to %struct.S*
-  %memptr.ptr = extractvalue { i64, i64 } %tmp, 0
-  %2 = and i64 %memptr.ptr, 1
-  %memptr.isvirtual = icmp ne i64 %2, 0
-  br i1 %memptr.isvirtual, label %memptr.virtual, label %memptr.nonvirtual
-
-memptr.virtual:                                   ; preds = %entry
-  %3 = bitcast %struct.S* %this.adjusted to i8**
-  %memptr.vtable = load i8** %3
-  %4 = sub i64 %memptr.ptr, 1
-  %5 = getelementptr i8* %memptr.vtable, i64 %4
-  %6 = bitcast i8* %5 to void (%struct.S*)**
-  %memptr.virtualfn = load void (%struct.S*)** %6
-  br label %memptr.end
-
-memptr.nonvirtual:                                ; preds = %entry
-  %memptr.nonvirtualfn = inttoptr i64 %memptr.ptr to void (%struct.S*)*
-  br label %memptr.end
-
-memptr.end:                                       ; preds = %memptr.nonvirtual, %memptr.virtual
-  %7 = phi void (%struct.S*)* [ %memptr.virtualfn, %memptr.virtual ], [ %memptr.nonvirtualfn, %memptr.nonvirtual ]
-  call void %7(%struct.S* %this.adjusted)
-  ret void
-}
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 9c7ba9b03059..e0ff07baae7c 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -132,11 +132,11 @@ define i32 @test15(i32 %a, i32 %b) nounwind {
 }
 
 define <2 x i64> @test16(<2 x i64> %x) nounwind {
-  %shr = lshr <2 x i64> %x, <i64 3, i64 5>
-  %div = udiv <2 x i64> %shr, <i64 4, i64 6>
+  %shr = lshr <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv <2 x i64> %shr, <i64 6, i64 6>
   ret <2 x i64> %div
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT: udiv <2 x i64> %x, <i64 32, i64 192>
+; CHECK-NEXT: udiv <2 x i64> %x, <i64 192, i64 192>
 ; CHECK-NEXT: ret <2 x i64>
 }
 
@@ -175,3 +175,153 @@ define i32 @test20(i32 %x) {
 ; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
 ; CHECK-NEXT: ret i32
 }
+
+define i32 @test21(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %div = sdiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test22(i32 %a) {
+  %mul = mul nsw i32 %a, 3
+  %div = sdiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %div = sdiv i32 %a, 4
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test23(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 12
+  ret i32 %div
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %div = udiv i32 %a, 3
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test24(i32 %a) {
+  %mul = mul nuw i32 %a, 3
+  %div = udiv i32 %mul, 12
+  ret i32 %div
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %div = lshr i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test25(i32 %a) {
+  %shl = shl nsw i32 %a, 2
+  %div = sdiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %div = shl nsw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test26(i32 %a) {
+  %mul = mul nsw i32 %a, 12
+  %div = sdiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %div = shl nsw i32 %a, 2
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test27(i32 %a) {
+  %shl = shl nuw i32 %a, 2
+  %div = udiv i32 %shl, 2
+  ret i32 %div
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %div = shl nuw i32 %a, 1
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test28(i32 %a) {
+  %mul = mul nuw i32 %a, 36
+  %div = udiv i32 %mul, 3
+  ret i32 %div
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %div = mul nuw i32 %a, 12
+; CHECK-NEXT: ret i32 %div
+}
+
+define i32 @test29(i32 %a) {
+  %mul = shl nsw i32 %a, 31
+  %div = sdiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: %[[and:.*]] = and i32 %a, 1
+; CHECK-NEXT: ret i32 %[[and]]
+}
+
+define i32 @test30(i32 %a) {
+  %mul = shl nuw i32 %a, 31
+  %div = udiv i32 %mul, -2147483648
+  ret i32 %div
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: ret i32 %a
+}
+
+define <2 x i32> @test31(<2 x i32> %x) {
+  %shr = lshr <2 x i32> %x, <i32 31, i32 31>
+  %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
+  ret <2 x i32> %div
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
+; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: ret <2 x i32>
+}
+
+define i32 @test32(i32 %a, i32 %b) {
+  %shl = shl i32 2, %b
+  %div = lshr i32 %shl, 2
+  %div2 = udiv i32 %a, %div
+  ret i32 %div2
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: %[[shl:.*]] = shl i32 2, %b
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[shl]], 2
+; CHECK-NEXT: %[[div:.*]] = udiv i32 %a, %[[shr]]
+; CHECK-NEXT: ret i32
+}
+
+define <2 x i64> @test33(<2 x i64> %x) nounwind {
+  %shr = lshr exact <2 x i64> %x, <i64 5, i64 5>
+  %div = udiv exact <2 x i64> %shr, <i64 6, i64 6>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: udiv exact <2 x i64> %x, <i64 192, i64 192>
+; CHECK-NEXT: ret <2 x i64>
+}
+
+define <2 x i64> @test34(<2 x i64> %x) nounwind {
+  %neg = sub nsw <2 x i64> zeroinitializer, %x
+  %div = sdiv exact <2 x i64> %neg, <i64 3, i64 4>
+  ret <2 x i64> %div
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: sdiv exact <2 x i64> %x, <i64 -3, i64 -4>
+; CHECK-NEXT: ret <2 x i64>
+}
+
+define i32 @test35(i32 %A) {
+  %and = and i32 %A, 2147483647
+  %mul = sdiv exact i32 %and, 2147483647
+  ret i32 %mul
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: %[[and:.*]]  = and i32 %A, 2147483647
+; CHECK-NEXT: %[[udiv:.*]] = udiv exact i32 %[[and]], 2147483647
+; CHECK-NEXT: ret i32 %[[udiv]]
+}
+
+define i32 @test36(i32 %A) {
+  %and = and i32 %A, 2147483647
+  %shl = shl nsw i32 1, %A
+  %mul = sdiv exact i32 %and, %shl
+  ret i32 %mul
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: %[[and:.*]] = and i32 %A, 2147483647
+; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
+; CHECK-NEXT: ret i32 %[[shr]]
+}
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index d958470f1baa..63a02bbd8572 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -1,349 +1,366 @@
-; RUN: opt < %s -instcombine -enable-double-float-shrink -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define float @acos_test(float %f) nounwind readnone {
-; CHECK: acos_test
+; Check for and against shrinkage when using the
+; unsafe-fp-math function attribute on a math lib
+; function. This optimization may be overridden by
+; the -enable-double-float-shrink option.
+; PR17850: http://llvm.org/bugs/show_bug.cgi?id=17850
+
+define float @acos_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acos_test
 ; CHECK: call float @acosf(float %f)
 }
 
-define double @acos_test2(float %f) nounwind readnone {
-; CHECK: acos_test2
+define double @acos_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acos(double %conv)
    ret double %call
+; CHECK-LABEL: acos_test2
 ; CHECK: call double @acos(double %conv)
 }
 
-define float @acosh_test(float %f) nounwind readnone {
-; CHECK: acosh_test
+define float @acosh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: acosh_test
 ; CHECK: call float @acoshf(float %f)
 }
 
-define double @acosh_test2(float %f) nounwind readnone {
-; CHECK: acosh_test2
+define double @acosh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @acosh(double %conv)
    ret double %call
+; CHECK-LABEL: acosh_test2
 ; CHECK: call double @acosh(double %conv)
 }
 
-define float @asin_test(float %f) nounwind readnone {
-; CHECK: asin_test
+define float @asin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asin_test
 ; CHECK: call float @asinf(float %f)
 }
 
-define double @asin_test2(float %f) nounwind readnone {
-; CHECK: asin_test2
+define double @asin_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asin(double %conv)
    ret double %call
+; CHECK-LABEL: asin_test2
 ; CHECK: call double @asin(double %conv)
 }
 
-define float @asinh_test(float %f) nounwind readnone {
-; CHECK: asinh_test
+define float @asinh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: asinh_test
 ; CHECK: call float @asinhf(float %f)
 }
 
-define double @asinh_test2(float %f) nounwind readnone {
-; CHECK: asinh_test2
+define double @asinh_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @asinh(double %conv)
    ret double %call
+; CHECK-LABEL: asinh_test2
 ; CHECK: call double @asinh(double %conv)
 }
 
-define float @atan_test(float %f) nounwind readnone {
-; CHECK: atan_test
+define float @atan_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atan_test
 ; CHECK: call float @atanf(float %f)
 }
 
-define double @atan_test2(float %f) nounwind readnone {
-; CHECK: atan_test2
+define double @atan_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atan(double %conv)
    ret double %call
+; CHECK-LABEL: atan_test2
 ; CHECK: call double @atan(double %conv)
 }
-define float @atanh_test(float %f) nounwind readnone {
-; CHECK: atanh_test
+define float @atanh_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @atanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: atanh_test
 ; CHECK: call float @atanhf(float %f)
 }
 
-define double @atanh_test2(float %f) nounwind readnone {
-; CHECK: atanh_test2
+define double @atanh_test2(float %f)   {
     %conv = fpext float %f to double
     %call = call double @atanh(double %conv)
     ret double %call
+; CHECK-LABEL: atanh_test2
 ; CHECK: call double @atanh(double %conv)
 }
-define float @cbrt_test(float %f) nounwind readnone {
-; CHECK: cbrt_test
+define float @cbrt_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: cbrt_test
 ; CHECK: call float @cbrtf(float %f)
 }
 
-define double @cbrt_test2(float %f) nounwind readnone {
-; CHECK: cbrt_test2
+define double @cbrt_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @cbrt(double %conv)
    ret double %call
+; CHECK-LABEL: cbrt_test2
 ; CHECK: call double @cbrt(double %conv)
 }
-define float @exp_test(float %f) nounwind readnone {
-; CHECK: exp_test
+define float @exp_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: exp_test
 ; CHECK: call float @expf(float %f)
 }
 
-define double @exp_test2(float %f) nounwind readnone {
-; CHECK: exp_test2
+define double @exp_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp(double %conv)
    ret double %call
+; CHECK-LABEL: exp_test2
 ; CHECK: call double @exp(double %conv)
 }
-define float @expm1_test(float %f) nounwind readnone {
-; CHECK: expm1_test
+define float @expm1_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: expm1_test
 ; CHECK: call float @expm1f(float %f)
 }
 
-define double @expm1_test2(float %f) nounwind readnone {
-; CHECK: expm1_test2
+define double @expm1_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @expm1(double %conv)
    ret double %call
+; CHECK-LABEL: expm1_test2
 ; CHECK: call double @expm1(double %conv)
 }
-define float @exp10_test(float %f) nounwind readnone {
-; CHECK: exp10_test
+define float @exp10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
-; FIXME: Re-enable this when Linux allows transforming this again, or when we
-; can use builtin attributes to test the transform regardless of OS.
-; DISABLED-CHECK: call float @exp10f(float %f)
+; CHECK-LABEL: exp10_test
 ; CHECK: call double @exp10(double %conv)
 }
 
-define double @exp10_test2(float %f) nounwind readnone {
-; CHECK: exp10_test2
+define double @exp10_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @exp10(double %conv)
    ret double %call
+; CHECK-LABEL: exp10_test2
 ; CHECK: call double @exp10(double %conv)
 }
-define float @log_test(float %f) nounwind readnone {
-; CHECK: log_test
+define float @log_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log_test
 ; CHECK: call float @logf(float %f)
 }
 
-define double @log_test2(float %f) nounwind readnone {
-; CHECK: log_test2
+define double @log_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log(double %conv)
    ret double %call
+; CHECK-LABEL: log_test2
 ; CHECK: call double @log(double %conv)
 }
-define float @log10_test(float %f) nounwind readnone {
-; CHECK: log10_test
+define float @log10_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log10_test
 ; CHECK: call float @log10f(float %f)
 }
 
-define double @log10_test2(float %f) nounwind readnone {
-; CHECK: log10_test2
+define double @log10_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @log10(double %conv)
    ret double %call
+; CHECK-LABEL: log10_test2
 ; CHECK: call double @log10(double %conv)
 }
-define float @log1p_test(float %f) nounwind readnone {
-; CHECK: log1p_test
+define float @log1p_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log1p_test
 ; CHECK: call float @log1pf(float %f)
 }
 
-define double @log1p_test2(float %f) nounwind readnone {
-; CHECK: log1p_test2
+define double @log1p_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log1p(double %conv)
    ret double %call
+; CHECK-LABEL: log1p_test2
 ; CHECK: call double @log1p(double %conv)
 }
-define float @log2_test(float %f) nounwind readnone {
-; CHECK: log2_test
+define float @log2_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: log2_test
 ; CHECK: call float @log2f(float %f)
 }
 
-define double @log2_test2(float %f) nounwind readnone {
-; CHECK: log2_test2
+define double @log2_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @log2(double %conv)
    ret double %call
+; CHECK-LABEL: log2_test2
 ; CHECK: call double @log2(double %conv)
 }
-define float @logb_test(float %f) nounwind readnone {
-; CHECK: logb_test
+define float @logb_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: logb_test
 ; CHECK: call float @logbf(float %f)
 }
 
-define double @logb_test2(float %f) nounwind readnone {
-; CHECK: logb_test2
+define double @logb_test2(float %f)   {
    %conv = fpext float %f to double
    %call = call double @logb(double %conv)
    ret double %call
+; CHECK-LABEL: logb_test2
 ; CHECK: call double @logb(double %conv)
 }
-define float @sin_test(float %f) nounwind readnone {
-; CHECK: sin_test
+define float @sin_test(float %f)   {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sin_test
 ; CHECK: call float @sinf(float %f)
 }
 
-define double @sin_test2(float %f) nounwind readnone {
-; CHECK: sin_test2
+define double @sin_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @sin(double %conv)
    ret double %call
+; CHECK-LABEL: sin_test2
 ; CHECK: call double @sin(double %conv)
 }
 
-define float @sqrt_test(float %f) nounwind readnone {
-; CHECK: sqrt_test
+define float @sqrt_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @sqrt(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_test
 ; CHECK: call float @sqrtf(float %f)
 }
 
-define float @sqrt_int_test(float %f) nounwind readnone {
-; CHECK: sqrt_int_test
+define double @sqrt_test2(float %f) {
+   %conv = fpext float %f to double
+   %call = call double @sqrt(double %conv)
+   ret double %call
+; CHECK-LABEL: sqrt_test2
+; CHECK: call double @sqrt(double %conv)
+}
+
+define float @sqrt_int_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @llvm.sqrt.f64(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: sqrt_int_test
 ; CHECK: call float @llvm.sqrt.f32(float %f)
 }
 
-define double @sqrt_test2(float %f) nounwind readnone {
-; CHECK: sqrt_test2
+define double @sqrt_int_test2(float %f) {
    %conv = fpext float %f to double
-   %call = call double @sqrt(double %conv)
+   %call = call double @llvm.sqrt.f64(double %conv)
    ret double %call
-; CHECK: call double @sqrt(double %conv)
+; CHECK-LABEL: sqrt_int_test2
+; CHECK: call double @llvm.sqrt.f64(double %conv)
 }
-define float @tan_test(float %f) nounwind readnone {
-; CHECK: tan_test
+
+define float @tan_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tan_test
 ; CHECK: call float @tanf(float %f)
 }
 
-define double @tan_test2(float %f) nounwind readnone {
-; CHECK: tan_test2
+define double @tan_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tan(double %conv)
    ret double %call
+; CHECK-LABEL: tan_test2
 ; CHECK: call double @tan(double %conv)
 }
-define float @tanh_test(float %f) nounwind readnone {
-; CHECK: tanh_test
+define float @tanh_test(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    %conv1 = fptrunc double %call to float
    ret float %conv1
+; CHECK-LABEL: tanh_test
 ; CHECK: call float @tanhf(float %f)
 }
 
-define double @tanh_test2(float %f) nounwind readnone {
-; CHECK: tanh_test2
+define double @tanh_test2(float %f) {
    %conv = fpext float %f to double
    %call = call double @tanh(double %conv)
    ret double %call
+; CHECK-LABEL: tanh_test2
 ; CHECK: call double @tanh(double %conv)
 }
 
-declare double @tanh(double) nounwind readnone
-declare double @tan(double) nounwind readnone
-declare double @sqrt(double) nounwind readnone
-declare double @sin(double) nounwind readnone
-declare double @log2(double) nounwind readnone
-declare double @log1p(double) nounwind readnone
-declare double @log10(double) nounwind readnone
-declare double @log(double) nounwind readnone
-declare double @logb(double) nounwind readnone
-declare double @exp10(double) nounwind readnone
-declare double @expm1(double) nounwind readnone
-declare double @exp(double) nounwind readnone
-declare double @cbrt(double) nounwind readnone
-declare double @atanh(double) nounwind readnone
-declare double @atan(double) nounwind readnone
-declare double @acos(double) nounwind readnone
-declare double @acosh(double) nounwind readnone
-declare double @asin(double) nounwind readnone
-declare double @asinh(double) nounwind readnone
-
-declare double @llvm.sqrt.f64(double) nounwind readnone
+declare double @tanh(double) #1
+declare double @tan(double) #1
+
+; sqrt is a special case: the shrinking optimization 
+; is valid even without unsafe-fp-math.
+declare double @sqrt(double) 
+declare double @llvm.sqrt.f64(double) 
+
+declare double @sin(double) #1
+declare double @log2(double) #1
+declare double @log1p(double) #1
+declare double @log10(double) #1
+declare double @log(double) #1
+declare double @logb(double) #1
+declare double @exp10(double) #1
+declare double @expm1(double) #1
+declare double @exp(double) #1
+declare double @cbrt(double) #1
+declare double @atanh(double) #1
+declare double @atan(double) #1
+declare double @acos(double) #1
+declare double @acosh(double) #1
+declare double @asin(double) #1
+declare double @asinh(double) #1
+
+attributes #1 = { "unsafe-fp-math"="true" }
 
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
new file mode 100644
index 000000000000..0479549bea3f
--- /dev/null
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Make sure all library calls are eliminated when the input is known positive.
+
+declare float @fabsf(float)
+declare double @fabs(double)
+declare fp128 @fabsl(fp128)
+
+define float @square_fabs_call_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @fabsf(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_call_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_call_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @fabs(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_call_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_call_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @fabsl(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_call_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Make sure all intrinsic calls are eliminated when the input is known positive.
+
+declare float @llvm.fabs.f32(float)
+declare double @llvm.fabs.f64(double)
+declare fp128 @llvm.fabs.f128(fp128)
+
+define float @square_fabs_intrinsic_f32(float %x) {
+  %mul = fmul float %x, %x
+  %fabsf = tail call float @llvm.fabs.f32(float %mul)
+  ret float %fabsf
+
+; CHECK-LABEL: square_fabs_intrinsic_f32(
+; CHECK-NEXT: %mul = fmul float %x, %x
+; CHECK-NEXT: ret float %mul
+}
+
+define double @square_fabs_intrinsic_f64(double %x) {
+  %mul = fmul double %x, %x
+  %fabs = tail call double @llvm.fabs.f64(double %mul)
+  ret double %fabs
+
+; CHECK-LABEL: square_fabs_intrinsic_f64(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define fp128 @square_fabs_intrinsic_f128(fp128 %x) {
+  %mul = fmul fp128 %x, %x
+  %fabsl = tail call fp128 @llvm.fabs.f128(fp128 %mul)
+  ret fp128 %fabsl
+
+; CHECK-LABEL: square_fabs_intrinsic_f128(
+; CHECK-NEXT: %mul = fmul fp128 %x, %x
+; CHECK-NEXT: ret fp128 %mul
+}
+
+; Shrinking a library call to a smaller type should not be inhibited by nor inhibit the square optimization.
+
+define float @square_fabs_shrink_call1(float %x) {
+  %ext = fpext float %x to double
+  %sq = fmul double %ext, %ext
+  %fabs = call double @fabs(double %sq)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call1(
+; CHECK-NEXT: %trunc = fmul float %x, %x
+; CHECK-NEXT: ret float %trunc
+}
+
+define float @square_fabs_shrink_call2(float %x) {
+  %sq = fmul float %x, %x
+  %ext = fpext float %sq to double
+  %fabs = call double @fabs(double %ext)
+  %trunc = fptrunc double %fabs to float
+  ret float %trunc
+
+; CHECK-LABEL: square_fabs_shrink_call2(
+; CHECK-NEXT: %sq = fmul float %x, %x
+; CHECK-NEXT: ret float %sq
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 2ee4b0f2c381..c6081c399253 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -93,7 +93,7 @@ define float @fold9(float %f1, float %f2) {
   ret float %t3
 
 ; CHECK-LABEL: @fold9(
-; CHECK: fsub fast float 0.000000e+00, %f2
+; CHECK: fsub fast float -0.000000e+00, %f2
 }
 
 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
@@ -322,6 +322,14 @@ define float @fneg1(float %f1, float %f2) {
 ; CHECK: fmul float %f1, %f2
 }
 
+define float @fneg2(float %x) {
+  %sub = fsub nsz float 0.0, %x
+  ret float %sub
+; CHECK-LABEL: @fneg2(
+; CHECK-NEXT: fsub nsz float -0.000000e+00, %x
+; CHECK-NEXT: ret float 
+}
+
 ; =========================================================================
 ;
 ;   Testing-cases about div
@@ -530,3 +538,173 @@ define float @fact_div6(float %x) {
 ; CHECK: fact_div6
 ; CHECK: %t3 = fsub fast float %t1, %t2
 }
+
+; =========================================================================
+;
+;   Test-cases for square root
+;
+; =========================================================================
+
+; A squared factor fed into a square root intrinsic should be hoisted out
+; as a fabs() value.
+; We have to rely on a function-level attribute to enable this optimization
+; because intrinsics don't currently have access to IR-level fast-math
+; flags. If that changes, we can relax the requirement on all of these
+; tests to just specify 'fast' on the sqrt.
+
+attributes #0 = { "unsafe-fp-math" = "true" }
+
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_intrinsic_arg_squared(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_squared(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+; Check all 6 combinations of a 3-way multiplication tree where
+; one factor is repeated.
+
+define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args1(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %mul, %x
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args2(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %y
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args3(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
+  %mul = fmul fast double %y, %x
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args4(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %y
+  %mul2 = fmul fast double %x, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args5(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %y, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_three_args6(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+define double @sqrt_intrinsic_arg_4th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul2)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_4th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: ret double %mul
+}
+
+define double @sqrt_intrinsic_arg_5th(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %mul2 = fmul fast double %mul, %x
+  %mul3 = fmul fast double %mul2, %mul
+  %sqrt = call double @llvm.sqrt.f64(double %mul3)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_arg_5th(
+; CHECK-NEXT: %mul = fmul fast double %x, %x
+; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
+; CHECK-NEXT: ret double %1
+}
+
+; Check that square root calls have the same behavior.
+
+declare float @sqrtf(float)
+declare double @sqrt(double)
+declare fp128 @sqrtl(fp128)
+
+define float @sqrt_call_squared_f32(float %x) #0 {
+  %mul = fmul fast float %x, %x
+  %sqrt = call float @sqrtf(float %mul)
+  ret float %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f32(
+; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: ret float %fabs
+}
+
+define double @sqrt_call_squared_f64(double %x) #0 {
+  %mul = fmul fast double %x, %x
+  %sqrt = call double @sqrt(double %mul)
+  ret double %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f64(
+; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: ret double %fabs
+}
+
+define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
+  %mul = fmul fast fp128 %x, %x
+  %sqrt = call fp128 @sqrtl(fp128 %mul)
+  ret fp128 %sqrt
+
+; CHECK-LABEL: sqrt_call_squared_f128(
+; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: ret fp128 %fabs
+}
+
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index afc6782a0122..ee39d1084ebc 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -1,5 +1,7 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
+declare double @llvm.fabs.f64(double) nounwind readnone
+
 define i1 @test1(float %x, float %y) nounwind {
   %ext1 = fpext float %x to double
   %ext2 = fpext float %y to double
@@ -81,6 +83,16 @@ define i32 @test9(double %a) nounwind {
 ; CHECK: ret i32 0
 }
 
+define i32 @test9_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp olt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test9_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: ret i32 0
+}
+
 define i32 @test10(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ole double %call, 0.000000e+00
@@ -91,6 +103,16 @@ define i32 @test10(double %a) nounwind {
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
+define i32 @test10_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ole double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test10_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
 define i32 @test11(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ogt double %call, 0.000000e+00
@@ -101,6 +123,16 @@ define i32 @test11(double %a) nounwind {
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
 
+define i32 @test11_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ogt double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test11_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
 define i32 @test12(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oge double %call, 0.000000e+00
@@ -111,6 +143,16 @@ define i32 @test12(double %a) nounwind {
 ; CHECK: fcmp ord double %a, 0.000000e+00
 }
 
+define i32 @test12_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp oge double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test12_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp ord double %a, 0.000000e+00
+}
+
 define i32 @test13(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp une double %call, 0.000000e+00
@@ -121,6 +163,16 @@ define i32 @test13(double %a) nounwind {
 ; CHECK: fcmp une double %a, 0.000000e+00
 }
 
+define i32 @test13_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp une double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test13_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp une double %a, 0.000000e+00
+}
+
 define i32 @test14(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp oeq double %call, 0.000000e+00
@@ -131,6 +183,16 @@ define i32 @test14(double %a) nounwind {
 ; CHECK: fcmp oeq double %a, 0.000000e+00
 }
 
+define i32 @test14_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp oeq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test14_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
 define i32 @test15(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp one double %call, 0.000000e+00
@@ -141,6 +203,16 @@ define i32 @test15(double %a) nounwind {
 ; CHECK: fcmp one double %a, 0.000000e+00
 }
 
+define i32 @test15_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp one double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test15_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
 define i32 @test16(double %a) nounwind {
   %call = tail call double @fabs(double %a) nounwind
   %cmp = fcmp ueq double %call, 0.000000e+00
@@ -151,6 +223,16 @@ define i32 @test16(double %a) nounwind {
 ; CHECK: fcmp ueq double %a, 0.000000e+00
 }
 
+define i32 @test16_intrinsic(double %a) nounwind {
+  %call = tail call double @llvm.fabs.f64(double %a) nounwind
+  %cmp = fcmp ueq double %call, 0.000000e+00
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: @test16_intrinsic(
+; CHECK-NOT: fabs
+; CHECK: fcmp ueq double %a, 0.000000e+00
+}
+
 ; Don't crash.
 define i32 @test17(double %a, double (double)* %p) nounwind {
   %call = tail call double %p(double %a) nounwind
diff --git a/test/Transforms/InstCombine/float-shrink-compare.ll b/test/Transforms/InstCombine/float-shrink-compare.ll
index e50046790da8..a08f9531d217 100644
--- a/test/Transforms/InstCombine/float-shrink-compare.ll
+++ b/test/Transforms/InstCombine/float-shrink-compare.ll
@@ -222,8 +222,46 @@ define i32 @test18(float %x, float %y, float %z) nounwind uwtable {
 ; CHECK-NEXT: fcmp oeq float %fmaxf, %z
 }
 
+define i32 @test19(float %x, float %y, float %z) nounwind uwtable {
+  %1 = fpext float %x to double
+  %2 = fpext float %y to double
+  %3 = call double @copysign(double %1, double %2) nounwind
+  %4 = fpext float %z to double
+  %5 = fcmp oeq double %3, %4
+  %6 = zext i1 %5 to i32
+  ret i32 %6
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: %copysignf = call float @copysignf(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %copysignf, %z
+}
+
+define i32 @test20(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.000000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: %fminf = call float @fminf(float 1.000000e+00, float %x)
+; CHECK-NEXT: fcmp oeq float %fminf, %y
+}
+
+define i32 @test21(float %x, float %y) nounwind uwtable {
+  %1 = fpext float %y to double
+  %2 = fpext float %x to double
+  %3 = call double @fmin(double 1.300000e+00, double %2) nounwind
+  %4 = fcmp oeq double %1, %3
+  %5 = zext i1 %4 to i32
+  ret i32 %5
+; should not be changed to fminf as the constant would loose precision
+; CHECK-LABEL: @test21(
+; CHECK: %3 = call double @fmin(double 1.300000e+00, double %2)
+}
+
 declare double @fabs(double) nounwind readnone
 declare double @ceil(double) nounwind readnone
+declare double @copysign(double, double) nounwind readnone
 declare double @floor(double) nounwind readnone
 declare double @nearbyint(double) nounwind readnone
 declare double @rint(double) nounwind readnone
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index 18cbf9da5361..a776765ac2ca 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -123,3 +123,32 @@ define float @test11(float %x, float %y) {
 ; CHECK-NOT: fadd float
 ; CHECK: fadd fast float
 }
+
+; PR21126: http://llvm.org/bugs/show_bug.cgi?id=21126
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X.
+declare double @llvm.sqrt.f64(double)
+
+define double @sqrt_squared1(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul = fmul fast double %sqrt, %sqrt
+  ret double %mul
+; CHECK-LABEL: @sqrt_squared1(
+; CHECK-NEXT: ret double %f
+}
+
+; With unsafe/fast math, sqrt(X) * sqrt(X) is just X, 
+; but make sure another use of the sqrt is intact.
+; Note that the remaining fmul is altered but is not 'fast'
+; itself because it was not marked 'fast' originally. 
+; Thus, we have an overall fast result, but no more indication of
+; 'fast'ness in the code.
+define double @sqrt_squared2(double %f) {
+  %sqrt = call double @llvm.sqrt.f64(double %f)
+  %mul1 = fmul fast double %sqrt, %sqrt
+  %mul2 = fmul double %mul1, %sqrt
+  ret double %mul2
+; CHECK-LABEL: @sqrt_squared2(
+; CHECK-NEXT: %sqrt = call double @llvm.sqrt.f64(double %f)
+; CHECK-NEXT: %mul2 = fmul double %sqrt, %f
+; CHECK-NEXT: ret double %mul2
+}
diff --git a/test/Transforms/InstCombine/fold-phi.ll b/test/Transforms/InstCombine/fold-phi.ll
index bd01d58aa586..c6bb1b36335b 100644
--- a/test/Transforms/InstCombine/fold-phi.ll
+++ b/test/Transforms/InstCombine/fold-phi.ll
@@ -17,23 +17,23 @@ end:
   ret float %add5
 }
 
-; CHECK: fold_phi
-define float @fold_phi(float %a) nounwind {
+; CHECK-LABEL: @pr21377(
+define void @pr21377(i32) {
 entry:
-  br label %for.body
-
-for.body:
-; CHECK: phi float
-; CHECK-NEXT: br i1 undef
-  %sum.057 = phi float [ 0.000000e+00, %entry ], [ %add5, %bb0 ]
-  %add5 = fadd float %sum.057, 1.0 ;; Should be moved to the latch!
-  br i1 undef, label %bb0, label %end
-
-; CHECK: bb0:
-bb0:
-; CHECK: fadd float
-  br label %for.body
-
-end:
-  ret float %add5
+  br label %while.body
+
+while.body:                                       ; preds = %if.end, %entry
+  %phi1 = phi i64 [ undef, %entry ], [ %or2, %if.end ]
+  %zext = zext i32 %0 to i64
+  br i1 undef, label %if.end, label %if.else
+
+if.else:                                          ; preds = %while.body
+  %or1 = or i64 %phi1, %zext
+  %and = and i64 %or1, 4294967295
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %while.body
+  %phi2 = phi i64 [ %and, %if.else ], [ undef, %while.body ]
+  %or2 = or i64 %phi2, %zext
+  br label %while.body
 }
diff --git a/test/Transforms/InstCombine/fpcast.ll b/test/Transforms/InstCombine/fpcast.ll
index 9be66fd42c68..8319624b87c9 100644
--- a/test/Transforms/InstCombine/fpcast.ll
+++ b/test/Transforms/InstCombine/fpcast.ll
@@ -53,3 +53,35 @@ define half @test5(float %a, float %b, float %c) {
 }
 
 declare float @llvm.fabs.f32(float) nounwind readonly
+
+define <1 x float> @test6(<1 x double> %V) {
+  %frem = frem <1 x double> %V, %V
+  %trunc = fptrunc <1 x double> %frem to <1 x float>
+  ret <1 x float> %trunc
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %[[frem:.*]]  = frem <1 x double> %V, %V
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc <1 x double> %[[frem]] to <1 x float>
+; CHECK-NEXT: ret <1 x float> %trunc
+}
+
+define float @test7(double %V) {
+  %frem = frem double %V, 1.000000e+00
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+; CHECK-LABEL: @test7
+; CHECK-NEXT: %[[frem:.*]]  = frem double %V, 1.000000e+00
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
+; CHECK-NEXT: ret float %trunc
+}
+
+define float @test8(float %V) {
+  %fext = fpext float %V to double
+  %frem = frem double %fext, 1.000000e-01
+  %trunc = fptrunc double %frem to float
+  ret float %trunc
+; CHECK-LABEL: @test8
+; CHECK-NEXT: %[[fext:.*]]  = fpext float %V to double
+; CHECK-NEXT: %[[frem:.*]]  = frem double %fext, 1.000000e-01
+; CHECK-NEXT: %[[trunc:.*]] = fptrunc double %frem to float
+; CHECK-NEXT: ret float %trunc
+}
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 3240c6d2a4d0..94cc1803073a 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -6,6 +6,7 @@ target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 %pair = type { i32, i32 }
 %struct.B = type { double }
 %struct.A = type { %struct.B, i32, i32 }
+%struct.C = type { [7 x i8] }
 
 
 @Global = constant [10 x i8] c"helloworld"
@@ -580,6 +581,16 @@ define i32 addrspace(1)* @test33_array_struct_as1([10 x %struct.Key] addrspace(1
   ret i32 addrspace(1)* %C
 }
 
+define i32 addrspace(1)* @test33_addrspacecast(%struct.Key* %A) {
+; CHECK-LABEL: @test33_addrspacecast(
+; CHECK: %C = getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+; CHECK-NEXT: addrspacecast i32* %C to i32 addrspace(1)*
+; CHECK-NEXT: ret
+  %B = addrspacecast %struct.Key* %A to %struct.anon addrspace(1)*
+  %C = getelementptr %struct.anon addrspace(1)* %B, i32 0, i32 2
+  ret i32 addrspace(1)* %C
+}
+
 	%T2 = type { i8*, i8 }
 define i8* @test34(i8* %Val, i64 %V) nounwind {
 entry:
@@ -591,8 +602,8 @@ entry:
 	%C = load i8** %B, align 8
 	ret i8* %C
 ; CHECK-LABEL: @test34(
-; CHECK: %V.c = inttoptr i64 %V to i8*
-; CHECK: ret i8* %V.c
+; CHECK: %[[C:.*]] = inttoptr i64 %V to i8*
+; CHECK: ret i8* %[[C]]
 }
 
 %t0 = type { i8*, [19 x i8] }
@@ -692,7 +703,7 @@ define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
 
 ; CHECK-LABEL: @test39(
 ; CHECK: getelementptr inbounds %struct.ham* %arg, i64 0, i32 2
-; CHECK: getelementptr inbounds i8* %tmp3, i64 -8
+; CHECK: getelementptr inbounds i8* %{{.+}}, i64 -8
 }
 
 define i1 @pr16483([1 x i8]* %a, [1 x i8]* %b) {
@@ -803,6 +814,78 @@ define i16 @test41([3 x i32] addrspace(1)* %array) {
 ; CHECK-NEXT: ret i16 8
 }
 
+define i8* @test42(i8* %c1, i8* %c2) {
+  %ptrtoint = ptrtoint i8* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %gep = getelementptr inbounds i8* %c2, i64 %sub
+  ret i8* %gep
+
+; CHECK-LABEL: @test42(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i8* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i8* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i8*
+; CHECK-NEXT:  ret i8* [[INTTOPTR]]
+}
+
+define i16* @test43(i16* %c1, i16* %c2) {
+  %ptrtoint = ptrtoint i16* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = ashr i64 %sub, 1
+  %gep = getelementptr inbounds i16* %c2, i64 %shr
+  ret i16* %gep
+
+; CHECK-LABEL: @test43(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint i16* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint i16* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to i16*
+; CHECK-NEXT:  ret i16* [[INTTOPTR]]
+}
+
+define %struct.C* @test44(%struct.C* %c1, %struct.C* %c2) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %shr
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test44(
+; CHECK-NEXT:  [[PTRTOINT1:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[PTRTOINT2:%.*]] = ptrtoint %struct.C* %c2 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 [[PTRTOINT2]], [[PTRTOINT1]]
+; CHECK-NEXT:  [[INTTOPTR:%.*]] = inttoptr i64 [[SUB]] to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[INTTOPTR]]
+}
+
+define %struct.C* @test45(%struct.C* %c1, %struct.C** %c2) {
+  %ptrtoint1 = ptrtoint %struct.C* %c1 to i64
+  %ptrtoint2 = ptrtoint %struct.C** %c2 to i64
+  %sub = sub i64 %ptrtoint2, %ptrtoint1 ; C2 - C1
+  %shr = sdiv i64 %sub, 7
+  %gep = getelementptr inbounds %struct.C* %c1, i64 %shr ; C1 + (C2 - C1)
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test45(
+; CHECK-NEXT:  [[BITCAST:%.*]] = bitcast %struct.C** %c2 to %struct.C*
+; CHECK-NEXT:  ret %struct.C* [[BITCAST]]
+}
+
+define %struct.C* @test46(%struct.C* %c1, %struct.C* %c2, i64 %N) {
+  %ptrtoint = ptrtoint %struct.C* %c1 to i64
+  %sub = sub i64 0, %ptrtoint
+  %sdiv = sdiv i64 %sub, %N
+  %gep = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+  ret %struct.C* %gep
+
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  [[PTRTOINT:%.*]] = ptrtoint %struct.C* %c1 to i64
+; CHECK-NEXT:  [[SUB:%.*]] = sub i64 0, [[PTRTOINT]]
+; CHECK-NEXT:  [[SDIV:%.*]] = sdiv i64 [[SUB]], %N
+; CHECK-NEXT:  [[GEP:%.*]] = getelementptr inbounds %struct.C* %c2, i64 %sdiv
+; CHECK-NEXT:  ret %struct.C* [[GEP]]
+}
+
 define i32 addrspace(1)* @ascast_0_gep(i32* %p) nounwind {
 ; CHECK-LABEL: @ascast_0_gep(
 ; CHECK-NOT: getelementptr
diff --git a/test/Transforms/InstCombine/icmp-logical.ll b/test/Transforms/InstCombine/icmp-logical.ll
index d5d8cbc8c26e..faae2016e207 100644
--- a/test/Transforms/InstCombine/icmp-logical.ll
+++ b/test/Transforms/InstCombine/icmp-logical.ll
@@ -150,3 +150,23 @@ define i1 @nomask_rhs(i32 %in) {
   %val = or i1 %tst1, %tst2
   ret i1 %val
 }
+
+define i1 @fold_mask_cmps_to_false(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_false
+; CHECK: ret i1 false
+  %1 = and i32 %x, 2147483647
+  %2 = icmp eq i32 %1, 0
+  %3 = icmp eq i32 %x, 2147483647
+  %4 = and i1 %3, %2
+  ret i1 %4
+}
+
+define i1 @fold_mask_cmps_to_true(i32 %x) {
+; CHECK-LABEL: @fold_mask_cmps_to_true
+; CHECK: ret i1 true
+  %1 = and i32 %x, 2147483647
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp ne i32 %x, 2147483647
+  %4 = or i1 %3, %2
+  ret i1 %4
+}
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
new file mode 100644
index 000000000000..0911ab03c601
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These should be InstSimplify checks, but most of the code
+; is currently only in InstCombine.  TODO: move supporting code
+
+; Definitely out of range
+define i1 @test_nonzero(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero
+; CHECK: ret i1 true
+  %val = load i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+define i1 @test_nonzero2(i32* nocapture readonly %arg) {
+; CHECK-LABEL:test_nonzero2
+; CHECK: ret i1 false
+  %val = load i32* %arg, !range !0
+  %rval = icmp eq i32 %val, 0
+  ret i1 %rval
+}
+
+; Potentially in range
+define i1 @test_nonzero3(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero3
+; Check that this does not trigger - it wouldn't be legal
+; CHECK: icmp
+  %val = load i32* %arg, !range !1
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Definitely in range
+define i1 @test_nonzero4(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero4
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ne i8 %val, 0
+  ret i1 %rval
+}
+
+define i1 @test_nonzero5(i8* nocapture readonly %arg) {
+; CHECK-LABEL: test_nonzero5
+; CHECK: ret i1 false
+  %val = load i8* %arg, !range !2
+  %rval = icmp ugt i8 %val, 0
+  ret i1 %rval
+}
+
+; Cheaper checks (most values in range meet requirements)
+define i1 @test_nonzero6(i8* %argw) {
+; CHECK-LABEL: test_nonzero6
+; CHECK: icmp ne i8 %val, 0
+  %val = load i8* %argw, !range !3
+  %rval = icmp sgt i8 %val, 0
+  ret i1 %rval
+}
+
+
+!0 = !{i32 1, i32 6} 
+!1 = !{i32 0, i32 6} 
+!2 = !{i8 0, i8 1} 
+!3 = !{i8 0, i8 6} 
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
new file mode 100644
index 000000000000..52414b99cca7
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -0,0 +1,378 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK-LABEL: @lshr_eq_msb_low_last_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_msb_low_second_zero
+; CHECK-NEXT: icmp ugt i8 %a, 6
+define i1 @ashr_eq_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_msb_low_last_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @lshr_ne_msb_low_last_zero(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_msb_low_second_zero
+; CHECK-NEXT: icmp ult i8 %a, 7
+define i1 @ashr_ne_msb_low_second_zero(i8 %a) {
+ %shr = ashr i8 127, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @ashr_eq_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @ashr_ne_both_equal(i8 %a) {
+ %shr = ashr i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @lshr_eq_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp eq i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @lshr_ne_both_equal(i8 %a) {
+ %shr = lshr i8 127, %a
+ %cmp = icmp ne i8 %shr, 127
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_ashr_eq_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_ashr_ne_both_equal(i8 %a) {
+ %shr = ashr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 128
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_both_equal
+; CHECK-NEXT: icmp eq i8 %a, 0
+define i1 @exact_lshr_eq_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp eq i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_both_equal
+; CHECK-NEXT: icmp ne i8 %a, 0
+define i1 @exact_lshr_ne_both_equal(i8 %a) {
+ %shr = lshr exact i8 126, %a
+ %cmp = icmp ne i8 %shr, 126
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_opposite_msb
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @lshr_eq_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_opposite_msb
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @lshr_ne_opposite_msb(i8 %a) {
+ %shr = lshr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @exact_ashr_eq(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @exact_ashr_ne(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @exact_lshr_eq(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @exact_lshr_ne(i8 %a) {
+ %shr = lshr exact i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq
+; CHECK-NEXT: icmp eq i8 %a, 7
+define i1 @nonexact_ashr_eq(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne
+; CHECK-NEXT: icmp ne i8 %a, 7
+define i1 @nonexact_ashr_ne(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq
+; CHECK-NEXT: icmp eq i8 %a, 2
+define i1 @nonexact_lshr_eq(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne
+; CHECK-NEXT: icmp ne i8 %a, 2
+define i1 @nonexact_lshr_ne(i8 %a) {
+ %shr = lshr i8 4, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_lshr_eq_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_lshr_ne_exactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @exact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @exact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_exactdiv
+; CHECK-NEXT: icmp eq i8 %a, 4
+define i1 @nonexact_ashr_eq_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_exactdiv
+; CHECK-NEXT: icmp ne i8 %a, 4
+define i1 @nonexact_ashr_ne_exactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -5
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr exact i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp eq i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactdiv(i8 %a) {
+ %shr = lshr i8 80, %a
+ %cmp = icmp ne i8 %shr, 31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr exact i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactdiv
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp eq i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactdiv
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactdiv(i8 %a) {
+ %shr = ashr i8 -80, %a
+ %cmp = icmp ne i8 %shr, -31
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_noexactlog(i8 %a) {
+ %shr = lshr i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_noexactlog(i8 %a) {
+ %shr = ashr i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
+}
+
+; Don't try to fold the entire body of function @PR20945 into a
+; single `ret i1 true` statement.
+; If %B is equal to 1, then this function would return false.
+; As a consequence, the instruction combiner is not allowed to fold %cmp
+; to 'true'. Instead, it should replace %cmp with a simpler comparison
+; between %B and 1.
+
+; CHECK-LABEL: @PR20945(
+; CHECK: icmp ne i32 %B, 1
+define i1 @PR20945(i32 %B) {
+  %shr = ashr i32 -9, %B
+  %cmp = icmp ne i32 %shr, -5
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @PR21222
+; CHECK: icmp eq i32 %B, 6
+define i1 @PR21222(i32 %B) {
+  %shr = ashr i32 -93, %B
+  %cmp = icmp eq i32 %shr, -2
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 26e144f93a57..64741c5712eb 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1148,22 +1148,6 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_eq_31(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_eq_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp eq i32 %shl, 31
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ne_31(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ne_31(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ne i32 %shl, 31
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_30(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1209,22 +1193,6 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
-; CHECK-NEXT: ret i1 false
-define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ugt i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
-; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
-; CHECK-NEXT: ret i1 true
-define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
-  %shl = shl i32 1, %V
-  %cmp = icmp ule i32 %shl, 2147483648
-  ret i1 %cmp
-}
-
 ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %V, 31
 ; CHECK-NEXT: ret i1 [[CMP]]
@@ -1424,3 +1392,184 @@ define i1 @icmp_neg_cst_slt(i32 %a) {
   %2 = icmp slt i32 %1, -10
   ret i1 %2
 }
+
+; CHECK-LABEL: @icmp_and_or_lshr
+; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw i32 1, %y
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[SHL]], 1
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[OR]], %x
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
+  %shf = lshr i32 %x, %y
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @icmp_and_or_lshr_cst
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 3
+; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @icmp_and_or_lshr_cst(i32 %x) {
+  %shf = lshr i32 %x, 1
+  %or = or i32 %shf, %x
+  %and = and i32 %or, 1
+  %ret = icmp ne i32 %and, 0
+  ret i1 %ret
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 29
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
+ %shl = shl i32 4, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4
+; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) {
+ %shl = shl i32 -2, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_positive(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_both_negative(i32 %a) {
+ %shl = shl i32 -50, %a
+ %cmp = icmp eq i32 %shl, -50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_1(i32 %a) {
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 25
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2
+; CHECK-NEXT: %cmp = icmp eq i32 %a, 1
+; CHECK-NEXT: ret i1 %cmp
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_2(i32 %a) {
+ %shl = shl i32 25, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3
+; CHECK-NEXT: ret i1 false
+define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_3(i32 %a) {
+ %shl = shl i32 26, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sgt_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -1
+define i1 @icmp_sgt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sgt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sge_zero_add_nsw
+; CHECK-NEXT: icmp sgt i32 %a, -2
+define i1 @icmp_sge_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sge i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_slt_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, -1
+define i1 @icmp_slt_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp slt i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_sle_zero_add_nsw
+; CHECK-NEXT: icmp slt i32 %a, 0
+define i1 @icmp_sle_zero_add_nsw(i32 %a) {
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sle i32 %add, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @icmp_cmpxchg_strong
+; CHECK-NEXT: %[[xchg:.*]] = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+; CHECK-NEXT: %[[icmp:.*]] = extractvalue { i32, i1 } %[[xchg]], 1
+; CHECK-NEXT: ret i1 %[[icmp]]
+define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
+  %xchg = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+  %xtrc = extractvalue { i32, i1 } %xchg, 0
+  %icmp = icmp eq i32 %xtrc, %old_val
+  ret i1 %icmp
+}
+
+; CHECK-LABEL: @f1
+; CHECK-NEXT: %[[cmp:.*]] = icmp sge i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f1(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sge i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f2
+; CHECK-NEXT: %[[cmp:.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f2(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sgt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f3
+; CHECK-NEXT: %[[cmp:.*]] = icmp slt i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f3(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp slt i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f4
+; CHECK-NEXT: %[[cmp:.*]] = icmp sle i64 %a, %b
+; CHECK-NEXT: ret i1 %[[cmp]]
+define i1 @f4(i64 %a, i64 %b) {
+  %t = sub nsw i64 %a, %b
+  %v = icmp sle i64 %t, 0
+  ret i1 %v
+}
+
+; CHECK-LABEL: @f5
+; CHECK: %[[cmp:.*]] = icmp slt i32 %[[sub:.*]], 0
+; CHECK: %[[neg:.*]] = sub nsw i32 0, %[[sub]]
+; CHECK: %[[sel:.*]] = select i1 %[[cmp]], i32 %[[neg]], i32 %[[sub]]
+; CHECK: ret i32 %[[sel]]
+define i32 @f5(i8 %a, i8 %b) {
+  %conv = zext i8 %a to i32
+  %conv3 = zext i8 %b to i32
+  %sub = sub nsw i32 %conv, %conv3
+  %cmp4 = icmp slt i32 %sub, 0
+  %sub7 = sub nsw i32 0, %sub
+  %sub7.sub = select i1 %cmp4, i32 %sub7, i32 %sub
+  ret i32 %sub7.sub
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 9b58d9386f58..8e7742f8c34a 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -1,10 +1,17 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 %overflow.result = type {i8, i1}
+%ov.result.32 = type { i32, i1 }
+
 
-declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
-declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
+declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+declare %ov.result.32 @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
 declare double @llvm.powi.f64(double, i32) nounwind readonly
 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
@@ -91,17 +98,92 @@ define i8 @uaddtest7(i8 %A, i8 %B) {
 }
 
 ; PR20194
-define { i32, i1 } @saddtest1(i8 %a, i8 %b) {
+define %ov.result.32 @saddtest_nsw(i8 %a, i8 %b) {
   %A = sext i8 %a to i32
   %B = sext i8 %b to i32
-  %x = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
-  ret { i32, i1 } %x
-; CHECK-LABEL: @saddtest1
+  %x = call %ov.result.32 @llvm.sadd.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @saddtest_nsw
 ; CHECK: %x = add nsw i32 %A, %B
-; CHECK-NEXT: %1 = insertvalue { i32, i1 } { i32 undef, i1 false }, i32 %x, 0
-; CHECK-NEXT:  ret { i32, i1 } %1
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @uaddtest_nuw(i32 %a, i32 %b) {
+  %A = and i32 %a, 2147483647
+  %B = and i32 %b, 2147483647
+  %x = call %ov.result.32 @llvm.uadd.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @uaddtest_nuw
+; CHECK: %x = add nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @ssubtest_nsw(i8 %a, i8 %b) {
+  %A = sext i8 %a to i32
+  %B = sext i8 %b to i32
+  %x = call %ov.result.32 @llvm.ssub.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @ssubtest_nsw
+; CHECK: %x = sub nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
 }
 
+define %ov.result.32 @usubtest_nuw(i32 %a, i32 %b) {
+  %A = or i32 %a, 2147483648
+  %B = and i32 %b, 2147483647
+  %x = call %ov.result.32 @llvm.usub.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @usubtest_nuw
+; CHECK: %x = sub nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest1_nsw(i32 %a, i32 %b) {
+  %A = and i32 %a, 4095 ; 0xfff
+  %B = and i32 %b, 524287; 0x7ffff
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest1_nsw
+; CHECK: %x = mul nuw nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest2_nsw(i32 %a, i32 %b) {
+  %A = ashr i32 %a, 16
+  %B = ashr i32 %b, 16
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest2_nsw
+; CHECK: %x = mul nsw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
+
+define %ov.result.32 @smultest3_sw(i32 %a, i32 %b) {
+  %A = ashr i32 %a, 16
+  %B = ashr i32 %b, 15
+  %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @smultest3_sw
+; CHECK: %x = call %ov.result.32 @llvm.smul.with.overflow.i32(i32 %A, i32 %B)
+; CHECK-NEXT:  ret %ov.result.32 %x
+}
+
+define %ov.result.32 @umultest_nuw(i32 %a, i32 %b) {
+  %A = and i32 %a, 65535 ; 0xffff
+  %B = and i32 %b, 65535 ; 0xffff
+  %x = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %A, i32 %B)
+  ret %ov.result.32 %x
+; CHECK-LABEL: @umultest_nuw
+; CHECK: %x = mul nuw i32 %A, %B
+; CHECK-NEXT: %1 = insertvalue %ov.result.32 { i32 undef, i1 false }, i32 %x, 0
+; CHECK-NEXT:  ret %ov.result.32 %1
+}
 
 define i8 @umultest1(i8 %A, i1* %overflowPtr) {
   %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
@@ -125,9 +207,6 @@ define i8 @umultest2(i8 %A, i1* %overflowPtr) {
 ; CHECK-NEXT: ret i8 %A
 }
 
-%ov.result.32 = type { i32, i1 }
-declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
-
 define i32 @umultest3(i32 %n) nounwind {
   %shr = lshr i32 %n, 2
   %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
@@ -152,6 +231,19 @@ define i32 @umultest4(i32 %n) nounwind {
 ; CHECK: umul.with.overflow
 }
 
+define %ov.result.32 @umultest5(i32 %x, i32 %y) nounwind {
+  %or_x = or i32 %x, 2147483648
+  %or_y = or i32 %y, 2147483648
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %or_x, i32 %or_y)
+  ret %ov.result.32 %mul
+; CHECK-LABEL: @umultest5(
+; CHECK-NEXT: %[[or_x:.*]] = or i32 %x, -2147483648
+; CHECK-NEXT: %[[or_y:.*]] = or i32 %y, -2147483648
+; CHECK-NEXT: %[[mul:.*]] = mul i32 %[[or_x]], %[[or_y]]
+; CHECK-NEXT: %[[ret:.*]] = insertvalue %ov.result.32 { i32 undef, i1 true }, i32 %[[mul]], 0
+; CHECK-NEXT: ret %ov.result.32 %[[ret]]
+}
+
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
diff --git a/test/Transforms/InstCombine/load-addrspace-cast.ll b/test/Transforms/InstCombine/load-addrspace-cast.ll
deleted file mode 100644
index fd6339cc9262..000000000000
--- a/test/Transforms/InstCombine/load-addrspace-cast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-target datalayout = "e-p:64:64:64-n8:16:32:64"
-
-define i32* @pointer_to_addrspace_pointer(i32 addrspace(1)** %x) nounwind {
-; CHECK-LABEL: @pointer_to_addrspace_pointer(
-; CHECK: load
-; CHECK: addrspacecast
-  %y = bitcast i32 addrspace(1)** %x to i32**
-  %z = load i32** %y
-  ret i32* %z
-}
-
diff --git a/test/Transforms/InstCombine/load.ll b/test/Transforms/InstCombine/load.ll
index c8ce70a5c03a..b4b7558a1a05 100644
--- a/test/Transforms/InstCombine/load.ll
+++ b/test/Transforms/InstCombine/load.ll
@@ -2,6 +2,7 @@
 
 ; This test makes sure that these instructions are properly eliminated.
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 @X = constant i32 42		; <i32*> [#uses=2]
 @X2 = constant i32 47		; <i32*> [#uses=1]
@@ -118,3 +119,34 @@ define <16 x i8> @test13(<2 x i64> %x) {
   %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
   ret <16 x i8> %tmp
 }
+
+define i8 @test14(i8 %x, i32 %y) {
+; This test must not have the store of %x forwarded to the load -- there is an
+; intervening store if %y. However, the intervening store occurs with a different
+; type and size and to a different pointer value. This is ensuring that none of
+; those confuse the analysis into thinking that the second store does not alias
+; the first.
+; CHECK-LABEL: @test14(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %a = alloca i32
+  %a.i8 = bitcast i32* %a to i8*
+  store i8 %x, i8* %a.i8
+  store i32 %y, i32* %a
+  %r = load i8* %a.i8
+  ret i8 %r
+}
+
+@test15_global = external global i32
+
+define i8 @test15(i8 %x, i32 %y) {
+; Same test as @test14 essentially, but using a global instead of an alloca.
+; CHECK-LABEL: @test15(
+; CHECK:         %[[R:.*]] = load i8*
+; CHECK-NEXT:    ret i8 %[[R]]
+  %g.i8 = bitcast i32* @test15_global to i8*
+  store i8 %x, i8* %g.i8
+  store i32 %y, i32* @test15_global
+  %r = load i8* %g.i8
+  ret i8 %r
+}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
new file mode 100644
index 000000000000..ad6a11cf6eb1
--- /dev/null
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -0,0 +1,86 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test_load_cast_combine_tbaa(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
+; CHECK-LABEL: @test_load_cast_combine_tbaa(
+; CHECK: load i32* %{{.*}}, !tbaa !0
+entry:
+  %l = load float* %ptr, !tbaa !0
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_noalias(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
+; CHECK-LABEL: @test_load_cast_combine_noalias(
+; CHECK: load i32* %{{.*}}, !alias.scope !2, !noalias !1
+entry:
+  %l = load float* %ptr, !alias.scope !2, !noalias !1
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define float @test_load_cast_combine_range(i32* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) drops range metadata. It
+; would be nice to preserve or update it somehow but this is hard when moving
+; between types.
+; CHECK-LABEL: @test_load_cast_combine_range(
+; CHECK: load float* %{{.*}}
+; CHECK-NOT: !range
+; CHECK: ret float
+entry:
+  %l = load i32* %ptr, !range !5
+  %c = bitcast i32 %l to float
+  ret float %c
+}
+
+define i32 @test_load_cast_combine_invariant(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
+; CHECK-LABEL: @test_load_cast_combine_invariant(
+; CHECK: load i32* %{{.*}}, !invariant.load !3
+entry:
+  %l = load float* %ptr, !invariant.load !3
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define i32 @test_load_cast_combine_nontemporal(float* %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_nontemporal(
+; CHECK: load i32* %{{.*}}, !nontemporal !4
+entry:
+  %l = load float* %ptr, !nontemporal !4
+  %c = bitcast float %l to i32
+  ret i32 %c
+}
+
+define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_loop(
+; CHECK: load i32* %{{.*}}, !llvm.mem.parallel_loop_access !1
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %src.gep = getelementptr inbounds float* %src, i32 %i
+  %dst.gep = getelementptr inbounds i32* %dst, i32 %i
+  %l = load float* %src.gep, !llvm.mem.parallel_loop_access !1
+  %c = bitcast float %l to i32
+  store i32 %c, i32* %dst.gep
+  %i.next = add i32 %i, 1
+  %cmp = icmp slt i32 %i.next, %n
+  br i1 %cmp, label %loop, label %exit, !llvm.loop !1
+
+exit:
+  ret void
+}
+
+!0 = !{ !1, !1, i64 0 }
+!1 = !{ !1 }
+!2 = !{ !2, !1 }
+!3 = !{ }
+!4 = !{ i32 1 }
+!5 = !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 208520653848..ed25e4e49c84 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -144,3 +144,26 @@ lpad.i:                                           ; preds = %entry
   call void @_ZdlPvRKSt9nothrow_t(i8* %call.i, i8* %nt) builtin nounwind
   resume { i8*, i32 } %0
 }
+
+declare i8* @_Znwm(i64) nobuiltin
+declare void @_ZdlPvm(i8*, i64) nobuiltin
+declare i8* @_Znwj(i32) nobuiltin
+declare void @_ZdlPvj(i8*, i32) nobuiltin
+declare i8* @_Znam(i64) nobuiltin
+declare void @_ZdaPvm(i8*, i64) nobuiltin
+declare i8* @_Znaj(i32) nobuiltin
+declare void @_ZdaPvj(i8*, i32) nobuiltin
+
+; CHECK-LABEL: @test8(
+define void @test8() {
+  ; CHECK-NOT: call
+  %nwm = call i8* @_Znwm(i64 32) builtin
+  call void @_ZdlPvm(i8* %nwm, i64 32) builtin
+  %nwj = call i8* @_Znwj(i32 32) builtin
+  call void @_ZdlPvj(i8* %nwj, i32 32) builtin
+  %nam = call i8* @_Znam(i64 32) builtin
+  call void @_ZdaPvm(i8* %nam, i64 32) builtin
+  %naj = call i8* @_Znaj(i32 32) builtin
+  call void @_ZdaPvj(i8* %naj, i32 32) builtin
+  ret void
+}
diff --git a/test/Transforms/InstCombine/maxnum.ll b/test/Transforms/InstCombine/maxnum.ll
new file mode 100644
index 000000000000..585d9f41f9fc
--- /dev/null
+++ b/test/Transforms/InstCombine/maxnum.ll
@@ -0,0 +1,222 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.maxnum.f32(float, float) #0
+declare float @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.maxnum.f64(double, double) #0
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
+
+; CHECK-LABEL: @constant_fold_maxnum_f32
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32() #0 {
+  %x = call float @llvm.maxnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_inv
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_inv() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_maxnum_f32_nan1() #0 {
+  %x = call float @llvm.maxnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_maxnum_f32_nan_nan() #0 {
+  %x = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_maxnum_f32_p0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_p0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_maxnum_f32_n0_n0() #0 {
+  %x = call float @llvm.maxnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 2.000000e+00, float 8.000000e+00, float 1.000000e+01, float 9.000000e+00>
+define <4 x float> @constant_fold_maxnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.maxnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64() #0 {
+  %x = call double @llvm.maxnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan0() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_maxnum_f64_nan1() #0 {
+  %x = call double @llvm.maxnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_maxnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_maxnum_f64_nan_nan() #0 {
+  %x = call double @llvm.maxnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_maxnum_f32
+; CHECK: call float @llvm.maxnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_maxnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_maxnum_f32(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @maxnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @maxnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.maxnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_maxnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_maxnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @maxnum_x_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_x_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_y_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @maxnum_y_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_z_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @maxnum_z_maxnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_maxnum_x_y_z
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @maxnum_maxnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum4
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @maxnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %z, float %w) #0
+  %c = call float @llvm.maxnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_inf_val
+; CHECK-NEXT: ret float 0x7FF0000000000000
+define float @fold_maxnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_maxnum_f32_neginf_val
+; CHECK-NEXT: call float @llvm.maxnum.f32(float %x, float 0xFFF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_maxnum_f32_neginf_val(float %x) nounwind {
+  %val = call float @llvm.maxnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index 65349c6e6906..d960693a05d4 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -37,7 +37,7 @@ define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
   ret i32 %ret
 ; CHECK: ret i32 [[RET]]
 }
diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll
index 9216ae7fe95a..008b838201ed 100644
--- a/test/Transforms/InstCombine/memcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -57,4 +57,17 @@ define void @test_no_simplify2() {
   ret void
 }
 
+define i8* @test_simplify_return_indcall(i8* ()* %alloc) {
+; CHECK-LABEL: @test_simplify_return_indcall(
+  %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: %dst = call i8* %alloc()
+  %dst = call i8* %alloc()
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %ret = call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+; CHECK-NEXT: ret i8* %dst
+  ret i8* %ret
+}
+
 declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/minnum.ll b/test/Transforms/InstCombine/minnum.ll
new file mode 100644
index 000000000000..f7494e7ae154
--- /dev/null
+++ b/test/Transforms/InstCombine/minnum.ll
@@ -0,0 +1,244 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+declare float @llvm.minnum.f32(float, float) #0
+declare float @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
+declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #0
+
+declare double @llvm.minnum.f64(double, double) #0
+declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
+
+declare float @llvm.maxnum.f32(float, float) #0
+
+; CHECK-LABEL: @constant_fold_minnum_f32
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32() #0 {
+  %x = call float @llvm.minnum.f32(float 1.0, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_inv
+; CHECK-NEXT: ret float 1.000000e+00
+define float @constant_fold_minnum_f32_inv() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 1.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan0
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan0() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 2.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan1
+; CHECK-NEXT: ret float 2.000000e+00
+define float @constant_fold_minnum_f32_nan1() #0 {
+  %x = call float @llvm.minnum.f32(float 2.0, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_nan_nan
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @constant_fold_minnum_f32_nan_nan() #0 {
+  %x = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_p0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_p0_n0
+; CHECK-NEXT: ret float 0.000000e+00
+define float @constant_fold_minnum_f32_p0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float 0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_p0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_p0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float 0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f32_n0_n0
+; CHECK-NEXT: ret float -0.000000e+00
+define float @constant_fold_minnum_f32_n0_n0() #0 {
+  %x = call float @llvm.minnum.f32(float -0.0, float -0.0) #0
+  ret float %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_v4f32
+; CHECK-NEXT: ret <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 5.000000e+00>
+define <4 x float> @constant_fold_minnum_v4f32() #0 {
+  %x = call <4 x float> @llvm.minnum.v4f32(<4 x float> <float 1.0, float 8.0, float 3.0, float 9.0>, <4 x float> <float 2.0, float 2.0, float 10.0, float 5.0>)
+  ret <4 x float> %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64
+; CHECK-NEXT: ret double 1.000000e+00
+define double @constant_fold_minnum_f64() #0 {
+  %x = call double @llvm.minnum.f64(double 1.0, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan0
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan0() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 2.0) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan1
+; CHECK-NEXT: ret double 2.000000e+00
+define double @constant_fold_minnum_f64_nan1() #0 {
+  %x = call double @llvm.minnum.f64(double 2.0, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @constant_fold_minnum_f64_nan_nan
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @constant_fold_minnum_f64_nan_nan() #0 {
+  %x = call double @llvm.minnum.f64(double 0x7FF8000000000000, double 0x7FF8000000000000) #0
+  ret double %x
+}
+
+; CHECK-LABEL: @canonicalize_constant_minnum_f32
+; CHECK: call float @llvm.minnum.f32(float %x, float 1.000000e+00)
+define float @canonicalize_constant_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 1.0, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @noop_minnum_f32
+; CHECK-NEXT: ret float %x
+define float @noop_minnum_f32(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_nan_val
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_nan_val(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float 0x7FF8000000000000, float %x) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @minnum_f32_val_nan
+; CHECK-NEXT: ret float %x
+define float @minnum_f32_val_nan(float %x) #0 {
+  %y = call float @llvm.minnum.f32(float %x, float 0x7FF8000000000000) #0
+  ret float %y
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_undef
+; CHECK-NEXT: ret float undef
+define float @fold_minnum_f32_undef_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_val_undef
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_val_undef(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float %x, float undef) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_undef_val
+; CHECK-NEXT: ret float %x
+define float @fold_minnum_f32_undef_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float undef, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @minnum_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_y_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: ret float
+define float @minnum_y_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %y, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_z_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %a)
+; CHECK-NEXT: ret float
+define float @minnum_z_minnum_x_y(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum_minnum_x_y_z
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %z)
+; CHECK-NEXT: ret float
+define float @minnum_minnum_x_y_z(float %x, float %y, float %z) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %a, float %z) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @minnum4
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float %y)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %z, float %w)
+; CHECK-NEXT: call float @llvm.minnum.f32(float %a, float %b)
+; CHECK-NEXT: ret float
+define float @minnum4(float %x, float %y, float %z, float %w) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %z, float %w) #0
+  %c = call float @llvm.minnum.f32(float %a, float %b) #0
+  ret float %c
+}
+
+; CHECK-LABEL: @minnum_x_maxnum_x_y
+; CHECK-NEXT: call float @llvm.maxnum.f32
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: ret float
+define float @minnum_x_maxnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.maxnum.f32(float %x, float %y) #0
+  %b = call float @llvm.minnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @maxnum_x_minnum_x_y
+; CHECK-NEXT: call float @llvm.minnum.f32
+; CHECK-NEXT: call float @llvm.maxnum.f32
+; CHECK-NEXT: ret float
+define float @maxnum_x_minnum_x_y(float %x, float %y) #0 {
+  %a = call float @llvm.minnum.f32(float %x, float %y) #0
+  %b = call float @llvm.maxnum.f32(float %x, float %a) #0
+  ret float %b
+}
+
+; CHECK-LABEL: @fold_minnum_f32_inf_val
+; CHECK-NEXT: call float @llvm.minnum.f32(float %x, float 0x7FF0000000000000)
+; CHECK-NEXT: ret float
+define float @fold_minnum_f32_inf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0x7FF0000000000000, float %x) #0
+  ret float %val
+}
+
+; CHECK-LABEL: @fold_minnum_f32_minf_val
+; CHECK-NEXT: ret float 0xFFF0000000000000
+define float @fold_minnum_f32_minf_val(float %x) nounwind {
+  %val = call float @llvm.minnum.f32(float 0xFFF0000000000000, float %x) #0
+  ret float %val
+}
+
+attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index d19bedc7a10c..4d1e6c700bda 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -197,3 +197,94 @@ define <2 x i1> @test21(<2 x i1> %A, <2 x i1> %B) {
         ret <2 x i1> %C
 ; CHECK: %C = and <2 x i1> %A, %B
 }
+
+define i32 @test22(i32 %A) {
+; CHECK-LABEL: @test22(
+        %B = mul nsw i32 %A, -1
+        ret i32 %B
+; CHECK: sub nsw i32 0, %A
+}
+
+define i32 @test23(i32 %A) {
+; CHECK-LABEL: @test23(
+        %B = shl nuw i32 %A, 1
+        %C = mul nuw i32 %B, 3
+        ret i32 %C
+; CHECK: mul nuw i32 %A, 6
+}
+
+define i32 @test24(i32 %A) {
+; CHECK-LABEL: @test24(
+        %B = shl nsw i32 %A, 1
+        %C = mul nsw i32 %B, 3
+        ret i32 %C
+; CHECK: mul nsw i32 %A, 6
+}
+
+define i32 @test25(i32 %A, i32 %B) {
+; CHECK-LABEL: @test25(
+        %C = sub nsw i32 0, %A
+        %D = sub nsw i32 0, %B
+        %E = mul nsw i32 %C, %D
+        ret i32 %E
+; CHECK: mul nsw i32 %A, %B
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+; CHECK-LABEL: @test26(
+        %C = shl nsw i32 1, %B
+        %D = mul nsw i32 %A, %C
+        ret i32 %D
+; CHECK: shl nsw i32 %A, %B
+}
+
+define i32 @test27(i32 %A, i32 %B) {
+; CHECK-LABEL: @test27(
+        %C = shl i32 1, %B
+        %D = mul nuw i32 %A, %C
+        ret i32 %D
+; CHECK: shl nuw i32 %A, %B
+}
+
+define i32 @test28(i32 %A) {
+; CHECK-LABEL: @test28(
+        %B = shl i32 1, %A
+        %C = mul nsw i32 %B, %B
+        ret i32 %C
+; CHECK:      %[[shl1:.*]] = shl i32 1, %A
+; CHECK-NEXT: %[[shl2:.*]] = shl i32 %[[shl1]], %A
+; CHECK-NEXT: ret i32 %[[shl2]]
+}
+
+define i64 @test29(i31 %A, i31 %B) {
+; CHECK-LABEL: @test29(
+        %C = sext i31 %A to i64
+        %D = sext i31 %B to i64
+        %E = mul i64 %C, %D
+        ret i64 %E
+; CHECK:      %[[sext1:.*]] = sext i31 %A to i64
+; CHECK-NEXT: %[[sext2:.*]] = sext i31 %B to i64
+; CHECK-NEXT: %[[mul:.*]] = mul nsw i64 %[[sext1]], %[[sext2]]
+; CHECK-NEXT: ret i64 %[[mul]]
+}
+
+define i64 @test30(i32 %A, i32 %B) {
+; CHECK-LABEL: @test30(
+        %C = zext i32 %A to i64
+        %D = zext i32 %B to i64
+        %E = mul i64 %C, %D
+        ret i64 %E
+; CHECK:      %[[zext1:.*]] = zext i32 %A to i64
+; CHECK-NEXT: %[[zext2:.*]] = zext i32 %B to i64
+; CHECK-NEXT: %[[mul:.*]] = mul nuw i64 %[[zext1]], %[[zext2]]
+; CHECK-NEXT: ret i64 %[[mul]]
+}
+
+@PR22087 = external global i32
+define i32 @test31(i32 %V) {
+; CHECK-LABEL: @test31
+  %mul = mul i32 %V, shl (i32 1, i32 zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32))
+  ret i32 %mul
+; CHECK:      %[[mul:.*]] = shl i32 %V, zext (i1 icmp ne (i32* inttoptr (i64 1 to i32*), i32* @PR22087) to i32)
+; CHECK-NEXT: ret i32 %[[mul]]
+}
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
new file mode 100644
index 000000000000..f3f19bae03dd
--- /dev/null
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+
+; CHECK-LABEL: define i32 @positive1
+; CHECK: switch i32
+; CHECK: i32 10, label
+; CHECK: i32 100, label
+; CHECK: i32 1001, label
+
+define i32 @positive1(i64 %a) {
+entry:
+  %and = and i64 %a, 4294967295
+  switch i64 %and, label %sw.default [
+    i64 10, label %return
+    i64 100, label %sw.bb1
+    i64 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; CHECK-LABEL: define i32 @negative1
+; CHECK: switch i32
+; CHECK: i32 -10, label
+; CHECK: i32 -100, label
+; CHECK: i32 -1001, label
+
+define i32 @negative1(i64 %a) {
+entry:
+  %or = or i64 %a, -4294967296
+  switch i64 %or, label %sw.default [
+    i64 -10, label %return
+    i64 -100, label %sw.bb1
+    i64 -1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure truncating a constant int larger than 64-bit doesn't trigger an
+; assertion.
+
+; CHECK-LABEL: define i32 @trunc72to68
+; CHECK: switch i68
+; CHECK: i68 10, label
+; CHECK: i68 100, label
+; CHECK: i68 1001, label
+
+define i32 @trunc72to68(i72 %a) {
+entry:
+  %and = and i72 %a, 295147905179352825855
+  switch i72 %and, label %sw.default [
+    i72 10, label %return
+    i72 100, label %sw.bb1
+    i72 1001, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %return
+
+sw.bb2:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 24, %sw.default ], [ 123, %sw.bb2 ], [ 213, %sw.bb1 ], [ 231, %entry ]
+  ret i32 %retval.0
+}
+
+; Make sure to avoid assertion crashes and use the type before
+; truncation to generate the sub constant expressions that leads
+; to the recomputed condition.
+;
+; CHECK-LABEL: @trunc64to59
+; CHECK: switch i59
+; CHECK: i59 0, label
+; CHECK: i59 18717182647723699, label
+
+define void @trunc64to59(i64 %a) {
+entry:
+  %tmp0 = and i64 %a, 15
+  %tmp1 = mul i64 %tmp0, -6425668444178048401
+  %tmp2 = add i64 %tmp1, 5170979678563097242
+  %tmp3 = mul i64 %tmp2, 1627972535142754813
+  switch i64 %tmp3, label %sw.default [
+    i64 847514119312061490, label %sw.bb1
+    i64 866231301959785189, label %sw.bb2
+  ]
+
+sw.bb1:
+  br label %sw.default
+
+sw.bb2:
+  br label %sw.default
+
+sw.default:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
new file mode 100644
index 000000000000..cec5297695b1
--- /dev/null
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -instcombine -S | FileCheck %s
+
+; PR21403: http://llvm.org/bugs/show_bug.cgi?id=21403
+; When the call to sqrtf is replaced by an intrinsic call to fabs,
+; it should not cause a problem in CGSCC. 
+
+define float @bar(float %f) #0 {
+  %mul = fmul fast float %f, %f
+  %call1 = call float @sqrtf(float %mul) #0
+  ret float %call1
+
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: ret float
+}
+
+declare float @sqrtf(float) #0
+
+attributes #0 = { readnone "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/not-fcmp.ll b/test/Transforms/InstCombine/not-fcmp.ll
index ad01a6bdf1bc..9718e0b905fc 100644
--- a/test/Transforms/InstCombine/not-fcmp.ll
+++ b/test/Transforms/InstCombine/not-fcmp.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -S | grep "fcmp uge"
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1570
 
 define i1 @f(float %X, float %Y) {
@@ -6,5 +6,8 @@ entry:
         %tmp3 = fcmp olt float %X, %Y           ; <i1> [#uses=1]
         %toBoolnot5 = xor i1 %tmp3, true                ; <i1> [#uses=1]
         ret i1 %toBoolnot5
+; CHECK-LABEL: @f(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %toBoolnot5 = fcmp uge float %X, %Y
+; CHECK-NEXT: ret i1 %toBoolnot5
 }
-
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 4a8825b15c4e..4012ce1ea4ba 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -1,7 +1,8 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 
-; RUN: opt < %s -instcombine -S | not grep xor
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: xor
 
 define i32 @test1(i32 %A) {
         %B = xor i32 %A, -1             ; <i32> [#uses=1]
@@ -51,4 +52,3 @@ entry:
 	%retval67 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
 	ret i8 %retval67
 }
-
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 9cb688423960..a971c913bd41 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
   ret i16 %1
 }
 
-@a_alias = alias weak [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
 define i32 @foo_alias() nounwind {
   %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
   ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 645903299c86..1285b1c3aa6a 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -256,7 +256,7 @@ return:
   ret i32 7
 }
 
-@globalalias = alias internal [60 x i8]* @a
+@globalalias = internal alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: ret i32 60
@@ -266,7 +266,7 @@ define i32 @test18() {
   ret i32 %1
 }
 
-@globalalias2 = alias weak [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8]* @a
 
 ; CHECK-LABEL: @test19(
 ; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll
index cec36f119a9a..546b777b4e6b 100644
--- a/test/Transforms/InstCombine/or-xor.ll
+++ b/test/Transforms/InstCombine/or-xor.ll
@@ -92,3 +92,86 @@ define i32 @test9(i32 %x, i32 %y) nounwind {
 ; CHECK-NEXT: %z = or i32 %y.not, %x
 ; CHECK-NEXT: ret i32 %z
 }
+
+define i32 @test10(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %or = or i32 %xor1, %xor2
+  ret i32 %or
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i32 -1
+}
+
+; (x | y) & ((~x) ^ y) -> (x & y)
+define i32 @test11(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %and = and i32 %or, %xor
+ ret i32 %and
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((~x) ^ y) & (x | y) -> (x & y)
+define i32 @test12(i32 %x, i32 %y) {
+ %neg = xor i32 %x, -1
+ %xor = xor i32 %neg, %y
+ %or = or i32 %x, %y
+ %and = and i32 %xor, %or
+ ret i32 %and
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %and = and i32 %x, %y
+; CHECK-NEXT: ret i32 %and
+}
+
+; ((x | y) ^ (x ^ y)) -> (x & y)
+define i32 @test13(i32 %x, i32 %y) {
+  %1 = xor i32 %y, %x
+  %2 = or i32 %y, %x
+  %3 = xor i32 %2, %1
+  ret i32 %3
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = and i32 %y, %x
+; CHECK-NEXT: ret i32 %1
+}
+
+; ((x | ~y) ^ (~x | y)) -> x ^ y
+define i32 @test14(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %or1 = or i32 %x, %noty
+  %or2 = or i32 %notx, %y
+  %xor = xor i32 %or1, %or2
+  ret i32 %xor
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+; ((x & ~y) ^ (~x & y)) -> x ^ y
+define i32 @test15(i32 %x, i32 %y) {
+  %noty = xor i32 %y, -1
+  %notx = xor i32 %x, -1
+  %and1 = and i32 %x, %noty
+  %and2 = and i32 %notx, %y
+  %xor = xor i32 %and1, %and2
+  ret i32 %xor
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %xor = xor i32 %x, %y
+; CHECK-NEXT: ret i32 %xor
+}
+
+define i32 @test16(i32 %a, i32 %b) {
+  %or = xor i32 %a, %b
+  %and1 = and i32 %or, 1
+  %and2 = and i32 %b, -2
+  %xor = or i32 %and1, %and2
+  ret i32 %xor
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %1 = and i32 %a, 1
+; CHECK-NEXT: %xor = xor i32 %1, %b
+; CHECK-NEXT: ret i32 %xor
+}
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index 1cd897ee90c6..f604bafcc330 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -408,3 +408,111 @@ define i32 @test38(i32* %xp, i32 %y) {
   %or = or i32 %x, %sext
   ret i32 %or
 }
+
+define i32 @test39(i32 %a, i32 %b) {
+; CHECK-LABEL: test39(
+; CHECK-NEXT: %or = or i32 %a, %b
+ %xor = xor i32 %a, -1
+ %and = and i32 %xor, %b
+ %or = or i32 %and, %a
+ ret i32 %or
+}
+
+define i32 @test40(i32 %a, i32 %b) {
+; CHECK-LABEL: test40(
+; CHECK-NEXT:   %1 = xor i32 %a, -1 
+; CHECK-NEXT: %or = or i32 %1, %b
+ %and = and i32 %a, %b
+ %xor = xor i32 %a, -1
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test41(i32 %a, i32 %b) {
+; CHECK-LABEL: test41(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %and = and i32 %a, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test42(i32 %a, i32 %b) {
+; CHECK-LABEL: test42(
+; CHECK-NEXT: %1 = xor i32 %a, -1
+; CHECK-NEXT: %or = xor i32 %1, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %and = and i32 %a, %b
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test43(i32 %a, i32 %b) {
+; CHECK-LABEL: test43(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %xor = xor i32 %a, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
+}
+
+define i32 @test44(i32 %a, i32 %b) {
+; CHECK-LABEL: test44(
+; CHECK-NEXT: %or = xor i32 %a, %b
+ %xor = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %or = or i32 %xor, %and
+ ret i32 %or
+}
+
+define i32 @test45(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: test45(
+; CHECK-NEXT: %1 = and i32 %x, %z
+; CHECK-NEXT: %or1 = or i32 %1, %y
+; CHECK-NEXT: ret i32 %or1
+  %or = or i32 %y, %z
+  %and = and i32 %x, %or
+  %or1 = or i32 %and, %y
+  ret i32 %or1
+}
+
+define i1 @test46(i8 signext %c)  {
+  %c.off = add i8 %c, -97
+  %cmp1 = icmp ult i8 %c.off, 26
+  %c.off17 = add i8 %c, -65
+  %cmp2 = icmp ult i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test46(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 26
+}
+
+define i1 @test47(i8 signext %c)  {
+  %c.off = add i8 %c, -65
+  %cmp1 = icmp ule i8 %c.off, 26
+  %c.off17 = add i8 %c, -97
+  %cmp2 = icmp ule i8 %c.off17, 26
+  %or = or i1 %cmp1, %cmp2
+  ret i1 %or
+; CHECK-LABEL: @test47(
+; CHECK-NEXT:  and i8 %c, -33
+; CHECK-NEXT:  add i8 %1, -65
+; CHECK-NEXT:  icmp ult i8 %2, 27
+}
+
+define i1 @test48(i64 %x, i1 %b) {
+  %1 = icmp ult i64 %x, 2305843009213693952
+  %2 = icmp ugt i64 %x, 2305843009213693951
+  %.b = or i1 %2, %b
+  %3 = or i1 %1, %.b
+  ret i1 %3
+; CHECK-LABEL: @test48(
+; CHECK-NEXT:  ret i1 true
+}
diff --git a/test/Transforms/InstCombine/overflow-mul.ll b/test/Transforms/InstCombine/overflow-mul.ll
index cbb2f5f95003..6d8d40bcac3e 100644
--- a/test/Transforms/InstCombine/overflow-mul.ll
+++ b/test/Transforms/InstCombine/overflow-mul.ll
@@ -173,3 +173,16 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
   %vcgez.i = sext <4 x i1> %tmp to <4 x i32>
   ret <4 x i32> %vcgez.i
 }
+
+@pr21445_data = external global i32
+define i1 @pr21445(i8 %a) {
+; CHECK-LABEL: @pr21445(
+; CHECK-NEXT:  %[[umul:.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 ptrtoint (i32* @pr21445_data to i8))
+; CHECK-NEXT:  %[[cmp:.*]] = extractvalue { i8, i1 } %[[umul]], 1
+; CHECK-NEXT:  ret i1 %[[cmp]]
+  %ext = zext i8 %a to i32
+  %mul = mul i32 %ext, zext (i8 ptrtoint (i32* @pr21445_data to i8) to i32)
+  %and = and i32 %mul, 255
+  %cmp = icmp ne i32 %mul, %and
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/pr12251.ll b/test/Transforms/InstCombine/pr12251.ll
index 74a41eb7d227..8c382bb104ac 100644
--- a/test/Transforms/InstCombine/pr12251.ll
+++ b/test/Transforms/InstCombine/pr12251.ll
@@ -12,4 +12,4 @@ entry:
 ; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
 ; CHECK-NEXT: ret i1 %tobool
 
-!0 = metadata !{i8 0, i8 2}
+!0 = !{i8 0, i8 2}
diff --git a/test/Transforms/InstCombine/pr12338.ll b/test/Transforms/InstCombine/pr12338.ll
index d34600f0fa58..7e0bf59614c3 100644
--- a/test/Transforms/InstCombine/pr12338.ll
+++ b/test/Transforms/InstCombine/pr12338.ll
@@ -4,9 +4,9 @@ define void @entry() nounwind {
 entry:
   br label %for.cond
 
+; CHECK: br label %for.cond
 for.cond:
   %local = phi <1 x i32> [ <i32 0>, %entry ], [ %phi2, %cond.end47 ]
-; CHECK: sub <1 x i32> <i32 92>, %local
   %phi3 = sub <1 x i32> zeroinitializer, %local
   br label %cond.end
 
diff --git a/test/Transforms/InstCombine/pr21199.ll b/test/Transforms/InstCombine/pr21199.ll
new file mode 100644
index 000000000000..e6599fb640d7
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21199.ll
@@ -0,0 +1,25 @@
+; do not replace a 'select' with 'or' in 'select - cmp - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f(i32)
+
+define void @test(i32 %len) {
+entry:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  %cmp11 = icmp ult i32 0, %cond
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  tail call void @f(i32 %cond)
+  %inc = add i32 %i.02, 1
+  %cmp1 = icmp ult i32 %inc, %cond
+  br i1 %cmp1, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: select
+}
diff --git a/test/Transforms/InstCombine/pr21210.ll b/test/Transforms/InstCombine/pr21210.ll
new file mode 100644
index 000000000000..1db87949dda0
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21210.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -instcombine -S | FileCheck %s
+; Checks that the select-icmp optimization is safe in two cases
+declare void @foo(i32)
+declare i32 @bar(i32)
+
+; don't replace 'cond' by 'len' in the home block ('bb') that
+; contains the select
+define void @test1(i32 %len) {
+entry:
+  br label %bb
+
+bb:
+  %cmp = icmp ult i32 %len, 8
+  %cond = select i1 %cmp, i32 %len, i32 8
+  call void @foo(i32 %cond)
+  %cmp11 = icmp eq i32 %cond, 8
+  br i1 %cmp11, label %for.end, label %bb
+
+for.end:
+  ret void
+; CHECK: select
+; CHECK: icmp eq i32 %cond, 8
+}
+
+; don't replace 'cond' by 'len' in a block ('b1') that dominates all uses
+; of the select outside the home block ('bb'), but can be reached from the home
+; block on another path ('bb -> b0 -> b1')
+define void @test2(i32 %len) {
+entry:
+  %0 = call i32 @bar(i32 %len);
+  %cmp = icmp ult i32 %len, 4
+  br i1 %cmp, label %bb, label %b1
+bb:
+  %cond = select i1 %cmp, i32 %len, i32 8
+  %cmp11 = icmp eq i32 %cond, 8
+  br i1 %cmp11, label %b0, label %b1
+
+b0:
+  call void @foo(i32 %len)
+  br label %b1
+
+b1:
+; CHECK: phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  %1 = phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+  br label %ret
+
+ret:
+  call void @foo(i32 %1)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/pr21651.ll b/test/Transforms/InstCombine/pr21651.ll
new file mode 100644
index 000000000000..914785f329af
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21651.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @PR21651() {
+  switch i2 0, label %out [
+    i2 0, label %out
+    i2 1, label %out
+  ]
+
+out:
+  ret void
+}
+
+; CHECK-LABEL: define void @PR21651(
+; CHECK:   switch i2 0, label %out [
+; CHECK:     i2 0, label %out
+; CHECK:     i2 1, label %out
+; CHECK:   ]
+; CHECK: out:                                              ; preds = %0, %0, %0
+; CHECK:   ret void
+; CHECK: }
diff --git a/test/Transforms/InstCombine/pr21891.ll b/test/Transforms/InstCombine/pr21891.ll
new file mode 100644
index 000000000000..8194976b6233
--- /dev/null
+++ b/test/Transforms/InstCombine/pr21891.ll
@@ -0,0 +1,18 @@
+; RUN: opt %s -instcombine
+
+define i32 @f(i32 %theNumber) {
+entry:
+  %cmp = icmp sgt i32 %theNumber, -1
+  call void @llvm.assume(i1 %cmp)
+  br i1 true, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %shl = shl nuw i32 %theNumber, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %phi = phi i32 [ %shl, %if.then ], [ undef, %entry ]
+  ret i32 %phi
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/InstCombine/range-check.ll b/test/Transforms/InstCombine/range-check.ll
new file mode 100644
index 000000000000..35f11dd39ef3
--- /dev/null
+++ b/test/Transforms/InstCombine/range-check.ll
@@ -0,0 +1,159 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check simplification of
+; (icmp sgt x, -1) & (icmp sgt/sge n, x) --> icmp ugt/uge n, x
+
+; CHECK-LABEL: define i1 @test_and1
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %x, 0
+  %b = icmp slt i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and2
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %x, -1
+  %b = icmp sle i32 %x, %nn
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and3
+; CHECK: [[R:%[0-9]+]] = icmp ugt i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sgt i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_and4
+; CHECK: [[R:%[0-9]+]] = icmp uge i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_and4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sge i32 %nn, %x
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or1
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, 0
+  %b = icmp sge i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or2
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or2(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %x, -1
+  %b = icmp sgt i32 %x, %nn
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or3
+; CHECK: [[R:%[0-9]+]] = icmp ule i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or3(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp sle i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @test_or4
+; CHECK: [[R:%[0-9]+]] = icmp ult i32 %nn, %x
+; CHECK: ret i1 [[R]]
+define i1 @test_or4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %nn, %x
+  %b = icmp slt i32 %x, 0
+  %c = or i1 %a, %b
+  ret i1 %c
+}
+
+; Negative tests
+
+; CHECK-LABEL: define i1 @negative1
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative1(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sgt i32 %x, 0      ; should be: icmp sge
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative2
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative2(i32 %x, i32 %n) {
+  %a = icmp slt i32 %x, %n     ; n can be negative
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative3
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative3(i32 %x, i32 %y, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %y, 0      ; should compare %x and not %y
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative4
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = and i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative4(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp ne i32 %x, %nn     ; should be: icmp slt/sle
+  %b = icmp sge i32 %x, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+}
+
+; CHECK-LABEL: define i1 @negative5
+; CHECK: %a = icmp
+; CHECK: %b = icmp
+; CHECK: %c = or i1 %a, %b
+; CHECK: ret i1 %c
+define i1 @negative5(i32 %x, i32 %n) {
+  %nn = and i32 %n, 2147483647
+  %a = icmp slt i32 %x, %nn
+  %b = icmp sge i32 %x, 0
+  %c = or i1 %a, %b            ; should be: and
+  ret i1 %c
+}
+
diff --git a/test/Transforms/InstCombine/select-cmp-br.ll b/test/Transforms/InstCombine/select-cmp-br.ll
new file mode 100644
index 000000000000..f10d58783a77
--- /dev/null
+++ b/test/Transforms/InstCombine/select-cmp-br.ll
@@ -0,0 +1,155 @@
+; Replace a 'select' with 'or' in 'select - cmp [eq|ne] - br' sequence
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%C = type <{ %struct.S }>
+%struct.S = type { i64*, i32, i32 }
+
+declare void @bar(%struct.S *) #1
+declare void @foobar()
+
+define void @test1(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* %0, %C* null
+  %8 = icmp eq %C* %7, null
+  br i1 %8, label %12, label %10
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test1(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test2(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* null, %C* %0
+  %8 = icmp eq %C* %7, null
+  br i1 %8, label %12, label %10
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test2(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test3(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* %0, %C* null
+  %8 = icmp ne %C* %7, null
+  br i1 %8, label %10, label %12
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test3(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test4(%C*) {
+entry:
+  %1 = getelementptr inbounds %C* %0, i64 0, i32 0, i32 0
+  %m = load i64** %1, align 8
+  %2 = getelementptr inbounds %C* %0, i64 1, i32 0, i32 0
+  %n = load i64** %2, align 8
+  %3 = getelementptr inbounds i64* %m, i64 9
+  %4 = bitcast i64* %3 to i64 (%C*)**
+  %5 = load i64 (%C*)** %4, align 8
+  %6 = icmp eq i64* %m, %n
+  %7 = select i1 %6, %C* null, %C* %0
+  %8 = icmp ne %C* %7, null
+  br i1 %8, label %10, label %12
+
+; <label>:9                                       ; preds = %10, %12
+  ret void
+
+; <label>:10                                      ; preds = %entry
+  %11 = getelementptr inbounds %C* %7, i64 0, i32 0
+  tail call void @bar(%struct.S* %11)
+  br label %9
+
+; <label>:12                                      ; preds = %entry
+  %13 = tail call i64 %5(%C* %0)
+  br label %9
+; CHECK-LABEL: @test4(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
+
+define void @test5(%C*, i1) {
+entry:
+  %2 = select i1 %1, %C* null, %C* %0
+  %3 = icmp ne %C* %2, null
+  br i1 %3, label %5, label %7
+
+; <label>:4                                       ; preds = %10, %12
+  ret void
+
+; <label>:5                                      ; preds = %entry
+  %6 = getelementptr inbounds %C* %2, i64 0, i32 0
+  tail call void @bar(%struct.S* %6)
+  br label %4
+
+; <label>:7                                      ; preds = %entry
+  tail call void @foobar()
+  br label %4
+; CHECK-LABEL: @test5(
+; CHECK-NOT: select
+; CHECK: or
+; CHECK-NOT: select
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index d625f3b1b33d..054f00a0de49 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1,7 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; This test makes sure that these instructions are properly eliminated.
 ; PR1822
 
-; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
 
 define i32 @test1(i32 %A, i32 %B) {
         %C = select i1 false, i32 %A, i32 %B            
@@ -307,6 +308,26 @@ define i32 @test16(i1 %C, i32* %P) {
 ; CHECK: ret i32 %V
 }
 
+;; It may be legal to load from a null address in a non-zero address space
+define i32 @test16_neg(i1 %C, i32 addrspace(1)* %P) {
+        %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
+        %V = load i32 addrspace(1)* %P2
+        ret i32 %V
+; CHECK-LABEL: @test16_neg
+; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
+; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK: ret i32 %V
+}
+define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
+        %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
+        %V = load i32 addrspace(1)* %P2
+        ret i32 %V
+; CHECK-LABEL: @test16_neg2
+; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
+; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK: ret i32 %V
+}
+
 define i1 @test17(i32* %X, i1 %C) {
         %R = select i1 %C, i32* %X, i32* null           
         %RV = icmp eq i32* %R, null             
@@ -916,9 +937,9 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i64 %x, 1
-; CHECK-NEXT: [[ZEXT:%[a-z0-9]+]] = trunc i64 [[AND]] to i32
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[XOR]], %y
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i64 %x to i32
+; CHECK-NEXT: [[AND:%.+]] = and i32 [[TRUNC]], 1
+; CHECK-NEXT: [[OR:%.+]] = or i32 [[XOR]], %y
 ; CHECK-NEXT: ret i32 [[OR]]
 define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
   %and = and i64 %x, 1
@@ -957,11 +978,11 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
 }
 
 ; CHECK-LABEL: @select_icmp_ne_0_and_1073741824_or_8(
-; CHECK-NEXT: [[LSHR:%[a-z0-9]+]] = lshr i32 %x, 27
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[LSHR]], 8
-; CHECK-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i32 [[AND]] to i8
-; CHECK-NEXT: [[XOR:%[a-z0-9]+]] = xor i8 [[TRUNC]], 8
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i8 [[XOR]], %y
+; CHECK-NEXT: [[LSHR:%.+]] = lshr i32 %x, 27
+; CHECK-NEXT: [[TRUNC:%.+]] = trunc i32 [[LSHR]] to i8
+; CHECK-NEXT: [[AND:%.+]] = and i8 [[TRUNC]], 8
+; CHECK-NEXT: [[XOR:%.+]] = xor i8 [[AND]], 8
+; CHECK-NEXT: [[OR:%.+]] = or i8 [[XOR]], %y
 ; CHECK-NEXT: ret i8 [[OR]]
 define i8 @select_icmp_ne_0_and_1073741824_or_8(i32 %x, i8 %y) {
   %and = and i32 %x, 1073741824
@@ -996,17 +1017,6 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
   ret <2 x i32> %select
 }
 
-; CHECK-LABEL: @select_icmp_and_8_eq_0_or_8(
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
-; CHECK-NEXT: ret i32 [[OR]]
-define i32 @select_icmp_and_8_eq_0_or_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %or = or i32 %x, 8
-  %or.x = select i1 %cmp, i32 %or, i32 %x
-  ret i32 %or.x
-}
-
 ; CHECK-LABEL: @select_icmp_and_8_ne_0_xor_8(
 ; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
 ; CHECK-NEXT: ret i32 [[AND]]
@@ -1029,27 +1039,6 @@ define i32 @select_icmp_and_8_eq_0_xor_8(i32 %x) {
   ret i32 %xor.x
 }
 
-; CHECK-LABEL: @select_icmp_and_8_ne_0_and_not_8(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
-; CHECK-NEXT: ret i32 [[AND]]
-define i32 @select_icmp_and_8_ne_0_and_not_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i32 %x, -9
-  %x.and1 = select i1 %cmp, i32 %x, i32 %and1
-  ret i32 %x.and1
-}
-
-; CHECK-LABEL: @select_icmp_and_8_eq_0_and_not_8(
-; CHECK-NEXT: ret i32 %x
-define i32 @select_icmp_and_8_eq_0_and_not_8(i32 %x) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i32 %x, -9
-  %and1.x = select i1 %cmp, i32 %and1, i32 %x
-  ret i32 %and1.x
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_xor_8(
 ; CHECK: select i1 %cmp, i64 %y, i64 %xor
 define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
@@ -1060,16 +1049,6 @@ define i64 @select_icmp_x_and_8_eq_0_y_xor_8(i32 %x, i64 %y) {
   ret i64 %y.xor
 }
 
-; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_and_not_8(
-; CHECK: select i1 %cmp, i64 %y, i64 %and1
-define i64 @select_icmp_x_and_8_eq_0_y_and_not_8(i32 %x, i64 %y) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i64 %y, -9
-  %y.and1 = select i1 %cmp, i64 %y, i64 %and1
-  ret i64 %y.and1
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_xor_8(
 ; CHECK: select i1 %cmp, i64 %xor, i64 %y
 define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
@@ -1080,16 +1059,6 @@ define i64 @select_icmp_x_and_8_ne_0_y_xor_8(i32 %x, i64 %y) {
   ret i64 %xor.y
 }
 
-; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
-; CHECK: select i1 %cmp, i64 %and1, i64 %y
-define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
-  %and = and i32 %x, 8
-  %cmp = icmp eq i32 %and, 0
-  %and1 = and i64 %y, -9
-  %and1.y = select i1 %cmp, i64 %and1, i64 %y
-  ret i64 %and1.y
-}
-
 ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8(
 ; CHECK: xor i64 %1, 8
 ; CHECK: or i64 %2, %y
@@ -1101,6 +1070,39 @@ define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
   ret i64 %or.y
 }
 
+; CHECK-LABEL: @select_icmp_and_2147483648_ne_0_xor_2147483648(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 2147483647
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_2147483648_ne_0_xor_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %x.xor = select i1 %cmp, i32 %x, i32 %xor
+  ret i32 %x.xor
+}
+
+; CHECK-LABEL: @select_icmp_and_2147483648_eq_0_xor_2147483648(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, -2147483648
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_2147483648_eq_0_xor_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %xor = xor i32 %x, 2147483648
+  %xor.x = select i1 %cmp, i32 %xor, i32 %x
+  ret i32 %xor.x
+}
+
+; CHECK-LABEL: @select_icmp_x_and_2147483648_ne_0_or_2147483648(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, -2147483648
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_x_and_2147483648_ne_0_or_2147483648(i32 %x) {
+  %and = and i32 %x, 2147483648
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 2147483648
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
 define i32 @test65(i64 %x) {
   %1 = and i64 %x, 16
   %2 = icmp ne i64 %1, 0
@@ -1108,10 +1110,11 @@ define i32 @test65(i64 %x) {
   ret i32 %3
 
 ; CHECK-LABEL: @test65(
-; CHECK: and i64 %x, 16
-; CHECK: trunc i64 %1 to i32
-; CHECK: lshr exact i32 %2, 3
-; CHECK: xor i32 %3, 42
+; CHECK: %[[TRUNC:.*]] = trunc i64 %x to i32
+; CHECK: %[[LSHR:.*]] = lshr i32 %[[TRUNC]], 3
+; CHECK: %[[AND:.*]] = and i32 %[[LSHR]], 2
+; CHECK: %[[XOR:.*]] = xor i32 %[[AND]], 42
+; CHECK: ret i32 %[[XOR]]
 }
 
 define i32 @test66(i64 %x) {
@@ -1236,3 +1239,256 @@ define i32 @test75(i32 %x) {
 ; CHECK-NEXT: [[SEL:%[a-z0-9]+]] = select i1 [[CMP]], i32 68, i32 %x
 ; CHECK-NEXT: ret i32 [[SEL]]
 }
+
+@under_aligned = external global i32, align 1
+
+define i32 @test76(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test76(
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
+; CHECK: load i32* %[[P]]
+
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* @under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_i32(i32*)
+
+define i32 @test77(i1 %flag, i32* %x) {
+; The load here must not be speculated around the select. One side of the
+; select is trivially dereferencable but may have a lower alignment than the
+; load does.
+; CHECK-LABEL: @test77(
+; CHECK: %[[A:.*]] = alloca i32, align 1
+; CHECK: call void @scribble_on_i32(i32* %[[A]])
+; CHECK: store i32 0, i32* %x
+; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
+; CHECK: load i32* %[[P]]
+
+  %under_aligned = alloca i32, align 1
+  call void @scribble_on_i32(i32* %under_aligned)
+  store i32 0, i32* %x
+  %p = select i1 %flag, i32* %under_aligned, i32* %x
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define i32 @test78(i1 %flag, i32* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78(
+; CHECK:         %[[V1:.*]] = load i32* %x
+; CHECK-NEXT:    %[[V2:.*]] = load i32* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT:    ret i32 %[[S]]
+entry:
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test79(
+; CHECK:         %[[V1:.*]] = load float* %x
+; CHECK-NEXT:    %[[V2:.*]] = load float* %y
+; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
+; CHECK-NEXT:    ret float %[[S]]
+entry:
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  store i32 0, i32* %x1
+  store i32 0, i32* %y
+  ; Block forwarding by storing to %z which could alias either %x or %y.
+  store i32 42, i32* %z
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test80(i1 %flag) {
+; Test that when we speculate the loads around the select they fold throug
+; load->load folding and load->store folding.
+; CHECK-LABEL: @test80(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    ret i32 %[[V]]
+entry:
+  %x = alloca i32
+  %y = alloca i32
+  call void @scribble_on_i32(i32* %x)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load i32* %x
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, i32* %x, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+define float @test81(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test81(
+; CHECK:         %[[X:.*]] = alloca i32
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast i32 %[[V]] to float
+; CHECK-NEXT:    ret float %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load i32* %x1
+  store i32 %tmp, i32* %y
+  %p = select i1 %flag, float* %x, float* %y1
+  %v = load float* %p
+  ret float %v
+}
+
+define i32 @test82(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers.
+; CHECK-LABEL: @test82(
+; CHECK:         %[[X:.*]] = alloca float
+; CHECK-NEXT:    %[[Y:.*]] = alloca i32
+; CHECK-NEXT:    %[[X1:.*]] = bitcast float* %[[X]] to i32*
+; CHECK-NEXT:    %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
+; CHECK:         %[[V:.*]] = load float* %[[X]]
+; CHECK-NEXT:    store float %[[V]], float* %[[Y1]]
+; CHECK-NEXT:    %[[C:.*]] = bitcast float %[[V]] to i32
+; CHECK-NEXT:    ret i32 %[[C]]
+entry:
+  %x = alloca float
+  %y = alloca i32
+  %x1 = bitcast float* %x to i32*
+  %y1 = bitcast i32* %y to float*
+  call void @scribble_on_i32(i32* %x1)
+  call void @scribble_on_i32(i32* %y)
+  %tmp = load float* %x
+  store float %tmp, float* %y1
+  %p = select i1 %flag, i32* %x1, i32* %y
+  %v = load i32* %p
+  ret i32 %v
+}
+
+declare void @scribble_on_i64(i64*)
+declare void @scribble_on_i128(i128*)
+
+define i8* @test83(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires inttoptr casts.
+; CHECK-LABEL: @test83(
+; CHECK:         %[[X:.*]] = alloca i8*
+; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
+; CHECK-DAG:     %[[X2:.*]] = bitcast i8** %[[X]] to i64*
+; CHECK-DAG:     %[[Y2:.*]] = bitcast i8** %[[Y]] to i64*
+; CHECK:         %[[V:.*]] = load i64* %[[X2]]
+; CHECK-NEXT:    store i64 %[[V]], i64* %[[Y2]]
+; CHECK-NEXT:    %[[C:.*]] = inttoptr i64 %[[V]] to i8*
+; CHECK-NEXT:    ret i8* %[[S]]
+entry:
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i64* %x1
+  store i64 %tmp, i64* %y
+  %p = select i1 %flag, i8** %x, i8** %y1
+  %v = load i8** %p
+  ret i8* %v
+}
+
+define i64 @test84(i1 %flag) {
+; Test that we can speculate the load around the select even though they use
+; differently typed pointers and requires a ptrtoint cast.
+; CHECK-LABEL: @test84(
+; CHECK:         %[[X:.*]] = alloca i8*
+; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
+; CHECK:         %[[V:.*]] = load i8** %[[X]]
+; CHECK-NEXT:    store i8* %[[V]], i8** %[[Y]]
+; CHECK-NEXT:    %[[C:.*]] = ptrtoint i8* %[[V]] to i64
+; CHECK-NEXT:    ret i64 %[[C]]
+entry:
+  %x = alloca i8*
+  %y = alloca i64
+  %x1 = bitcast i8** %x to i64*
+  %y1 = bitcast i64* %y to i8**
+  call void @scribble_on_i64(i64* %x1)
+  call void @scribble_on_i64(i64* %y)
+  %tmp = load i8** %x
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i64* %x1, i64* %y
+  %v = load i64* %p
+  ret i64 %v
+}
+
+define i8* @test85(i1 %flag) {
+; Test that we can't speculate the load around the select. The load of the
+; pointer doesn't load all of the stored integer bits. We could fix this, but it
+; would require endianness checks and other nastiness.
+; CHECK-LABEL: @test85(
+; CHECK:         %[[T:.*]] = load i128*
+; CHECK-NEXT:    store i128 %[[T]], i128*
+; CHECK-NEXT:    %[[X:.*]] = load i8**
+; CHECK-NEXT:    %[[Y:.*]] = load i8**
+; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i8* %[[X]], i8* %[[Y]]
+; CHECK-NEXT:    ret i8* %[[V]]
+entry:
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i128* %x2
+  store i128 %tmp, i128* %y
+  %p = select i1 %flag, i8** %x1, i8** %y1
+  %v = load i8** %p
+  ret i8* %v
+}
+
+define i128 @test86(i1 %flag) {
+; Test that we can't speculate the load around the select when the integer size
+; is larger than the pointer size. The store of the pointer doesn't store to all
+; the bits of the integer.
+;
+; CHECK-LABEL: @test86(
+; CHECK:         %[[T:.*]] = load i8**
+; CHECK-NEXT:    store i8* %[[T]], i8**
+; CHECK-NEXT:    %[[X:.*]] = load i128*
+; CHECK-NEXT:    %[[Y:.*]] = load i128*
+; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i128 %[[X]], i128 %[[Y]]
+; CHECK-NEXT:    ret i128 %[[V]]
+entry:
+  %x = alloca [2 x i8*]
+  %y = alloca i128
+  %x1 = bitcast [2 x i8*]* %x to i8**
+  %x2 = bitcast i8** %x1 to i128*
+  %y1 = bitcast i128* %y to i8**
+  call void @scribble_on_i128(i128* %x2)
+  call void @scribble_on_i128(i128* %y)
+  %tmp = load i8** %x1
+  store i8* %tmp, i8** %y1
+  %p = select i1 %flag, i128* %x2, i128* %y
+  %v = load i128* %p
+  ret i128 %v
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 5586bb652783..0b5b5deb68c5 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -57,7 +57,7 @@ define <4 x i32> @test5_zero_vector(<4 x i32> %A) {
 define <4 x i32> @test5_non_splat_vector(<4 x i32> %A) {
 ; CHECK-LABEL: @test5_non_splat_vector(
 ; CHECK-NOT: ret <4 x i32> undef
-  %B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+  %B = lshr <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
   ret <4 x i32> %B
 }
 
@@ -84,14 +84,14 @@ define <4 x i32> @test5a_non_splat_vector(<4 x i32> %A) {
 
 define i32 @test5b() {
 ; CHECK-LABEL: @test5b(
-; CHECK: ret i32 -1
+; CHECK: ret i32 0
         %B = ashr i32 undef, 2  ;; top two bits must be equal, so not undef
         ret i32 %B
 }
 
 define i32 @test5b2(i32 %A) {
 ; CHECK-LABEL: @test5b2(
-; CHECK: ret i32 -1
+; CHECK: ret i32 0
         %B = ashr i32 undef, %A  ;; top %A bits must be equal, so not undef
         ret i32 %B
 }
@@ -738,67 +738,49 @@ define i32 @test56(i32 %x) {
 
 
 define i32 @test57(i32 %x) {
-  %shr = lshr i32 %x, 1
-  %shl = shl i32 %shr, 4
-  %and = and i32 %shl, 16
-  ret i32 %and
-; CHECK-LABEL: @test57(
-; CHECK: shl i32 %x, 3
-}
-
-define i32 @test58(i32 %x) {
-  %shr = lshr i32 %x, 1
-  %shl = shl i32 %shr, 4
-  %or = or i32 %shl, 8
-  ret i32 %or
-; CHECK-LABEL: @test58(
-; CHECK: shl i32 %x, 3
-}
-
-define i32 @test59(i32 %x) {
   %shr = ashr i32 %x, 1
   %shl = shl i32 %shr, 4
   %or = or i32 %shl, 7
   ret i32 %or
-; CHECK-LABEL: @test59(
+; CHECK-LABEL: @test57(
 ; CHECK: %shl = shl i32 %shr1, 4
 }
 
 
-define i32 @test60(i32 %x) {
+define i32 @test58(i32 %x) {
   %shr = ashr i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK-LABEL: @test60(
+; CHECK-LABEL: @test58(
 ; CHECK: ashr i32 %x, 3
 }
 
 
-define i32 @test61(i32 %x) {
+define i32 @test59(i32 %x) {
   %shr = ashr i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 2
   ret i32 %or
-; CHECK-LABEL: @test61(
+; CHECK-LABEL: @test59(
 ; CHECK: ashr i32 %x, 4
 }
 
 ; propagate "exact" trait
-define i32 @test62(i32 %x) {
+define i32 @test60(i32 %x) {
   %shr = ashr exact i32 %x, 4
   %shl = shl i32 %shr, 1
   %or = or i32 %shl, 1
   ret i32 %or
-; CHECK-LABEL: @test62(
+; CHECK-LABEL: @test60(
 ; CHECK: ashr exact i32 %x, 3
 }
 
 ; PR17026
-; CHECK-LABEL: @test63(
+; CHECK-LABEL: @test61(
 ; CHECK-NOT: sh
 ; CHECK: ret
-define void @test63(i128 %arg) {
+define void @test61(i128 %arg) {
 bb:
   br i1 undef, label %bb1, label %bb12
 
@@ -830,29 +812,29 @@ bb12:                                             ; preds = %bb11, %bb8, %bb
   ret void
 }
 
-define i32 @test64(i32 %a) {
-; CHECK-LABEL: @test64(
+define i32 @test62(i32 %a) {
+; CHECK-LABEL: @test62(
 ; CHECK-NEXT: ret i32 undef
   %b = ashr i32 %a, 32  ; shift all bits out
   ret i32 %b
 }
 
-define <4 x i32> @test64_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test64_splat_vector
+define <4 x i32> @test62_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_splat_vector
 ; CHECK-NEXT: ret <4 x i32> undef
   %b = ashr <4 x i32> %a, <i32 32, i32 32, i32 32, i32 32>  ; shift all bits out
   ret <4 x i32> %b
 }
 
-define <4 x i32> @test64_non_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test64_non_splat_vector
+define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) {
+; CHECK-LABEL: @test62_non_splat_vector
 ; CHECK-NOT: ret <4 x i32> undef
   %b = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>  ; shift all bits out
   ret <4 x i32> %b
 }
 
-define <2 x i65> @test_65(<2 x i64> %t) {
-; CHECK-LABEL: @test_65
+define <2 x i65> @test_63(<2 x i64> %t) {
+; CHECK-LABEL: @test_63
   %a = zext <2 x i64> %t to <2 x i65>
   %sext = shl <2 x i65> %a, <i65 33, i65 33>
   %b = ashr <2 x i65> %sext, <i65 33, i65 33>
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index d7004977cd63..3a714d7046d3 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -34,54 +34,45 @@ define i32 @test3(i16 %P) {
 ; CHECK: ret i32 %tmp.5
 }
 
-define i32 @test4(i16 %P) {
-        %tmp.1 = zext i16 %P to i32             ; <i32> [#uses=1]
-        %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
-        %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
-        ret i32 %tmp.5
-; CHECK-LABEL: @test4(
-; CHECK: %tmp.5 = sext i16 %P to i32
-; CHECK: ret i32 %tmp.5
-}
-
-define i32 @test5(i32 %x) {
+define i32 @test4(i32 %x) {
         %tmp.1 = and i32 %x, 255                ; <i32> [#uses=1]
         %tmp.2 = xor i32 %tmp.1, 128            ; <i32> [#uses=1]
         %tmp.3 = add i32 %tmp.2, -128           ; <i32> [#uses=1]
         ret i32 %tmp.3
-; CHECK-LABEL: @test5(
+; CHECK-LABEL: @test4(
 ; CHECK: %sext = shl i32 %x, 24
 ; CHECK: %tmp.3 = ashr exact i32 %sext, 24
 ; CHECK: ret i32 %tmp.3
 }
 
-define i32 @test6(i32 %x) {
+define i32 @test5(i32 %x) {
         %tmp.2 = shl i32 %x, 16         ; <i32> [#uses=1]
         %tmp.4 = ashr i32 %tmp.2, 16            ; <i32> [#uses=1]
         ret i32 %tmp.4
-; CHECK-LABEL: @test6(
+; CHECK-LABEL: @test5(
 ; CHECK: %tmp.2 = shl i32 %x, 16
 ; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
 ; CHECK: ret i32 %tmp.4
 }
 
-define i32 @test7(i16 %P) {
+define i32 @test6(i16 %P) {
   %tmp.1 = zext i16 %P to i32                     ; <i32> [#uses=1]
   %sext1 = shl i32 %tmp.1, 16                     ; <i32> [#uses=1]
   %tmp.5 = ashr i32 %sext1, 16                    ; <i32> [#uses=1]
   ret i32 %tmp.5
-; CHECK-LABEL: @test7(
+; CHECK-LABEL: @test6(
 ; CHECK: %tmp.5 = sext i16 %P to i32
 ; CHECK: ret i32 %tmp.5
 }
 
-define i32 @test8(i32 %x) nounwind readnone {
+define i32 @test7(i32 %x) nounwind readnone {
 entry:
   %shr = lshr i32 %x, 5                           ; <i32> [#uses=1]
   %xor = xor i32 %shr, 67108864                   ; <i32> [#uses=1]
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
-; CHECK-LABEL: @test8(
+; CHECK-LABEL: @test7(
 ; CHECK: %sub = ashr i32 %x, 5
 ; CHECK: ret i32 %sub
 }
+
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
new file mode 100644
index 000000000000..bee219db9c12
--- /dev/null
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; These tests check the optimizations specific to
+; pointers being relocated at a statepoint.
+
+
+declare void @func()
+
+define i1 @test_negative(i32 addrspace(1)* %p) {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_negative
+; CHECK: %pnew = call i32 addrspace(1)*
+; CHECK: ret i1 %cmp
+}
+
+define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_nonnull
+; CHECK: ret i1 false
+}
+
+define i1 @test_null() {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_null
+; CHECK-NOT: %pnew
+; CHECK: ret i1 true
+}
+
+define i1 @test_undef() {
+entry:
+  %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 4, i32 4)
+  %cmp = icmp eq i32 addrspace(1)* %pnew, null
+  ret i1 %cmp
+; CHECK-LABEL: test_undef
+; CHECK-NOT: %pnew
+; CHECK: ret i1 undef
+}
+
+declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index b64c800e546c..0bb175923e98 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,8 +113,8 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !4, metadata !4, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"float", metadata !1}
-!4 = metadata !{metadata !"int", metadata !1}
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"float", !1}
+!4 = !{!"int", !1}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
index fc58ffcb8cb0..9bbd7dbe7da8 100644
--- a/test/Transforms/InstCombine/strcmp-1.ll
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
index df30dd100443..49b095554c2c 100644
--- a/test/Transforms/InstCombine/strncmp-1.ll
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -15,7 +15,7 @@ define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %strcmpload = load i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
-; CHECK: %2 = sub i32 0, %1
+; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
 
   %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
@@ -73,7 +73,7 @@ define i32 @test6(i8* %str1, i8* %str2) {
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
 ; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
-; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
 ; CHECK: ret i32 [[RET]]
 
   %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
index c80e31ae3ddc..4e4b3f2a1a88 100644
--- a/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -35,12 +35,12 @@ define i32 (i8*, i32*, double*)*** @test2() {
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
-; CHECK: !0 = metadata !{metadata !1, metadata !1, i64 0}
-; CHECK: !1 = metadata !{metadata !"float", metadata !2}
-
-!0 = metadata !{metadata !"Simple C/C++ TBAA"}
-!1 = metadata !{metadata !"omnipotent char", metadata !0}
-!2 = metadata !{metadata !5, metadata !5, i64 0}
-!3 = metadata !{i64 0, i64 4, metadata !2}
-!4 = metadata !{i64 0, i64 8, null}
-!5 = metadata !{metadata !"float", metadata !0}
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"float", !2}
+
+!0 = !{!"Simple C/C++ TBAA"}
+!1 = !{!"omnipotent char", !0}
+!2 = !{!5, !5, i64 0}
+!3 = !{i64 0, i64 4, !2}
+!4 = !{i64 0, i64 8, null}
+!5 = !{!"float", !0}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
index e7aff00ba8d0..3a24074e72a7 100644
--- a/test/Transforms/InstCombine/sub-xor.ll
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -32,7 +32,7 @@ define i32 @test3(i32 %x) nounwind {
 
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT: and i32 %x, 31
-; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: sub nsw i32 73, %and
 ; CHECK-NEXT: ret
 }
 
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 67b7c4996b07..0e421f75f139 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -142,8 +142,9 @@ define i32 @test15(i32 %A, i32 %B) {
 	%D = srem i32 %B, %C
 	ret i32 %D
 ; CHECK-LABEL: @test15(
-; CHECK: %D = srem i32 %B, %A
-; CHECK: ret i32 %D
+; CHECK:      %[[sub:.*]] = sub i32 0, %A
+; CHECK-NEXT: %[[rem:.*]] = srem i32 %B, %[[sub]]
+; CHECK: ret i32 %[[rem]]
 }
 
 define i32 @test16(i32 %A) {
@@ -464,3 +465,88 @@ define i32 @test38(i32 %A) {
 ; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
 ; CHECK-NEXT: ret i32 [[SEXT]]
 }
+
+define i32 @test39(i32 %A, i32 %x) {
+  %B = sub i32 0, %A
+  %C = sub nsw i32 %x, %B
+  ret i32 %C
+; CHECK-LABEL: @test39(
+; CHECK: %C = add i32 %x, %A
+; CHECK: ret i32 %C
+}
+
+define i16 @test40(i16 %a, i16 %b) {
+  %ashr = ashr i16 %a, 1
+  %ashr1 = ashr i16 %b, 1
+  %sub = sub i16 %ashr, %ashr1
+  ret i16 %sub
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i16 %a, 1
+; CHECK-NEXT: [[ASHR1:%.*]] = ashr i16 %b, 1
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i16 [[ASHR]], [[ASHR1]]
+; CHECK: ret i16 [[RET]]
+}
+
+define i32 @test41(i16 %a, i16 %b) {
+  %conv = sext i16 %a to i32
+  %conv1 = sext i16 %b to i32
+  %sub = sub i32 %conv, %conv1
+  ret i32 %sub
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: [[SEXT:%.*]] = sext i16 %a to i32
+; CHECK-NEXT: [[SEXT1:%.*]] = sext i16 %b to i32
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i32 [[SEXT]], [[SEXT1]]
+; CHECK: ret i32 [[RET]]
+}
+
+define i4 @test42(i4 %x, i4 %y) {
+  %a = and i4 %y, 7
+  %b = and i4 %x, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[AND1:%.*]] = and i4 %x, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nsw i4 [[AND]], [[AND1]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i4 @test43(i4 %x, i4 %y) {
+  %a = or i4 %x, -8
+  %b = and i4 %y, 7
+  %c = sub i4 %a, %b
+  ret i4 %c
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[OR:%.*]] = or i4 %x, -8
+; CHECK-NEXT: [[AND:%.*]] = and i4 %y, 7
+; CHECK-NEXT: [[RET:%.*]] = sub nuw i4 [[OR]], [[AND]]
+; CHECK: ret i4 [[RET]]
+}
+
+define i32 @test44(i32 %x) {
+  %sub = sub nsw i32 %x, 32768
+  ret i32 %sub
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 %x, -32768
+; CHECK: ret i32 [[ADD]]
+}
+
+define i32 @test45(i32 %x, i32 %y) {
+  %or = or i32 %x, %y
+  %xor = xor i32 %x, %y
+  %sub = sub i32 %or, %xor
+  ret i32 %sub
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: %sub = and i32 %x, %y
+; CHECK: ret i32 %sub
+}
+
+define i32 @test46(i32 %x, i32 %y) {
+ %or = or i32 %x, %y
+ %sub = sub i32 %or, %x
+ ret i32 %sub
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %sub = and i32 %y, %x.not
+; CHECK: ret i32 %sub
+}
diff --git a/test/Transforms/InstCombine/unordered-fcmp-select.ll b/test/Transforms/InstCombine/unordered-fcmp-select.ll
new file mode 100644
index 000000000000..0eb729047e71
--- /dev/null
+++ b/test/Transforms/InstCombine/unordered-fcmp-select.ll
@@ -0,0 +1,125 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @select_max_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_max_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ugt(
+; CHECK: %cmp.inv = fcmp ole float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_ugt(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge(
+; CHECK: %cmp.inv = fcmp olt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_min_uge(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK-NEXT: ret float %sel
+define float @select_max_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_max_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %a, float %b
+; CHECK: ret float %sel
+define float @select_max_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ult(
+; CHECK: %cmp.inv = fcmp oge float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ult(float %a, float %b) {
+  %cmp = fcmp ult float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_ule(
+; CHECK: %cmp.inv = fcmp ogt float %a, %b
+; CHECK-NEXT: %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_min_ule(float %a, float %b) {
+  %cmp = fcmp ule float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_une(
+; CHECK:  %cmp.inv = fcmp oeq float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_une(float %a, float %b) {
+  %cmp = fcmp une float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_fcmp_ueq
+; CHECK:  %cmp.inv = fcmp one float %a, %b
+; CHECK-NEXT:  %sel = select i1 %cmp.inv, float %b, float %a
+; CHECK-NEXT: ret float %sel
+define float @select_fcmp_ueq(float %a, float %b) {
+  %cmp = fcmp ueq float %a, %b
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+declare void @foo(i1)
+
+; CHECK-LABEL: @select_max_ugt_2_use_cmp(
+; CHECK: fcmp ugt
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_max_ugt_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp ugt float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %a, float %b
+  ret float %sel
+}
+
+; CHECK-LABEL: @select_min_uge_2_use_cmp(
+; CHECK: fcmp uge
+; CHECK-NOT: fcmp
+; CHECK: ret
+define float @select_min_uge_2_use_cmp(float %a, float %b) {
+  %cmp = fcmp uge float %a, %b
+  call void @foo(i1 %cmp)
+  %sel = select i1 %cmp, float %b, float %a
+  ret float %sel
+}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 41d2b292eeff..00a029aeab78 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -303,6 +303,33 @@ define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
   ret <2 x i64> %2
 }
 
+; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> %i
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+  ret <2 x i64> %1
+}
+
+; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+  ret <2 x i64> %1
+}
 
 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/Transforms/InstCombine/vsx-unaligned.ll b/test/Transforms/InstCombine/vsx-unaligned.ll
new file mode 100644
index 000000000000..26e04268f446
--- /dev/null
+++ b/test/Transforms/InstCombine/vsx-unaligned.ll
@@ -0,0 +1,44 @@
+; Verify that we can create unaligned loads and stores from VSX intrinsics.
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target triple = "powerpc64-unknown-linux-gnu"
+
+@vf = common global <4 x float> zeroinitializer, align 1
+@res_vf = common global <4 x float> zeroinitializer, align 1
+@vd = common global <2 x double> zeroinitializer, align 1
+@res_vd = common global <2 x double> zeroinitializer, align 1
+
+define void @test1() {
+entry:
+  %t1 = alloca <4 x float>*, align 8
+  %t2 = alloca <2 x double>*, align 8
+  store <4 x float>* @vf, <4 x float>** %t1, align 8
+  %0 = load <4 x float>** %t1, align 8
+  %1 = bitcast <4 x float>* %0 to i8*
+  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
+  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
+  %3 = load <4 x float>** %t1, align 8
+  %4 = bitcast <4 x float>* %3 to i8*
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
+  store <2 x double>* @vd, <2 x double>** %t2, align 8
+  %5 = load <2 x double>** %t2, align 8
+  %6 = bitcast <2 x double>* %5 to i8*
+  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
+  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
+  %8 = load <2 x double>** %t2, align 8
+  %9 = bitcast <2 x double>* %8 to i8*
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
+  ret void
+}
+
+; CHECK-LABEL: @test1
+; CHECK: %0 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
+; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
+; CHECK: %1 = load <2 x double>* @vd, align 1
+; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
+
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 3722697f9892..c8debcbac226 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -1,50 +1,70 @@
 ; This test makes sure that these instructions are properly eliminated.
 ;
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep "xor "
+; RUN:    FileCheck %s
 ; END.
 @G1 = global i32 0		; <i32*> [#uses=1]
 @G2 = global i32 0		; <i32*> [#uses=1]
 
 define i1 @test0(i1 %A) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: ret i1 %A
 	%B = xor i1 %A, false		; <i1> [#uses=1]
 	ret i1 %B
 }
 
 define i32 @test1(i32 %A) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i32 %A
 	%B = xor i32 %A, 0		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i1 @test2(i1 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i1 false
 	%B = xor i1 %A, %A		; <i1> [#uses=1]
 	ret i1 %B
 }
 
 define i32 @test3(i32 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret i32 0
 	%B = xor i32 %A, %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i32 @test4(i32 %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: ret i32 -1
 	%NotA = xor i32 -1, %A		; <i32> [#uses=1]
 	%B = xor i32 %A, %NotA		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define i32 @test5(i32 %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: %1 = and i32 %A, -124
+; CHECK-NEXT: ret i32 %1
 	%t1 = or i32 %A, 123		; <i32> [#uses=1]
 	%r = xor i32 %t1, 123		; <i32> [#uses=1]
 	ret i32 %r
 }
 
 define i8 @test6(i8 %A) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i8 %A
 	%B = xor i8 %A, 17		; <i8> [#uses=1]
 	%C = xor i8 %B, 17		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i32 @test7(i32 %A, i32 %B) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %A1 = and i32 %A, 7
+; CHECK-NEXT: %B1 = and i32 %B, 128
+; CHECK-NEXT: %C11 = or i32 %A1, %B1
+; CHECK-NEXT: ret i32 %C11
 	%A1 = and i32 %A, 7		; <i32> [#uses=1]
 	%B1 = and i32 %B, 128		; <i32> [#uses=1]
 	%C1 = xor i32 %A1, %B1		; <i32> [#uses=1]
@@ -52,6 +72,8 @@ define i32 @test7(i32 %A, i32 %B) {
 }
 
 define i8 @test8(i1 %c) {
+; CHECK-LABEL: @test8(
+; CHECK: br i1 %c, label %False, label %True
 	%d = xor i1 %c, true		; <i1> [#uses=1]
 	br i1 %d, label %True, label %False
 
@@ -63,30 +85,47 @@ False:		; preds = %0
 }
 
 define i1 @test9(i8 %A) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %C = icmp eq i8 %A, 89
+; CHECK-NEXT: ret i1 %C
 	%B = xor i8 %A, 123		; <i8> [#uses=1]
 	%C = icmp eq i8 %B, 34		; <i1> [#uses=1]
 	ret i1 %C
 }
 
 define i8 @test10(i8 %A) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %B = and i8 %A, 3
+; CHECK-NEXT: %C1 = or i8 %B, 4
+; CHECK-NEXT: ret i8 %C1
 	%B = and i8 %A, 3		; <i8> [#uses=1]
 	%C = xor i8 %B, 4		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i8 @test11(i8 %A) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: %B = and i8 %A, -13
+; CHECK-NEXT: %1 = or i8 %B, 8
+; CHECK-NEXT: ret i8 %1
 	%B = or i8 %A, 12		; <i8> [#uses=1]
 	%C = xor i8 %B, 4		; <i8> [#uses=1]
 	ret i8 %C
 }
 
 define i1 @test12(i8 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %c = icmp ne i8 %A, 4
+; CHECK-NEXT: ret i1 %c
 	%B = xor i8 %A, 4		; <i8> [#uses=1]
 	%c = icmp ne i8 %B, 0		; <i1> [#uses=1]
 	ret i1 %c
 }
 
 define i1 @test13(i8 %A, i8 %B) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = icmp ne i8 %A, %B
+; CHECK-NEXT: ret i1 %1
 	%C = icmp ult i8 %A, %B		; <i1> [#uses=1]
 	%D = icmp ugt i8 %A, %B		; <i1> [#uses=1]
 	%E = xor i1 %C, %D		; <i1> [#uses=1]
@@ -94,6 +133,8 @@ define i1 @test13(i8 %A, i8 %B) {
 }
 
 define i1 @test14(i8 %A, i8 %B) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: ret i1 true
 	%C = icmp eq i8 %A, %B		; <i1> [#uses=1]
 	%D = icmp ne i8 %B, %A		; <i1> [#uses=1]
 	%E = xor i1 %C, %D		; <i1> [#uses=1]
@@ -101,36 +142,54 @@ define i1 @test14(i8 %A, i8 %B) {
 }
 
 define i32 @test15(i32 %A) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: %C = sub i32 0, %A
+; CHECK-NEXT: ret i32 %C
 	%B = add i32 %A, -1		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test16(i32 %A) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: %C = sub i32 -124, %A
+; CHECK-NEXT: ret i32 %C
 	%B = add i32 %A, 123		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test17(i32 %A) {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: %C = add i32 %A, -124
+; CHECK-NEXT: ret i32 %C
 	%B = sub i32 123, %A		; <i32> [#uses=1]
 	%C = xor i32 %B, -1		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test18(i32 %A) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: %C = add i32 %A, 124
+; CHECK-NEXT: ret i32 %C
 	%B = xor i32 %A, -1		; <i32> [#uses=1]
 	%C = sub i32 123, %B		; <i32> [#uses=1]
 	ret i32 %C
 }
 
 define i32 @test19(i32 %A, i32 %B) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: ret i32 %B
 	%C = xor i32 %A, %B		; <i32> [#uses=1]
 	%D = xor i32 %C, %A		; <i32> [#uses=1]
 	ret i32 %D
 }
 
 define void @test20(i32 %A, i32 %B) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: store i32 %B, i32* @G1
+; CHECK-NEXT: store i32 %A, i32* @G2
+; CHECK-NEXT: ret void
 	%tmp.2 = xor i32 %B, %A		; <i32> [#uses=2]
 	%tmp.5 = xor i32 %tmp.2, %B		; <i32> [#uses=2]
 	%tmp.8 = xor i32 %tmp.5, %tmp.2		; <i32> [#uses=1]
@@ -140,12 +199,18 @@ define void @test20(i32 %A, i32 %B) {
 }
 
 define i32 @test21(i1 %C, i32 %A, i32 %B) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: %D = select i1 %C, i32 %B, i32 %A
+; CHECK-NEXT: ret i32 %D
 	%C2 = xor i1 %C, true		; <i1> [#uses=1]
 	%D = select i1 %C2, i32 %A, i32 %B		; <i32> [#uses=1]
 	ret i32 %D
 }
 
 define i32 @test22(i1 %X) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: %1 = zext i1 %X to i32
+; CHECK-NEXT: ret i32 %1
 	%Y = xor i1 %X, true		; <i1> [#uses=1]
 	%Z = zext i1 %Y to i32		; <i32> [#uses=1]
 	%Q = xor i32 %Z, 1		; <i32> [#uses=1]
@@ -153,18 +218,27 @@ define i32 @test22(i1 %X) {
 }
 
 define i1 @test23(i32 %a, i32 %b) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: %tmp.4 = icmp eq i32 %b, 0
+; CHECK-NEXT: ret i1 %tmp.4
 	%tmp.2 = xor i32 %b, %a		; <i32> [#uses=1]
 	%tmp.4 = icmp eq i32 %tmp.2, %a		; <i1> [#uses=1]
 	ret i1 %tmp.4
 }
 
 define i1 @test24(i32 %c, i32 %d) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: %tmp.4 = icmp ne i32 %d, 0
+; CHECK-NEXT: ret i1 %tmp.4
 	%tmp.2 = xor i32 %d, %c		; <i32> [#uses=1]
 	%tmp.4 = icmp ne i32 %tmp.2, %c		; <i1> [#uses=1]
 	ret i1 %tmp.4
 }
 
 define i32 @test25(i32 %g, i32 %h) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: %tmp4 = and i32 %h, %g
+; CHECK-NEXT: ret i32 %tmp4
 	%h2 = xor i32 %h, -1		; <i32> [#uses=1]
 	%tmp2 = and i32 %h2, %g		; <i32> [#uses=1]
 	%tmp4 = xor i32 %tmp2, %g		; <i32> [#uses=1]
@@ -172,6 +246,9 @@ define i32 @test25(i32 %g, i32 %h) {
 }
 
 define i32 @test26(i32 %a, i32 %b) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: %tmp4 = and i32 %a, %b
+; CHECK-NEXT: ret i32 %tmp4
 	%b2 = xor i32 %b, -1		; <i32> [#uses=1]
 	%tmp2 = xor i32 %a, %b2		; <i32> [#uses=1]
 	%tmp4 = and i32 %tmp2, %a		; <i32> [#uses=1]
@@ -179,6 +256,10 @@ define i32 @test26(i32 %a, i32 %b) {
 }
 
 define i32 @test27(i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: %tmp = icmp eq i32 %b, %c
+; CHECK-NEXT: %tmp6 = zext i1 %tmp to i32
+; CHECK-NEXT: ret i32 %tmp6
 	%tmp2 = xor i32 %d, %b		; <i32> [#uses=1]
 	%tmp5 = xor i32 %d, %c		; <i32> [#uses=1]
 	%tmp = icmp eq i32 %tmp2, %tmp5		; <i1> [#uses=1]
@@ -187,6 +268,9 @@ define i32 @test27(i32 %b, i32 %c, i32 %d) {
 }
 
 define i32 @test28(i32 %indvar) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: %tmp214 = add i32 %indvar, 1
+; CHECK-NEXT: ret i32 %tmp214
 	%tmp7 = add i32 %indvar, -2147483647		; <i32> [#uses=1]
 	%tmp214 = xor i32 %tmp7, -2147483648		; <i32> [#uses=1]
 	ret i32 %tmp214
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index d153e035c899..797c8f39f298 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -82,3 +82,93 @@ define i32 @test6(i32 %x) {
 ; CHECK: lshr i32 %x, 16
 ; CHECK: ret
 }
+
+
+; (A | B) ^ (~A) -> (A | ~B)
+define i32 @test7(i32 %a, i32 %b) {
+ %or = or i32 %a, %b
+ %neg = xor i32 %a, -1
+ %xor = xor i32 %or, %neg
+ ret i32 %xor
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (~A) ^ (A | B) -> (A | ~B)
+define i32 @test8(i32 %a, i32 %b) {
+ %neg = xor i32 %a, -1
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: %[[b_not:.*]] = xor i32 %b, -1
+; CHECK-NEXT: %[[or:.*]] = or i32 %a, %[[b_not]]
+; CHECK-NEXT: ret i32 %[[or]]
+}
+
+; (A & B) ^ (A ^ B) -> (A | B)
+define i32 @test9(i32 %b, i32 %c) {
+ %and = and i32 %b, %c
+ %xor = xor i32 %b, %c
+ %xor2 = xor i32 %and, %xor
+ ret i32 %xor2
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+; (A ^ B) ^ (A & B) -> (A | B)
+define i32 @test10(i32 %b, i32 %c) {
+ %xor = xor i32 %b, %c
+ %and = and i32 %b, %c
+ %xor2 = xor i32 %xor, %and
+ ret i32 %xor2
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: %xor2 = or i32 %b, %c
+}
+
+define i32 @test11(i32 %A, i32 %B) {
+  %xor1 = xor i32 %B, %A
+  %not = xor i32 %A, -1
+  %xor2 = xor i32 %not, %B
+  %and = and i32 %xor1, %xor2
+  ret i32 %and
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test12(i32 %a, i32 %b) {
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %and, %nega
+ ret i32 %xor
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+ %nega = xor i32 %a, -1
+ %negb = xor i32 %b, -1
+ %and = and i32 %a, %negb
+ %xor = xor i32 %nega, %and
+ ret i32 %xor
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: %1 = and i32 %a, %b
+; CHECK-NEXT: %xor = xor i32 %1, -1
+}
+
+; (A ^ C) ^ (A | B) -> ((~A) & B) ^ C
+define i32 @test14(i32 %a, i32 %b, i32 %c) {
+ %neg = xor i32 %a, %c
+ %or = or i32 %a, %b
+ %xor = xor i32 %neg, %or
+ ret i32 %xor
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: %[[not:.*]] = xor i32 %a, -1
+; CHECK-NEXT: %[[and:.*]] = and i32 %[[not]], %b
+; CHECK-NEXT: %[[xor:.*]] = xor i32 %[[and]], %c
+; CHECK-NEXT: ret i32 %[[xor]]
+}
diff --git a/test/Transforms/InstMerge/ld_hoist1.ll b/test/Transforms/InstMerge/ld_hoist1.ll
new file mode 100644
index 000000000000..715f1b824407
--- /dev/null
+++ b/test/Transforms/InstMerge/ld_hoist1.ll
@@ -0,0 +1,64 @@
+; Test load hoist
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; Function Attrs: nounwind uwtable
+define float* @foo(i32* noalias nocapture readonly %in, float* noalias %out, i32 %size, i32* nocapture readonly %trigger)  {
+entry:
+  %cmp11 = icmp eq i32 %size, 0
+  br i1 %cmp11, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = add i32 %size, -1
+  br label %for.body
+
+; CHECK-LABEL: for.body
+; CHECK: load
+; CHECK:  %2 = getelementptr inbounds i32* %in, i64 %indvars.iv
+; CHECK:  %3 = load i32* %2, align 4
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32* %trigger, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %1, 0
+  br i1 %cmp1, label %if.then, label %if.else
+
+; CHECK-LABEL: if.then
+if.then:                                          ; preds = %for.body
+; This load should be hoisted
+  %arrayidx3 = getelementptr inbounds i32* %in, i64 %indvars.iv
+  %2 = load i32* %arrayidx3, align 4
+  %conv = sitofp i32 %2 to float
+  %add = fadd float %conv, 5.000000e-01
+  %arrayidx5 = getelementptr inbounds float* %out, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %arrayidx7 = getelementptr inbounds float* %out, i64 %indvars.iv
+  %3 = load float* %arrayidx7, align 4
+  %div = fdiv float %3, 3.000000e+00
+  store float %div, float* %arrayidx7, align 4
+; This load should be hoisted in spite of store 
+  %arrayidx9 = getelementptr inbounds i32* %in, i64 %indvars.iv
+  %4 = load i32* %arrayidx9, align 4
+  %conv10 = sitofp i32 %4 to float
+  %add13 = fadd float %div, %conv10
+  store float %add13, float* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %0
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %entry, %for.cond.for.end_crit_edge
+  ret float* %out
+}
+
diff --git a/test/Transforms/InstMerge/st_sink_barrier_call.ll b/test/Transforms/InstMerge/st_sink_barrier_call.ll
new file mode 100644
index 000000000000..c158b006bee0
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_barrier_call.ll
@@ -0,0 +1,43 @@
+; Test to make sure that a function call that needs to be a barrier to sinking stores is indeed a barrier.
+; Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+declare i32 @foo(i32 %x)
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK: store i32
+  store i32 %1, i32* %p1, align 4
+  br label %if.end
+  
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK: store i32
+  store i32 %add, i32* %p3, align 4
+  call i32 @foo(i32 5)				  ;barrier
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK-NOT: store
+  ret void
+}
diff --git a/test/Transforms/InstMerge/st_sink_no_barrier_call.ll b/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
new file mode 100644
index 000000000000..72f1fdf77371
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
@@ -0,0 +1,45 @@
+; Test to make sure that stores in a diamond get merged with a non barrier function call after the store instruction
+; Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+declare i32 @foo(i32 %x) #0
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %1, i32* %p1, align 4
+  br label %if.end
+  
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %add, i32* %p3, align 4
+  call i32 @foo(i32 5)				  ;not a barrier
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: store
+  ret void
+}
+
+attributes #0 = { readnone } 
diff --git a/test/Transforms/InstMerge/st_sink_no_barrier_load.ll b/test/Transforms/InstMerge/st_sink_no_barrier_load.ll
new file mode 100644
index 000000000000..5be0c254ba6f
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_no_barrier_load.ll
@@ -0,0 +1,43 @@
+; Test to make sure that stores in a diamond get merged with a non barrier load after the store instruction
+; Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %1, i32* %p1, align 4
+  %p2 = getelementptr inbounds %struct.node* %node.017, i32 5, i32 6
+  ; CHECK: load i32*
+  %not_barrier = load i32 * %p2, align 4
+  br label %if.end
+
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %add, i32* %p3, align 4
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: store
+  ret void
+}
diff --git a/test/Transforms/InstMerge/st_sink_no_barrier_store.ll b/test/Transforms/InstMerge/st_sink_no_barrier_store.ll
new file mode 100644
index 000000000000..06e2b63ca3ed
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_no_barrier_store.ll
@@ -0,0 +1,42 @@
+; Test to make sure that stores in a diamond get merged with a non barrier store after the store instruction to be sunk
+; Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %1, i32* %p1, align 4
+  br label %if.end
+
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p2 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  store i32 %add, i32* %p2, align 4
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 5, i32 6
+  ; CHECK: store i32
+  store i32 %add, i32* %p3, align 4  			  ; This is not a barrier
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: store
+  ret void
+}
diff --git a/test/Transforms/InstMerge/st_sink_two_stores.ll b/test/Transforms/InstMerge/st_sink_two_stores.ll
new file mode 100644
index 000000000000..1f7c6aa478ba
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_two_stores.ll
@@ -0,0 +1,47 @@
+; Test to make sure that stores in a diamond get merged
+; Stores sunks into the footer.
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %1, i32* %p1, align 4
+  %p2 = getelementptr inbounds %struct.node* %node.017, i32 4, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %1, i32* %p2, align 4
+  br label %if.end
+
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %add, i32* %p3, align 4
+  %p4 = getelementptr inbounds %struct.node* %node.017, i32 4, i32 6
+  ; CHECK-NOT: store i32
+  store i32 %2, i32* %p4, align 4  
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: store
+; CHECK: store
+  ret void
+}
diff --git a/test/Transforms/InstMerge/st_sink_with_barrier.ll b/test/Transforms/InstMerge/st_sink_with_barrier.ll
new file mode 100644
index 000000000000..d4efaa7efff7
--- /dev/null
+++ b/test/Transforms/InstMerge/st_sink_with_barrier.ll
@@ -0,0 +1,42 @@
+; Test to make sure that load from the same address as a store and appears after the store prevents the store from being sunk
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+%struct.node = type { i32, %struct.node*, %struct.node*, %struct.node*, i32, i32, i32, i32 }
+
+; Function Attrs: nounwind uwtable
+define void @sink_store(%struct.node* nocapture %r, i32 %index) {
+entry:
+  %node.0.in16 = getelementptr inbounds %struct.node* %r, i64 0, i32 2
+  %node.017 = load %struct.node** %node.0.in16, align 8
+  %index.addr = alloca i32, align 4
+  store i32 %index, i32* %index.addr, align 4
+  %0 = load i32* %index.addr, align 4
+  %cmp = icmp slt i32 %0, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK: if.then
+if.then:                                          ; preds = %entry
+  %1 = load i32* %index.addr, align 4
+  %p1 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK: store i32
+  store i32 %1, i32* %p1, align 4
+  %p2 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK: load i32*
+  %barrier = load i32 * %p2, align 4
+  br label %if.end
+
+; CHECK: if.else
+if.else:                                          ; preds = %entry
+  %2 = load i32* %index.addr, align 4
+  %add = add nsw i32 %2, 1
+  %p3 = getelementptr inbounds %struct.node* %node.017, i32 0, i32 6
+  ; CHECK: store i32
+  store i32 %add, i32* %p3, align 4
+  br label %if.end
+
+; CHECK: if.end
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK-NOT: store
+  ret void
+}
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index c59d6c916a6b..ce3c2aa6af22 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -20,3 +20,186 @@ define i64 @pow2b(i32 %x) {
   ret i64 %e2
 ; CHECK: ret i64 %e
 }
+
+define i32 @sub_neg_nuw(i32 %x, i32 %y) {
+; CHECK-LABEL: @sub_neg_nuw(
+  %neg = sub nuw i32 0, %y
+  %sub = sub i32 %x, %neg
+  ret i32 %sub
+; CHECK: ret i32 %x
+}
+
+define i1 @and_of_icmps0(i32 %b) {
+; CHECK-LABEL: @and_of_icmps0(
+  %1 = add i32 %b, 2
+  %2 = icmp ult i32 %1, 4
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps1(i32 %b) {
+; CHECK-LABEL: @and_of_icmps1(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp slt i32 %1, 4
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps2(i32 %b) {
+; CHECK-LABEL: @and_of_icmps2(
+  %1 = add i32 %b, 2
+  %2 = icmp ule i32 %1, 3
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps3(i32 %b) {
+; CHECK-LABEL: @and_of_icmps3(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sle i32 %1, 3
+  %cmp3 = icmp sgt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps4(i32 %b) {
+; CHECK-LABEL: @and_of_icmps4(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ult i32 %1, 4
+  %cmp3 = icmp ugt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @and_of_icmps5(i32 %b) {
+; CHECK-LABEL: @and_of_icmps5(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ule i32 %1, 3
+  %cmp3 = icmp ugt i32 %b, 2
+  %cmp = and i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+define i1 @or_of_icmps0(i32 %b) {
+; CHECK-LABEL: @or_of_icmps0(
+  %1 = add i32 %b, 2
+  %2 = icmp uge i32 %1, 4
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps1(i32 %b) {
+; CHECK-LABEL: @or_of_icmps1(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sge i32 %1, 4
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps2(i32 %b) {
+; CHECK-LABEL: @or_of_icmps2(
+  %1 = add i32 %b, 2
+  %2 = icmp ugt i32 %1, 3
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps3(i32 %b) {
+; CHECK-LABEL: @or_of_icmps3(
+  %1 = add nsw i32 %b, 2
+  %2 = icmp sgt i32 %1, 3
+  %cmp3 = icmp sle i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps4(i32 %b) {
+; CHECK-LABEL: @or_of_icmps4(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp uge i32 %1, 4
+  %cmp3 = icmp ule i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i1 @or_of_icmps5(i32 %b) {
+; CHECK-LABEL: @or_of_icmps5(
+  %1 = add nuw i32 %b, 2
+  %2 = icmp ugt i32 %1, 3
+  %cmp3 = icmp ule i32 %b, 2
+  %cmp = or i1 %2, %cmp3
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+define i32 @neg_nuw(i32 %x) {
+; CHECK-LABEL: @neg_nuw(
+  %neg = sub nuw i32 0, %x
+  ret i32 %neg
+; CHECK: ret i32 0
+}
+
+define i1 @and_icmp1(i32 %x, i32 %y) {
+  %1 = icmp ult i32 %x, %y
+  %2 = icmp ne i32 %y, 0
+  %3 = and i1 %1, %2
+  ret i1 %3
+}
+; CHECK-LABEL: @and_icmp1(
+; CHECK: %[[cmp:.*]] = icmp ult i32 %x, %y
+; CHECK: ret i1 %[[cmp]]
+
+define i1 @and_icmp2(i32 %x, i32 %y) {
+  %1 = icmp ult i32 %x, %y
+  %2 = icmp eq i32 %y, 0
+  %3 = and i1 %1, %2
+  ret i1 %3
+}
+; CHECK-LABEL: @and_icmp2(
+; CHECK: ret i1 false
+
+define i1 @or_icmp1(i32 %x, i32 %y) {
+  %1 = icmp ult i32 %x, %y
+  %2 = icmp ne i32 %y, 0
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+; CHECK-LABEL: @or_icmp1(
+; CHECK: %[[cmp:.*]] = icmp ne i32 %y, 0
+; CHECK: ret i1 %[[cmp]]
+
+define i1 @or_icmp2(i32 %x, i32 %y) {
+  %1 = icmp uge i32 %x, %y
+  %2 = icmp ne i32 %y, 0
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+; CHECK-LABEL: @or_icmp2(
+; CHECK: ret i1 true
+
+define i1 @or_icmp3(i32 %x, i32 %y) {
+  %1 = icmp uge i32 %x, %y
+  %2 = icmp eq i32 %y, 0
+  %3 = or i1 %1, %2
+  ret i1 %3
+}
+; CHECK-LABEL: @or_icmp3(
+; CHECK: %[[cmp:.*]] = icmp uge i32 %x, %y
+; CHECK: ret i1 %[[cmp]]
diff --git a/test/Transforms/InstSimplify/ashr-nop.ll b/test/Transforms/InstSimplify/ashr-nop.ll
deleted file mode 100644
index 0914d725e40d..000000000000
--- a/test/Transforms/InstSimplify/ashr-nop.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s -instsimplify -S | FileCheck %s
-
-; CHECK-LABEL: @foo
-; CHECK-NOT: ashr
-define i32 @foo(i32 %x) {
- %o = and i32 %x, 1
- %n = add i32 %o, -1
- %t = ashr i32 %n, 17
- ret i32 %t
-}
diff --git a/test/Transforms/InstSimplify/assume.ll b/test/Transforms/InstSimplify/assume.ll
new file mode 100644
index 000000000000..4dd0a8f4a82d
--- /dev/null
+++ b/test/Transforms/InstSimplify/assume.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+define void @test1() {
+  call void @llvm.assume(i1 1)
+  ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: llvm.assume
+; CHECK: ret void
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 94872d35bb97..10c7ca6754e3 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -333,14 +333,46 @@ define i1 @or(i32 %x) {
 ; CHECK: ret i1 false
 }
 
-define i1 @shl(i32 %x) {
-; CHECK-LABEL: @shl(
+define i1 @shl1(i32 %x) {
+; CHECK-LABEL: @shl1(
   %s = shl i32 1, %x
   %c = icmp eq i32 %s, 0
   ret i1 %c
 ; CHECK: ret i1 false
 }
 
+define i1 @shl2(i32 %X) {
+; CHECK: @shl2
+  %sub = shl nsw i32 -1, %X
+  %cmp = icmp eq i32 %sub, 31
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @shl3(i32 %X) {
+; CHECK: @shl3
+  %sub = shl nuw i32 4, %X
+  %cmp = icmp eq i32 %sub, 31
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @shl4(i32 %X) {
+; CHECK: @shl4
+  %sub = shl nsw i32 -1, %X
+  %cmp = icmp sle i32 %sub, -1
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @shl5(i32 %X) {
+; CHECK: @shl5
+  %sub = shl nuw i32 4, %X
+  %cmp = icmp ugt i32 %sub, 3
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
 define i1 @lshr1(i32 %x) {
 ; CHECK-LABEL: @lshr1(
   %s = lshr i32 -1, %x
@@ -917,6 +949,29 @@ define i1 @returns_nonnull_as_deref() {
 ; CHECK: ret
 }
 
+define i1 @nonnull_load(i32** %addr) {
+  %ptr = load i32** %addr, !nonnull !{}
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load
+; CHECK: ret i1 false
+}
+
+define i1 @nonnull_load_as_outer(i32* addrspace(1)* %addr) {
+  %ptr = load i32* addrspace(1)* %addr, !nonnull !{}
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load_as_outer
+; CHECK: ret i1 false
+}
+define i1 @nonnull_load_as_inner(i32 addrspace(1)** %addr) {
+  %ptr = load i32 addrspace(1)** %addr, !nonnull !{}
+  %cmp = icmp eq i32 addrspace(1)* %ptr, null
+  ret i1 %cmp
+; CHECK-LABEL: @nonnull_load_as_inner
+; CHECK: ret i1 false
+}
+
 ; If a bit is known to be zero for A and known to be one for B,
 ; then A and B cannot be equal.
 define i1 @icmp_eq_const(i32 %a) nounwind {
@@ -969,3 +1024,143 @@ define i1 @icmp_sdiv_neg1(i64 %a) {
 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[DIV]], 1073741824
 ; CHECK-NEXT: ret i1 [[CMP]]
 }
+
+define i1 @icmp_known_bits(i4 %x, i4 %y) {
+  %and1 = and i4 %y, -7
+  %and2 = and i4 %x, -7
+  %or1 = or i4 %and1, 2
+  %or2 = or i4 %and2, 2
+  %add = add i4 %or1, %or2
+  %cmp = icmp eq i4 %add, 0
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_known_bits
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_nuw_1(i64 %a) {
+ %shl = shl nuw i64 1, %a
+ %cmp = icmp ne i64 %shl, 0
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nuw_1
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_nsw_neg1(i64 %a) {
+ %shl = shl nsw i64 -1, %a
+ %cmp = icmp sge i64 %shl, 3
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nsw_neg1
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_nsw_1(i64 %a) {
+ %shl = shl nsw i64 1, %a
+ %cmp = icmp sge i64 %shl, 0
+ ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_nsw_1
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_1_V_ugt_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ugt i32 %shl, 2147483648
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ugt_2147483648(
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_1_V_ule_2147483648(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ule i32 %shl, 2147483648
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ule_2147483648(
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @icmp_shl_1_V_eq_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp eq i32 %shl, 31
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_eq_31(
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @icmp_shl_1_V_ne_31(i32 %V) {
+  %shl = shl i32 1, %V
+  %cmp = icmp ne i32 %shl, 31
+  ret i1 %cmp
+
+; CHECK-LABEL: @icmp_shl_1_V_ne_31(
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @tautological1(i32 %A, i32 %B) {
+  %C = and i32 %A, %B
+  %D = icmp ugt i32 %C, %A
+  ret i1 %D
+; CHECK-LABEL: @tautological1(
+; CHECK: ret i1 false
+}
+
+define i1 @tautological2(i32 %A, i32 %B) {
+  %C = and i32 %A, %B
+  %D = icmp ule i32 %C, %A
+  ret i1 %D
+; CHECK-LABEL: @tautological2(
+; CHECK: ret i1 true
+}
+
+define i1 @tautological3(i32 %A, i32 %B) {
+  %C = or i32 %A, %B
+  %D = icmp ule i32 %A, %C
+  ret i1 %D
+; CHECK-LABEL: @tautological3(
+; CHECK: ret i1 true
+}
+
+define i1 @tautological4(i32 %A, i32 %B) {
+  %C = or i32 %A, %B
+  %D = icmp ugt i32 %A, %C
+  ret i1 %D
+; CHECK-LABEL: @tautological4(
+; CHECK: ret i1 false
+}
+
+define i1 @tautological5(i32 %A, i32 %B) {
+  %C = or i32 %A, %B
+  %D = icmp ult i32 %C, %A
+  ret i1 %D
+; CHECK-LABEL: @tautological5(
+; CHECK: ret i1 false
+}
+
+define i1 @tautological6(i32 %A, i32 %B) {
+  %C = or i32 %A, %B
+  %D = icmp uge i32 %C, %A
+  ret i1 %D
+; CHECK-LABEL: @tautological6(
+; CHECK: ret i1 true
+}
+
+define i1 @tautological7(i32 %A, i32 %B) {
+  %C = and i32 %A, %B
+  %D = icmp uge i32 %A, %C
+  ret i1 %D
+; CHECK-LABEL: @tautological7(
+; CHECK: ret i1 true
+}
+
+define i1 @tautological8(i32 %A, i32 %B) {
+  %C = and i32 %A, %B
+  %D = icmp ult i32 %A, %C
+  ret i1 %D
+; CHECK-LABEL: @tautological8(
+; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstSimplify/exact-nsw-nuw.ll b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
index a0e326b13c02..5ccc8083e645 100644
--- a/test/Transforms/InstSimplify/exact-nsw-nuw.ll
+++ b/test/Transforms/InstSimplify/exact-nsw-nuw.ll
@@ -42,3 +42,19 @@ define i32 @shift5(i32 %A, i32 %B) {
   %D = ashr i32 %C, %B
   ret i32 %D
 }
+
+; CHECK-LABEL: @div1(
+; CHECK: ret i32 0
+define i32 @div1(i32 %V) {
+  %A = udiv i32 %V, -2147483648
+  %B = udiv i32 %A, -2147483648
+  ret i32 %B
+}
+
+; CHECK-LABEL: @div2(
+; CHECK-NOT: ret i32 0
+define i32 @div2(i32 %V) {
+  %A = sdiv i32 %V, -1
+  %B = sdiv i32 %A, -2147483648
+  ret i32 %B
+}
diff --git a/test/Transforms/InstSimplify/fold-builtin-fma.ll b/test/Transforms/InstSimplify/fold-builtin-fma.ll
new file mode 100644
index 000000000000..6331b8c2dd43
--- /dev/null
+++ b/test/Transforms/InstSimplify/fold-builtin-fma.ll
@@ -0,0 +1,119 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; Fixes PR20832
+; Make sure that we correctly fold a fused multiply-add where operands
+; are all finite constants and addend is zero.
+
+declare double @llvm.fma.f64(double, double, double)
+
+
+define double @PR20832()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @PR20832(
+; CHECK: ret double 5.600000e+01
+
+; Test builtin fma with all finite non-zero constants.
+define double @test_all_finite()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 5.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_all_finite(
+; CHECK: ret double 6.100000e+01
+
+; Test builtin fma with a +/-NaN addend.
+define double @test_NaN_addend()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0x7FF8000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_addend(
+; CHECK: ret double 0x7FF8000000000000
+
+define double @test_NaN_addend_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0xFFF8000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_addend_2(
+; CHECK: ret double 0xFFF8000000000000
+
+; Test builtin fma with a +/-Inf addend.
+define double @test_Inf_addend()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0x7FF0000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_addend(
+; CHECK: ret double 0x7FF0000000000000
+
+define double @test_Inf_addend_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 8.0, double 0xFFF0000000000000)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_addend_2(
+; CHECK: ret double 0xFFF0000000000000
+
+; Test builtin fma with one of the operands to the multiply being +/-NaN.
+define double @test_NaN_1()  {
+  %1 = call double @llvm.fma.f64(double 0x7FF8000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_1(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0x7FF8000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_2(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_3()  {
+  %1 = call double @llvm.fma.f64(double 0xFFF8000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_3(
+; CHECK: ret double 0x7FF8000000000000
+
+
+define double @test_NaN_4()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0xFFF8000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_NaN_4(
+; CHECK: ret double 0x7FF8000000000000
+
+
+; Test builtin fma with one of the operands to the multiply being +/-Inf.
+define double @test_Inf_1()  {
+  %1 = call double @llvm.fma.f64(double 0x7FF0000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_1(
+; CHECK: ret double 0x7FF0000000000000
+
+
+define double @test_Inf_2()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0x7FF0000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_2(
+; CHECK: ret double 0x7FF0000000000000
+
+
+define double @test_Inf_3()  {
+  %1 = call double @llvm.fma.f64(double 0xFFF0000000000000, double 8.0, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_3(
+; CHECK: ret double 0xFFF0000000000000
+
+
+define double @test_Inf_4()  {
+  %1 = call double @llvm.fma.f64(double 7.0, double 0xFFF0000000000000, double 0.0)
+  ret double %1
+}
+; CHECK-LABEL: @test_Inf_4(
+; CHECK: ret double 0xFFF0000000000000
+
diff --git a/test/Transforms/InstSimplify/gep.ll b/test/Transforms/InstSimplify/gep.ll
new file mode 100644
index 000000000000..49a97f133cef
--- /dev/null
+++ b/test/Transforms/InstSimplify/gep.ll
@@ -0,0 +1,80 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.A = type { [7 x i8] }
+
+define %struct.A* @test1(%struct.A* %b, %struct.A* %e) {
+  %e_ptr = ptrtoint %struct.A* %e to i64
+  %b_ptr = ptrtoint %struct.A* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %sdiv = sdiv exact i64 %sub, 7
+  %gep = getelementptr inbounds %struct.A* %b, i64 %sdiv
+  ret %struct.A* %gep
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret %struct.A* %e
+}
+
+define i8* @test2(i8* %b, i8* %e) {
+  %e_ptr = ptrtoint i8* %e to i64
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret i8* %e
+}
+
+define i64* @test3(i64* %b, i64* %e) {
+  %e_ptr = ptrtoint i64* %e to i64
+  %b_ptr = ptrtoint i64* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %ashr = ashr exact i64 %sub, 3
+  %gep = getelementptr inbounds i64* %b, i64 %ashr
+  ret i64* %gep
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret i64* %e
+}
+
+define %struct.A* @test4(%struct.A* %b) {
+  %b_ptr = ptrtoint %struct.A* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %sdiv = sdiv exact i64 %sub, 7
+  %gep = getelementptr inbounds %struct.A* %b, i64 %sdiv
+  ret %struct.A* %gep
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret %struct.A* null
+}
+
+define i8* @test5(i8* %b) {
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret i8* null
+}
+
+define i64* @test6(i64* %b) {
+  %b_ptr = ptrtoint i64* %b to i64
+  %sub = sub i64 0, %b_ptr
+  %ashr = ashr exact i64 %sub, 3
+  %gep = getelementptr inbounds i64* %b, i64 %ashr
+  ret i64* %gep
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret i64* null
+}
+
+define i8* @test7(i8* %b, i8** %e) {
+  %e_ptr = ptrtoint i8** %e to i64
+  %b_ptr = ptrtoint i8* %b to i64
+  %sub = sub i64 %e_ptr, %b_ptr
+  %gep = getelementptr inbounds i8* %b, i64 %sub
+  ret i8* %gep
+; CHECK-LABEL: @test7
+; CHECK-NEXT: ptrtoint
+; CHECK-NEXT: ptrtoint
+; CHECK-NEXT: sub
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstSimplify/noalias-ptr.ll b/test/Transforms/InstSimplify/noalias-ptr.ll
new file mode 100644
index 000000000000..7693e5542355
--- /dev/null
+++ b/test/Transforms/InstSimplify/noalias-ptr.ll
@@ -0,0 +1,259 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@g1 = global i32 0, align 4
+@g2 = internal global i32 0, align 4
+@g3 = unnamed_addr global i32 0, align 4
+@g4 = hidden global i32 0, align 4
+@g5 = protected global i32 0, align 4
+@g6 = thread_local unnamed_addr global i32 0, align 4
+
+; Make sure we can simplify away a pointer comparison between
+; dynamically-allocated memory and a local stack allocation.
+;   void p()
+;   {
+;     int *mData;
+;     int mStackData[10];
+;     mData = new int[12];
+;     if (mData != mStackData) {
+;       delete[] mData;
+;     }
+;   }
+
+define void @_Z2p1v() #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = tail call noalias i8* @_Znam(i64 48) #4
+  %3 = bitcast i8* %2 to i32*
+  %4 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %5 = icmp eq i32* %3, %4
+  br i1 %5, label %7, label %6
+
+; CHECK-LABEL: @_Z2p1v
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+; <label>:6                                       ; preds = %0
+  call void @_ZdaPv(i8* %2) #5
+  br label %7
+
+; <label>:7                                       ; preds = %0, %6
+  ret void
+}
+
+; Also check a more-complicated case with multiple underlying objects.
+
+define void @_Z2p2bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g2
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p2bb
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p4bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g3
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p4bb
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p5bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g4
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p5bb
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p6bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g5
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p6bb
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+; Here's another case involving multiple underlying objects, but this time we
+; must keep the comparison (it might involve a regular pointer-typed function
+; argument).
+
+define void @_Z4nopebbPi(i1 zeroext %b1, i1 zeroext %b2, i32* readnone %q) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* %q
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z4nopebbPi
+; CHECK: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p3bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g1
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p3bb
+; CHECK: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p7bb(i1 zeroext %b1, i1 zeroext %b2) #0 {
+  %mStackData = alloca [10 x i32], align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %3 = select i1 %b1, i32* %2, i32* @g6
+  %4 = tail call noalias i8* @_Znam(i64 48) #4
+  %5 = tail call noalias i8* @_Znam(i64 48) #4
+  %.v = select i1 %b2, i8* %4, i8* %5
+  %6 = bitcast i8* %.v to i32*
+  %7 = icmp eq i32* %6, %3
+  br i1 %7, label %9, label %8
+
+; CHECK-LABEL: @_Z2p7bb
+; CHECK: icmp
+; CHECK: ret void
+
+; <label>:8                                       ; preds = %0
+  call void @_ZdaPv(i8* %4) #5
+  call void @_ZdaPv(i8* %5) #5
+  br label %9
+
+; <label>:9                                       ; preds = %0, %8
+  ret void
+}
+
+define void @_Z2p2v(i32 %c) #0 {
+  %mStackData = alloca [10 x i32], i32 %c, align 16
+  %1 = bitcast [10 x i32]* %mStackData to i8*
+  %2 = tail call noalias i8* @_Znam(i64 48) #4
+  %3 = bitcast i8* %2 to i32*
+  %4 = getelementptr inbounds [10 x i32]* %mStackData, i64 0, i64 0
+  %5 = icmp eq i32* %3, %4
+  br i1 %5, label %7, label %6
+
+; CHECK-LABEL: @_Z2p2v
+; CHECK: icmp
+; CHECK: ret void
+
+; <label>:6                                       ; preds = %0
+  call void @_ZdaPv(i8* %2) #5
+  br label %7
+
+; <label>:7                                       ; preds = %0, %6
+  ret void
+}
+
+; Function Attrs: nobuiltin
+declare noalias i8* @_Znam(i64) #2
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdaPv(i8*) #3
+
+attributes #0 = { uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nobuiltin }
+attributes #3 = { nobuiltin nounwind }
+attributes #4 = { builtin }
+attributes #5 = { builtin nounwind }
+
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
index 80fa8e7b4831..f5ea45107e2a 100644
--- a/test/Transforms/InstSimplify/rem.ll
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -15,3 +15,31 @@ define i32 @select2(i32 %x, i1 %b) {
   ret i32 %rem
 ; CHECK: ret i32 0
 }
+
+define i32 @rem1(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem1(
+; CHECK-NEXT: %mod = srem i32 %x, %n
+; CHECK-NEXT: ret i32 %mod
+ %mod = srem i32 %x, %n
+ %mod1 = srem i32 %mod, %n
+ ret i32 %mod1
+}
+
+define i32 @rem2(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem2(
+; CHECK-NEXT: %mod = urem i32 %x, %n
+; CHECK-NEXT: ret i32 %mod
+ %mod = urem i32 %x, %n
+ %mod1 = urem i32 %mod, %n
+ ret i32 %mod1
+}
+
+define i32 @rem3(i32 %x, i32 %n) {
+; CHECK-LABEL: @rem3(
+; CHECK-NEXT: %[[srem:.*]] = srem i32 %x, %n
+; CHECK-NEXT: %[[urem:.*]] = urem i32 %[[srem]], %n
+; CHECK-NEXT: ret i32 %[[urem]]
+ %mod = srem i32 %x, %n
+ %mod1 = urem i32 %mod, %n
+ ret i32 %mod1
+}
diff --git a/test/Transforms/InstSimplify/select.ll b/test/Transforms/InstSimplify/select.ll
new file mode 100644
index 000000000000..1d45e5728e68
--- /dev/null
+++ b/test/Transforms/InstSimplify/select.ll
@@ -0,0 +1,161 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %and = and i32 %x, 1
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -2
+  %and1.x = select i1 %cmp, i32 %and1, i32 %x
+  ret i32 %and1.x
+; CHECK-LABEL: @test1(
+; CHECK: ret i32 %x
+}
+
+define i32 @test2(i32 %x) {
+  %and = and i32 %x, 1
+  %cmp = icmp ne i32 %and, 0
+  %and1 = and i32 %x, -2
+  %and1.x = select i1 %cmp, i32 %x, i32 %and1
+  ret i32 %and1.x
+; CHECK-LABEL: @test2(
+; CHECK: ret i32 %x
+}
+
+define i32 @test3(i32 %x) {
+  %and = and i32 %x, 1
+  %cmp = icmp ne i32 %and, 0
+  %and1 = and i32 %x, -2
+  %and1.x = select i1 %cmp, i32 %and1, i32 %x
+  ret i32 %and1.x
+; CHECK-LABEL: @test3(
+; CHECK: %[[and:.*]] = and i32 %x, -2
+; CHECK: ret i32 %[[and]]
+}
+
+define i32 @test4(i32 %X) {
+  %cmp = icmp slt i32 %X, 0
+  %or = or i32 %X, -2147483648
+  %cond = select i1 %cmp, i32 %X, i32 %or
+  ret i32 %cond
+; CHECK-LABEL: @test4
+; CHECK: %[[or:.*]] = or i32 %X, -2147483648
+; CHECK: ret i32 %[[or]]
+}
+
+define i32 @test5(i32 %X) {
+  %cmp = icmp slt i32 %X, 0
+  %or = or i32 %X, -2147483648
+  %cond = select i1 %cmp, i32 %or, i32 %X
+  ret i32 %cond
+; CHECK-LABEL: @test5
+; CHECK: ret i32 %X
+}
+
+define i32 @test6(i32 %X) {
+  %cmp = icmp slt i32 %X, 0
+  %and = and i32 %X, 2147483647
+  %cond = select i1 %cmp, i32 %and, i32 %X
+  ret i32 %cond
+; CHECK-LABEL: @test6
+; CHECK: %[[and:.*]] = and i32 %X, 2147483647
+; CHECK: ret i32 %[[and]]
+}
+
+define i32 @test7(i32 %X) {
+  %cmp = icmp slt i32 %X, 0
+  %and = and i32 %X, 2147483647
+  %cond = select i1 %cmp, i32 %X, i32 %and
+  ret i32 %cond
+; CHECK-LABEL: @test7
+; CHECK: ret i32 %X
+}
+
+define i32 @test8(i32 %X) {
+  %cmp = icmp sgt i32 %X, -1
+  %or = or i32 %X, -2147483648
+  %cond = select i1 %cmp, i32 %X, i32 %or
+  ret i32 %cond
+; CHECK-LABEL: @test8
+; CHECK: ret i32 %X
+}
+
+define i32 @test9(i32 %X) {
+  %cmp = icmp sgt i32 %X, -1
+  %or = or i32 %X, -2147483648
+  %cond = select i1 %cmp, i32 %or, i32 %X
+  ret i32 %cond
+; CHECK-LABEL: @test9
+; CHECK: %[[or:.*]] = or i32 %X, -2147483648
+; CHECK: ret i32 %[[or]]
+}
+
+define i32 @test10(i32 %X) {
+  %cmp = icmp sgt i32 %X, -1
+  %and = and i32 %X, 2147483647
+  %cond = select i1 %cmp, i32 %and, i32 %X
+  ret i32 %cond
+; CHECK-LABEL: @test10
+; CHECK: ret i32 %X
+}
+
+define i32 @test11(i32 %X) {
+  %cmp = icmp sgt i32 %X, -1
+  %and = and i32 %X, 2147483647
+  %cond = select i1 %cmp, i32 %X, i32 %and
+  ret i32 %cond
+; CHECK-LABEL: @test11
+; CHECK: %[[and:.*]] = and i32 %X, 2147483647
+; CHECK: ret i32 %[[and]]
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_or_8(
+; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %x, 8
+; CHECK-NEXT: ret i32 [[OR]]
+define i32 @select_icmp_and_8_eq_0_or_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %or = or i32 %x, 8
+  %or.x = select i1 %cmp, i32 %or, i32 %x
+  ret i32 %or.x
+}
+
+; CHECK-LABEL: @select_icmp_and_8_ne_0_and_not_8(
+; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, -9
+; CHECK-NEXT: ret i32 [[AND]]
+define i32 @select_icmp_and_8_ne_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %x.and1 = select i1 %cmp, i32 %x, i32 %and1
+  ret i32 %x.and1
+}
+
+; CHECK-LABEL: @select_icmp_and_8_eq_0_and_not_8(
+; CHECK-NEXT: ret i32 %x
+define i32 @select_icmp_and_8_eq_0_and_not_8(i32 %x) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i32 %x, -9
+  %and1.x = select i1 %cmp, i32 %and1, i32 %x
+  ret i32 %and1.x
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_eq_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %y, i64 %and1
+define i64 @select_icmp_x_and_8_eq_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %y.and1 = select i1 %cmp, i64 %y, i64 %and1
+  ret i64 %y.and1
+}
+
+; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_and_not_8(
+; CHECK: select i1 %cmp, i64 %and1, i64 %y
+define i64 @select_icmp_x_and_8_ne_0_y_and_not_8(i32 %x, i64 %y) {
+  %and = and i32 %x, 8
+  %cmp = icmp eq i32 %and, 0
+  %and1 = and i64 %y, -9
+  %and1.y = select i1 %cmp, i64 %and1, i64 %y
+  ret i64 %and1.y
+}
+
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
new file mode 100644
index 000000000000..b0dc8731a112
--- /dev/null
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -0,0 +1,346 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; CHECK-LABEL: @foo
+; CHECK:      %[[and:.*]] = and i32 %x, 1
+; CHECK-NEXT: %[[add:.*]] = add i32 %[[and]], -1
+; CHECK-NEXT: ret i32 %[[add]]
+define i32 @foo(i32 %x) {
+ %o = and i32 %x, 1
+ %n = add i32 %o, -1
+ %t = ashr i32 %n, 17
+ ret i32 %t
+}
+
+; CHECK-LABEL: @exact_lshr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_eq_both_zero(i8 %a) {
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_eq_both_zero(i8 %a) {
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_eq_both_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_ne_both_zero(i8 %a) {
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_ne_both_zero(i8 %a) {
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_ne_both_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_both_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_ne_both_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_last_zero(i8 %a) {
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_last_zero(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_both_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_eq_both_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_last_zero(i8 %a) {
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_last_zero(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_lshr_eq_last_zero(i8 %a) {
+ %shr = lshr i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_last_zero
+; CHECK-NEXT: ret i1 false
+define i1 @nonexact_ashr_eq_last_zero(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_lshr_ne_last_zero(i8 %a) {
+ %shr = lshr i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_last_zero
+; CHECK-NEXT: ret i1 true
+define i1 @nonexact_ashr_ne_last_zero(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_eq_first_zero
+; CHECK-NEXT: ret i1 false
+define i1 @lshr_eq_first_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_first_zero
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_eq_first_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @lshr_ne_first_zero
+; CHECK-NEXT: ret i1 true
+define i1 @lshr_ne_first_zero(i8 %a) {
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_first_zero
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_ne_first_zero(i8 %a) {
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_both_minus1
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_eq_both_minus1(i8 %a) {
+ %shr = ashr i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_both_minus1
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_ne_both_minus1(i8 %a) {
+ %shr = ashr i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_both_minus1
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_eq_both_minus1(i8 %a) {
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_both_minus1
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_ne_both_minus1(i8 %a) {
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_opposite_msb
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_opposite_msb(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @exact_ashr_eq_noexactlog(i8 %a) {
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_opposite_msb
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_opposite_msb(i8 %a) {
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_eq_opposite_msb
+; CHECK-NEXT: ret i1 false
+define i1 @ashr_eq_opposite_msb(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @ashr_ne_opposite_msb
+; CHECK-NEXT: ret i1 true
+define i1 @ashr_ne_opposite_msb(i8 %a) {
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @exact_ashr_eq_shift_gt(i8 %a) {
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @exact_ashr_ne_shift_gt(i8 %a) {
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
+ %shr = ashr i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
+ %shr = ashr i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_shift_gt
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_shift_gt(i8 %a) {
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_shift_gt
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_shift_gt(i8 %a) {
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
+; CHECK-NEXT : ret i1 false
+define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
+ %shr = lshr i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
+; CHECK-NEXT : ret i1 true
+define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
+ %shr = ashr i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_ashr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @exact_ashr_ne_noexactlog(i8 %a) {
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_eq_noexactlog
+; CHECK-NEXT: ret i1 false
+define i1 @exact_lshr_eq_noexactlog(i8 %a) {
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_ne_noexactlog
+; CHECK-NEXT: ret i1 true
+define i1 @exact_lshr_ne_noexactlog(i8 %a) {
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @exact_lshr_lowbit
+; CHECK-NEXT: ret i32 7
+define i32 @exact_lshr_lowbit(i32 %shiftval) {
+  %shr = lshr exact i32 7, %shiftval
+  ret i32 %shr
+}
+
+; CHECK-LABEL: @exact_ashr_lowbit
+; CHECK-NEXT: ret i32 7
+define i32 @exact_ashr_lowbit(i32 %shiftval) {
+  %shr = ashr exact i32 7, %shiftval
+  ret i32 %shr
+}
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index 181c2efa2dc0..e8b49b6397db 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -160,3 +160,108 @@ define <4 x i8> @test19(<4 x i8> %a) {
   %b = shl <4 x i8> %a, <i8 8, i8 9, i8 undef, i8 -1>
   ret <4 x i8> %b
 }
+
+; CHECK-LABEL: @test20
+; CHECK: ret i32 undef
+define i32 @test20(i32 %a) {
+  %b = udiv i32 %a, 0
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test21
+; CHECK: ret i32 undef
+define i32 @test21(i32 %a) {
+  %b = sdiv i32 %a, 0
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test22
+; CHECK: ret i32 undef
+define i32 @test22(i32 %a) {
+  %b = ashr exact i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test23
+; CHECK: ret i32 undef
+define i32 @test23(i32 %a) {
+  %b = lshr exact i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test24
+; CHECK: ret i32 undef
+define i32 @test24() {
+  %b = udiv i32 undef, 0
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test25
+; CHECK: ret i32 undef
+define i32 @test25() {
+  %b = lshr i32 0, undef
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test26
+; CHECK: ret i32 undef
+define i32 @test26() {
+  %b = ashr i32 0, undef
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test27
+; CHECK: ret i32 undef
+define i32 @test27() {
+  %b = shl i32 0, undef
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test28
+; CHECK: ret i32 undef
+define i32 @test28(i32 %a) {
+  %b = shl nsw i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test29
+; CHECK: ret i32 undef
+define i32 @test29(i32 %a) {
+  %b = shl nuw i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test30
+; CHECK: ret i32 undef
+define i32 @test30(i32 %a) {
+  %b = shl nsw nuw i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test31
+; CHECK: ret i32 0
+define i32 @test31(i32 %a) {
+  %b = shl i32 undef, %a
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test32
+; CHECK: ret i32 undef
+define i32 @test32(i32 %a) {
+  %b = shl i32 undef, 0
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test33
+; CHECK: ret i32 undef
+define i32 @test33(i32 %a) {
+  %b = ashr i32 undef, 0
+  ret i32 %b
+}
+
+; CHECK-LABEL: @test34
+; CHECK: ret i32 undef
+define i32 @test34(i32 %a) {
+  %b = lshr i32 undef, 0
+  ret i32 %b
+}
diff --git a/test/Transforms/InstSimplify/vector_ptr_bitcast.ll b/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
new file mode 100644
index 000000000000..607892ae74d2
--- /dev/null
+++ b/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+%mst = type { i8*, i8* }
+%mst2 = type { i32*, i32*, i32*, i32* }
+
+@a = private unnamed_addr constant %mst { i8* inttoptr (i64 -1 to i8*),
+                                          i8* inttoptr (i64 -1 to i8*)},
+                                          align 8
+@b = private unnamed_addr constant %mst2 { i32* inttoptr (i64 42 to i32*),
+                                           i32* inttoptr (i64 67 to i32*),
+                                           i32* inttoptr (i64 33 to i32*),
+                                           i32* inttoptr (i64 58 to i32*)},
+                                          align 8
+
+define i64 @fn() {
+  %x = load <2 x i8*>* bitcast (%mst* @a to <2 x i8*>*), align 8
+  %b = extractelement <2 x i8*> %x, i32 0
+  %c = ptrtoint i8* %b to i64
+  ; CHECK-LABEL: @fn
+  ; CHECK-NEXT: ret i64 -1
+  ret i64 %c
+}
+
+define i64 @fn2() {
+  %x = load <4 x i32*>* bitcast (%mst2* @b to <4 x i32*>*), align 8
+  %b = extractelement <4 x i32*> %x, i32 0
+  %c = extractelement <4 x i32*> %x, i32 3
+  %d = ptrtoint i32* %b to i64
+  %e = ptrtoint i32* %c to i64
+  %r = add i64 %d, %e
+  ; CHECK-LABEL: @fn2
+  ; CHECK-NEXT: ret i64 100
+  ret i64 %r
+}
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index 16bfe2a46091..16523886a068 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -4,10 +4,10 @@
 ; CHECK: @A = internal global i32 0
 
 @B = alias i32* @A
-; CHECK: @B = alias internal i32* @A
+; CHECK: @B = internal alias i32* @A
 
 @C = alias i32* @A
-; CHECK: @C = alias internal i32* @A
+; CHECK: @C = internal alias i32* @A
 
 define i32 @main() {
 	%tmp = load i32* @C
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
index c24d4b7f32a0..b09a136e5263 100644
--- a/test/Transforms/Internalize/local-visibility.ll
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -10,9 +10,9 @@
 ; CHECK: @protected.variable = internal global i32 0
 @protected.variable = protected global i32 0
 
-; CHECK: @hidden.alias = alias internal i32* @global
+; CHECK: @hidden.alias = internal alias  i32* @global
 @hidden.alias = hidden alias i32* @global
-; CHECK: @protected.alias = alias internal i32* @global
+; CHECK: @protected.alias = internal alias i32* @global
 @protected.alias = protected alias i32* @global
 
 ; CHECK: define internal void @hidden.function() {
diff --git a/test/Transforms/JumpThreading/assume-edge-dom.ll b/test/Transforms/JumpThreading/assume-edge-dom.ll
new file mode 100644
index 000000000000..f1d0f41e2505
--- /dev/null
+++ b/test/Transforms/JumpThreading/assume-edge-dom.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+declare i8* @escape()
+declare void @llvm.assume(i1)
+
+define i1 @test1(i1 %cond) {
+entry:
+    br i1 %cond, label %taken, label %not_taken
+
+; CHECK-LABEL: @test1
+; CHECK: br i1 %cond, label %no, label %yes
+; CHECK: ret i1 true
+
+taken:
+    %res1 = call i8* @escape()
+    %a = icmp eq i8* %res1, null
+    tail call void @llvm.assume(i1 %a)
+    br label %done
+not_taken:
+    %res2 = call i8* @escape()
+    %b = icmp ne i8* %res2, null
+    tail call void @llvm.assume(i1 %b)
+    br label %done
+
+; An assume that can be used to simplify this comparison dominates each
+; predecessor branch (although no assume dominates the cmp itself). Make sure
+; this still can be simplified.
+
+done:
+    %res = phi i8* [ %res1, %taken ], [ %res2, %not_taken ]
+    %cnd = icmp ne i8* %res, null
+    br i1 %cnd, label %yes, label %no
+
+yes:
+    ret i1 true
+no:
+    ret i1 false
+}
+
diff --git a/test/Transforms/JumpThreading/assume.ll b/test/Transforms/JumpThreading/assume.ll
new file mode 100644
index 000000000000..89dd0a954c01
--- /dev/null
+++ b/test/Transforms/JumpThreading/assume.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -jump-threading -dce < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @test1(i32 %a, i32 %b) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp1 = icmp sgt i32 %b, 1234
+  br i1 %cmp1, label %if.then, label %if.else
+
+; CHECK-LABEL: @test1
+; CHECK: icmp sgt i32 %a, 5
+; CHECK: call void @llvm.assume
+; CHECK-NOT: icmp sgt i32 %a, 3
+; CHECK: ret i32
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp sgt i32 %a, 3
+  br i1 %cmp2, label %if.then3, label %return
+
+if.then3:                                         ; preds = %if.then
+  tail call void (...)* @bar() #1
+  br label %return
+
+if.else:                                          ; preds = %entry
+  tail call void (...)* @car() #1
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then, %if.then3
+  %retval.0 = phi i32 [ 1, %if.then3 ], [ 0, %if.then ], [ 0, %if.else ]
+  ret i32 %retval.0
+}
+
+define i32 @test2(i32 %a) #0 {
+entry:
+  %cmp = icmp sgt i32 %a, 5
+  tail call void @llvm.assume(i1 %cmp)
+  %cmp1 = icmp sgt i32 %a, 3
+  br i1 %cmp1, label %if.then, label %return
+
+; CHECK-LABEL: @test2
+; CHECK: icmp sgt i32 %a, 5
+; CHECK: tail call void @llvm.assume
+; CHECK: tail call void (...)* @bar()
+; CHECK: ret i32 1
+
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @bar() #1
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+declare void @bar(...)
+
+declare void @car(...)
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/JumpThreading/conservative-lvi.ll b/test/Transforms/JumpThreading/conservative-lvi.ll
new file mode 100644
index 000000000000..1ea8cdc4693a
--- /dev/null
+++ b/test/Transforms/JumpThreading/conservative-lvi.ll
@@ -0,0 +1,58 @@
+; RUN: opt -jump-threading -S %s | FileCheck %s
+
+; Check that we thread arg2neg -> checkpos -> end.
+;
+; LazyValueInfo would previously fail to analyze the value of %arg in arg2neg
+; because its predecessing blocks (checkneg) hadn't been processed yet (PR21238)
+
+; CHECK-LABEL: @test_jump_threading
+; CHECK: arg2neg:
+; CHECK-NEXT: br i1 %arg1, label %end, label %checkpos.thread
+; CHECK: checkpos.thread:
+; CHECK-NEXT: br label %end
+
+define i32 @test_jump_threading(i1 %arg1, i32 %arg2) {
+checkneg:
+  %cmp = icmp slt i32 %arg2, 0
+  br i1 %cmp, label %arg2neg, label %checkpos
+
+arg2neg:
+  br i1 %arg1, label %end, label %checkpos
+
+checkpos:
+  %cmp2 = icmp sgt i32 %arg2, 0
+  br i1 %cmp2, label %arg2pos, label %end
+
+arg2pos:
+  br label %end
+
+end:
+  %0 = phi i32 [ 1, %arg2neg ], [ 2, %checkpos ], [ 3, %arg2pos ]
+  ret i32 %0
+}
+
+
+; arg2neg has an edge back to itself. If LazyValueInfo is not careful when
+; visiting predecessors, it could get into an infinite loop.
+
+; CHECK-LABEL: test_infinite_loop
+
+define i32 @test_infinite_loop(i1 %arg1, i32 %arg2) {
+checkneg:
+  %cmp = icmp slt i32 %arg2, 0
+  br i1 %cmp, label %arg2neg, label %checkpos
+
+arg2neg:
+  br i1 %arg1, label %arg2neg, label %checkpos
+
+checkpos:
+  %cmp2 = icmp sgt i32 %arg2, 0
+  br i1 %cmp2, label %arg2pos, label %end
+
+arg2pos:
+  br label %end
+
+end:
+  %0 = phi i32 [ 2, %checkpos ], [ 3, %arg2pos ]
+  ret i32 %0
+}
diff --git a/test/Transforms/JumpThreading/pr22086.ll b/test/Transforms/JumpThreading/pr22086.ll
new file mode 100644
index 000000000000..35d9aa5b1841
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr22086.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+
+; CHECK-LABEL: @f(
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  br label %[[loop:.*]]
+; CHECK:       [[loop]]:
+; CHECK-NEXT:  br label %[[loop]]
+
+define void @f() {
+entry:
+  br label %for.cond1
+
+if.end16:
+  %phi1 = phi i32 [ undef, %for.cond1 ]
+  %g.3 = phi i32 [ %g.1, %for.cond1 ]
+  %sext = shl i32 %g.3, 16
+  %conv20 = ashr exact i32 %sext, 16
+  %tobool21 = icmp eq i32 %phi1, 0
+  br i1 %tobool21, label %lor.rhs, label %for.cond1
+
+for.cond1:
+  %g.1 = phi i32 [ 0, %entry ], [ 0, %lor.rhs ], [ %g.3, %if.end16 ]
+  br i1 undef, label %lor.rhs, label %if.end16
+
+lor.rhs:
+  br label %for.cond1
+}
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index e5bf64b9e256..4351f9918109 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -75,7 +75,38 @@ bb3:		; preds = %bb1
 	ret i32 %res.0
 }
 
-!0 = metadata !{metadata !3, metadata !3, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"int", metadata !1}
+define i32 @test3(i8** %x, i1 %f) {
+; Correctly thread loads of different (but compatible) types, placing bitcasts
+; as necessary in the predecessors. This is especially tricky because the same
+; predecessor ends up with two entries in the PHI node and they must share
+; a single cast.
+; CHECK-LABEL: @test3(
+entry:
+  %0 = bitcast i8** %x to i32**
+  %1 = load i32** %0, align 8
+  br i1 %f, label %if.end57, label %if.then56
+; CHECK: %[[LOAD:.*]] = load i32**
+; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8*
+
+if.then56:
+  br label %if.end57
+
+if.end57:
+  %2 = load i8** %x, align 8
+  %tobool59 = icmp eq i8* %2, null
+  br i1 %tobool59, label %return, label %if.then60
+; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ]
+; CHECK-NEXT: %[[CMP:.*]] = icmp eq i8* %[[PHI]], null
+; CHECK-NEXT: br i1 %[[CMP]]
+
+if.then60:
+  ret i32 42
+
+return:
+  ret i32 13
+}
+
+!0 = !{!3, !3, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!"int", !1}
diff --git a/test/Transforms/LCSSA/indirectbr.ll b/test/Transforms/LCSSA/indirectbr.ll
index 96564486e820..345395bee01b 100644
--- a/test/Transforms/LCSSA/indirectbr.ll
+++ b/test/Transforms/LCSSA/indirectbr.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -lcssa -verify-loop-info -verify-dom-info -disable-output
-; PR5437
+; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S | FileCheck %s
 
 ; LCSSA should work correctly in the case of an indirectbr that exits
 ; the loop, and the loop has exits with predecessors not within the loop
 ; (and btw these edges are unsplittable due to the indirectbr).
-
-define i32 @js_Interpret() nounwind {
+; PR5437
+define i32 @test0() nounwind {
+; CHECK-LABEL: @test0
 entry:
   br i1 undef, label %"4", label %"3"
 
@@ -540,3 +540,35 @@ entry:
 "1862":                                           ; preds = %"1836", %"692"
   unreachable
 }
+
+; An exit for Loop L1 may be the header of a disjoint Loop L2.  Thus, when we
+; create PHIs in one of such exits we are also inserting PHIs in L2 header. This
+; could break LCSSA form for L2 because these inserted PHIs can also have uses
+; in L2 exits. Test that we don't assert/crash on that.
+define void @test1() {
+; CHECK-LABEL: @test1
+  br label %lab1
+
+lab1:
+  %tmp21 = add i32 undef, 677038203
+  br i1 undef, label %lab2, label %exit
+
+lab2:
+  indirectbr i8* undef, [label %lab1, label %lab3]
+
+lab3:
+; CHECK: %tmp21.lcssa1 = phi i32 [ %tmp21.lcssa1, %lab4 ], [ %tmp21, %lab2 ]
+  %tmp12 = phi i32 [ %tmp21, %lab2 ], [ %tmp12, %lab4 ]
+  br i1 undef, label %lab5, label %lab4
+
+lab4:
+  br label %lab3
+
+lab5:
+; CHECK:  %tmp21.lcssa1.lcssa = phi i32 [ %tmp21.lcssa1, %lab3 ]
+  %tmp15 = add i32 %tmp12, undef
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 7cf7a323552d..5587142dcf76 100644
--- a/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -30,10 +30,10 @@ for.end:                                          ; preds = %for.inc
   ret void
 }
 
-!0 = metadata !{metadata !5, metadata !5, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!3 = metadata !{metadata !"short", metadata !1}
-!4 = metadata !{metadata !6, metadata !6, i64 0}
-!5 = metadata !{metadata !"any pointer", metadata !1}
-!6 = metadata !{metadata !"int", metadata !1}
+!0 = !{!5, !5, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA", null}
+!3 = !{!"short", !1}
+!4 = !{!6, !6, i64 0}
+!5 = !{!"any pointer", !1}
+!6 = !{!"int", !1}
diff --git a/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll b/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll
new file mode 100644
index 000000000000..17ae716dd478
--- /dev/null
+++ b/test/Transforms/LICM/2014-09-10-doFinalizationAssert.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalar-evolution -licm -loop-unroll -disable-output
+; Test triggered an assertion in doFinalization() because loop unroll was deleting
+; the inner loop which caused the loop to not get removed from the
+; LoopToAliasSetMap.
+; Test case taken from test/Transforms/LoopUnroll/unloop.ll.
+
+declare i1 @check() nounwind
+define void @skiplevelexit() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %inner
+
+inner:
+  %iv = phi i32 [ 0, %outer ], [ %inc, %tail ]
+  %inc = add i32 %iv, 1
+  call zeroext i1 @check()
+  br i1 true, label %outer.backedge, label %tail
+
+tail:
+  br i1 false, label %inner, label %exit
+
+outer.backedge:
+  br label %outer
+
+exit:
+  ret void
+}
+
diff --git a/test/Transforms/LICM/PR19798.ll b/test/Transforms/LICM/PR19798.ll
new file mode 100644
index 000000000000..82befb09666c
--- /dev/null
+++ b/test/Transforms/LICM/PR19798.ll
@@ -0,0 +1,22 @@
+; RUN: opt -licm -S < %s | FileCheck %s
+
+define void @f() {
+; CHECK-LABEL: @f(
+entry:
+  br label %bb0
+
+bb0:
+  %tobool7 = icmp eq i1 undef, undef
+  br label %bb1
+
+bb1:
+  br i1 undef, label %bb0, label %bb0
+
+unreachable:
+; CHECK-LABEL: unreachable:
+; CHECK:   br i1 undef, label %unreachable, label %unreachable
+  br i1 %tobool7, label %unreachable, label %unreachable
+
+bb3:
+  unreachable
+}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index e5c774ff8e9d..b49c559672a6 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -15,7 +15,7 @@ if.then:                                          ; preds = %for.body
 
 if.then27:                                        ; preds = %if.then
 ; CHECK: tail call void @llvm.dbg.value
-  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !19), !dbg !21
+  tail call void @llvm.dbg.value(metadata double undef, i64 0, metadata !19, metadata !{}), !dbg !21
   br label %for.body61.us
 
 if.end.if.end.split_crit_edge.critedge:           ; preds = %if.then
@@ -31,35 +31,35 @@ for.end104:                                       ; preds = %for.cond.backedge
   ret void, !dbg !24
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
-!0 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"idamax", metadata !"idamax", metadata !"", i32 112, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !25} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !25, i32 12, metadata !"clang version 2.9 (trunk 127169)", i1 true, metadata !"", i32 0, metadata !8, metadata !8, metadata !8, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dscal", metadata !"dscal", metadata !"", i32 206, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 589845, metadata !25, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{null}
-!9 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"daxpy", metadata !"daxpy", metadata !"", i32 230, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!10 = metadata !{i32 589870, metadata !25, metadata !1, metadata !"dgefa", metadata !"dgefa", metadata !"", i32 267, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
-!11 = metadata !{i32 281, i32 9, metadata !12, null}
-!12 = metadata !{i32 589835, metadata !25, metadata !13, i32 272, i32 5, i32 32} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589835, metadata !25, metadata !14, i32 271, i32 5, i32 31} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 589835, metadata !25, metadata !10, i32 267, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 271, i32 5, metadata !14, null}
-!16 = metadata !{i32 284, i32 10, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !25, metadata !12, i32 282, i32 9, i32 33} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{double undef}
-!19 = metadata !{i32 590080, metadata !14, metadata !"temp", metadata !1, i32 268, metadata !20, i32 0} ; [ DW_TAG_auto_variable ]
-!20 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 286, i32 14, metadata !22, null}
-!22 = metadata !{i32 589835, metadata !25, metadata !17, i32 285, i32 13, i32 34} ; [ DW_TAG_lexical_block ]
-!23 = metadata !{i32 296, i32 13, metadata !17, null}
-!24 = metadata !{i32 313, i32 1, metadata !14, null}
-!25 = metadata !{metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", metadata !"/private/tmp"}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00idamax\00idamax\00\00112\000\001\000\006\00256\000\000", !25, !1, !3, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !25} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 127169)\001\00\000\00\000", !25, !8, !8, !8, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !25, !1, null, !4, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x2e\00dscal\00dscal\00\00206\000\001\000\006\00256\000\000", !25, !1, !7, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !25, !1, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{null}
+!9 = !{!"0x2e\00daxpy\00daxpy\00\00230\000\001\000\006\00256\000\000", !25, !1, !7, i32 0, null, null, null, null} ; [ DW_TAG_subprogram ]
+!10 = !{!"0x2e\00dgefa\00dgefa\00\00267\000\001\000\006\00256\000\000", !25, !1, !7, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 267] [def] [scope 0] [dgefa]
+!11 = !MDLocation(line: 281, column: 9, scope: !12)
+!12 = !{!"0xb\00272\005\0032", !25, !13} ; [ DW_TAG_lexical_block ]
+!13 = !{!"0xb\00271\005\0031", !25, !14} ; [ DW_TAG_lexical_block ]
+!14 = !{!"0xb\00267\001\0030", !25, !10} ; [ DW_TAG_lexical_block ]
+!15 = !MDLocation(line: 271, column: 5, scope: !14)
+!16 = !MDLocation(line: 284, column: 10, scope: !17)
+!17 = !{!"0xb\00282\009\0033", !25, !12} ; [ DW_TAG_lexical_block ]
+!18 = !{double undef}
+!19 = !{!"0x100\00temp\00268\000", !14, !1, !20} ; [ DW_TAG_auto_variable ]
+!20 = !{!"0x24\00double\000\0064\0064\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!21 = !MDLocation(line: 286, column: 14, scope: !22)
+!22 = !{!"0xb\00285\0013\0034", !25, !17} ; [ DW_TAG_lexical_block ]
+!23 = !MDLocation(line: 296, column: 13, scope: !17)
+!24 = !MDLocation(line: 313, column: 1, scope: !14)
+!25 = !{!"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", !"/private/tmp"}
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index 1ba94d6b489c..59904bad4ae6 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -37,4 +37,4 @@ for.end:                                          ; preds = %for.cond
 
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/LICM/preheader-safe.ll b/test/Transforms/LICM/preheader-safe.ll
new file mode 100644
index 000000000000..260a5f653b77
--- /dev/null
+++ b/test/Transforms/LICM/preheader-safe.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+
+declare void @use_nothrow(i64 %a) nounwind
+declare void @use(i64 %a)
+
+define void @nothrow(i64 %x, i64 %y, i1* %cond) {
+; CHECK-LABEL: nothrow
+; CHECK-LABEL: entry
+; CHECK: %div = udiv i64 %x, %y
+; CHECK-LABEL: loop
+; CHECK: call void @use_nothrow(i64 %div)
+entry:
+  br label %loop
+
+loop:                                         ; preds = %entry, %for.inc
+  %div = udiv i64 %x, %y
+  call void @use_nothrow(i64 %div)
+  br label %loop
+}
+; Negative test
+define void @throw_header(i64 %x, i64 %y, i1* %cond) {
+; CHECK-LABEL: throw_header
+; CHECK-LABEL: loop
+; CHECK: %div = udiv i64 %x, %y
+; CHECK: call void @use(i64 %div)
+entry:
+  br label %loop
+
+loop:                                         ; preds = %entry, %for.inc
+  %div = udiv i64 %x, %y
+  call void @use(i64 %div)
+  br label %loop
+}
+
+; The header is known no throw, but the loop is not.  We can
+; still lift out of the header.
+define void @nothrow_header(i64 %x, i64 %y, i1 %cond) {
+; CHECK-LABEL: nothrow_header
+; CHECK-LABEL: entry
+; CHECK: %div = udiv i64 %x, %y
+; CHECK-LABEL: loop
+; CHECK: call void @use(i64 %div)
+entry:
+  br label %loop
+loop:                                         ; preds = %entry, %for.inc
+  %div = udiv i64 %x, %y
+  br i1 %cond, label %loop-if, label %exit
+loop-if:
+  call void @use(i64 %div)
+  br label %loop
+exit:
+  ret void
+}
+; Negative test - can't move out of throwing block
+define void @nothrow_header_neg(i64 %x, i64 %y, i1 %cond) {
+; CHECK-LABEL: nothrow_header_neg
+; CHECK-LABEL: entry
+; CHECK-LABEL: loop
+; CHECK: %div = udiv i64 %x, %y
+; CHECK: call void @use(i64 %div)
+entry:
+  br label %loop
+loop:                                         ; preds = %entry, %for.inc
+  br label %loop-if
+loop-if:
+  %div = udiv i64 %x, %y
+  call void @use(i64 %div)
+  br label %loop
+}
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
index 86f11fe04435..a189cf22f66b 100644
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@@ -36,8 +36,8 @@ for.end:                                          ; preds = %for.cond.for.end_cr
   ret i32* %r.0.lcssa
 }
 
-!0 = metadata !{metadata !"minimal TBAA"}
-!1 = metadata !{metadata !3, metadata !3, i64 0}
-!2 = metadata !{metadata !4, metadata !4, i64 0}
-!3 = metadata !{metadata !"float", metadata !0}
-!4 = metadata !{metadata !"int", metadata !0}
+!0 = !{!"minimal TBAA"}
+!1 = !{!3, !3, i64 0}
+!2 = !{!4, !4, i64 0}
+!3 = !{!"float", !0}
+!4 = !{!"int", !0}
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index d7e7c6e9a316..80afb3c01e9a 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -185,9 +185,9 @@ for.end:                                          ; preds = %for.cond.for.end_cr
 ; CHECK-NEXT:  store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
 }
 
-!0 = metadata !{metadata !4, metadata !4, i64 0}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !5, metadata !5, i64 0}
-!4 = metadata !{metadata !"int", metadata !1}
-!5 = metadata !{metadata !"float", metadata !1}
+!0 = !{!4, !4, i64 0}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!5, !5, i64 0}
+!4 = !{!"int", !1}
+!5 = !{!"float", !1}
diff --git a/test/Transforms/LICM/sinking.ll b/test/Transforms/LICM/sinking.ll
index ccc9186f7a48..d7a8fcdafcad 100644
--- a/test/Transforms/LICM/sinking.ll
+++ b/test/Transforms/LICM/sinking.ll
@@ -314,6 +314,84 @@ exit:
   ret i32 %lcssa
 }
 
+; Can't sink stores out of exit blocks containing indirectbr instructions
+; because loop simplify does not create dedicated exits for such blocks. Test
+; that by sinking the store from lab21 to lab22, but not further.
+define void @test12() {
+; CHECK-LABEL: @test12
+  br label %lab4
+
+lab4:
+  br label %lab20
+
+lab5:
+  br label %lab20
+
+lab6:
+  br label %lab4
+
+lab7:
+  br i1 undef, label %lab8, label %lab13
+
+lab8:
+  br i1 undef, label %lab13, label %lab10
+
+lab10:
+  br label %lab7
+
+lab13:
+  ret void
+
+lab20:
+  br label %lab21
+
+lab21:
+; CHECK: lab21:
+; CHECK-NOT: store
+; CHECK: br i1 false, label %lab21, label %lab22
+  store i32 36127957, i32* undef, align 4
+  br i1 undef, label %lab21, label %lab22
+
+lab22:
+; CHECK: lab22:
+; CHECK: store
+; CHECK-NEXT: indirectbr i8* undef
+  indirectbr i8* undef, [label %lab5, label %lab6, label %lab7]
+}
+
+; Test that we don't crash when trying to sink stores and there's no preheader
+; available (which is used for creating loads that may be used by the SSA
+; updater)
+define void @test13() {
+; CHECK-LABEL: @test13
+  br label %lab59
+
+lab19:
+  br i1 undef, label %lab20, label %lab38
+
+lab20:
+  br label %lab60
+
+lab21:
+  br i1 undef, label %lab22, label %lab38
+
+lab22:
+  br label %lab38
+
+lab38:
+  ret void
+
+lab59:
+  indirectbr i8* undef, [label %lab60, label %lab38]
+
+lab60:
+; CHECK: lab60:
+; CHECK: store
+; CHECK-NEXT: indirectbr
+  store i32 2145244101, i32* undef, align 4
+  indirectbr i8* undef, [label %lab21, label %lab19]
+}
+
 declare void @f(i32*)
 
 declare void @g()
diff --git a/test/Transforms/LoadCombine/load-combine-aa.ll b/test/Transforms/LoadCombine/load-combine-aa.ll
new file mode 100644
index 000000000000..3542dcebf5e4
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine-aa.ll
@@ -0,0 +1,39 @@
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
+; CHECK-LABEL: @test1
+
+; CHECK: load i64*
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  store i32 %load1, i32* %b, align 4
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
+define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
+; CHECK-LABEL: @test2
+
+; CHECK: load i32*
+; CHECK: load i32*
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  store i32 %load1, i32* %b, align 4
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
diff --git a/test/Transforms/LoadCombine/load-combine-assume.ll b/test/Transforms/LoadCombine/load-combine-assume.ll
new file mode 100644
index 000000000000..94f630072add
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine-assume.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.assume(i1) nounwind
+
+; 'load' before the 'call' gets optimized:
+define i64 @test1(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test1
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  %load2 = load i32* %arrayidx1, align 4
+  tail call void @llvm.assume(i1 %b)
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
+; 'call' before the 'load' doesn't get optimized:
+define i64 @test2(i32* nocapture readonly %a, i1 %b) {
+; CHECK-LABEL: @test2
+
+; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: tail call void @llvm.assume(i1 %b)
+; CHECK: ret i64
+
+  %load1 = load i32* %a, align 4
+  %conv = zext i32 %load1 to i64
+  %arrayidx1 = getelementptr inbounds i32* %a, i64 1
+  tail call void @llvm.assume(i1 %b)
+  %load2 = load i32* %arrayidx1, align 4
+  %conv2 = zext i32 %load2 to i64
+  %shl = shl nuw i64 %conv2, 32
+  %add = or i64 %shl, %conv
+  ret i64 %add
+}
+
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index ef4a478d0e85..863df3c72d50 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -5,8 +5,8 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 define void @foo(double* nocapture %a) nounwind ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !{double* %a}, i64 0, metadata !5), !dbg !8
-  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !10), !dbg !14
+  tail call void @llvm.dbg.value(metadata double* %a, i64 0, metadata !5, metadata !{}), !dbg !8
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !{}), !dbg !14
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -19,34 +19,34 @@ for.body:                                         ; preds = %entry, %for.body
   br i1 %exitcond, label %for.body, label %for.end, !dbg !14
 
 for.end:                                          ; preds = %for.body
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10), !dbg !16
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10, metadata !{}), !dbg !16
   ret void, !dbg !17
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (double*)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, metadata !"", i32 0, metadata !9, metadata !9, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
-!6 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
-!7 = metadata !{i32 589860, null, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 2, i32 18, metadata !0, null}
-!9 = metadata !{i32 0}
-!10 = metadata !{i32 590080, metadata !11, metadata !"i", metadata !1, i32 3, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 589835, metadata !18, metadata !12, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 589835, metadata !18, metadata !0, i32 2, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
-!13 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 3, i32 3, metadata !12, null}
-!15 = metadata !{i32 4, i32 5, metadata !11, null}
-!16 = metadata !{i32 3, i32 29, metadata !11, null}
-!17 = metadata !{i32 5, i32 1, metadata !12, null}
-!18 = metadata !{metadata !"li.c", metadata !"/private/tmp"}
-!19 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\00256\000\000", !18, !1, !3, null, void (double*)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 127165:127174)\001\00\000\00\000", !18, !9, !9, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x101\00a\0016777218\000", !0, !1, !6} ; [ DW_TAG_arg_variable ]
+!6 = !{!"0xf\00\000\0064\0064\000\000", null, !2, !7} ; [ DW_TAG_pointer_type ]
+!7 = !{!"0x24\00double\000\0064\0064\000\000\004", null, !2} ; [ DW_TAG_base_type ]
+!8 = !MDLocation(line: 2, column: 18, scope: !0)
+!9 = !{i32 0}
+!10 = !{!"0x100\00i\003\000", !11, !1, !13} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\003\003\001", !18, !12} ; [ DW_TAG_lexical_block ]
+!12 = !{!"0xb\002\0021\000", !18, !0} ; [ DW_TAG_lexical_block ]
+!13 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!14 = !MDLocation(line: 3, column: 3, scope: !12)
+!15 = !MDLocation(line: 4, column: 5, scope: !11)
+!16 = !MDLocation(line: 3, column: 29, scope: !11)
+!17 = !MDLocation(line: 5, column: 1, scope: !12)
+!18 = !{!"li.c", !"/private/tmp"}
+!19 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 50fc9659a804..846b366f2942 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,12 +1,12 @@
 ; RUN: opt -S -loop-rotate < %s | FileCheck %s
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
 ; CHECK-LABEL: define i32 @tak(
 ; CHECK: entry
-; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
+; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x
 
 entry:
   br label %tailrecurse
@@ -15,9 +15,9 @@ tailrecurse:                                      ; preds = %if.then, %entry
   %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ]
   %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ]
   %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ]
-  tail call void @llvm.dbg.value(metadata !{i32 %x.tr}, i64 0, metadata !6), !dbg !7
-  tail call void @llvm.dbg.value(metadata !{i32 %y.tr}, i64 0, metadata !8), !dbg !9
-  tail call void @llvm.dbg.value(metadata !{i32 %z.tr}, i64 0, metadata !10), !dbg !11
+  tail call void @llvm.dbg.value(metadata i32 %x.tr, i64 0, metadata !6, metadata !{}), !dbg !7
+  tail call void @llvm.dbg.value(metadata i32 %y.tr, i64 0, metadata !8, metadata !{}), !dbg !9
+  tail call void @llvm.dbg.value(metadata i32 %z.tr, i64 0, metadata !10, metadata !{}), !dbg !11
   %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !12
   br i1 %cmp, label %if.then, label %if.end, !dbg !12
 
@@ -46,9 +46,9 @@ define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
 ; CHECK-LABEL: define void @FindFreeHorzSeg(
 ; CHECK: %dec = add
 ; CHECK-NEXT: tail call void @llvm.dbg.value
-; CHECK-NEXT: br i1 %tobool, label %for.cond, label %[[LOOP_EXIT:[^,]*]]
-; CHECK: [[LOOP_EXIT]]:
-; CHECK-NEXT: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ]
+; CHECK: %cmp = icmp
+; CHECK: br i1 %cmp
+; CHECK: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ]
 ; CHECK-NEXT: br label %for.end
 
 
@@ -72,7 +72,7 @@ for.body:
 
 for.inc:
   %dec = add i64 %i.0, -1
-  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata !{metadata !"undef"})
+  tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !{!"undef"}, metadata !{})
   br label %for.cond
 
 for.end:
@@ -84,24 +84,24 @@ for.end:
 !llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !18, metadata !1, metadata !"tak", metadata !"tak", metadata !"", i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32, i32)* @tak, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
-!1 = metadata !{i32 589865, metadata !18} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !18, i32 12, metadata !"clang version 2.9 (trunk 125492)", i1 true, metadata !"", i32 0, metadata !19, metadata !19, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !18, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 32, i32 13, metadata !0, null}
-!8 = metadata !{i32 590081, metadata !0, metadata !"y", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 32, i32 20, metadata !0, null}
-!10 = metadata !{i32 590081, metadata !0, metadata !"z", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!11 = metadata !{i32 32, i32 27, metadata !0, null}
-!12 = metadata !{i32 33, i32 3, metadata !13, null}
-!13 = metadata !{i32 589835, metadata !18, metadata !0, i32 32, i32 30, i32 6} ; [ DW_TAG_lexical_block ]
-!14 = metadata !{i32 34, i32 5, metadata !15, null}
-!15 = metadata !{i32 589835, metadata !18, metadata !13, i32 33, i32 14, i32 7} ; [ DW_TAG_lexical_block ]
-!16 = metadata !{i32 36, i32 3, metadata !13, null}
-!17 = metadata !{i32 37, i32 1, metadata !13, null}
-!18 = metadata !{metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
-!19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00tak\00tak\00\0032\000\001\000\006\00256\000\000", !18, !1, !3, null, i32 (i32, i32, i32)* @tak, null, null, null} ; [ DW_TAG_subprogram ] [line 32] [def] [scope 0] [tak]
+!1 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.9 (trunk 125492)\001\00\000\00\000", !18, !19, !19, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00x\0032\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!7 = !MDLocation(line: 32, column: 13, scope: !0)
+!8 = !{!"0x101\00y\0032\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!9 = !MDLocation(line: 32, column: 20, scope: !0)
+!10 = !{!"0x101\00z\0032\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!11 = !MDLocation(line: 32, column: 27, scope: !0)
+!12 = !MDLocation(line: 33, column: 3, scope: !13)
+!13 = !{!"0xb\0032\0030\006", !18, !0} ; [ DW_TAG_lexical_block ]
+!14 = !MDLocation(line: 34, column: 5, scope: !15)
+!15 = !{!"0xb\0033\0014\007", !18, !13} ; [ DW_TAG_lexical_block ]
+!16 = !MDLocation(line: 36, column: 3, scope: !13)
+!17 = !MDLocation(line: 37, column: 1, scope: !13)
+!18 = !{!"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame"}
+!19 = !{i32 0}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopRotate/nosimplifylatch.ll b/test/Transforms/LoopRotate/nosimplifylatch.ll
new file mode 100644
index 000000000000..8e858b4dfcc3
--- /dev/null
+++ b/test/Transforms/LoopRotate/nosimplifylatch.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S < %s -loop-rotate -licm -verify-dom-info -verify-loop-info | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios8.0.0"
+
+;CHECK: for.inc:
+;CHECK-NEXT: %incdec.ptr.i = getelementptr 
+
+; Function Attrs: alwaysinline inlinehint nounwind readonly ssp
+define linkonce_odr hidden i64 @_ZNSt3__14findINS_11__wrap_iterIPiEEiEET_S4_S4_RKT0_(i64 %__first.coerce, i64 %__last.coerce, i32* nocapture readonly dereferenceable(4) %__value_) {
+entry:
+  %coerce.val.ip = inttoptr i64 %__first.coerce to i32*
+  %coerce.val.ip2 = inttoptr i64 %__last.coerce to i32*
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %coerce.val.ip9 = phi i32* [ %incdec.ptr.i, %for.inc ], [ %coerce.val.ip, %entry ]
+  %lnot.i = icmp eq i32* %coerce.val.ip9, %coerce.val.ip2
+  br i1 %lnot.i, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %0 = load i32* %coerce.val.ip9, align 4
+  %1 = load i32* %__value_, align 4
+  %cmp = icmp eq i32 %0, %1
+  br i1 %cmp, label %for.end, label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %incdec.ptr.i = getelementptr inbounds i32* %coerce.val.ip9, i64 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond, %for.body
+  %coerce.val.ip9.lcssa = phi i32* [ %coerce.val.ip9, %for.cond ], [ %coerce.val.ip9, %for.body ]
+  %coerce.val.pi = ptrtoint i32* %coerce.val.ip9.lcssa to i64
+  ret i64 %coerce.val.pi
+}
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
index d646cb9d6cb2..62e5b1ae11d5 100644
--- a/test/Transforms/LoopRotate/simplifylatch.ll
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -4,7 +4,7 @@
 @mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
 
 ; CHECK-LABEL: @f(
-; CHECK-NOT: bb4
+; CHECK-NOT: bb:
 define i8 @f() {
 entry:
 	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
index 8de5938939dc..9678148c3570 100644
--- a/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -1,6 +1,4 @@
-; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
-; RUN: not grep sext %t
-; RUN: grep "phi i64" %t | count 1
+; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s
 
 ; Loopsimplify should be able to merge the two loop exits
 ; into one, so that loop rotate can rotate the loop, so
@@ -9,36 +7,42 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 
-define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+; CHECK-LABEL: @test1
+; CHECK: bb:
+; CHECK: phi i64
+; CHECK-NOT: phi i64
+; CHECK-NOT: sext
+
+define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
 entry:
-	%t0 = load float* %peakWeight, align 4		; <float> [#uses=1]
+	%t0 = load float* %peakWeight, align 4
 	br label %bb1
 
 bb:		; preds = %bb2
-	%t1 = sext i32 %hiPart.0 to i64		; <i64> [#uses=1]
-	%t2 = getelementptr float* %pTmp1, i64 %t1		; <float*> [#uses=1]
-	%t3 = load float* %t2, align 4		; <float> [#uses=1]
-	%t4 = fadd float %t3, %distERBhi.0		; <float> [#uses=1]
-	%t5 = add i32 %hiPart.0, 1		; <i32> [#uses=2]
-	%t6 = sext i32 %t5 to i64		; <i64> [#uses=1]
-	%t7 = getelementptr float* %peakWeight, i64 %t6		; <float*> [#uses=1]
-	%t8 = load float* %t7, align 4		; <float> [#uses=1]
-	%t9 = fadd float %t8, %peakCount.0		; <float> [#uses=1]
+	%t1 = sext i32 %hiPart.0 to i64
+	%t2 = getelementptr float* %pTmp1, i64 %t1
+	%t3 = load float* %t2, align 4
+	%t4 = fadd float %t3, %distERBhi.0
+	%t5 = add i32 %hiPart.0, 1
+	%t6 = sext i32 %t5 to i64
+	%t7 = getelementptr float* %peakWeight, i64 %t6
+	%t8 = load float* %t7, align 4
+	%t9 = fadd float %t8, %peakCount.0
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
-	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]		; <float> [#uses=2]
-	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]		; <i32> [#uses=3]
-	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]		; <float> [#uses=3]
-	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00		; <i1> [#uses=1]
+	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]
+	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]
+	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]
+	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00
 	br i1 %t10, label %bb3, label %bb2
 
 bb2:		; preds = %bb1
-	%t11 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=1]
-	%t12 = icmp sgt i32 %t11, %hiPart.0		; <i1> [#uses=1]
+	%t11 = add i32 %bandEdgeIndex, -1
+	%t12 = icmp sgt i32 %t11, %hiPart.0
 	br i1 %t12, label %bb, label %bb3
 
 bb3:		; preds = %bb2, %bb1
-	%t13 = fdiv float %peakCount.0, %distERBhi.0		; <float> [#uses=1]
+	%t13 = fdiv float %peakCount.0, %distERBhi.0
 	ret float %t13
 }
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
index 10b2c3a403cc..48b709429467 100644
--- a/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-memset.ll
@@ -96,6 +96,6 @@ done:                                             ; preds = %while.cond, %while.
   ret i8* %dest
 }
 
-!0 = metadata !{metadata !"omnipotent char", metadata !1}
-!1 = metadata !{metadata !"Simple C/C++ TBAA"}
-!2 = metadata !{metadata !"long long", metadata !0}
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
+!2 = !{!"long long", !0}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index f4edf092641f..26b294042d42 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -201,7 +201,7 @@ for.end:                                          ; preds = %for.body
 ;
 ; Currently we have three extra add.w's that keep the store address
 ; live past the next increment because ISEL is unfortunately undoing
-; the store chain. ISEL also fails to convert the stores to
+; the store chain. ISEL also fails to convert all but one of the stores to
 ; post-increment addressing. However, the loads should use
 ; post-increment addressing, no add's or add.w's beyond the three
 ; mentioned. Most importantly, there should be no spills or reloads!
@@ -210,7 +210,7 @@ for.end:                                          ; preds = %for.body
 ; A9: %.lr.ph
 ; A9-NOT: lsl.w
 ; A9-NOT: {{ldr|str|adds|add r}}
-; A9: add.w r
+; A9: vst1.8 {{.*}} [r{{[0-9]+}}]!
 ; A9-NOT: {{ldr|str|adds|add r}}
 ; A9: add.w r
 ; A9-NOT: {{ldr|str|adds|add r}}
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index ee7b1e8883e6..e493cf85fdef 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %_ZN8nsTArray9Elemen
   %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
   %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
   %add = add nsw i32 %i.06, 1
-  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !{}) nounwind
   br label %_ZN8nsTArray9ElementAtEi.exit
 
 _ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
@@ -33,6 +33,6 @@ declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.n
 
 declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
+!0 = !{!"0x101"}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopStrengthReduce/pr18165.ll b/test/Transforms/LoopStrengthReduce/pr18165.ll
index c38d6a625e88..cc878c48c4b9 100644
--- a/test/Transforms/LoopStrengthReduce/pr18165.ll
+++ b/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -77,12 +77,12 @@ attributes #2 = { nounwind optsize }
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5 "}
-!1 = metadata !{metadata !2, metadata !3, i64 0}
-!2 = metadata !{metadata !"", metadata !3, i64 0, metadata !3, i64 4, metadata !3, i64 8}
-!3 = metadata !{metadata !"int", metadata !4, i64 0}
-!4 = metadata !{metadata !"omnipotent char", metadata !5, i64 0}
-!5 = metadata !{metadata !"Simple C/C++ TBAA"}
-!6 = metadata !{metadata !2, metadata !3, i64 8}
-!7 = metadata !{metadata !3, metadata !3, i64 0}
-!8 = metadata !{metadata !2, metadata !3, i64 4}
+!0 = !{!"clang version 3.5 "}
+!1 = !{!2, !3, i64 0}
+!2 = !{!"", !3, i64 0, !3, i64 4, !3, i64 8}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!2, !3, i64 8}
+!7 = !{!3, !3, i64 0}
+!8 = !{!2, !3, i64 4}
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index 17c91e5c07b1..aae79cbac789 100644
--- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -41,8 +41,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; CHECK-LABEL: @test
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body.prol{{.*}}:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
 
diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
new file mode 100644
index 000000000000..7a50fc0a4f49
--- /dev/null
+++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_opt_for_size
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
+define void @unroll_default() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @unroll_default
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/ephemeral.ll b/test/Transforms/LoopUnroll/ephemeral.ll
new file mode 100644
index 000000000000..9d4061390537
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ephemeral.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
+
+; Make sure this loop is completely unrolled...
+; CHECK-LABEL: @test1
+; CHECK: for.body:
+; CHECK-NOT: for.end:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+
+  ; This loop will be completely unrolled, even with these extra instructions,
+  ; but only because they're ephemeral (and, thus, free).
+  %1 = add nsw i32 %0, 2
+  %2 = add nsw i32 %1, 4
+  %3 = add nsw i32 %2, 4
+  %4 = add nsw i32 %3, 4
+  %5 = add nsw i32 %4, 4
+  %6 = add nsw i32 %5, 4
+  %7 = add nsw i32 %6, 4
+  %8 = add nsw i32 %7, 4
+  %9 = add nsw i32 %8, 4
+  %10 = add nsw i32 %9, 4
+  %ca = icmp sgt i32 %10, -7
+  call void @llvm.assume(i1 %ca)
+
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
new file mode 100644
index 000000000000..dcb5d1c28fb2
--- /dev/null
+++ b/test/Transforms/LoopUnroll/ignore-annotation-intrinsic-cost.ll
@@ -0,0 +1,133 @@
+; REQUIRES: asserts
+; RUN: opt < %s -disable-output -stats -loop-unroll -info-output-file - | FileCheck %s --check-prefix=STATS
+; STATS: 1 loop-unroll - Number of loops unrolled (completely or otherwise)
+; Test that llvm.annotation intrinsic do not count against the loop body size
+; and prevent unrolling.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@B = common global i32 0, align 4
+
+define void @foo(i32* noalias %A, i32 %B, i32 %C) {
+entry:
+  br label %for.body
+
+; A loop that has a small loop body (except for the annotations) that should be
+; unrolled with the default heuristic. Make sure the extra annotations do not
+; prevent unrolling
+for.body:                                         ; preds = %entry, %for.body
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  ; The real loop.
+  %mul = mul nsw i32 %B, %C
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.01
+  store i32 %mul, i32* %arrayidx, align 4
+  %inc = add nsw i32 %i.01, 1
+  %exitcond = icmp ne i32 %inc, 4
+
+  ; A bunch of annotations
+  %annot.0 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.1 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.2 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.3 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.4 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.5 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.6 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.7 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.8 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.9 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.10 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.11 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.12 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.13 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.14 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.15 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.16 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.17 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.18 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.19 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.20 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.21 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.22 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.23 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.24 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.25 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.26 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.27 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.28 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.29 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.30 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.31 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.32 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.33 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.34 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.35 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.36 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.37 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.38 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.39 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.40 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.41 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.42 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.43 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.44 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.45 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.46 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.47 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.48 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.49 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.50 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.51 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.52 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.53 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.54 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.55 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.56 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.57 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.58 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.59 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.60 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.61 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.62 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.63 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.64 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.65 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.66 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.67 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.68 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.69 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.70 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.71 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.72 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.73 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.74 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.75 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.76 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.77 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.78 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.79 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.80 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.81 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.82 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.83 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.84 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.85 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.86 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.87 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.88 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.89 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.90 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.91 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.92 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.93 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.94 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.95 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.96 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.97 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.98 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  %annot.99 = tail call i32 @llvm.annotation.i32(i32 %i.01, i8* null, i8* null, i32 0)
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll
new file mode 100644
index 000000000000..98cab32a42a6
--- /dev/null
+++ b/test/Transforms/LoopUnroll/nsw-tripcount.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-unroll -S %s | FileCheck %s
+
+; extern void f(int);
+; void test1(int v) {
+;   for (int i=v; i<=v+1; ++i)
+;     f(i);
+; }
+;
+; We can use the nsw information to see that the tripcount will be 2, so the
+; loop should be unrolled as this is always beneficial
+
+declare void @f(i32)
+
+; CHECK-LABEL: @test1
+define void @test1(i32 %v) {
+entry:
+  %add = add nsw i32 %v, 1
+  br label %for.body
+
+for.body:
+  %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
+  tail call void @f(i32 %i.04)
+  %inc = add nsw i32 %i.04, 1
+  %cmp = icmp slt i32 %i.04, %add
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: call void @f
+; CHECK-NOT: br i1
+; CHECK: call void @f
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
index 3179d55e978a..a650317f3df7 100644
--- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
 ; Loop size = 3, when the function has the optsize attribute, the
 ; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
-; by 16 times because 3 * 16 < 50.
+; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
 define void @unroll_opt_for_size() nounwind optsize {
 entry:
   br label %loop
@@ -32,4 +32,21 @@ exit:
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
 ; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
 ; CHECK-NEXT: icmp
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index a14087dcdce7..3a8777bb1473 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -3,15 +3,16 @@
 ; Tests for unrolling loops with run-time trip counts
 
 ; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: %lcmp.overflow = icmp eq i32 %n, 0
-; CHECK: %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
-; CHECK: br i1 %lcmp.or, label %unr.cmp
+; CHECK:  %lcmp.mod = icmp ne i32 %xtraiter, 0
+; CHECK:  %lcmp.overflow = icmp eq i32 %n, 0
+; CHECK:  %lcmp.or = or i1 %lcmp.overflow, %lcmp.mod
+; CHECK:  br i1 %lcmp.or, label %for.body.prol, label %for.body.preheader.split
 
-; CHECK: unr.cmp{{.*}}:
-; CHECK: for.body.unr{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
+; CHECK:  %prol.iter.sub = sub i32 %prol.iter, 1
+; CHECK:  %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; CHECK:  br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
 entry:
@@ -39,7 +40,7 @@ for.end:                                          ; preds = %for.body, %entry
 ; even if the -unroll-runtime is specified
 
 ; CHECK: for.body:
-; CHECK-NOT: for.body.unr:
+; CHECK-NOT: for.body.prol:
 
 define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
 entry:
@@ -85,8 +86,8 @@ cond_true138:
 
 ; Test run-time unrolling for a loop that counts down by -2.
 
-; CHECK: for.body.unr:
-; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 
 define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
 entry:
@@ -113,3 +114,7 @@ for.end:                                          ; preds = %for.cond.for.end_cr
   %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
   ret i16 %res.0.lcssa
 }
+
+; CHECK: !0 = distinct !{!0, !1}
+; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
+
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index ad99b8cd9c66..38b4f32354a3 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,11 +1,11 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
 
 ; This tests that setting the unroll count works
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 ; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
 
 define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index cbc7af58ff5b..176362a34456 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,10 +1,9 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
 
 ; Choose a smaller, power-of-two, unroll count if the loop is too large.
 ; This test makes sure we're not unrolling 'odd' counts
 
-; CHECK: unr.cmp:
-; CHECK: for.body.unr:
+; CHECK: for.body.prol:
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/scevunroll.ll b/test/Transforms/LoopUnroll/scevunroll.ll
index c3086e8335f9..20161d798e9e 100644
--- a/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/test/Transforms/LoopUnroll/scevunroll.ll
@@ -66,16 +66,13 @@ exit2:
 
 ; SCEV properly unrolls multi-exit loops.
 ;
-; SCEV cannot currently unroll this loop.
-; It should ideally detect a trip count of 5.
-; rdar:14038809 [SCEV]: Optimize trip count computation for multi-exit loops.
 ; CHECK-LABEL: @multiExit(
-; CHECKFIXME: getelementptr i32* %base, i32 10
-; CHECKFIXME-NEXT: load i32*
-; CHECKFIXME: br i1 false, label %l2.10, label %exit1
-; CHECKFIXME: l2.10:
-; CHECKFIXME-NOT: br
-; CHECKFIXME: ret i32
+; CHECK: getelementptr i32* %base, i32 10
+; CHECK-NEXT: load i32*
+; CHECK: br i1 false, label %l2.10, label %exit1
+; CHECK: l2.10:
+; CHECK-NOT: br
+; CHECK: ret i32
 define i32 @multiExit(i32* %base) nounwind {
 entry:
   br label %l1
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
new file mode 100644
index 000000000000..d59368578ec2
--- /dev/null
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When prologue is fully unrolled, the branch on its end is unconditional.
+; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
+; like in this example, where it comes from an argument.
+;
+; This test is based on an example from here:
+; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
+;
+; CHECK: while.body.prol:
+; CHECK: br i1
+; CHECK: entry.split:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp eq i32 %i, %N
+  %inc = add i32 %i, 1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret i32 %i
+}
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
index 39da7fa70ade..4f934a688be8 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -1,19 +1,19 @@
 ; RUN: opt < %s -loop-unroll -S | FileCheck %s
 ;
-; Verify that the unrolling pass removes existing loop unrolling metadata
+; Verify that the unrolling pass removes existing unroll count metadata
 ; and adds a disable unrolling node after unrolling is complete.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; #pragma clang loop  vectorize(enable) unroll(enable) unroll_count(4) vectorize_width(8)
+; #pragma clang loop  vectorize(enable) unroll_count(4) vectorize_width(8)
 ;
-; Unroll metadata should be replaces with unroll(disable).  Vectorize
+; Unroll count metadata should be replaced with unroll(disable).  Vectorize
 ; metadata should be untouched.
 ;
-; CHECK-LABEL: @loop1(
+; CHECK-LABEL: @unroll_count_4(
 ; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
-define void @loop1(i32* nocapture %a) {
+define void @unroll_count_4(i32* nocapture %a) {
 entry:
   br label %for.body
 
@@ -30,19 +30,49 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!1 = metadata !{metadata !1, metadata !2, metadata !3, metadata !4, metadata !5}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!3 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
-!4 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
-!5 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+!1 = !{!1, !2, !3, !4}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!"llvm.loop.unroll.count", i32 4}
+!4 = !{!"llvm.loop.vectorize.width", i32 8}
+
+; #pragma clang loop unroll(full)
+;
+; An unroll disable metadata node is only added for the unroll count case.
+; In this case, the loop has a full unroll metadata but can't be fully unrolled
+; because the trip count is dynamic.  The full unroll metadata should remain
+; after unrolling.
+;
+; CHECK-LABEL: @unroll_full(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
+define void @unroll_full(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !5
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.full"}
 
 ; #pragma clang loop unroll(disable)
 ;
 ; Unroll metadata should not change.
 ;
-; CHECK-LABEL: @loop2(
-; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_2:.*]]
-define void @loop2(i32* nocapture %a) {
+; CHECK-LABEL: @unroll_disable(
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+define void @unroll_disable(i32* nocapture %a) {
 entry:
   br label %for.body
 
@@ -54,16 +84,66 @@ for.body:                                         ; preds = %for.body, %entry
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 64
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !6
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
 
 for.end:                                          ; preds = %for.body
   ret void
 }
-!6 = metadata !{metadata !6, metadata !7}
-!7 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
-
-; CHECK: ![[LOOP_1]] = metadata !{metadata ![[LOOP_1]], metadata ![[VEC_ENABLE:.*]], metadata ![[WIDTH_8:.*]], metadata ![[UNROLL_DISABLE:.*]]}
-; CHECK: ![[VEC_ENABLE]] = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-; CHECK: ![[WIDTH_8]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
-; CHECK: ![[UNROLL_DISABLE]] = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
-; CHECK: ![[LOOP_2]] = metadata !{metadata ![[LOOP_2]], metadata ![[UNROLL_DISABLE:.*]]}
+!7 = !{!7, !8}
+!8 = !{!"llvm.loop.unroll.disable"}
+
+; This function contains two loops which share the same llvm.loop metadata node
+; with an llvm.loop.unroll.count 2 hint.  Both loops should be unrolled.  This
+; verifies that adding disable metadata to a loop after unrolling doesn't affect
+; other loops which previously shared the same llvm.loop metadata.
+;
+; CHECK-LABEL: @shared_metadata(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+define void @shared_metadata(i32* nocapture %List) #0 {
+entry:
+  br label %for.body3
+
+for.body3:                                        ; preds = %for.body3, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
+  %arrayidx = getelementptr inbounds i32* %List, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %0, 10
+  store i32 %add4, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.body3.1.preheader, label %for.body3, !llvm.loop !9
+
+for.body3.1.preheader:                            ; preds = %for.body3
+  br label %for.body3.1
+
+for.body3.1:                                      ; preds = %for.body3.1.preheader, %for.body3.1
+  %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
+  %1 = add nsw i64 %indvars.iv.1, 1
+  %arrayidx.1 = getelementptr inbounds i32* %List, i64 %1
+  %2 = load i32* %arrayidx.1, align 4
+  %add4.1 = add nsw i32 %2, 10
+  store i32 %add4.1, i32* %arrayidx.1, align 4
+  %exitcond.1 = icmp eq i64 %1, 4
+  br i1 %exitcond.1, label %for.inc5.1, label %for.body3.1, !llvm.loop !9
+
+for.inc5.1:                                       ; preds = %for.body3.1
+  ret void
+}
+!9 = !{!9, !10}
+!10 = !{!"llvm.loop.unroll.count", i32 2}
+
+
+; CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[VEC_ENABLE:.*]], ![[WIDTH_8:.*]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+; CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+; CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2]], ![[UNROLL_FULL:.*]]}
+; CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+; CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[UNROLL_DISABLE:.*]]}
+; CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]]}
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index e1b24e44b5ab..d53fa49cc9f5 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -54,12 +54,12 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.unroll.enable", i1 false}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.unroll.disable"}
 
 ; loop64 has a high enough count that it should *not* be unrolled by
 ; the default unrolling heuristic.  It serves as the control for the
-; unroll(enable) pragma test loop64_with_.* tests below.
+; unroll(full) pragma test loop64_with_.* tests below.
 ;
 ; CHECK-LABEL: @loop64(
 ; CHECK: store i32
@@ -83,7 +83,7 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop should be fully unrolled.
 ;
 ; CHECK-LABEL: @loop64_with_enable(
@@ -105,8 +105,8 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.unroll.enable", i1 true}
+!3 = !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
 
 ; #pragma clang loop unroll_count(4)
 ; Loop should be unrolled 4 times.
@@ -135,40 +135,10 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!5 = metadata !{metadata !5, metadata !6}
-!6 = metadata !{metadata !"llvm.loop.unroll.count", i32 4}
+!5 = !{!5, !6}
+!6 = !{!"llvm.loop.unroll.count", i32 4}
 
-
-; #pragma clang loop unroll_count(enable) unroll_count(4)
-; Loop should be unrolled 4 times.
-;
-; CHECK-LABEL: @loop64_with_enable_and_count4(
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: store i32
-; CHECK-NOT: store i32
-; CHECK: br i1
-define void @loop64_with_enable_and_count4(i32* nocapture %a) {
-entry:
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
-  %inc = add nsw i32 %0, 1
-  store i32 %inc, i32* %arrayidx, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 64
-  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !7
-
-for.end:                                          ; preds = %for.body
-  ret void
-}
-!7 = metadata !{metadata !7, metadata !6, metadata !4}
-
-; #pragma clang loop unroll_count(enable)
+; #pragma clang loop unroll(full)
 ; Full unrolling is requested, but loop has a dynamic trip count so
 ; no unrolling should occur.
 ;
@@ -195,7 +165,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-!8 = metadata !{metadata !8, metadata !4}
+!8 = !{!8, !4}
 
 ; #pragma clang loop unroll_count(4)
 ; Loop has a dynamic trip count.  Unrolling should occur, but no
@@ -232,7 +202,7 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
-!9 = metadata !{metadata !9, metadata !6}
+!9 = !{!9, !6}
 
 ; #pragma clang loop unroll_count(1)
 ; Loop should not be unrolled
@@ -258,10 +228,10 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!10 = metadata !{metadata !10, metadata !11}
-!11 = metadata !{metadata !"llvm.loop.unroll.count", i32 1}
+!10 = !{!10, !11}
+!11 = !{!"llvm.loop.unroll.count", i32 1}
 
-; #pragma clang loop unroll(enable)
+; #pragma clang loop unroll(full)
 ; Loop has very high loop count (1 million) and full unrolling was requested.
 ; Loop should unrolled up to the pragma threshold, but not completely.
 ;
@@ -286,4 +256,4 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret void
 }
-!12 = metadata !{metadata !12, metadata !4}
+!12 = !{!12, !4}
diff --git a/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
new file mode 100644
index 000000000000..adbf47defe8f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/update-loop-info-in-subloops.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S < %s -loop-unroll -block-freq | FileCheck %s
+; Crasher from PR20987.
+
+; CHECK: define void @update_loop_info_in_subloops
+; CHECK: entry:
+; CHECK: L:
+; CHECK: L.inner:
+; CHECK: L.inner.latch:
+; CHECK: L.latch:
+; CHECK: L.inner.1:
+; CHECK: L.inner.latch.1:
+; CHECK: L.latch.1:
+
+define void @update_loop_info_in_subloops() {
+entry:
+  br label %L
+
+L:
+  %0 = phi i64 [ 1, %entry ], [ %1, %L.latch ]
+  br label %L.inner
+
+L.inner:
+  br label %L.inner.latch
+
+L.inner.latch:
+  br i1 false, label %L.latch, label %L.inner
+
+L.latch:
+  %1 = add i64 %0, 1
+  %2 = icmp eq i64 %1, 3
+  br i1 %2, label %exit, label %L
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index 1e1396f80085..a292afbd5882 100644
--- a/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
index aa7cc0ee325d..b3eae690423d 100644
--- a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce
 
 ; Check that we don't fall into an infinite loop.
 define void @test() nounwind {
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
index ae9f998fee26..16d64eab25c3 100644
--- a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -dce -force-vector-unroll=1 -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -dce -force-vector-interleave=1 -force-vector-width=4 
 
 ; Check that we don't crash.
 
diff --git a/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
new file mode 100644
index 000000000000..a01d54312a60
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x i32>*
+; CHECK: sdiv <4 x i32>
+; CHECK: store <4 x i32>
+
+define void @foo(){
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %0, 2
+  %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index 9c69ba87f392..29d74a0645e8 100644
--- a/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -39,8 +39,8 @@ for.end:                                          ; preds = %for.body
 }
 
 ; Now, we check for the Hint metadata
-; CHECK: [[vect]] = metadata !{metadata [[vect]], metadata [[width:![0-9]+]], metadata [[unroll:![0-9]+]]}
-; CHECK: [[width]] = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-; CHECK: [[unroll]] = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
-; CHECK: [[scalar]] = metadata !{metadata [[scalar]], metadata [[width]], metadata [[unroll]]}
+; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]], [[unroll:![0-9]+]]}
+; CHECK: [[width]] = !{!"llvm.loop.vectorize.width", i32 1}
+; CHECK: [[unroll]] = !{!"llvm.loop.interleave.count", i32 1}
+; CHECK: [[scalar]] = distinct !{[[scalar]], [[width]], [[unroll]]}
 
diff --git a/test/Transforms/LoopVectorize/X86/assume.ll b/test/Transforms/LoopVectorize/X86/assume.ll
new file mode 100644
index 000000000000..a94e24dd7e9c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/assume.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @test1
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %cmp1 = fcmp ogt float %0, 1.000000e+02
+  tail call void @llvm.assume(i1 %cmp1)
+  %add = fadd float %0, 1.000000e+00
+  %arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv, 1599
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #1
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+
+%struct.data = type { float*, float* }
+
+; Function Attrs: nounwind uwtable
+define void @test2(%struct.data* nocapture readonly %d) #0 {
+entry:
+  %b = getelementptr inbounds %struct.data* %d, i64 0, i32 1
+  %0 = load float** %b, align 8
+  %ptrint = ptrtoint float* %0 to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  %a = getelementptr inbounds %struct.data* %d, i64 0, i32 0
+  %1 = load float** %a, align 8
+  %ptrint2 = ptrtoint float* %1 to i64
+  %maskedptr3 = and i64 %ptrint2, 31
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  br label %for.body
+
+; CHECK-LABEL: @test2
+; CHECK: vector.body:
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: @llvm.assume
+; CHECK: for.body:
+; CHECK: ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  tail call void @llvm.assume(i1 %maskcond)
+  %arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv
+  %2 = load float* %arrayidx, align 4
+  %add = fadd float %2, 1.000000e+00
+  tail call void @llvm.assume(i1 %maskcond4)
+  %arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv
+  store float %add, float* %arrayidx5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv, 1599
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index e1113fdd911c..05403cd5b61b 100644
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -S | FileCheck %s
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-unroll=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -force-vector-interleave=0 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index d6120e76cc0b..46efaf0e8531 100644
--- a/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -50,7 +50,7 @@ for.end15:                                        ; preds = %for.end.us, %entry
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!3 = metadata !{metadata !4, metadata !5}
-!4 = metadata !{metadata !4}
-!5 = metadata !{metadata !5}
+!3 = !{!4, !5}
+!4 = !{!4}
+!5 = !{!5}
 
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
new file mode 100644
index 000000000000..0708471e643a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -0,0 +1,420 @@
+; RUN: opt < %s  -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s  -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s  -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+;AVX1-NOT: llvm.masked
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo1(int *A, int *B, int *trigger) {
+;
+;  for (int i=0; i<10000; i++) {
+;    if (trigger[i] < 100) {
+;          A[i] = B[i] + trigger[i];
+;    }
+;  }
+;}
+
+;AVX2-LABEL: @foo1
+;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX2: call <8 x i32> @llvm.masked.load.v8i32
+;AVX2: add nsw <8 x i32>
+;AVX2: call void @llvm.masked.store.v8i32
+;AVX2: ret void
+
+;AVX512-LABEL: @foo1
+;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX512: call <16 x i32> @llvm.masked.load.v16i32
+;AVX512: add nsw <16 x i32>
+;AVX512: call void @llvm.masked.store.v16i32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo1(i32* %A, i32* %B, i32* %trigger) {
+entry:
+  %A.addr = alloca i32*, align 8
+  %B.addr = alloca i32*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32* %B, i32** %B.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %cmp1 = icmp slt i32 %3, 100
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load i32** %B.addr, align 8
+  %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
+  %6 = load i32* %arrayidx3, align 4
+  %7 = load i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load i32** %trigger.addr, align 8
+  %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %add = add nsw i32 %6, %9
+  %10 = load i32* %i, align 4
+  %idxprom6 = sext i32 %10 to i64
+  %11 = load i32** %A.addr, align 8
+  %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+  store i32 %add, i32* %arrayidx7, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %12 = load i32* %i, align 4
+  %inc = add nsw i32 %12, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; The source code:
+;
+;void foo2(float *A, float *B, int *trigger) {
+;
+;  for (int i=0; i<10000; i++) {
+;    if (trigger[i] < 100) {
+;          A[i] = B[i] + trigger[i];
+;    }
+;  }
+;}
+
+;AVX2-LABEL: @foo2
+;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX2: call <8 x float> @llvm.masked.load.v8f32
+;AVX2: fadd <8 x float>
+;AVX2: call void @llvm.masked.store.v8f32
+;AVX2: ret void
+
+;AVX512-LABEL: @foo2
+;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX512: call <16 x float> @llvm.masked.load.v16f32
+;AVX512: fadd <16 x float>
+;AVX512: call void @llvm.masked.store.v16f32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo2(float* %A, float* %B, i32* %trigger) {
+entry:
+  %A.addr = alloca float*, align 8
+  %B.addr = alloca float*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store float* %A, float** %A.addr, align 8
+  store float* %B, float** %B.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %cmp1 = icmp slt i32 %3, 100
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load float** %B.addr, align 8
+  %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2
+  %6 = load float* %arrayidx3, align 4
+  %7 = load i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load i32** %trigger.addr, align 8
+  %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %conv = sitofp i32 %9 to float
+  %add = fadd float %6, %conv
+  %10 = load i32* %i, align 4
+  %idxprom6 = sext i32 %10 to i64
+  %11 = load float** %A.addr, align 8
+  %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6
+  store float %add, float* %arrayidx7, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %12 = load i32* %i, align 4
+  %inc = add nsw i32 %12, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; The source code:
+;
+;void foo3(double *A, double *B, int *trigger) {
+;
+;  for (int i=0; i<10000; i++) {
+;    if (trigger[i] < 100) {
+;          A[i] = B[i] + trigger[i];
+;    }
+;  }
+;}
+
+;AVX2-LABEL: @foo3
+;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
+;AVX2: call <4 x double> @llvm.masked.load.v4f64
+;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
+;AVX2: fadd <4 x double>
+;AVX2: call void @llvm.masked.store.v4f64
+;AVX2: ret void
+
+;AVX512-LABEL: @foo3
+;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
+;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: sitofp <8 x i32> %wide.load to <8 x double>
+;AVX512: fadd <8 x double>
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+
+; Function Attrs: nounwind uwtable
+define void @foo3(double* %A, double* %B, i32* %trigger) #0 {
+entry:
+  %A.addr = alloca double*, align 8
+  %B.addr = alloca double*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store double* %A, double** %A.addr, align 8
+  store double* %B, double** %B.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %cmp1 = icmp slt i32 %3, 100
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load double** %B.addr, align 8
+  %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+  %6 = load double* %arrayidx3, align 8
+  %7 = load i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load i32** %trigger.addr, align 8
+  %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %conv = sitofp i32 %9 to double
+  %add = fadd double %6, %conv
+  %10 = load i32* %i, align 4
+  %idxprom6 = sext i32 %10 to i64
+  %11 = load double** %A.addr, align 8
+  %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+  store double %add, double* %arrayidx7, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %12 = load i32* %i, align 4
+  %inc = add nsw i32 %12, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; The source code:
+;
+;void foo4(double *A, double *B, int *trigger) {
+;
+;  for (int i=0; i<10000; i++) {
+;    if (trigger[i] < 100) {
+;          A[i] = B[i*2] + trigger[i]; << non-cosecutive access
+;    }
+;  }
+;}
+
+;AVX2-LABEL: @foo4
+;AVX2-NOT: llvm.masked
+;AVX2: ret void
+
+;AVX512-LABEL: @foo4
+;AVX512-NOT: llvm.masked
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo4(double* %A, double* %B, i32* %trigger)  {
+entry:
+  %A.addr = alloca double*, align 8
+  %B.addr = alloca double*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store double* %A, double** %A.addr, align 8
+  store double* %B, double** %B.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %cmp1 = icmp slt i32 %3, 100
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32* %i, align 4
+  %mul = mul nsw i32 %4, 2
+  %idxprom2 = sext i32 %mul to i64
+  %5 = load double** %B.addr, align 8
+  %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+  %6 = load double* %arrayidx3, align 8
+  %7 = load i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load i32** %trigger.addr, align 8
+  %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %conv = sitofp i32 %9 to double
+  %add = fadd double %6, %conv
+  %10 = load i32* %i, align 4
+  %idxprom6 = sext i32 %10 to i64
+  %11 = load double** %A.addr, align 8
+  %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+  store double %add, double* %arrayidx7, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %12 = load i32* %i, align 4
+  %inc = add nsw i32 %12, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+@a = common global [1 x i32*] zeroinitializer, align 8
+@c = common global i32* null, align 8
+
+; The loop here should not be vectorized due to trapping
+; constant expression
+;AVX2-LABEL: @foo5
+;AVX2-NOT: llvm.masked
+;AVX2: store i32 sdiv
+;AVX2: ret void
+
+;AVX512-LABEL: @foo5
+;AVX512-NOT: llvm.masked
+;AVX512: store i32 sdiv
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo5(i32* %A, i32* %B, i32* %trigger) {
+entry:
+  %A.addr = alloca i32*, align 8
+  %B.addr = alloca i32*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32* %B, i32** %B.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+  %3 = load i32* %arrayidx, align 4
+  %cmp1 = icmp slt i32 %3, 100
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load i32** %B.addr, align 8
+  %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
+  %6 = load i32* %arrayidx3, align 4
+  %7 = load i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load i32** %trigger.addr, align 8
+  %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+  %9 = load i32* %arrayidx5, align 4
+  %add = add nsw i32 %6, %9
+  %10 = load i32* %i, align 4
+  %idxprom6 = sext i32 %10 to i64
+  %11 = load i32** %A.addr, align 8
+  %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+  store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %12 = load i32* %i, align 4
+  %inc = add nsw i32 %12, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index 8e0ca417b404..7feb66ce6a98 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -170,7 +170,7 @@ for.end:                                          ; preds = %for.body
   ret i32 %1
 }
 
-!0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 1}
-!2 = metadata !{metadata !2, metadata !3}
-!3 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 0}
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 1}
+!2 = !{!2, !3}
+!3 = !{!"llvm.loop.vectorize.enable", i1 0}
diff --git a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 8716cff77789..fd69dc46070e 100644
--- a/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -vectorizer-min-trip-count=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index 2c47fcb4d389..ad010443a5e0 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -46,4 +46,4 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-!3 = metadata !{metadata !3}
+!3 = !{!3}
diff --git a/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 7e156a9edad4..22ab52157498 100644
--- a/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -104,8 +104,8 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-!3 = metadata !{metadata !3}
-!4 = metadata !{metadata !4}
-!5 = metadata !{metadata !3, metadata !4}
-!6 = metadata !{metadata !6}
-!7 = metadata !{metadata !7}
+!3 = !{!3}
+!4 = !{!4}
+!5 = !{!3, !4}
+!6 = !{!6}
+!7 = !{!7}
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
new file mode 100644
index 000000000000..054da8ed20ae
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-unknown-linux-gnu -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.anon = type { [100 x i32], i32, [100 x i32] }
+
+@Foo = common global %struct.anon zeroinitializer, align 4
+
+;CHECK-LABEL: @foo(
+;CHECK: load <4 x i32>*
+;CHECK: sdiv <4 x i32>
+;CHECK: store <4 x i32>
+
+define void @foo(){
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %div = sdiv i32 %0, 2
+  %arrayidx2 = getelementptr inbounds %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/small-size.ll b/test/Transforms/LoopVectorize/X86/small-size.ll
index bcf16aa5db13..8c7a881be46f 100644
--- a/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -loop-vectorize-with-block-frequency -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -139,7 +139,7 @@ define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!0 = !{!"branch_weights", i32 64, i32 4}
 
 ; We can't vectorize this one because we need a runtime ptr check.
 ;CHECK-LABEL: @example23(
diff --git a/test/Transforms/LoopVectorize/X86/tripcount.ll b/test/Transforms/LoopVectorize/X86/tripcount.ll
index 6b38bacf8888..a4ec6948a2f0 100644
--- a/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -mcpu=prescott < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mcpu=prescott < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386-unknown-freebsd11.0"
diff --git a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index d5024bb13210..716dc08a55ac 100644
--- a/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -1,6 +1,6 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S \
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-VECTOR
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-unroll=0 -dce -S \
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=1 -force-vector-interleave=0 -dce -S \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-SCALAR
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index 2d7b663804f5..c684b4e79b14 100644
--- a/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-unroll=0 -dce -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -force-vector-width=4 -force-vector-interleave=0 -dce -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index 074313bde6f5..a781fbedb404 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -52,8 +52,8 @@ for.end:
   ret void
 }
 
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This method will not be vectorized, as scalar cost is lower than any of vector costs.
@@ -89,5 +89,5 @@ for.end:
 declare float @llvm.sin.f32(float) nounwind readnone
 
 ; Dummy metadata
-!3 = metadata !{metadata !3}
+!3 = !{!3}
 
diff --git a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 97c31a148e3a..e39e6b57dd96 100644
--- a/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -43,8 +43,8 @@ for.end:
   ret void
 }
 
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; This loop will not be vectorized as the trip count is below the threshold.
@@ -69,5 +69,5 @@ for.end:
   ret void
 }
 
-!3 = metadata !{metadata !3}
+!3 = !{!3}
 
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 93f24cbf4909..011ce8eac556 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -24,10 +24,10 @@
 
 ; File, line, and column should match those specified in the metadata
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
-; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 ; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
-; CHECK: remark: source.cpp:19:5: loop not vectorized: vectorization is explicitly enabled
+; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
 
 ; CHECK: _Z4testPii
@@ -122,40 +122,40 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
-!1 = metadata !{metadata !"source.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7, metadata !8}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 1}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_disabled", metadata !"test_disabled", metadata !"", i32 10, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z13test_disabledPii, null, null, metadata !2, i32 10}
-!8 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_array_bounds", metadata !"test_array_bounds", metadata !"", i32 16, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, metadata !2, i32 16}
-!9 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!10 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!11 = metadata !{metadata !"clang version 3.5.0"}
-!12 = metadata !{i32 3, i32 8, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
-!14 = metadata !{metadata !14, metadata !15, metadata !15}
-!15 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!16 = metadata !{i32 4, i32 5, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 36, i32 0, i32 1}
-!18 = metadata !{metadata !19, metadata !19, i64 0}
-!19 = metadata !{metadata !"int", metadata !20, i64 0}
-!20 = metadata !{metadata !"omnipotent char", metadata !21, i64 0}
-!21 = metadata !{metadata !"Simple C/C++ TBAA"}
-!22 = metadata !{i32 5, i32 9, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 5, i32 9, i32 0, i32 2}
-!24 = metadata !{i32 8, i32 1, metadata !4, null}
-!25 = metadata !{i32 12, i32 8, metadata !26, null}
-!26 = metadata !{i32 786443, metadata !1, metadata !7, i32 12, i32 3, i32 0, i32 3}
-!27 = metadata !{metadata !27, metadata !28, metadata !29}
-!28 = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
-!29 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-!30 = metadata !{i32 13, i32 5, metadata !26, null}
-!31 = metadata !{i32 14, i32 1, metadata !7, null}
-!32 = metadata !{i32 18, i32 8, metadata !33, null}
-!33 = metadata !{i32 786443, metadata !1, metadata !8, i32 18, i32 3, i32 0, i32 4}
-!34 = metadata !{metadata !34, metadata !15}
-!35 = metadata !{i32 19, i32 5, metadata !33, null}
-!36 = metadata !{i32 20, i32 1, metadata !8, null}
+!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"source.cpp", !"."}
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @_Z4testPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00test_disabled\00test_disabled\00\0010\000\001\000\006\00256\001\0010", !1, !5, !6, null, void (i32*, i32)* @_Z13test_disabledPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 10] [def] [test_disabled]
+!8 = !{!"0x2e\00test_array_bounds\00test_array_bounds\00\0016\000\001\000\006\00256\001\0016", !1, !5, !6, null, void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, null, null, !2} ; [ DW_TAG_subprogram ] [line 16] [def] [test_array_bounds]
+!9 = !{i32 2, !"Dwarf Version", i32 2}
+!10 = !{i32 2, !"Debug Info Version", i32 2}
+!11 = !{!"clang version 3.5.0"}
+!12 = !MDLocation(line: 3, column: 8, scope: !13)
+!13 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!14 = !{!14, !15, !15}
+!15 = !{!"llvm.loop.vectorize.enable", i1 true}
+!16 = !MDLocation(line: 4, column: 5, scope: !17)
+!17 = !{!"0xb\003\0036\000", !1, !13} ; [ DW_TAG_lexical_block ]
+!18 = !{!19, !19, i64 0}
+!19 = !{!"int", !20, i64 0}
+!20 = !{!"omnipotent char", !21, i64 0}
+!21 = !{!"Simple C/C++ TBAA"}
+!22 = !MDLocation(line: 5, column: 9, scope: !23)
+!23 = !{!"0xb\005\009\000", !1, !17} ; [ DW_TAG_lexical_block ]
+!24 = !MDLocation(line: 8, column: 1, scope: !4)
+!25 = !MDLocation(line: 12, column: 8, scope: !26)
+!26 = !{!"0xb\0012\003\000", !1, !7} ; [ DW_TAG_lexical_block ]
+!27 = !{!27, !28, !29}
+!28 = !{!"llvm.loop.interleave.count", i32 1}
+!29 = !{!"llvm.loop.vectorize.width", i32 1}
+!30 = !MDLocation(line: 13, column: 5, scope: !26)
+!31 = !MDLocation(line: 14, column: 1, scope: !7)
+!32 = !MDLocation(line: 18, column: 8, scope: !33)
+!33 = !{!"0xb\0018\003\000", !1, !8} ; [ DW_TAG_lexical_block ]
+!34 = !{!34, !15}
+!35 = !MDLocation(line: 19, column: 5, scope: !33)
+!36 = !MDLocation(line: 20, column: 1, scope: !8)
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index f6834477ff51..16fe370464b0 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-unroll=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
 
 ; This code has all the !dbg annotations needed to track source line information,
 ; but is missing the llvm.dbg.cu annotation. This prevents code generation from
@@ -49,26 +49,26 @@ declare void @ibar(i32*) #1
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!1 = metadata !{metadata !"vectorization-remarks.c", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 5, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !2, i32 6} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5.0 "}
-!10 = metadata !{i32 8, i32 3, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!11 = metadata !{metadata !12, metadata !12, i64 0}
-!12 = metadata !{metadata !"int", metadata !13, i64 0}
-!13 = metadata !{metadata !"omnipotent char", metadata !14, i64 0}
-!14 = metadata !{metadata !"Simple C/C++ TBAA"}
-!15 = metadata !{i32 17, i32 8, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 17, i32 8, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!17 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 8, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!18 = metadata !{i32 786443, metadata !1, metadata !4, i32 17, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!19 = metadata !{i32 18, i32 5, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !1, metadata !18, i32 17, i32 27, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
-!21 = metadata !{metadata !13, metadata !13, i64 0}
-!22 = metadata !{i32 20, i32 3, metadata !4, null}
-!23 = metadata !{i32 21, i32 3, metadata !4, null}
+!1 = !{!"vectorization-remarks.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\005\000\001\000\006\00256\001\006", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 5] [def] [scope 6] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./vectorization-remarks.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !MDLocation(line: 8, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !MDLocation(line: 17, column: 8, scope: !16)
+!16 = !{!"0xb\0017\008\002", !1, !17} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!17 = !{!"0xb\0017\008\001", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!18 = !{!"0xb\0017\003\000", !1, !4} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!19 = !MDLocation(line: 18, column: 5, scope: !20)
+!20 = !{!"0xb\0017\0027\000", !1, !18} ; [ DW_TAG_lexical_block ] [./vectorization-remarks.c]
+!21 = !{!13, !13, i64 0}
+!22 = !MDLocation(line: 20, column: 3, scope: !4)
+!23 = !MDLocation(line: 21, column: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
index efc93d94a7c5..d8e5403d2d4e 100644
--- a/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
+++ b/test/Transforms/LoopVectorize/X86/x86_fp80-vector-store.ll
@@ -1,4 +1,4 @@
-; RUN: opt -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S < %s | FileCheck %s
+; RUN: opt -O3 -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.7.0"
diff --git a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
index a099daa740e5..cab333d06406 100644
--- a/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
+++ b/test/Transforms/LoopVectorize/XCore/no-vector-registers.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -S -mtriple=xcore | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S -mtriple=xcore | FileCheck %s
 
 target datalayout = "e-p:32:32:32-a0:0:32-n32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f16:16:32-f32:32:32-f64:32:32"
 target triple = "xcore"
diff --git a/test/Transforms/LoopVectorize/align.ll b/test/Transforms/LoopVectorize/align.ll
index 84b03615d374..f2fb8b91b46e 100644
--- a/test/Transforms/LoopVectorize/align.ll
+++ b/test/Transforms/LoopVectorize/align.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/bsd_regex.ll b/test/Transforms/LoopVectorize/bsd_regex.ll
index 7b712729a1cd..7a3e79893da3 100644
--- a/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=2 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=2 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
index 2648bbea8035..d7cbad06ddf0 100644
--- a/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
+++ b/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/calloc.ll b/test/Transforms/LoopVectorize/calloc.ll
index 55c0a605450f..5f441f3e238f 100644
--- a/test/Transforms/LoopVectorize/calloc.ll
+++ b/test/Transforms/LoopVectorize/calloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
index 255ce9c77eaf..4f92d33ff528 100644
--- a/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; rdar://problem/12848162
 
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
new file mode 100644
index 000000000000..38e9c4f9a61c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Function Attrs: nounwind ssp uwtable
+define void @conditional_store(i32* noalias nocapture %indices) #0 {
+entry:
+  br label %for.body, !dbg !10
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+  %arrayidx = getelementptr inbounds i32* %indices, i64 %indvars.iv, !dbg !12
+  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14
+  %cmp1 = icmp eq i32 %0, 1024, !dbg !12
+  br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
+
+if.then:                                          ; preds = %for.body
+  store i32 0, i32* %arrayidx, align 4, !dbg !18, !tbaa !14
+  br label %for.inc, !dbg !18
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+  %exitcond = icmp eq i64 %indvars.iv.next, 4096, !dbg !10
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !10
+
+for.end:                                          ; preds = %for.inc
+  ret void, !dbg !19
+}
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = !{!"0x11\0012\00clang version 3.6.0\001\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"source.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00conditional_store\00conditional_store\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*)* @conditional_store, null, null, !2} ; [ DW_TAG_subprogram ]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.6.0"}
+!10 = !MDLocation(line: 2, column: 8, scope: !11)
+!11 = !{!"0xb\002\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 3, column: 9, scope: !13)
+!13 = !{!"0xb\003\009\000", !1, !11} ; [ DW_TAG_lexical_block ]
+!14 = !{!15, !15, i64 0}
+!15 = !{!"int", !16, i64 0}
+!16 = !{!"omnipotent char", !17, i64 0}
+!17 = !{!"Simple C/C++ TBAA"}
+!18 = !MDLocation(line: 3, column: 29, scope: !13)
+!19 = !MDLocation(line: 4, column: 1, scope: !4)
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index e4ba77fa3daa..1882c3ff9a83 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -11,7 +11,7 @@
 ; }
 
 ; CHECK: remark: source.cpp:5:9: loop not vectorized: loop control flow is not understood by vectorizer
-; CHECK: remark: source.cpp:5:9: loop not vectorized: vectorization was not specified
+; CHECK: remark: source.cpp:5:9: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 
 ; CHECK: _Z4testPii
 ; CHECK-NOT: x i32>
@@ -55,24 +55,24 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
-!1 = metadata !{metadata !"source.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @_Z4testPii, null, null, metadata !2, i32 2}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5.0"}
-!10 = metadata !{i32 3, i32 8, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
-!12 = metadata !{i32 5, i32 9, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 5, i32 9, i32 0, i32 2}
-!14 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 3, i32 0, i32 1}
-!15 = metadata !{metadata !16, metadata !16, i64 0}
-!16 = metadata !{metadata !"int", metadata !17, i64 0}
-!17 = metadata !{metadata !"omnipotent char", metadata !18, i64 0}
-!18 = metadata !{metadata !"Simple C/C++ TBAA"}
-!19 = metadata !{i32 8, i32 7, metadata !13, null}
-!20 = metadata !{i32 12, i32 3, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"source.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", !1, !5, !6, null, i32 (i32*, i32)* @_Z4testPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [test]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0"}
+!10 = !MDLocation(line: 3, column: 8, scope: !11)
+!11 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 5, column: 9, scope: !13)
+!13 = !{!"0xb\005\009\000", !1, !14} ; [ DW_TAG_lexical_block ]
+!14 = !{!"0xb\004\003\000", !1, !11} ; [ DW_TAG_lexical_block ]
+!15 = !{!16, !16, i64 0}
+!16 = !{!"int", !17, i64 0}
+!17 = !{!"omnipotent char", !18, i64 0}
+!18 = !{!"Simple C/C++ TBAA"}
+!19 = !MDLocation(line: 8, column: 7, scope: !13)
+!20 = !MDLocation(line: 12, column: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index c8215a107de7..f32f610b1cd6 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index 2497b25ea1da..92d315481266 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
+; RUN: opt < %s -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine | FileCheck %s
 ; Make sure we vectorize with debugging turned on.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK-LABEL: @test(
 define i32 @test() #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !{}), !dbg !18
   br label %for.body, !dbg !18
 
 for.body:
@@ -25,7 +25,7 @@ for.body:
   %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
   store i32 %add, i32* %arrayidx4, align 4, !dbg !19
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9, metadata !{}), !dbg !18
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18
   %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18
   br i1 %exitcond, label %for.body, label %for.end, !dbg !18
@@ -34,9 +34,9 @@ for.end:
   ret i32 0, !dbg !24
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
 attributes #1 = { nounwind readnone }
@@ -44,27 +44,27 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26}
 
-!0 = metadata !{i32 786449, metadata !25, i32 4, metadata !"clang", i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, null, metadata !""}
-!1 = metadata !{i32 0}
-!2 = metadata !{metadata !3}
-!3 = metadata !{i32 786478, metadata !25, metadata !4, metadata !"test", metadata !"test", metadata !"test", i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5}
-!4 = metadata !{i32 786473, metadata !25}
-!5 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!6 = metadata !{metadata !7}
-!7 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0}
-!10 = metadata !{i32 786443, metadata !25, metadata !3, i32 6, i32 0, i32 0}
-!11 = metadata !{metadata !12, metadata !16, metadata !17}
-!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null}
-!13 = metadata !{i32 786433, null, null, null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
-!14 = metadata !{metadata !15}
-!15 = metadata !{i32 786465, i64 0, i64 1024}
-!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null}
-!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} 
-!18 = metadata !{i32 6, i32 0, metadata !10, null}
-!19 = metadata !{i32 7, i32 0, metadata !20, null}
-!20 = metadata !{i32 786443, metadata !25, metadata !10, i32 6, i32 0, i32 1}
-!24 = metadata !{i32 9, i32 0, metadata !3, null}
-!25 = metadata !{metadata !"test", metadata !"/path/to/somewhere"}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang\001\00\000\00\000", !25, !1, !1, !2, !11, null} ; [ DW_TAG_compile_unit ]
+!1 = !{i32 0}
+!2 = !{!3}
+!3 = !{!"0x2e\00test\00test\00test\005\000\001\000\006\00256\001\005", !25, !4, !5, null, i32 ()* @test, null, null, !8} ; [ DW_TAG_subprogram ]
+!4 = !{!"0x29", !25} ; [ DW_TAG_file_type ]
+!5 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !6, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = !{!7}
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ]
+!8 = !{!9}
+!9 = !{!"0x100\00i\006\000", !10, !4, !7} ; [ DW_TAG_auto_variable ]
+!10 = !{!"0xb\006\000\000", !25, !3} ; [ DW_TAG_lexical_block ]
+!11 = !{!12, !16, !17}
+!12 = !{!"0x34\00A\00A\00\001\000\001", null, !4, !13, [1024 x i32]* @A, null} ; [ DW_TAG_variable ]
+!13 = !{!"0x1\00\000\0032768\0032\000\000", null, null, !7, !14, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 32768, align 32, offset 0] [from int]
+!14 = !{!15}
+!15 = !{i32 786465, i64 0, i64 1024}
+!16 = !{!"0x34\00B\00B\00\002\000\001", null, !4, !13, [1024 x i32]* @B, null} ; [ DW_TAG_variable ]
+!17 = !{!"0x34\00C\00C\00\003\000\001", null, !4, !13, [1024 x i32]* @C, null} ; [ DW_TAG_variable ]
+!18 = !MDLocation(line: 6, scope: !10)
+!19 = !MDLocation(line: 7, scope: !20)
+!20 = !{!"0xb\006\000\001", !25, !10} ; [ DW_TAG_lexical_block ]
+!24 = !MDLocation(line: 9, scope: !3)
+!25 = !{!"test", !"/path/to/somewhere"}
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index bf0b4184b7a1..634bf79ddf0e 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -19,10 +19,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 define i32 @f(i32* nocapture %a, i32 %size) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32* %a}, i64 0, metadata !13), !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32 %size}, i64 0, metadata !14), !dbg !19
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !15), !dbg !20
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 %size, i64 0, metadata !14, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !{}), !dbg !21
   %cmp4 = icmp eq i32 %size, 0, !dbg !21
   br i1 %cmp4, label %for.end, label %for.body.lr.ph, !dbg !21
 
@@ -35,9 +35,9 @@ for.body:                                         ; preds = %for.body.lr.ph, %fo
   %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv, !dbg !22
   %0 = load i32* %arrayidx, align 4, !dbg !22
   %add = add i32 %0, %sum.05, !dbg !22
-  tail call void @llvm.dbg.value(metadata !{i32 %add.lcssa}, i64 0, metadata !15), !dbg !22
+  tail call void @llvm.dbg.value(metadata i32 %add.lcssa, i64 0, metadata !15, metadata !{}), !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
-  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16), !dbg !21
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !16, metadata !{}), !dbg !21
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
   %exitcond = icmp ne i32 %lftr.wideiv, %size, !dbg !21
   br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge, !dbg !21
@@ -52,10 +52,10 @@ for.end:                                          ; preds = %entry, %for.cond.fo
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.declare(metadata, metadata) #1
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind readonly ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -63,28 +63,28 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !27}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 185038) (llvm/trunk 185097)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
-!1 = metadata !{metadata !"-", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !5, metadata !6, metadata !"f", metadata !"f", metadata !"", i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32*, i32)* @f, null, null, metadata !12, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
-!5 = metadata !{metadata !"<stdin>", metadata !"/Volumes/Data/backedup/dev/os/llvm/debug"}
-!6 = metadata !{i32 786473, metadata !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
-!7 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9, metadata !10, metadata !11}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
-!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !16}
-!13 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!14 = metadata !{i32 786689, metadata !4, metadata !"size", metadata !6, i32 33554435, metadata !11, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [size] [line 3]
-!15 = metadata !{i32 786688, metadata !4, metadata !"sum", metadata !6, i32 4, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [sum] [line 4]
-!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !6, i32 5, metadata !11, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 5]
-!17 = metadata !{i32 786443, metadata !5, metadata !4, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 3}
-!19 = metadata !{i32 3, i32 0, metadata !4, null}
-!20 = metadata !{i32 4, i32 0, metadata !4, null}
-!21 = metadata !{i32 5, i32 0, metadata !17, null}
-!22 = metadata !{i32 6, i32 0, metadata !17, null}
-!26 = metadata !{i32 7, i32 0, metadata !4, null}
-!27 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 185038) (llvm/trunk 185097)\001\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Volumes/Data/backedup/dev/os/llvm/debug/-] [DW_LANG_C99]
+!1 = !{!"-", !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00f\00f\00\003\000\001\000\006\00256\001\003", !5, !6, !7, null, i32 (i32*, i32)* @f, null, null, !12} ; [ DW_TAG_subprogram ] [line 3] [def] [f]
+!5 = !{!"<stdin>", !"/Volumes/Data/backedup/dev/os/llvm/debug"}
+!6 = !{!"0x29", !5}          ; [ DW_TAG_file_type ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9, !10, !11}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from int]
+!11 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, null} ; [ DW_TAG_base_type ] [unsigned int] [line 0, size 32, align 32, offset 0, enc DW_ATE_unsigned]
+!12 = !{!13, !14, !15, !16}
+!13 = !{!"0x101\00a\0016777219\000", !4, !6, !10} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!14 = !{!"0x101\00size\0033554435\000", !4, !6, !11} ; [ DW_TAG_arg_variable ] [size] [line 3]
+!15 = !{!"0x100\00sum\004\000", !4, !6, !11} ; [ DW_TAG_auto_variable ] [sum] [line 4]
+!16 = !{!"0x100\00i\005\000", !17, !6, !11} ; [ DW_TAG_auto_variable ] [i] [line 5]
+!17 = !{!"0xb\005\000\000", !5, !4} ; [ DW_TAG_lexical_block ] [/Volumes/Data/backedup/dev/os/llvm/debug/<stdin>]
+!18 = !{i32 2, !"Dwarf Version", i32 3}
+!19 = !MDLocation(line: 3, scope: !4)
+!20 = !MDLocation(line: 4, scope: !4)
+!21 = !MDLocation(line: 5, scope: !17)
+!22 = !MDLocation(line: 6, scope: !17)
+!26 = !MDLocation(line: 7, scope: !4)
+!27 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/duplicated-metadata.ll b/test/Transforms/LoopVectorize/duplicated-metadata.ll
new file mode 100644
index 000000000000..bf2f899dff04
--- /dev/null
+++ b/test/Transforms/LoopVectorize/duplicated-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -S 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test makes sure we don't duplicate the loop vectorizer's metadata
+; while marking them as already vectorized (by setting width = 1), even
+; at lower optimization levels, where no extra cleanup is done
+
+define void @_Z3fooPf(float* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds float* %a, i64 %indvars.iv
+  %p = load float* %arrayidx, align 4
+  %mul = fmul float %p, 2.000000e+00
+  store float %mul, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 4}
+; CHECK-NOT: !{metadata !"llvm.loop.vectorize.width", i32 4}
+; CHECK: !{!"llvm.loop.interleave.count", i32 1}
diff --git a/test/Transforms/LoopVectorize/ee-crash.ll b/test/Transforms/LoopVectorize/ee-crash.ll
index 8a4f8ce3c122..a3c0bb89b128 100644
--- a/test/Transforms/LoopVectorize/ee-crash.ll
+++ b/test/Transforms/LoopVectorize/ee-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/exact.ll b/test/Transforms/LoopVectorize/exact.ll
new file mode 100644
index 000000000000..0a8fbf337347
--- /dev/null
+++ b/test/Transforms/LoopVectorize/exact.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; CHECK-LABEL: @lshr_exact(
+; CHECK: lshr exact <4 x i32>
+define void @lshr_exact(i32* %x) {
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %x, i64 %iv
+  %0 = load i32* %arrayidx, align 4
+  %conv1 = lshr exact i32 %0, 1
+  store i32 %conv1, i32* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
index 21d09372d546..0fc55c897310 100644
--- a/test/Transforms/LoopVectorize/flags.ll
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
index 0dfbab07279a..0f064ee2dc85 100644
--- a/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/test/Transforms/LoopVectorize/float-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -23,3 +23,25 @@ for.body:                                         ; preds = %for.body, %entry
 for.end:                                          ; preds = %for.body
   ret float %add
 }
+
+;CHECK-LABEL: @foosub(
+;CHECK: fsub fast <4 x float>
+;CHECK: ret
+define float @foosub(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ]
+  %arrayidx = getelementptr inbounds float* %A, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %sub = fsub fast float %sum.04, %0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 200
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret float %sub
+}
diff --git a/test/Transforms/LoopVectorize/funcall.ll b/test/Transforms/LoopVectorize/funcall.ll
index f1f068c43db3..e03534fbac37 100644
--- a/test/Transforms/LoopVectorize/funcall.ll
+++ b/test/Transforms/LoopVectorize/funcall.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index b6cde5d00f5e..6c8af0bcd7c9 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-unroll=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -dce -instcombine -S | FileCheck %s -check-prefix=UNROLL
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index d64d67f6a5b1..3f11ce8bf213 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O1 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -O1 -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
 
diff --git a/test/Transforms/LoopVectorize/hoist-loads.ll b/test/Transforms/LoopVectorize/hoist-loads.ll
index 765e14d6985b..d0b27f1eae7c 100644
--- a/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/i8-induction.ll b/test/Transforms/LoopVectorize/i8-induction.ll
index 2a0e82645494..90e3ec00cde2 100644
--- a/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/test/Transforms/LoopVectorize/i8-induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/if-conv-crash.ll b/test/Transforms/LoopVectorize/if-conv-crash.ll
index f8f2cf1fff4b..67910bf6d544 100644
--- a/test/Transforms/LoopVectorize/if-conv-crash.ll
+++ b/test/Transforms/LoopVectorize/if-conv-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion-nest.ll b/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 92cb06e5e525..b5ac8fca23b3 100644
--- a/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 8cb703cdfa4b..455699c78030 100644
--- a/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index 6e3e8ed27853..9e185284919a 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -enable-if-conversion -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 7b0e181c845f..c6067e01ee21 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-unroll=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-unroll=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
diff --git a/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
index 624ee7e50597..b8624fd00774 100644
--- a/test/Transforms/LoopVectorize/incorrect-dom-info.ll
+++ b/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -139,4 +139,4 @@ attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-po
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.6.0 "}
+!0 = !{!"clang version 3.6.0 "}
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
index 71bedb7334ac..067a76b51ad0 100644
--- a/test/Transforms/LoopVectorize/increment.ll
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 7dabcb2ba04f..3f3491835b54 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -28,7 +28,7 @@ for.end:
   ret void
 }
 
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
 
 ; Make sure we remove unneeded vectorization of induction variables.
 ; In order for instcombine to cleanup the vectorized induction variables that we
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 9c8201ab7805..ce64c5b620d5 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
index 7dfaf03b0f2d..d48731a07436 100644
--- a/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -1192,3 +1192,59 @@ for.body:                                         ; preds = %entry, %for.body
 for.end:                                          ; preds = %for.body, %entry
   ret void
 }
+
+declare float @llvm.minnum.f32(float, float) nounwind readnone
+
+;CHECK-LABEL: @minnum_f32(
+;CHECK: llvm.minnum.v4f32
+;CHECK: ret void
+define void @minnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.maxnum.f32(float, float) nounwind readnone
+
+;CHECK-LABEL: @maxnum_f32(
+;CHECK: llvm.maxnum.v4f32
+;CHECK: ret void
+define void @maxnum_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4
+  %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/lcssa-crash.ll b/test/Transforms/LoopVectorize/lcssa-crash.ll
index de6be548490c..68cc74ed7098 100644
--- a/test/Transforms/LoopVectorize/lcssa-crash.ll
+++ b/test/Transforms/LoopVectorize/lcssa-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/lifetime.ll b/test/Transforms/LoopVectorize/lifetime.ll
index 4f6f3b820a43..ba36cc4f7aa4 100644
--- a/test/Transforms/LoopVectorize/lifetime.ll
+++ b/test/Transforms/LoopVectorize/lifetime.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/loop-vect-memdep.ll b/test/Transforms/LoopVectorize/loop-vect-memdep.ll
new file mode 100644
index 000000000000..e2c7524238d1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/loop-vect-memdep.ll
@@ -0,0 +1,26 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; RUN: opt < %s -S -loop-vectorize -debug-only=loop-vectorize 2>&1 | FileCheck %s
+; REQUIRES: asserts
+; CHECK: LV: Can't vectorize due to memory conflicts
+
+define void @test_loop_novect(double** %arr, i64 %n) {
+for.body.lr.ph:
+  %t = load double** %arr, align 8
+  br label %for.body
+
+for.body:                                      ; preds = %for.body, %for.body.lr.ph
+  %i = phi i64 [ 0, %for.body.lr.ph ], [ %i.next, %for.body ]
+  %a = getelementptr inbounds double* %t, i64 %i
+  %i.next = add nuw nsw i64 %i, 1
+  %a.next = getelementptr inbounds double* %t, i64 %i.next
+  %t1 = load double* %a, align 8
+  %t2 = load double* %a.next, align 8
+  store double %t1, double* %a.next, align 8
+  store double %t2, double* %a, align 8
+  %c = icmp eq i64 %i, %n
+  br i1 %c, label %final, label %for.body
+
+final:                                   ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/memdep.ll b/test/Transforms/LoopVectorize/memdep.ll
index 21cb703ba47c..f857e800bea7 100644
--- a/test/Transforms/LoopVectorize/memdep.ll
+++ b/test/Transforms/LoopVectorize/memdep.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S | FileCheck %s
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -S | FileCheck %s -check-prefix=WIDTH
+; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s -check-prefix=WIDTH
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/metadata-unroll.ll b/test/Transforms/LoopVectorize/metadata-unroll.ll
index 848f1f9601b9..36a231454f08 100644
--- a/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -37,5 +37,5 @@ define void @inc(i32 %n) nounwind uwtable noinline ssp {
   ret void
 }
 
-!0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.interleave.count", i32 2}
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.interleave.count", i32 2}
diff --git a/test/Transforms/LoopVectorize/metadata-width.ll b/test/Transforms/LoopVectorize/metadata-width.ll
index 87de655da6f2..dee4feedd6b8 100644
--- a/test/Transforms/LoopVectorize/metadata-width.ll
+++ b/test/Transforms/LoopVectorize/metadata-width.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -27,5 +27,5 @@ for.end:                                          ; preds = %for.body, %entry
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 8}
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 8}
diff --git a/test/Transforms/LoopVectorize/metadata.ll b/test/Transforms/LoopVectorize/metadata.ll
index bdcf1c9fb229..a258f7c06c07 100644
--- a/test/Transforms/LoopVectorize/metadata.ll
+++ b/test/Transforms/LoopVectorize/metadata.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -27,18 +27,18 @@ for.end:                                          ; preds = %for.body
 ; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
 ; CHECK: ret i32 0
 
-; CHECK-DAG: ![[TFLT]] = metadata !{metadata ![[TFLT1:[0-9]+]]
-; CHECK-DAG: ![[TFLT1]] = metadata !{metadata !"float"
+; CHECK-DAG: ![[TFLT]] = !{![[TFLT1:[0-9]+]]
+; CHECK-DAG: ![[TFLT1]] = !{!"float"
 
-; CHECK-DAG: ![[TINT]] = metadata !{metadata ![[TINT1:[0-9]+]]
-; CHECK-DAG: ![[TINT1]] = metadata !{metadata !"int"
+; CHECK-DAG: ![[TINT]] = !{![[TINT1:[0-9]+]]
+; CHECK-DAG: ![[TINT1]] = !{!"int"
 
 attributes #0 = { nounwind uwtable }
 
-!0 = metadata !{metadata !1, metadata !1, i64 0}
-!1 = metadata !{metadata !"float", metadata !2, i64 0}
-!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
-!3 = metadata !{metadata !"Simple C/C++ TBAA"}
-!4 = metadata !{metadata !5, metadata !5, i64 0}
-!5 = metadata !{metadata !"int", metadata !2, i64 0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"float", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !2, i64 0}
 
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 0e47260984f2..1984cdd892d1 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-unroll=1  < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -dce -instcombine -force-vector-width=2 -force-vector-interleave=1  < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -516,7 +516,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp ugt <2 x float>
+; CHECK: fcmp ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
@@ -542,7 +542,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp uge <2 x float>
+; CHECK: fcmp olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
@@ -568,7 +568,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp ult <2 x float>
+; CHECK: fcmp oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
@@ -594,7 +594,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp ule <2 x float>
+; CHECK: fcmp ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp ogt <2 x float>
@@ -727,7 +727,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp ult <2 x float>
+; CHECK: fcmp oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
@@ -753,7 +753,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp ule <2 x float>
+; CHECK: fcmp ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
@@ -779,7 +779,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp ugt <2 x float>
+; CHECK: fcmp ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
@@ -805,7 +805,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp uge <2 x float>
+; CHECK: fcmp olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
 ; CHECK: fcmp olt <2 x float>
diff --git a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
index 88a29c50df5a..cd022adc9399 100644
--- a/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
+++ b/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -indvars -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index d64662838e00..bb2af1e57ccc 100644
--- a/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; From a simple program with two address spaces:
 ; char Y[4*10000] __attribute__((address_space(1)));
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
index 240b1b5d49dc..d3bd755e6cf1 100644
--- a/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -72,30 +72,30 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
-!1 = metadata !{metadata !"no_array_bounds.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test", metadata !"test", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32*, i32)* @_Z4testPiS_i, null, null, metadata !2, i32 2}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5.0"}
-!10 = metadata !{i32 4, i32 8, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 3, i32 0, i32 0}
-!12 = metadata !{metadata !12, metadata !13}
-!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!14 = metadata !{i32 5, i32 5, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 4, i32 36, i32 0, i32 1}
-!16 = metadata !{i32 9, i32 8, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 3, i32 0, i32 2}
-!18 = metadata !{metadata !18, metadata !13, metadata !19}
-!19 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-!20 = metadata !{i32 10, i32 5, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !17, i32 9, i32 36, i32 0, i32 3}
-!22 = metadata !{metadata !23, metadata !23, i64 0}
-!23 = metadata !{metadata !"int", metadata !24, i64 0}
-!24 = metadata !{metadata !"omnipotent char", metadata !25, i64 0}
-!25 = metadata !{metadata !"Simple C/C++ TBAA"}
-!26 = metadata !{i32 12, i32 1, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0\001\00\000\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ]
+!1 = !{!"no_array_bounds.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test\00test\00\001\000\001\000\006\00256\001\002", !1, !5, !6, null, void (i32*, i32*, i32)* @_Z4testPiS_i, null, null, !2} ; [ DW_TAG_subprogram ]
+!5 = !{!"0x29", !1} ; [ DW_TAG_file_type ]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ]
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0"}
+!10 = !MDLocation(line: 4, column: 8, scope: !11)
+!11 = !{!"0xb\004\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!12 = !{!12, !13}
+!13 = !{!"llvm.loop.vectorize.enable", i1 true}
+!14 = !MDLocation(line: 5, column: 5, scope: !15)
+!15 = !{!"0xb\004\0036\000", !1, !11} ; [ DW_TAG_lexical_block ]
+!16 = !MDLocation(line: 9, column: 8, scope: !17)
+!17 = !{!"0xb\009\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!18 = !{!18, !13, !19}
+!19 = !{!"llvm.loop.vectorize.width", i32 1}
+!20 = !MDLocation(line: 10, column: 5, scope: !21)
+!21 = !{!"0xb\009\0036\000", !1, !17} ; [ DW_TAG_lexical_block ]
+!22 = !{!23, !23, i64 0}
+!23 = !{!"int", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !MDLocation(line: 12, column: 1, scope: !4)
diff --git a/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index 295fcabb0b29..5c721a6806d9 100644
--- a/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-unroll=1 -S < %s | FileCheck %s
+; RUN: opt -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S < %s | FileCheck %s
 @a = common global [128 x i32] zeroinitializer, align 16
 
 ;; Must not vectorize division reduction. Division is lossy.
diff --git a/test/Transforms/LoopVectorize/no_int_induction.ll b/test/Transforms/LoopVectorize/no_int_induction.ll
index e572d1a884ca..1275915f5e36 100644
--- a/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; int __attribute__((noinline)) sum_array(int *A, int n) {
 ;  return std::accumulate(A, A + n, 0);
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 1f891ad2c4fa..bcd29c1a439a 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,4 +1,7 @@
-; RUN: opt -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=2 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+
+; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
+; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 8f654e41d4c8..64aab37592f6 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
 
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
-; CHECK: remark: source.cpp:4:5: loop not vectorized: vectorization is explicitly enabled with width 4
+; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
 ; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
 
 ; CHECK: _Z11test_switchPii
@@ -59,28 +59,28 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5.0", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2}
-!1 = metadata !{metadata !"source.cpp", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"test_switch", metadata !"test_switch", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i32*, i32)* @_Z11test_switchPii, null, null, metadata !2, i32 1}
-!5 = metadata !{i32 786473, metadata !1}
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null}
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5.0"}
-!10 = metadata !{i32 3, i32 8, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0, i32 0}
-!12 = metadata !{metadata !12, metadata !13, metadata !13}
-!13 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
-!14 = metadata !{i32 4, i32 5, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !11, i32 3, i32 36, i32 0, i32 1}
-!16 = metadata !{metadata !17, metadata !17, i64 0}
-!17 = metadata !{metadata !"int", metadata !18, i64 0}
-!18 = metadata !{metadata !"omnipotent char", metadata !19, i64 0}
-!19 = metadata !{metadata !"Simple C/C++ TBAA"}
-!20 = metadata !{i32 6, i32 7, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 18, i32 0, i32 2}
-!22 = metadata !{i32 7, i32 5, metadata !21, null}
-!23 = metadata !{i32 9, i32 7, metadata !21, null}
-!24 = metadata !{i32 14, i32 1, metadata !4, null}
+!0 = !{!"0x11\004\00clang version 3.5.0\001\00\006\00\002", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./source.cpp] [DW_LANG_C_plus_plus]
+!1 = !{!"source.cpp", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00test_switch\00test_switch\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, void (i32*, i32)* @_Z11test_switchPii, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [test_switch]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./source.cpp]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5.0"}
+!10 = !MDLocation(line: 3, column: 8, scope: !11)
+!11 = !{!"0xb\003\003\000", !1, !4} ; [ DW_TAG_lexical_block ]
+!12 = !{!12, !13, !13}
+!13 = !{!"llvm.loop.vectorize.enable", i1 true}
+!14 = !MDLocation(line: 4, column: 5, scope: !15)
+!15 = !{!"0xb\003\0036\000", !1, !11} ; [ DW_TAG_lexical_block ]
+!16 = !{!17, !17, i64 0}
+!17 = !{!"int", !18, i64 0}
+!18 = !{!"omnipotent char", !19, i64 0}
+!19 = !{!"Simple C/C++ TBAA"}
+!20 = !MDLocation(line: 6, column: 7, scope: !21)
+!21 = !{!"0xb\004\0018\000", !1, !15} ; [ DW_TAG_lexical_block ]
+!22 = !MDLocation(line: 7, column: 5, scope: !21)
+!23 = !MDLocation(line: 9, column: 7, scope: !21)
+!24 = !MDLocation(line: 14, column: 1, scope: !4)
diff --git a/test/Transforms/LoopVectorize/nofloat.ll b/test/Transforms/LoopVectorize/nofloat.ll
index c3c81b6f8450..e9f4c5f23e08 100644
--- a/test/Transforms/LoopVectorize/nofloat.ll
+++ b/test/Transforms/LoopVectorize/nofloat.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 ; Make sure that we don't vectorize functions with 'noimplicitfloat' attributes.
 
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
index 0c54a2b01226..b03d4f05e16f 100644
--- a/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/nsw-crash.ll b/test/Transforms/LoopVectorize/nsw-crash.ll
index e5fad14d0dda..68d993340bdb 100644
--- a/test/Transforms/LoopVectorize/nsw-crash.ll
+++ b/test/Transforms/LoopVectorize/nsw-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4
 
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LoopVectorize/opt.ll b/test/Transforms/LoopVectorize/opt.ll
index 27030a2ff2a9..a9be80fbfee6 100644
--- a/test/Transforms/LoopVectorize/opt.ll
+++ b/test/Transforms/LoopVectorize/opt.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -O3 -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=LOOPVEC %s
-; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-unroll=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
+; RUN: opt -S -O3 -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=LOOPVEC %s
+; RUN: opt -S -O3 -disable-loop-vectorization -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck --check-prefix=NOLOOPVEC %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/ptr_loops.ll b/test/Transforms/LoopVectorize/ptr_loops.ll
index 1259e21ebf2e..3fb38fe1ef13 100644
--- a/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
index fc8f0a5482f0..2f7a96a09d22 100644
--- a/test/Transforms/LoopVectorize/read-only.ll
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 791fce156220..5e6b7fa332b2 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 65ef95dcb121..da02d01706ca 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/reverse_iter.ll b/test/Transforms/LoopVectorize/reverse_iter.ll
index f803120c4d57..13172bb17aeb 100644
--- a/test/Transforms/LoopVectorize/reverse_iter.ll
+++ b/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 848ffc428c5f..ec56f80144c4 100644
--- a/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -march=r600 -mcpu=cayman -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 ; Check vectorization that would ordinarily require a runtime bounds
 ; check on the pointers when mixing address spaces. For now we cannot
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index 4e98ec504f5b..12ba3ce6c667 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -march=r600 -mcpu=cayman -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 ; Artificial datalayout
 target datalayout = "e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
diff --git a/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 73b28301b7e7..9d02a6accd67 100644
--- a/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index d15479d202b7..1edafb4ed7a9 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index 7370a6fb93c5..324949dafd0a 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/safegep.ll b/test/Transforms/LoopVectorize/safegep.ll
index c9508601e2c1..f853afd2bba6 100644
--- a/test/Transforms/LoopVectorize/safegep.ll
+++ b/test/Transforms/LoopVectorize/safegep.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1  < %s |  FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1  < %s |  FileCheck %s
 target datalayout = "e-p:32:32:32-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f32:32:32-f64:32:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 
 
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
index d623a3469096..d19458f4aa92 100644
--- a/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -enable-if-conversion | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
index 257c7bebe4d5..6b37cc23ae31 100644
--- a/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 683621a6f69b..515477180772 100644
--- a/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=2 -force-vector-width=8 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=8 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx"
@@ -106,9 +106,9 @@ declare i32 @fn2(double) #1
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !"int", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"double", metadata !1}
-!4 = metadata !{metadata !0, metadata !0, i64 0}
-!5 = metadata !{metadata !3, metadata !3, i64 0}
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"double", !1}
+!4 = !{!0, !0, i64 0}
+!5 = !{!3, !3, i64 0}
diff --git a/test/Transforms/LoopVectorize/simple-unroll.ll b/test/Transforms/LoopVectorize/simple-unroll.ll
index 83f35ffb609b..8bf680ac7c5f 100644
--- a/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-unroll=2 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
index 49ce5c539727..1d30102c3f97 100644
--- a/test/Transforms/LoopVectorize/small-loop.ll
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
index 8f675afd80cb..cc474945a9d0 100644
--- a/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index 26f4d156df61..6d3d11349b14 100644
--- a/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/struct_access.ll b/test/Transforms/LoopVectorize/struct_access.ll
index 75beae82f170..cf6f325ae306 100644
--- a/test/Transforms/LoopVectorize/struct_access.ll
+++ b/test/Transforms/LoopVectorize/struct_access.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/tbaa-nodep.ll b/test/Transforms/LoopVectorize/tbaa-nodep.ll
index f31b3072bc6c..be3e74f5d68a 100644
--- a/test/Transforms/LoopVectorize/tbaa-nodep.ll
+++ b/test/Transforms/LoopVectorize/tbaa-nodep.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s  -tbaa -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
-; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
+; RUN: opt < %s  -tbaa -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s
+; RUN: opt < %s  -basicaa -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -simplifycfg -S | FileCheck %s --check-prefix=CHECK-NOTBAA
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -93,10 +93,10 @@ for.end:                                          ; preds = %for.body
 
 attributes #0 = { nounwind uwtable }
 
-!0 = metadata !{metadata !1, metadata !1, i64 0}
-!1 = metadata !{metadata !"float", metadata !2, i64 0}
-!2 = metadata !{metadata !"omnipotent char", metadata !3, i64 0}
-!3 = metadata !{metadata !"Simple C/C++ TBAA"}
-!4 = metadata !{metadata !5, metadata !5, i64 0}
-!5 = metadata !{metadata !"int", metadata !2, i64 0}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"float", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !2, i64 0}
 
diff --git a/test/Transforms/LoopVectorize/undef-inst-bug.ll b/test/Transforms/LoopVectorize/undef-inst-bug.ll
index ed60e801afde..0444fe894ed4 100644
--- a/test/Transforms/LoopVectorize/undef-inst-bug.ll
+++ b/test/Transforms/LoopVectorize/undef-inst-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/unroll_novec.ll b/test/Transforms/LoopVectorize/unroll_novec.ll
index 89f4678526de..257b4e63a915 100644
--- a/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-unroll=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-width=1 -force-target-num-scalar-regs=16 -force-target-max-scalar-interleave=8 -force-target-instruction-cost=1 -small-loop-cost=40 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LoopVectorize/value-ptr-bug.ll b/test/Transforms/LoopVectorize/value-ptr-bug.ll
index 6b06afaf0de2..7fb9095b7c10 100644
--- a/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-unroll=1 -dce -instcombine < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine < %s | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
index f6465677839e..ea7be9c106b3 100644
--- a/test/Transforms/LoopVectorize/vect.omp.persistence.ll
+++ b/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -O2 -force-vector-unroll=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ; Loop from "rotated"
@@ -61,8 +61,8 @@ for.end:
   ret void
 }
 
-!1 = metadata !{metadata !1, metadata !2}
-!2 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!1 = !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
 
 ;
 ; Test #2
@@ -84,5 +84,5 @@ return:
   ret i32 0
 }
 
-!3 = metadata !{metadata !3, metadata !4}
-!4 = metadata !{metadata !"llvm.loop.vectorize.enable", i1 true}
+!3 = !{!3, !4}
+!4 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/vect.stats.ll b/test/Transforms/LoopVectorize/vect.stats.ll
index 9adf2bb6cf14..c5b6e64bc9fb 100644
--- a/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/test/Transforms/LoopVectorize/vect.stats.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -stats -S 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ;
@@ -56,4 +56,4 @@ for.body:                                         ; preds = %entry, %for.body
 
 for.end:                                          ; preds = %entry, %for.body
   ret void
-}
-\ No newline at end of file
+}
diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll
index 97654f495018..a9b2a53f74b3 100644
--- a/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
@@ -68,10 +68,10 @@ _ZSt10accumulateIPiiET0_T_S2_S1_.exit:            ; preds = %for.body.i, %entry
 
 attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" }
 
-; CHECK: !0 = metadata !{metadata !0, metadata !1, metadata !2}
-; CHECK: !1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
-; CHECK: !2 = metadata !{metadata !"llvm.loop.interleave.count", i32 1}
-; CHECK: !3 = metadata !{metadata !3, metadata !1, metadata !2}
+; CHECK: !0 = distinct !{!0, !1, !2}
+; CHECK: !1 = !{!"llvm.loop.vectorize.width", i32 1}
+; CHECK: !2 = !{!"llvm.loop.interleave.count", i32 1}
+; CHECK: !3 = distinct !{!3, !1, !2}
 
-!0 = metadata !{metadata !0, metadata !1}
-!1 = metadata !{metadata !"llvm.loop.vectorize.width", i32 1}
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.width", i32 1}
diff --git a/test/Transforms/LoopVectorize/version-mem-access.ll b/test/Transforms/LoopVectorize/version-mem-access.ll
index 51d20e227ddf..7ac2fca2533f 100644
--- a/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-unroll=1 < %s -S | FileCheck %s
+; RUN: opt -basicaa -loop-vectorize -enable-mem-access-versioning -force-vector-width=2 -force-vector-interleave=1 < %s -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
index 71a9cd0dc5be..2f100de82370 100644
--- a/test/Transforms/LoopVectorize/write-only.ll
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index e184cb05b94a..b15edc715141 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -274,7 +274,7 @@ return:                                           ; preds = %if.end, %if.then
 
 declare i1 @llvm.expect.i1(i1, i1) nounwind readnone
 
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 4, i32 64, i32 4}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 64, i32 4, i32 4}
+; CHECK: !0 = !{!"branch_weights", i32 64, i32 4}
+; CHECK: !1 = !{!"branch_weights", i32 4, i32 64}
+; CHECK: !2 = !{!"branch_weights", i32 4, i32 64, i32 4}
+; CHECK: !3 = !{!"branch_weights", i32 64, i32 4, i32 4}
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index ea0d515498cf..ea581d197e58 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
+; RUN: opt < %s -O3 -S | grep volatile | count 3
 ; PR1520
 ; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
 
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 33eaed60fe58..a7369c0902b9 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -7,13 +7,13 @@ entry:
   %retval = alloca double                         ; <double*> [#uses=2]
   %0 = alloca double                              ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
-; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata ![[IVAR:[0-9]*]])
-; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata ![[JVAR:[0-9]*]])
+  call void @llvm.dbg.declare(metadata i32* %i_addr, metadata !0, metadata !{}), !dbg !8
+; CHECK: call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
+; CHECK: call void @llvm.dbg.value(metadata double %j, i64 0, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
 ; CHECK: ![[IVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [i]
 ; CHECK: ![[JVAR]] = {{.*}} ; [ DW_TAG_arg_variable ] [j]
   store i32 %i, i32* %i_addr
-  call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
+  call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !{}), !dbg !8
   store double %j, double* %j_addr
   %1 = load i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
   %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
@@ -30,23 +30,23 @@ return:                                           ; preds = %entry
   ret double %retval1, !dbg !10
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 786689, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !12, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, double (i32, double)* @testfunc, null, null, null, i32 2} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !12} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !13, metadata !13, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !12, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{metadata !6, metadata !7, metadata !6}
-!6 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 786468, metadata !12, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 2, i32 0, metadata !1, null}
-!9 = metadata !{i32 786689, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 3, i32 0, metadata !11, null}
-!11 = metadata !{i32 786443, metadata !12, metadata !1, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{metadata !"testfunc.c", metadata !"/tmp"}
-!13 = metadata !{i32 0}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00i\002\000", !1, !2, !7} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00testfunc\00testfunc\00testfunc\002\000\001\000\006\000\000\002", !12, !2, !4, null, double (i32, double)* @testfunc, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !12, !13, !13, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !12, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!6, !7, !6}
+!6 = !{!"0x24\00double\000\0064\0064\000\000\004", !12, !2} ; [ DW_TAG_base_type ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !2} ; [ DW_TAG_base_type ]
+!8 = !MDLocation(line: 2, scope: !1)
+!9 = !{!"0x101\00j\002\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!10 = !MDLocation(line: 3, scope: !11)
+!11 = !{!"0xb\002\000\000", !12, !1} ; [ DW_TAG_lexical_block ]
+!12 = !{!"testfunc.c", !"/tmp"}
+!13 = !{i32 0}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 32acdd696ecf..76d2a1aa7d2b 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @foo(i32, i64, i8*)
 
@@ -11,14 +11,14 @@ entry:
   %z_addr.i = alloca i8*                          ; <i8**> [#uses=2]
   %a_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call void @llvm.dbg.declare(metadata !{i32* %a_addr}, metadata !0), !dbg !7
+  call void @llvm.dbg.declare(metadata i32* %a_addr, metadata !0, metadata !{}), !dbg !7
   store i32 %a, i32* %a_addr
   %0 = load i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
-  call void @llvm.dbg.declare(metadata !{i32* %x_addr.i}, metadata !9) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata i32* %x_addr.i, metadata !9, metadata !{}) nounwind, !dbg !15
   store i32 %0, i32* %x_addr.i
-  call void @llvm.dbg.declare(metadata !{i64* %y_addr.i}, metadata !16) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata i64* %y_addr.i, metadata !16, metadata !{}) nounwind, !dbg !15
   store i64 55, i64* %y_addr.i
-  call void @llvm.dbg.declare(metadata !{i8** %z_addr.i}, metadata !17) nounwind, !dbg !15
+  call void @llvm.dbg.declare(metadata i8** %z_addr.i, metadata !17, metadata !{}) nounwind, !dbg !15
   store i8* bitcast (void (i32)* @baz to i8*), i8** %z_addr.i
   %1 = load i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
   %2 = load i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
@@ -32,26 +32,26 @@ return:                                           ; preds = %entry
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!22}
-!0 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!1 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32)* @baz, null, null, null, i32 8} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 786473, metadata !20} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786449, metadata !20, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !21, metadata !21, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!4 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{null, metadata !6}
-!6 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 8, i32 0, metadata !1, null}
-!8 = metadata !{i32 9, i32 0, metadata !1, null}
-!9 = metadata !{i32 786689, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6, i32 0, null} ; [ DW_TAG_arg_variable ]
-!10 = metadata !{i32 786478, metadata !20, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false, null, null, null, null, i32 4} ; [ DW_TAG_subprogram ]
-!11 = metadata !{i32 786453, metadata !20, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{null, metadata !6, metadata !13, metadata !14}
-!13 = metadata !{i32 786468, metadata !20, metadata !2, metadata !"long int", i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!14 = metadata !{i32 786447, metadata !20, metadata !2, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
-!16 = metadata !{i32 786689, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13, i32 0, null} ; [ DW_TAG_arg_variable ]
-!17 = metadata !{i32 786689, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14, i32 0, null} ; [ DW_TAG_arg_variable ]
-!18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
-!19 = metadata !{i32 10, i32 0, metadata !1, null}
-!20 = metadata !{metadata !"bar.c", metadata !"/tmp/"}
-!21 = metadata !{i32 0}
-!22 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x101\00a\008\000", !1, !2, !6} ; [ DW_TAG_arg_variable ]
+!1 = !{!"0x2e\00baz\00baz\00baz\008\000\001\000\006\000\000\008", !20, !2, !4, null, void (i32)* @baz, null, null, null} ; [ DW_TAG_subprogram ]
+!2 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!3 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !20, !21, !21, null, null, null} ; [ DW_TAG_compile_unit ]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !5, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{null, !6}
+!6 = !{!"0x24\00int\000\0032\0032\000\000\005", !20, !2} ; [ DW_TAG_base_type ]
+!7 = !MDLocation(line: 8, scope: !1)
+!8 = !MDLocation(line: 9, scope: !1)
+!9 = !{!"0x101\00x\004\000", !10, !2, !6} ; [ DW_TAG_arg_variable ]
+!10 = !{!"0x2e\00bar\00bar\00bar\004\001\001\000\006\000\000\004", !20, !2, !11, null, null, null, null, null} ; [ DW_TAG_subprogram ]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !20, !2, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{null, !6, !13, !14}
+!13 = !{!"0x24\00long int\000\0064\0064\000\000\005", !20, !2} ; [ DW_TAG_base_type ]
+!14 = !{!"0xf\00\000\0064\0064\000\000", !20, !2, null} ; [ DW_TAG_pointer_type ]
+!15 = !MDLocation(line: 4, scope: !10, inlinedAt: !8)
+!16 = !{!"0x101\00y\004\000", !10, !2, !13} ; [ DW_TAG_arg_variable ]
+!17 = !{!"0x101\00z\004\000", !10, !2, !14} ; [ DW_TAG_arg_variable ]
+!18 = !MDLocation(line: 5, scope: !10, inlinedAt: !8)
+!19 = !MDLocation(line: 10, scope: !1)
+!20 = !{!"bar.c", !"/tmp/"}
+!21 = !{i32 0}
+!22 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/MemCpyOpt/callslot_deref.ll b/test/Transforms/MemCpyOpt/callslot_deref.ll
new file mode 100644
index 000000000000..4d51552d0151
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/callslot_deref.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) unnamed_addr nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; all bytes of %dst that are touch by the memset are dereferenceable
+define void @must_remove_memcpy(i8* noalias nocapture dereferenceable(4096) %dst) {
+; CHECK-LABEL: @must_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}
+
+; memset touch more bytes than those guaranteed to be dereferenceable
+define void @must_not_remove_memcpy(i8* noalias nocapture dereferenceable(1024) %dst) {
+; CHECK-LABEL: @must_not_remove_memcpy(
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  %src = alloca [4096 x i8], align 1
+  %p = getelementptr inbounds [4096 x i8]* %src, i64 0, i64 0
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 4096, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %p, i64 4096, i32 1, i1 false) #2
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
new file mode 100644
index 000000000000..626317614c94
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/memcpy-to-memset-with-lifetimes.ll
@@ -0,0 +1,55 @@
+; RUN: opt -basicaa -memcpyopt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo([8 x i64]* noalias nocapture sret dereferenceable(64) %sret) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.lifetime.end(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @foo(
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* %[[sret_cast]], i8 0, i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+define void @bar([8 x i64]* noalias nocapture sret dereferenceable(64) %sret, [8 x i64]* noalias nocapture dereferenceable(64) %out) {
+entry-block:
+  %a = alloca [8 x i64], align 8
+  %a.cast = bitcast [8 x i64]* %a to i8*
+  call void @llvm.lifetime.start(i64 64, i8* %a.cast)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 0, i64 64, i32 8, i1 false)
+  %sret.cast = bitcast [8 x i64]* %sret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %sret.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %a.cast, i8 42, i64 32, i32 8, i1 false)
+  %out.cast = bitcast [8 x i64]* %out to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %out.cast, i8* %a.cast, i64 64, i32 8, i1 false)
+  call void @llvm.lifetime.end(i64 64, i8* %a.cast)
+  ret void
+
+; CHECK-LABEL: @bar(
+; CHECK:         %[[a:[^=]+]] = alloca [8 x i64]
+; CHECK:         %[[a_cast:[^=]+]] = bitcast [8 x i64]* %[[a]] to i8*
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 0, i64 64
+; CHECK:         %[[sret_cast:[^=]+]] = bitcast [8 x i64]* %sret to i8*
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[sret_cast]], i8* %[[a_cast]], i64 64
+; CHECK:         call void @llvm.memset.p0i8.i64(i8* %[[a_cast]], i8 42, i64 32
+; CHECK:         %[[out_cast:[^=]+]] = bitcast [8 x i64]* %out to i8*
+; CHECK:         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[out_cast]], i8* %[[a_cast]], i64 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index 9878b471500e..b2083cb5c700 100644
--- a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -87,5 +87,5 @@ lpad:
 declare i8 @dummy();
 declare i32 @__gxx_personality_v0(...)
 
-!0 = metadata !{i8 0, i8 2}
-!1 = metadata !{i8 5, i8 7}
-\ No newline at end of file
+!0 = !{i8 0, i8 2}
+!1 = !{i8 5, i8 7}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
index e25ff1d3acb1..d3e4d942a9d5 100644
--- a/test/Transforms/MergeFunc/ranges.ll
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -39,5 +39,5 @@ define i1 @cmp_with_same_range(i8*, i8*) {
   ret i1 %out
 }
 
-!0 = metadata !{i8 0, i8 2}
-!1 = metadata !{i8 5, i8 7}
+!0 = !{i8 0, i8 2}
+!1 = !{i8 5, i8 7}
diff --git a/test/Transforms/MergeFunc/vector-GEP-crash.ll b/test/Transforms/MergeFunc/vector-GEP-crash.ll
new file mode 100644
index 000000000000..a1eefa0bc983
--- /dev/null
+++ b/test/Transforms/MergeFunc/vector-GEP-crash.ll
@@ -0,0 +1,12 @@
+; RUN: opt -mergefunc -disable-output < %s
+; This used to cause a crash when compairing the GEPs
+
+define void @foo(<2 x i64*>) {
+  %tmp = getelementptr <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}
+
+define void @bar(<2 x i64*>) {
+  %tmp = getelementptr <2 x i64*> %0, <2 x i64> <i64 0, i64 0>
+  ret void
+}
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index 6297af62ff0c..4010f31ff772 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-pc-linux-gnu"
 @func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
 @global_3_xxx = common global i32 0, align 4
 
-@func_7_xxx = alias weak i32 (...)* @aliased_func_7_xxx
+@func_7_xxx = weak alias i32 (...)* @aliased_func_7_xxx
 
 define i32 @aliased_func_7_xxx(...) {
   ret i32 0
diff --git a/test/Transforms/ObjCARC/allocas.ll b/test/Transforms/ObjCARC/allocas.ll
index 7347a8fd4447..7b671df70649 100644
--- a/test/Transforms/ObjCARC/allocas.ll
+++ b/test/Transforms/ObjCARC/allocas.ll
@@ -23,7 +23,7 @@ declare i8* @returner2()
 declare void @bar(i32 ()*)
 declare void @use_alloca(i8**)
 
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
@@ -495,6 +495,6 @@ arraydestroy.done1:
   ret void
 }
 
-!0 = metadata !{}
+!0 = !{}
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/ObjCARC/arc-annotations.ll b/test/Transforms/ObjCARC/arc-annotations.ll
index f76ba3b80b06..c0ce44f284ca 100644
--- a/test/Transforms/ObjCARC/arc-annotations.ll
+++ b/test/Transforms/ObjCARC/arc-annotations.ll
@@ -73,11 +73,11 @@ return:
   ret void
 }
 
-!0 = metadata !{}
+!0 = !{}
 
-; CHECK: ![[ANN0]] = metadata !{metadata !"(test0,%x)", metadata !"S_Use", metadata !"S_None"}
-; CHECK: ![[ANN1]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Retain"}
-; CHECK: ![[ANN2]] = metadata !{metadata !"(test0,%x)", metadata !"S_Release", metadata !"S_Use"}
-; CHECK: ![[ANN3]] = metadata !{metadata !"(test0,%x)", metadata !"S_None", metadata !"S_Release"}
-; CHECK: ![[ANN4]] = metadata !{metadata !"(test0,%x)", metadata !"S_Retain", metadata !"S_None"}
+; CHECK: ![[ANN0]] = !{!"(test0,%x)", !"S_Use", !"S_None"}
+; CHECK: ![[ANN1]] = !{!"(test0,%x)", !"S_None", !"S_Retain"}
+; CHECK: ![[ANN2]] = !{!"(test0,%x)", !"S_Release", !"S_Use"}
+; CHECK: ![[ANN3]] = !{!"(test0,%x)", !"S_None", !"S_Release"}
+; CHECK: ![[ANN4]] = !{!"(test0,%x)", !"S_Retain", !"S_None"}
 
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 885935c51533..7bc58c42d532 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -22,7 +22,7 @@ declare void @invokee()
 declare i8* @returner()
 declare void @bar(i32 ()*)
 
-declare void @llvm.dbg.value(metadata, i64, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 declare i8* @objc_msgSend(i8*, i8*, ...)
 
@@ -2679,8 +2679,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
 invoke.cont:
   %0 = bitcast {}* %self to i8*
   %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
-  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !0, metadata !{})
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !0, metadata !{})
   %ivar = load i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr = getelementptr i8* %0, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to float*
@@ -3011,9 +3011,9 @@ define void @test67(i8* %x) {
 
 !llvm.module.flags = !{!1}
 
-!0 = metadata !{}
-!1 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{}
+!1 = !{i32 1, !"Debug Info Version", i32 2}
 
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
-; CHECK: ![[RELEASE]] = metadata !{}
+; CHECK: ![[RELEASE]] = !{}
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
index 61e5a3b1dccb..746d56df8fad 100644
--- a/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -432,4 +432,4 @@ exit:
 
 ; CHECK: attributes [[NUW]] = { nounwind }
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index 55a1b28e1c46..a8282607cb3b 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -22,6 +22,6 @@ declare void @bar(i8*)
 
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
-!0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+!0 = !{!"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 50a2d9756648..f9308c8fdee2 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -169,6 +169,6 @@ entry:
   ret void
 }
 
-!0 = metadata !{}
+!0 = !{}
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
index 0bf63a6e5174..74a4a7f989cb 100644
--- a/test/Transforms/ObjCARC/contract-testcases.ll
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -89,7 +89,7 @@ lpad:                                             ; preds = %entry
 
 !clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
 
-!0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+!0 = !{!"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
 
 ; CHECK: attributes #0 = { optsize }
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/empty-block.ll b/test/Transforms/ObjCARC/empty-block.ll
index 0440ab8f7c07..cc82d1088110 100644
--- a/test/Transforms/ObjCARC/empty-block.ll
+++ b/test/Transforms/ObjCARC/empty-block.ll
@@ -56,4 +56,4 @@ define %0* @test1() nounwind {
 
 declare %0* @foo()
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index 79e300cb6b43..c72566cd2065 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -41,10 +41,10 @@ entry:
   %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
 ; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
   %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
-  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !12), !dbg !37
+  call void @llvm.dbg.value(metadata i8* %call, i64 0, metadata i32 02, metadata !{}), !dbg !37
 ; CHECK: call i8* @objc_retain(i8* %call) [[NUW:#[0-9]+]]
   %tmp3 = call i8* @objc_retain(i8* %call) nounwind, !dbg !39
-  call void @llvm.dbg.value(metadata !{i8* %call}, i64 0, metadata !25), !dbg !39
+  call void @llvm.dbg.value(metadata i8* %call, i64 0, metadata !25, metadata !{}), !dbg !39
   invoke fastcc void @ThrowFunc(i8* %call)
           to label %eh.cont unwind label %lpad, !dbg !40, !clang.arc.no_objc_arc_exceptions !38
 
@@ -58,7 +58,7 @@ lpad:                                             ; preds = %entry
           catch i8* null, !dbg !40
   %tmp5 = extractvalue { i8*, i32 } %tmp4, 0, !dbg !40
   %exn.adjusted = call i8* @objc_begin_catch(i8* %tmp5) nounwind, !dbg !44
-  call void @llvm.dbg.value(metadata !45, i64 0, metadata !21), !dbg !46
+  call void @llvm.dbg.value(metadata i8 0, i64 0, metadata !21, metadata !{}), !dbg !46
   call void @objc_end_catch(), !dbg !49, !clang.arc.no_objc_arc_exceptions !38
 ; CHECK: call void @objc_release(i8* %call)
   call void @objc_release(i8* %call) nounwind, !dbg !42, !clang.imprecise_release !38
@@ -72,7 +72,7 @@ if.end:                                           ; preds = %lpad, %eh.cont
   ret i32 0, !dbg !54
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
 
@@ -87,7 +87,7 @@ declare void @objc_exception_rethrow()
 define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
 entry:
   %tmp = call i8* @objc_retain(i8* %obj) nounwind
-  call void @llvm.dbg.value(metadata !{i8* %obj}, i64 0, metadata !32), !dbg !55
+  call void @llvm.dbg.value(metadata i8* %obj, i64 0, metadata !32, metadata !{}), !dbg !55
   %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
   %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
   %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
@@ -102,7 +102,7 @@ declare void @objc_release(i8*) nonlazybind
 
 declare void @NSLog(i8*, ...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; CHECK: attributes #0 = { ssp uwtable }
 ; CHECK: attributes #1 = { nounwind readnone }
@@ -113,62 +113,62 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33, !34, !35, !36, !61}
 
-!0 = metadata !{i32 786449, metadata !60, i32 16, metadata !"clang version 3.3 ", i1 true, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1, null, metadata !""} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
-!1 = metadata !{i32 0}
-!3 = metadata !{metadata !5, metadata !27}
-!5 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"main", metadata !"main", metadata !"", i32 9, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 true, i32 ()* @main, null, null, metadata !10, i32 10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
-!6 = metadata !{i32 786473, metadata !60} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{metadata !11}
-!11 = metadata !{metadata !12, metadata !21, metadata !25}
-!12 = metadata !{i32 786688, metadata !13, metadata !"obj", metadata !6, i32 11, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj] [line 11]
-!13 = metadata !{i32 786443, metadata !60, metadata !5, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!14 = metadata !{i32 786454, metadata !60, null, metadata !"id", i32 11, i64 0, i64 0, i64 0, i32 0, metadata !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
-!15 = metadata !{i32 786447, metadata !60, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
-!16 = metadata !{i32 786451, metadata !60, null, metadata !"objc_object", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !17, i32 0, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
-!17 = metadata !{metadata !18}
-!18 = metadata !{i32 786445, metadata !60, metadata !16, metadata !"isa", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
-!19 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
-!20 = metadata !{i32 786451, metadata !60, null, metadata !"objc_class", i32 0, i64 0, i64 0, i32 0, i32 4, null, null, i32 0, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
-!21 = metadata !{i32 786688, metadata !22, metadata !"ok", metadata !6, i32 13, metadata !23, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [ok] [line 13]
-!22 = metadata !{i32 786443, metadata !60, metadata !13, i32 12, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!23 = metadata !{i32 786454, metadata !60, null, metadata !"BOOL", i32 62, i64 0, i64 0, i64 0, i32 0, metadata !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
-!24 = metadata !{i32 786468, null, null, metadata !"signed char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!25 = metadata !{i32 786688, metadata !26, metadata !"obj2", metadata !6, i32 15, metadata !14, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
-!26 = metadata !{i32 786443, metadata !60, metadata !22, i32 14, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!27 = metadata !{i32 786478, metadata !60, metadata !6, metadata !"ThrowFunc", metadata !"ThrowFunc", metadata !"", i32 4, metadata !28, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (i8*)* @ThrowFunc, null, null, metadata !30, i32 5} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
-!28 = metadata !{i32 786453, i32 0, null, i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !29, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!29 = metadata !{null, metadata !14}
-!30 = metadata !{metadata !31}
-!31 = metadata !{metadata !32}
-!32 = metadata !{i32 786689, metadata !27, metadata !"obj", metadata !6, i32 16777220, metadata !14, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [obj] [line 4]
-!33 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
-!34 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
-!35 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
-!36 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
-!37 = metadata !{i32 11, i32 0, metadata !13, null}
-!38 = metadata !{}
-!39 = metadata !{i32 15, i32 0, metadata !26, null}
-!40 = metadata !{i32 17, i32 0, metadata !41, null}
-!41 = metadata !{i32 786443, metadata !60, metadata !26, i32 16, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!42 = metadata !{i32 22, i32 0, metadata !26, null}
-!43 = metadata !{i32 23, i32 0, metadata !22, null}
-!44 = metadata !{i32 19, i32 0, metadata !41, null}
-!45 = metadata !{i8 0}
-!46 = metadata !{i32 20, i32 0, metadata !47, null}
-!47 = metadata !{i32 786443, metadata !60, metadata !48, i32 19, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!48 = metadata !{i32 786443, metadata !60, metadata !26, i32 19, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!49 = metadata !{i32 21, i32 0, metadata !47, null}
-!50 = metadata !{i32 24, i32 0, metadata !51, null}
-!51 = metadata !{i32 786443, metadata !60, metadata !22, i32 23, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!52 = metadata !{i32 25, i32 0, metadata !51, null}
-!53 = metadata !{i32 27, i32 0, metadata !13, null}
-!54 = metadata !{i32 28, i32 0, metadata !13, null}
-!55 = metadata !{i32 4, i32 0, metadata !27, null}
-!56 = metadata !{i32 6, i32 0, metadata !57, null}
-!57 = metadata !{i32 786443, metadata !60, metadata !27, i32 5, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
-!58 = metadata !{i32 7, i32 0, metadata !57, null}
-!60 = metadata !{metadata !"test.m", metadata !"/Volumes/Files/gottesmmcab/Radar/12906997"}
-!61 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0016\00clang version 3.3 \001\00\002\00\000", !60, !1, !1, !3, !1, null} ; [ DW_TAG_compile_unit ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m] [DW_LANG_ObjC]
+!1 = !{i32 0}
+!3 = !{!5, !27}
+!5 = !{!"0x2e\00main\00main\00\009\000\001\000\006\000\001\0010", !60, !6, !7, null, i32 ()* @main, null, null, !10} ; [ DW_TAG_subprogram ] [line 9] [def] [scope 10] [main]
+!6 = !{!"0x29", !60} ; [ DW_TAG_file_type ]
+!7 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!11}
+!11 = !{!12, !21, !25}
+!12 = !{!"0x100\00obj\0011\000", !13, !6, !14} ; [ DW_TAG_auto_variable ] [obj] [line 11]
+!13 = !{!"0xb\0010\000\000", !60, !5} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!14 = !{!"0x16\00id\0011\000\000\000\000", !60, null, !15} ; [ DW_TAG_typedef ] [id] [line 11, size 0, align 0, offset 0] [from ]
+!15 = !{!"0xf\00\000\0064\0064\000\000", !60, null, !16} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from objc_object]
+!16 = !{!"0x13\00objc_object\000\000\000\000\000\000", !60, null, null, !17, null, i32 0, null} ; [ DW_TAG_structure_type ] [objc_object] [line 0, size 0, align 0, offset 0] [def] [from ]
+!17 = !{!18}
+!18 = !{!"0xd\00isa\000\0064\000\000\000", !60, !16, !19} ; [ DW_TAG_member ] [isa] [line 0, size 64, align 0, offset 0] [from ]
+!19 = !{!"0xf\00\000\0064\000\000\000", null, null, !20} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 0, offset 0] [from objc_class]
+!20 = !{!"0x13\00objc_class\000\000\000\000\004\000", !60, null, null, null, null, null, null} ; [ DW_TAG_structure_type ] [objc_class] [line 0, size 0, align 0, offset 0] [decl] [from ]
+!21 = !{!"0x100\00ok\0013\000", !22, !6, !23} ; [ DW_TAG_auto_variable ] [ok] [line 13]
+!22 = !{!"0xb\0012\000\001", !60, !13} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!23 = !{!"0x16\00BOOL\0062\000\000\000\000", !60, null, !24} ; [ DW_TAG_typedef ] [BOOL] [line 62, size 0, align 0, offset 0] [from signed char]
+!24 = !{!"0x24\00signed char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [signed char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!25 = !{!"0x100\00obj2\0015\000", !26, !6, !14} ; [ DW_TAG_auto_variable ] [obj2] [line 15]
+!26 = !{!"0xb\0014\000\002", !60, !22} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!27 = !{!"0x2e\00ThrowFunc\00ThrowFunc\00\004\001\001\000\006\00256\001\005", !60, !6, !28, null, void (i8*)* @ThrowFunc, null, null, !30} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [scope 5] [ThrowFunc]
+!28 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !29, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!29 = !{null, !14}
+!30 = !{!31}
+!31 = !{!32}
+!32 = !{!"0x101\00obj\0016777220\000", !27, !6, !14} ; [ DW_TAG_arg_variable ] [obj] [line 4]
+!33 = !{i32 1, !"Objective-C Version", i32 2}
+!34 = !{i32 1, !"Objective-C Image Info Version", i32 0}
+!35 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!36 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
+!37 = !MDLocation(line: 11, scope: !13)
+!38 = !{}
+!39 = !MDLocation(line: 15, scope: !26)
+!40 = !MDLocation(line: 17, scope: !41)
+!41 = !{!"0xb\0016\000\003", !60, !26} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!42 = !MDLocation(line: 22, scope: !26)
+!43 = !MDLocation(line: 23, scope: !22)
+!44 = !MDLocation(line: 19, scope: !41)
+!45 = !{i8 0}
+!46 = !MDLocation(line: 20, scope: !47)
+!47 = !{!"0xb\0019\000\005", !60, !48} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!48 = !{!"0xb\0019\000\004", !60, !26} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!49 = !MDLocation(line: 21, scope: !47)
+!50 = !MDLocation(line: 24, scope: !51)
+!51 = !{!"0xb\0023\000\006", !60, !22} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!52 = !MDLocation(line: 25, scope: !51)
+!53 = !MDLocation(line: 27, scope: !13)
+!54 = !MDLocation(line: 28, scope: !13)
+!55 = !MDLocation(line: 4, scope: !27)
+!56 = !MDLocation(line: 6, scope: !57)
+!57 = !{!"0xb\005\000\007", !60, !27} ; [ DW_TAG_lexical_block ] [/Volumes/Files/gottesmmcab/Radar/12906997/test.m]
+!58 = !MDLocation(line: 7, scope: !57)
+!60 = !{!"test.m", !"/Volumes/Files/gottesmmcab/Radar/12906997"}
+!61 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/ObjCARC/escape.ll b/test/Transforms/ObjCARC/escape.ll
index 28f2e807798a..357f7592d83c 100644
--- a/test/Transforms/ObjCARC/escape.ll
+++ b/test/Transforms/ObjCARC/escape.ll
@@ -128,7 +128,7 @@ declare i8* @objc_storeWeak(i8**, i8*)
 declare i8* @not_really_objc_storeWeak(i8**, i8*)
 declare void @objc_release(i8*)
 
-!0 = metadata !{}
+!0 = !{}
 
 ; CHECK: attributes [[NUW]] = { nounwind }
 ; CHECK: attributes #1 = { nounwind ssp }
diff --git a/test/Transforms/ObjCARC/intrinsic-use.ll b/test/Transforms/ObjCARC/intrinsic-use.ll
index f3833cb54297..b1e56c8e9de1 100644
--- a/test/Transforms/ObjCARC/intrinsic-use.ll
+++ b/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -112,5 +112,5 @@ entry:
 }
 
 
-!0 = metadata !{}
+!0 = !{}
 
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index 04d057b9d496..5ef5184154b7 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -221,4 +221,4 @@ declare i32 @__objc_personality_v0(...)
 
 ; CHECK: attributes [[NUW]] = { nounwind }
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 2eeb4fc25873..7d72e37f8034 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -21,7 +21,7 @@ declare void @__crasher_block_invoke(i8* nocapture)
 declare i8* @objc_retainBlock(i8*)
 declare void @__crasher_block_invoke1(i8* nocapture)
 
-!0 = metadata !{}
+!0 = !{}
 
 ; Delete a nested retain+release pair.
 
diff --git a/test/Transforms/ObjCARC/path-overflow.ll b/test/Transforms/ObjCARC/path-overflow.ll
index 3c14353947ae..d23965310127 100644
--- a/test/Transforms/ObjCARC/path-overflow.ll
+++ b/test/Transforms/ObjCARC/path-overflow.ll
@@ -2190,4 +2190,4 @@ return:                                           ; No predecessors!
 }
 
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/ObjCARC/provenance.ll b/test/Transforms/ObjCARC/provenance.ll
new file mode 100644
index 000000000000..937c68924357
--- /dev/null
+++ b/test/Transforms/ObjCARC/provenance.ll
@@ -0,0 +1,52 @@
+; RUN: opt -disable-output -pa-eval %s 2>&1 | FileCheck %s
+
+@"\01l_objc_msgSend_fixup_" = global i8 0
+@g1 = global i8 0, section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
+@g2 = global i8 0, section "__DATA, __objc_classrefs, regular, no_dead_strip"
+@g3 = global i8 0, section "__DATA, __objc_superrefs, regular, no_dead_strip"
+@g4 = global i8 0, section "__TEXT,__objc_methname,cstring_literals"
+@g5 = global i8 0, section "__TEXT,__cstring,cstring_literals"
+
+declare void @g(i8)
+
+define void @f(i8* %a, i8** %b, i8** %c) {
+  %y1 = load i8* %a
+  call void @g(i8 %y1)
+
+  %y2 = load i8** %b
+  %y3 = load i8** %c
+
+  %x0 = load i8* @"\01l_objc_msgSend_fixup_"
+  call void @g(i8 %x0)
+
+  %x1 = load i8* @g1
+  call void @g(i8 %x1)
+
+  %x2 = load i8* @g2
+  call void @g(i8 %x2)
+
+  %x3 = load i8* @g3
+  call void @g(i8 %x3)
+
+  %x4 = load i8* @g4
+  call void @g(i8 %x4)
+
+  %x5 = load i8* @g5
+  call void @g(i8 %x5)
+  ret void
+}
+
+; CHECK: y1 and y2 are related.
+; CHECK: y1 and y3 are related.
+; CHECK: y2 and y3 are related.
+; CHECK: x0 and y1 are not related.
+; CHECK: x0 and y2 are not related.
+; CHECK: x0 and y3 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y1 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y2 are not related.
+; CHECK: l_objc_msgSend_fixup_ and y3 are not related.
+; CHECK: x1 and y1 are not related.
+; CHECK: x2 and y1 are not related.
+; CHECK: x3 and y1 are not related.
+; CHECK: x4 and y1 are not related.
+; CHECK: x5 and y1 are not related.
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index 3a2bd03692e1..416202222aa8 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -64,6 +64,6 @@ lpad100:                                          ; preds = %invoke.cont93
 
 declare i32 @__gxx_personality_v0(...)
 
-!0 = metadata !{}
+!0 = !{}
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/split-backedge.ll b/test/Transforms/ObjCARC/split-backedge.ll
index 1b7cf441a685..2507173f4b7e 100644
--- a/test/Transforms/ObjCARC/split-backedge.ll
+++ b/test/Transforms/ObjCARC/split-backedge.ll
@@ -45,6 +45,6 @@ declare void @objc_release(i8*)
 declare i8* @objc_retain(i8*)
 declare void @use_pointer(i8*)
 
-!0 = metadata !{}
+!0 = !{}
 
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
index 5dab4e049e22..13d0b0a0c6b7 100644
--- a/test/Transforms/ObjCARC/weak-copies.ll
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -86,4 +86,4 @@ declare void @objc_destroyWeak(i8**)
 
 ; CHECK: attributes [[NUW]] = { nounwind }
 
-!0 = metadata !{}
+!0 = !{}
diff --git a/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
new file mode 100644
index 000000000000..34cd672ed267
--- /dev/null
+++ b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -partially-inline-libcalls < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @sqrt()
+
+; CHECK-LABEL: @foo
+define i32 @foo() {
+  ; CHECK: call{{.*}}@sqrt
+  ; CHECK-NOT: call{{.*}}@sqrt
+  %r = call i32 @sqrt()
+  ret i32 %r
+}
diff --git a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
index f78395540026..ea869842b180 100644
--- a/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -3,7 +3,7 @@
 define <4 x float> @test1() {
 ; CHECK-LABEL: test1
 ; CHECK-NEXT: %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
-; CHECK-NEXT: %tmp2 = fmul <4 x float> zeroinitializer, %tmp1
+; CHECK-NEXT: %tmp2 = fmul <4 x float> %tmp1, zeroinitializer
 ; CHECK-NEXT: ret <4 x float> %tmp2
 
   %tmp1 = fsub <4 x float> zeroinitializer, zeroinitializer
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index d70bfcbb72c6..0194ce2e302c 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -203,7 +203,7 @@ define i32 @test14(i32 %X1, i32 %X2) {
 
 ; CHECK-LABEL: @test14
 ; CHECK-NEXT: sub i32 %X1, %X2
-; CHECK-NEXT: mul i32 %tmp, 47
+; CHECK-NEXT: mul i32 %B2, 47
 ; CHECK-NEXT: ret i32
 }
 
diff --git a/test/Transforms/Reassociate/canonicalize-neg-const.ll b/test/Transforms/Reassociate/canonicalize-neg-const.ll
new file mode 100644
index 000000000000..e85a963f6dda
--- /dev/null
+++ b/test/Transforms/Reassociate/canonicalize-neg-const.ll
@@ -0,0 +1,158 @@
+; RUN: opt -reassociate -gvn -S < %s | FileCheck %s
+
+; (x + 0.1234 * y) * (x + -0.1234 * y) -> (x + 0.1234 * y) * (x - 0.1234 * y)
+define double @test1(double %x, double %y) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double %mul
+
+  %mul = fmul double 1.234000e-01, %y
+  %add = fadd double %mul, %x
+  %mul1 = fmul double -1.234000e-01, %y
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; (x + -0.1234 * y) * (x + -0.1234 * y) -> (x - 0.1234 * y) * (x - 0.1234 * y)
+define double @test2(double %x, double %y) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double %mul
+
+  %mul = fmul double %y, -1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fadd double %mul1, %x
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; (x + 0.1234 * y) * (x - -0.1234 * y) -> (x + 0.1234 * y) * (x + 0.1234 * y)
+define double @test3(double %x, double %y) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: fmul double %add{{.*}}, %add{{.*}}
+; CHECK-NEXT: ret double
+
+  %mul = fmul double %y, 1.234000e-01
+  %add = fadd double %mul, %x
+  %mul1 = fmul double %y, -1.234000e-01
+  %add2 = fsub double %x, %mul1
+  %mul3 = fmul double %add, %add2
+  ret double %mul3
+}
+
+; Canonicalize (x - -1234 * y)
+define i64 @test4(i64 %x, i64 %y) {
+; CHECK-LABEL: @test4
+; CHECK-NEXT: mul i64 %y, 1234
+; CHECK-NEXT: add i64 %mul, %x
+; CHECK-NEXT: ret i64 %sub
+
+  %mul = mul i64 %y, -1234
+  %sub = sub i64 %x, %mul
+  ret i64 %sub
+}
+
+; Canonicalize (x - -0.1234 * y)
+define double @test5(double %x, double %y) {
+; CHECK-LABEL: @test5
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fadd double %x, %mul
+; CHECK-NEXT: ret double
+
+  %mul = fmul double -1.234000e-01, %y
+  %sub = fsub double %x, %mul
+  ret double %sub
+}
+
+; Don't modify (-0.1234 * y - x)
+define double @test6(double %x, double %y) {
+; CHECK-LABEL: @test6
+; CHECK-NEXT: fmul double %y, -1.234000e-01
+; CHECK-NEXT: fsub double %mul, %x
+; CHECK-NEXT: ret double %sub
+
+  %mul = fmul double -1.234000e-01, %y
+  %sub = fsub double %mul, %x
+  ret double %sub
+}
+
+; Canonicalize (-0.1234 * y + x) -> (x - 0.1234 * y)
+define double @test7(double %x, double %y) {
+; CHECK-LABEL: @test7
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: ret double %add
+
+  %mul = fmul double -1.234000e-01, %y
+  %add = fadd double %mul, %x
+  ret double %add
+}
+
+; Canonicalize (y * -0.1234 + x) -> (x - 0.1234 * y)
+define double @test8(double %x, double %y) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT: fmul double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %mul
+; CHECK-NEXT: ret double %add
+
+  %mul = fmul double %y, -1.234000e-01
+  %add = fadd double %mul, %x
+  ret double %add
+}
+
+; Canonicalize (x - -0.1234 / y)
+define double @test9(double %x, double %y) {
+; CHECK-LABEL: @test9
+; CHECK-NEXT: fdiv double 1.234000e-01, %y
+; CHECK-NEXT: fadd double %x, %div
+; CHECK-NEXT: ret double
+
+  %div = fdiv double -1.234000e-01, %y
+  %sub = fsub double %x, %div
+  ret double %sub
+}
+
+; Don't modify (-0.1234 / y - x)
+define double @test10(double %x, double %y) {
+; CHECK-LABEL: @test10
+; CHECK-NEXT: fdiv double -1.234000e-01, %y
+; CHECK-NEXT: fsub double %div, %x
+; CHECK-NEXT: ret double %sub
+
+  %div = fdiv double -1.234000e-01, %y
+  %sub = fsub double %div, %x
+  ret double %sub
+}
+
+; Canonicalize (-0.1234 / y + x) -> (x - 0.1234 / y)
+define double @test11(double %x, double %y) {
+; CHECK-LABEL: @test11
+; CHECK-NEXT: fdiv double 1.234000e-01, %y
+; CHECK-NEXT: fsub double %x, %div
+; CHECK-NEXT: ret double %add
+
+  %div = fdiv double -1.234000e-01, %y
+  %add = fadd double %div, %x
+  ret double %add
+}
+
+; Canonicalize (y / -0.1234 + x) -> (x - y / 0.1234)
+define double @test12(double %x, double %y) {
+; CHECK-LABEL: @test12
+; CHECK-NEXT: fdiv double %y, 1.234000e-01
+; CHECK-NEXT: fsub double %x, %div
+; CHECK-NEXT: ret double %add
+
+  %div = fdiv double %y, -1.234000e-01
+  %add = fadd double %div, %x
+  ret double %add
+}
diff --git a/test/Transforms/Reassociate/commute.ll b/test/Transforms/Reassociate/commute.ll
new file mode 100644
index 000000000000..760e51b05e1c
--- /dev/null
+++ b/test/Transforms/Reassociate/commute.ll
@@ -0,0 +1,19 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+declare void @use(i32)
+
+define void @test1(i32 %x, i32 %y) {
+; CHECK-LABEL: test1
+; CHECK: mul i32 %y, %x
+; CHECK: mul i32 %y, %x
+; CHECK: sub i32 %1, %2
+; CHECK: call void @use(i32 %{{.*}})
+; CHECK: call void @use(i32 %{{.*}})
+
+  %1 = mul i32 %x, %y
+  %2 = mul i32 %y, %x
+  %3 = sub i32 %1, %2
+  call void @use(i32 %1)
+  call void @use(i32 %3)
+  ret void
+}
diff --git a/test/Transforms/Reassociate/fast-AgressiveSubMove.ll b/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
new file mode 100644
index 000000000000..0c28ed164910
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-AgressiveSubMove.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+define float @test1(float %A) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %X = fadd float %A, 1.000000e+00
+; CHECK-NEXT: %Y = fadd float %A, 1.000000e+00
+; CHECK-NEXT: %r = fsub float %X, %Y
+; CHECK-NEXT: ret float %r
+
+  %X = fadd float %A, 1.000000e+00
+  %Y = fadd float %A, 1.000000e+00
+  %r = fsub float %X, %Y
+  ret float %r
+}
+
+define float @test2(float %A) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: ret float 0.000000e+00
+
+  %X = fadd fast float 1.000000e+00, %A
+  %Y = fadd fast float 1.000000e+00, %A
+  %r = fsub fast float %X, %Y
+  ret float %r
+}
diff --git a/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll b/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
new file mode 100644
index 000000000000..0109e4fd1b3a
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-ArrayOutOfBounds.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+; Not marked as fast, so must not change.
+define float @test1(float %a0, float %a1, float %a2, float %a3, float %a4) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp.2 = fadd float %a3, %a4
+; CHECK-NEXT: %tmp.4 = fadd float %tmp.2, %a2
+; CHECK-NEXT: %tmp.6 = fadd float %tmp.4, %a1
+; CHECK-NEXT: %tmp.8 = fadd float %tmp.6, %a0
+; CHECK-NEXT: %tmp.11 = fadd float %a2, %a3
+; CHECK-NEXT: %tmp.13 = fadd float %tmp.11, %a1
+; CHECK-NEXT: %tmp.15 = fadd float %tmp.13, %a0
+; CHECK-NEXT: %tmp.18 = fadd float %a1, %a2
+; CHECK-NEXT: %tmp.20 = fadd float %tmp.18, %a0
+; CHECK-NEXT: %tmp.23 = fadd float %a0, %a1
+; CHECK-NEXT: %tmp.26 = fsub float %tmp.8, %tmp.15
+; CHECK-NEXT: %tmp.28 = fadd float %tmp.20, %tmp.26
+; CHECK-NEXT: %tmp.30 = fsub float %tmp.28, %tmp.23
+; CHECK-NEXT: %tmp.32 = fsub float %tmp.30, %a4
+; CHECK-NEXT: %tmp.34 = fsub float %tmp.32, %a2
+; CHECK-NEXT: %T = fmul float %tmp.34, %tmp.34
+; CHECK-NEXT: ret float %T
+
+  %tmp.2 = fadd float %a4, %a3
+  %tmp.4 = fadd float %tmp.2, %a2
+  %tmp.6 = fadd float %tmp.4, %a1
+  %tmp.8 = fadd float %tmp.6, %a0
+  %tmp.11 = fadd float %a3, %a2
+  %tmp.13 = fadd float %tmp.11, %a1
+  %tmp.15 = fadd float %tmp.13, %a0
+  %tmp.18 = fadd float %a2, %a1
+  %tmp.20 = fadd float %tmp.18, %a0
+  %tmp.23 = fadd float %a1, %a0
+  %tmp.26 = fsub float %tmp.8, %tmp.15
+  %tmp.28 = fadd float %tmp.26, %tmp.20
+  %tmp.30 = fsub float %tmp.28, %tmp.23
+  %tmp.32 = fsub float %tmp.30, %a4
+  %tmp.34 = fsub float %tmp.32, %a2
+  %T = fmul float %tmp.34, %tmp.34
+  ret float %T
+}
+
+; Should be able to eliminate everything.
+define float @test2(float %a0, float %a1, float %a2, float %a3, float %a4) {
+; CHECK-LABEL: test2
+; CHECK: ret float 0.000000e+00
+
+  %tmp.2 = fadd fast float %a4, %a3
+  %tmp.4 = fadd fast float %tmp.2, %a2
+  %tmp.6 = fadd fast float %tmp.4, %a1
+  %tmp.8 = fadd fast float %tmp.6, %a0
+  %tmp.11 = fadd fast float %a3, %a2
+  %tmp.13 = fadd fast float %tmp.11, %a1
+  %tmp.15 = fadd fast float %tmp.13, %a0
+  %tmp.18 = fadd fast float %a2, %a1
+  %tmp.20 = fadd fast float %tmp.18, %a0
+  %tmp.23 = fadd fast float %a1, %a0
+  %tmp.26 = fsub fast float %tmp.8, %tmp.15
+  %tmp.28 = fadd fast float %tmp.26, %tmp.20
+  %tmp.30 = fsub fast float %tmp.28, %tmp.23
+  %tmp.32 = fsub fast float %tmp.30, %a4
+  %tmp.34 = fsub fast float %tmp.32, %a2
+  %T = fmul fast float %tmp.34, %tmp.34
+  ret float %T
+}
diff --git a/test/Transforms/Reassociate/fast-MissedTree.ll b/test/Transforms/Reassociate/fast-MissedTree.ll
new file mode 100644
index 000000000000..689fd6cad74d
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-MissedTree.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+define float @test1(float %A, float %B) {
+; CHECK-LABEL: test1
+; CHECK: %Z = fadd fast float %A, %B
+; CHECK: ret float %Z
+	%W = fadd fast float %B, -5.0
+	%Y = fadd fast float %A, 5.0
+	%Z = fadd fast float %W, %Y
+	ret float %Z
+}
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
new file mode 100644
index 000000000000..eeae096bf944
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; Canonicalize operands, but don't optimize floating point vector operations.
+define <4 x float> @test1() {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+; CHECK-NEXT: %tmp2 = fmul fast <4 x float> %tmp1, zeroinitializer
+
+  %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer
+  %tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1
+  ret <4 x float> %tmp2
+}
+
+; Commute integer vector operations.
+define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = add <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = add <2 x i32> %x, %y
+  %tmp2 = add <2 x i32> %y, %x
+  %tmp3 = add <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %tmp1 = mul <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = mul <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = mul <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = mul <2 x i32> %x, %y
+  %tmp2 = mul <2 x i32> %y, %x
+  %tmp3 = mul <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test4
+; CHECK-NEXT: %tmp1 = and <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = and <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = and <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = and <2 x i32> %x, %y
+  %tmp2 = and <2 x i32> %y, %x
+  %tmp3 = and <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test5
+; CHECK-NEXT: %tmp1 = or <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = or <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = or <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = or <2 x i32> %x, %y
+  %tmp2 = or <2 x i32> %y, %x
+  %tmp3 = or <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
+
+define <2 x i32> @test6(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: test6
+; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y
+; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y
+; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2
+
+  %tmp1 = xor <2 x i32> %x, %y
+  %tmp2 = xor <2 x i32> %y, %x
+  %tmp3 = xor <2 x i32> %tmp1, %tmp2
+  ret <2 x i32> %tmp3
+}
diff --git a/test/Transforms/Reassociate/fast-SubReassociate.ll b/test/Transforms/Reassociate/fast-SubReassociate.ll
new file mode 100644
index 000000000000..db4191a4fa1b
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-SubReassociate.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -reassociate -constprop -instcombine -S | FileCheck %s
+
+define float @test1(float %A, float %B) {
+; CHECK-LABEL: test1
+; CHECK-NEXT: %W = fadd float %B, 5.000000e+00
+; CHECK-NEXT: %X = fadd float %A, -7.000000e+00
+; CHECK-NEXT: %Y = fsub float %X, %W
+; CHECK-NEXT: %Z = fadd float %Y, 1.200000e+01
+; CHECK-NEXT: ret float %Z
+
+  %W = fadd float 5.0, %B
+  %X = fadd float -7.0, %A
+  %Y = fsub float %X, %W
+  %Z = fadd float %Y, 12.0
+  ret float %Z
+}
+
+; With sub reassociation, constant folding can eliminate all of the constants.
+define float @test2(float %A, float %B) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: %Z = fsub fast float %A, %B
+; CHECK-NEXT: ret float %Z
+
+  %W = fadd fast float %B, 5.000000e+00
+  %X = fadd fast float %A, -7.000000e+00
+  %Y = fsub fast float %X, %W
+  %Z = fadd fast float %Y, 1.200000e+01
+  ret float %Z
+
+}
+
+define float @test3(float %A, float %B, float %C, float %D) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %M = fadd float %A, 1.200000e+01
+; CHECK-NEXT: %N = fadd float %M, %B
+; CHECK-NEXT: %O = fadd float %N, %C
+; CHECK-NEXT: %P = fsub float %D, %O
+; CHECK-NEXT: %Q = fadd float %P, 1.200000e+01
+; CHECK-NEXT: ret float %Q
+
+  %M = fadd float %A, 1.200000e+01
+  %N = fadd float %M, %B
+  %O = fadd float %N, %C
+  %P = fsub float %D, %O
+  %Q = fadd float %P, 1.200000e+01
+  ret float %Q
+}
+
+; With sub reassociation, constant folding can eliminate the two 12 constants.
+define float @test4(float %A, float %B, float %C, float %D) {
+; CHECK-LABEL: test4
+; CHECK-NEXT: %B.neg = fsub fast float -0.000000e+00, %B
+; CHECK-NEXT: %O.neg = fsub fast float %B.neg, %A
+; CHECK-NEXT: %P = fsub fast float %O.neg, %C
+; CHECK-NEXT: %Q = fadd fast float %P, %D
+; CHECK-NEXT: ret float %Q
+
+; FIXME: InstCombine should be able to get us to the following:
+; %sum = fadd fast float %B, %A
+; %sum1 = fadd fast float %sum, %C
+; %Q = fsub fast float %D, %sum1
+; ret i32 %Q
+
+  %M = fadd fast float 1.200000e+01, %A
+  %N = fadd fast float %M, %B
+  %O = fadd fast float %N, %C
+  %P = fsub fast float %D, %O
+  %Q = fadd fast float 1.200000e+01, %P
+  ret float %Q
+}
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
new file mode 100644
index 000000000000..67b07f4d2699
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -0,0 +1,285 @@
+; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
+
+; With reassociation, constant folding can eliminate the 12 and -12 constants.
+define float @test1(float %arg) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: fsub fast float -0.000000e+00, %arg
+; CHECK-NEXT: ret float
+
+  %tmp1 = fsub fast float -1.200000e+01, %arg
+  %tmp2 = fadd fast float %tmp1, 1.200000e+01
+  ret float %tmp2
+}
+
+define float @test2(float %reg109, float %reg1111) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fadd float %reg109, -3.000000e+01
+; CHECK-NEXT: fadd float %reg115, %reg1111
+; CHECK-NEXT: fadd float %reg116, 3.000000e+01
+; CHECK-NEXT: ret float
+
+  %reg115 = fadd float %reg109, -3.000000e+01
+  %reg116 = fadd float %reg115, %reg1111
+  %reg117 = fadd float %reg116, 3.000000e+01
+  ret float %reg117
+}
+
+define float @test3(float %reg109, float %reg1111) {
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %reg117 = fadd fast float %reg109, %reg1111
+; CHECK-NEXT:  ret float %reg117
+
+  %reg115 = fadd fast float %reg109, -3.000000e+01
+  %reg116 = fadd fast float %reg115, %reg1111
+  %reg117 = fadd fast float %reg116, 3.000000e+01
+  ret float %reg117
+}
+
+@fe = external global float
+@fa = external global float
+@fb = external global float
+@fc = external global float
+@ff = external global float
+
+define void @test4() {
+; CHECK-LABEL: @test4
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd fast float
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %A, %B
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = (a+b)+c;
+  store float %t2, float* @fe
+  ; f = (a+c)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define void @test5() {
+; CHECK-LABEL: @test5
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %A, %B
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = c+(a+b)
+  store float %t2, float* @fe
+  ; f = (c+a)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define void @test6() {
+; CHECK-LABEL: @test6
+; CHECK: fadd fast float
+; CHECK: fadd fast float
+; CHECK-NOT: fadd
+; CHECK: ret void
+
+  %A = load float* @fa
+  %B = load float* @fb
+  %C = load float* @fc
+  %t1 = fadd fast float %B, %A
+  %t2 = fadd fast float %t1, %C
+  %t3 = fadd fast float %C, %A
+  %t4 = fadd fast float %t3, %B
+  ; e = c+(b+a)
+  store float %t2, float* @fe
+  ; f = (c+a)+b
+  store float %t4, float* @ff
+  ret void
+}
+
+define float @test7(float %A, float %B, float %C) {
+; CHECK-LABEL: @test7
+; CHECK-NEXT: fadd fast float %C, %B
+; CHECK-NEXT: fmul fast float %A, %A
+; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: ret float
+
+  %aa = fmul fast float %A, %A
+  %aab = fmul fast float %aa, %B
+  %ac = fmul fast float %A, %C
+  %aac = fmul fast float %ac, %A
+  %r = fadd fast float %aab, %aac
+  ret float %r
+}
+
+define float @test8(float %X, float %Y, float %Z) {
+; CHECK-LABEL: @test8
+; CHECK-NEXT: fmul fast float %Y, %X
+; CHECK-NEXT: fsub fast float %Z
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 0.0, %X
+  %B = fmul fast float %A, %Y
+  ; (-X)*Y + Z -> Z-X*Y
+  %C = fadd fast float %B, %Z
+  ret float %C
+}
+
+define float @test9(float %X) {
+; CHECK-LABEL: @test9
+; CHECK-NEXT: fmul fast float %X, 9.400000e+01
+; CHECK-NEXT: ret float
+
+  %Y = fmul fast float %X, 4.700000e+01
+  %Z = fadd fast float %Y, %Y
+  ret float %Z
+}
+
+define float @test10(float %X) {
+; CHECK-LABEL: @test10
+; CHECK-NEXT: fmul fast float %X, 3.000000e+00
+; CHECK-NEXT: ret float
+
+  %Y = fadd fast float %X ,%X
+  %Z = fadd fast float %Y, %X
+  ret float %Z
+}
+
+define float @test11(float %W) {
+; CHECK-LABEL: test11
+; CHECK-NEXT: fmul fast float %W, 3.810000e+02
+; CHECK-NEXT: ret float
+
+  %X = fmul fast float %W, 127.0
+  %Y = fadd fast float %X ,%X
+  %Z = fadd fast float %Y, %X
+  ret float %Z
+}
+
+define float @test12(float %X) {
+; CHECK-LABEL: @test12
+; CHECK-NEXT: fmul fast float %X, -3.000000e+00
+; CHECK-NEXT: fadd fast float %factor, 6.000000e+00
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 1.000000e+00, %X
+  %B = fsub fast float 2.000000e+00, %X
+  %C = fsub fast float 3.000000e+00, %X
+  %Y = fadd fast float %A ,%B
+  %Z = fadd fast float %Y, %C
+  ret float %Z
+}
+
+define float @test13(float %X1, float %X2, float %X3) {
+; CHECK-LABEL: @test13
+; CHECK-NEXT: fsub fast float %X3, %X2
+; CHECK-NEXT: fmul fast float {{.*}}, %X1
+; CHECK-NEXT: ret float
+
+  %A = fsub fast float 0.000000e+00, %X1
+  %B = fmul fast float %A, %X2   ; -X1*X2
+  %C = fmul fast float %X1, %X3  ; X1*X3
+  %D = fadd fast float %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
+  ret float %D
+}
+
+define float @test14(float %X1, float %X2) {
+; CHECK-LABEL: @test14
+; CHECK-NEXT: fsub fast float %X1, %X2
+; CHECK-NEXT: fmul fast float %1, 4.700000e+01
+; CHECK-NEXT: ret float
+
+  %B = fmul fast float %X1, 47.   ; X1*47
+  %C = fmul fast float %X2, -47.  ; X2*-47
+  %D = fadd fast float %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
+  ret float %D
+}
+
+define float @test15(float %arg) {
+; CHECK-LABEL: test15
+; CHECK-NEXT: fmul fast float %arg, 1.440000e+02
+; CHECK-NEXT: ret float %tmp2
+
+  %tmp1 = fmul fast float 1.200000e+01, %arg
+  %tmp2 = fmul fast float %tmp1, 1.200000e+01
+  ret float %tmp2
+}
+
+; (b+(a+1234))+-a -> b+1234
+define float @test16(float %b, float %a) {
+; CHECK-LABEL: @test16
+; CHECK-NEXT: fadd fast float %b, 1.234000e+03
+; CHECK-NEXT: ret float
+
+  %1 = fadd fast float %a, 1234.0
+  %2 = fadd fast float %b, %1
+  %3 = fsub fast float 0.0, %a
+  %4 = fadd fast float %2, %3
+  ret float %4
+}
+
+; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
+
+define float @test17(float %a, float %b, float %z) {
+; CHECK-LABEL: test17
+; CHECK-NEXT: fmul fast float %a, 1.234500e+04
+; CHECK-NEXT: fmul fast float %e, %b
+; CHECK-NEXT: fmul fast float %f, %z
+; CHECK-NEXT: ret float
+
+  %c = fsub fast float 0.000000e+00, %z
+  %d = fmul fast float %a, %b
+  %e = fmul fast float %c, %d
+  %f = fmul fast float %e, 1.234500e+04
+  %g = fsub fast float 0.000000e+00, %f
+  ret float %g
+}
+
+define float @test18(float %a, float %b, float %z) {
+; CHECK-LABEL: test18
+; CHECK-NEXT: fmul fast float %a, 4.000000e+01
+; CHECK-NEXT: fmul fast float %e, %z
+; CHECK-NEXT: ret float
+
+  %d = fmul fast float %z, 4.000000e+01
+  %c = fsub fast float 0.000000e+00, %d
+  %e = fmul fast float %a, %c
+  %f = fsub fast float 0.000000e+00, %e
+  ret float %f
+}
+
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+define float @test19(float %A, float %B) {
+; CHECK-LABEL: @test19
+; CHECK-NEXT: fsub fast float %A, %B
+; CHECK-NEXT: ret float
+  %X = fadd fast float -1.200000e+01, %A
+  %Y = fsub fast float %X, %B
+  %Z = fadd fast float %Y, 1.200000e+01
+  ret float %Z
+}
+
+; With sub reassociation, constant folding can eliminate the uses of %a.
+define float @test20(float %a, float %b, float %c) nounwind  {
+; CHECK-LABEL: @test20
+; CHECK-NEXT: fsub fast float -0.000000e+00, %b
+; CHECK-NEXT: fsub fast float %b.neg, %c
+; CHECK-NEXT: ret float
+
+; FIXME: Should be able to generate the below, which may expose more
+;        opportunites for FAdd reassociation.
+; %sum = fadd fast float %c, %b
+; %tmp7 = fsub fast float 0, %sum
+
+  %tmp3 = fsub fast float %a, %b
+  %tmp5 = fsub fast float %tmp3, %c
+  %tmp7 = fsub fast float %tmp5, %a
+  ret float %tmp7
+}
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
new file mode 100644
index 000000000000..ad89607a21e4
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -0,0 +1,44 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+
+declare void @use(float)
+
+define void @test1(float %x, float %y) {
+; CHECK-LABEL: test1
+; CHECK: fmul fast float %y, %x
+; CHECK: fmul fast float %y, %x
+; CHECK: fsub fast float %1, %2
+; CHECK: call void @use(float %{{.*}})
+; CHECK: call void @use(float %{{.*}})
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fsub fast float %1, %2
+  call void @use(float %1)
+  call void @use(float %3)
+  ret void
+}
+
+define float @test2(float %x, float %y) {
+; CHECK-LABEL: test2
+; CHECK-NEXT: fmul fast float %y, %x
+; CHECK-NEXT: fmul fast float %y, %x
+; CHECK-NEXT: fsub fast float %1, %2
+; CHECK-NEXT: ret float %3
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fsub fast float %1, %2
+  ret float %3
+}
+
+define float @test3(float %x, float %y) {
+; CHECK-LABEL: test3
+; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: ret float %tmp1
+
+  %1 = fmul fast float %x, %y
+  %2 = fmul fast float %y, %x
+  %3 = fadd fast float %1, %2
+  ret float %3
+}
diff --git a/test/Transforms/Reassociate/fast-mightymul.ll b/test/Transforms/Reassociate/fast-mightymul.ll
new file mode 100644
index 000000000000..98bdf7a55ace
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-mightymul.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate -disable-output
+; PR13021
+
+define float @test2(float %x) {
+  %t0 = fmul fast float %x, %x
+  %t1 = fmul fast float %t0, %t0
+  %t2 = fmul fast float %t1, %t1
+  %t3 = fmul fast float %t2, %t2
+  %t4 = fmul fast float %t3, %t3
+  %t5 = fmul fast float %t4, %t4
+  %t6 = fmul fast float %t5, %t5
+  %t7 = fmul fast float %t6, %t6
+  %t8 = fmul fast float %t7, %t7
+  %t9 = fmul fast float %t8, %t8
+  %t10 = fmul fast float %t9, %t9
+  %t11 = fmul fast float %t10, %t10
+  %t12 = fmul fast float %t11, %t11
+  %t13 = fmul fast float %t12, %t12
+  %t14 = fmul fast float %t13, %t13
+  %t15 = fmul fast float %t14, %t14
+  %t16 = fmul fast float %t15, %t15
+  %t17 = fmul fast float %t16, %t16
+  %t18 = fmul fast float %t17, %t17
+  %t19 = fmul fast float %t18, %t18
+  %t20 = fmul fast float %t19, %t19
+  %t21 = fmul fast float %t20, %t20
+  %t22 = fmul fast float %t21, %t21
+  %t23 = fmul fast float %t22, %t22
+  %t24 = fmul fast float %t23, %t23
+  %t25 = fmul fast float %t24, %t24
+  %t26 = fmul fast float %t25, %t25
+  %t27 = fmul fast float %t26, %t26
+  %t28 = fmul fast float %t27, %t27
+  ret float %t28
+}
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
new file mode 100644
index 000000000000..45e15c7f3539
--- /dev/null
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+define float @fmultistep1(float %a, float %b, float %c) {
+; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
+; CHECK-LABEL: @fmultistep1
+; CHECK-NEXT: fadd fast float %c, %b
+; CHECK-NEXT: fmul fast float %a, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: ret float
+
+  %t0 = fmul fast float %a, %b
+  %t1 = fmul fast float %a, %t0 ; a*(a*b)
+  %t2 = fmul fast float %a, %c
+  %t3 = fmul fast float %a, %t2 ; a*(a*c)
+  %t4 = fadd fast float %t1, %t3
+  ret float %t4
+}
+
+define float @fmultistep2(float %a, float %b, float %c, float %d) {
+; Check that a*b+a*c+d is turned into a*(b+c)+d.
+; CHECK-LABEL: @fmultistep2
+; CHECK-NEXT: fadd fast float %c, %b
+; CHECK-NEXT: fmul fast float %tmp, %a
+; CHECK-NEXT: fadd fast float %tmp1, %d
+; CHECK-NEXT: ret float
+
+  %t0 = fmul fast float %a, %b
+  %t1 = fmul fast float %a, %c
+  %t2 = fadd fast float %t1, %d ; a*c+d
+  %t3 = fadd fast float %t0, %t2 ; a*b+(a*c+d)
+  ret float %t3
+}
diff --git a/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll b/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll
new file mode 100644
index 000000000000..f51c0c17fe7d
--- /dev/null
+++ b/test/Transforms/Reassociate/mixed-fast-nonfast-fp.ll
@@ -0,0 +1,18 @@
+; RUN: opt -reassociate %s -S | FileCheck %s
+
+define float @foo(float %a,float %b, float %c) {
+; CHECK: %mul3 = fmul float %a, %b
+; CHECK-NEXT: fmul fast float %c, 2.000000e+00
+; CHECK-NEXT: fadd fast float %factor, %b
+; CHECK-NEXT: fmul fast float %tmp1, %a
+; CHECK-NEXT: fadd fast float %tmp2, %mul3
+; CHECK-NEXT: ret float
+  %mul1 = fmul fast float %a, %c
+  %mul2 = fmul fast float %a, %b
+  %mul3 = fmul float %a, %b
+  %mul4 = fmul fast float %a, %c
+  %add1 = fadd fast  float %mul1, %mul3
+  %add2 = fadd  fast float %mul4, %mul2
+  %add3 = fadd fast float %add1, %add2
+  ret float %add3
+}
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index 12eaeeea7b7b..c499646a8b6a 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -9,7 +9,7 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
   %t3 = mul i64 %a, %t2 ; a*(a*c)
   %t4 = add i64 %t1, %t3
 ; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
 ; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
 ; CHECK-NEXT: ret
   ret i64 %t4
diff --git a/test/Transforms/Reassociate/negation1.ll b/test/Transforms/Reassociate/negation1.ll
new file mode 100644
index 000000000000..34b943cf496d
--- /dev/null
+++ b/test/Transforms/Reassociate/negation1.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -reassociate -instcombine -S | FileCheck %s
+
+; Test that we can turn things like A*B + X - A*B -> X.
+
+define i32 @test1(i32 %a, i32 %b, i32 %x) {
+; CHECK-LABEL: test1
+; CHECK: ret i32 %x
+
+  %c = mul i32 %a, %b
+  %d = add i32 %c, %x
+  %c1 = mul i32 %a, %b
+  %f = sub i32 %d, %c1
+  ret i32 %f
+}
+
diff --git a/test/Transforms/Reassociate/pr21205.ll b/test/Transforms/Reassociate/pr21205.ll
new file mode 100644
index 000000000000..fcc7150478eb
--- /dev/null
+++ b/test/Transforms/Reassociate/pr21205.ll
@@ -0,0 +1,21 @@
+; RUN: opt -reassociate -S < %s | FileCheck %s
+; PR21205
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Don't canonicalize %conv - undef into %conv + (-undef).
+; CHECK-LABEL: @test1
+; CHECK: %sub = fsub fast float %conv, undef
+; CHECK: %sub1 = fadd fast float %sub, -1.000000e+00
+
+define i32 @test1() {
+entry:
+  %0 = load i32* @a, align 4
+  %conv = sitofp i32 %0 to float
+  %sub = fsub fast float %conv, undef
+  %sub1 = fadd fast float %sub, -1.000000e+00
+  %conv2 = fptosi float %sub1 to i32
+  store i32 %conv2, i32* @b, align 4
+  ret i32 undef
+}
diff --git a/test/Transforms/Reassociate/wrap-flags.ll b/test/Transforms/Reassociate/wrap-flags.ll
new file mode 100644
index 000000000000..e3304b6a7bb2
--- /dev/null
+++ b/test/Transforms/Reassociate/wrap-flags.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -reassociate -dce -S | FileCheck %s
+; PR12985
+
+; Verify the nsw flags are preserved when converting shl to mul.
+
+; CHECK-LABEL: @shl_to_mul_nsw(
+; CHECK: %mul = mul i32 %i, -2147483648
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nsw(i32 %i) {
+entry:
+  %mul = shl nsw i32 %i, 31
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
+
+; CHECK-LABEL: @shl_to_mul_nuw(
+; CHECK: %mul = mul nuw i32 %i, 4
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nuw(i32 %i) {
+entry:
+  %mul = shl nuw i32 %i, 2
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
+
+; CHECK-LABEL: @shl_to_mul_nuw_nsw(
+; CHECK: %mul = mul nuw nsw i32 %i, 4
+; CHECK: add i32 %mul, 1
+define i32 @shl_to_mul_nuw_nsw(i32 %i) {
+entry:
+  %mul = shl nuw nsw i32 %i, 2
+  %mul2 = add i32 %mul, 1
+  ret i32 %mul2
+}
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index c1c6c926fd9b..107b7af2c1dc 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -227,3 +227,23 @@ entry:
 ; CHECK-LABEL: define internal i32 @test10b(
 ; CHECK: ret i32 undef
 }
+
+;;======================== test11
+
+define i64 @test11a() {
+  %xor = xor i64 undef, undef
+  ret i64 %xor
+; CHECK-LABEL: define i64 @test11a
+; CHECK: ret i64 0
+}
+
+define void @test11b() {
+  %call1 = call i64 @test11a()
+  %call2 = call i64 @llvm.ctpop.i64(i64 %call1)
+  ret void
+; CHECK-LABEL: define void @test11b
+; CHECK: %[[call1:.*]] = call i64 @test11a()
+; CHECK: %[[call2:.*]] = call i64 @llvm.ctpop.i64(i64 0)
+}
+
+declare i64 @llvm.ctpop.i64(i64)
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll
new file mode 100644
index 000000000000..4ee91a5ed4c0
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -0,0 +1,75 @@
+; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%structA = type { [2 x float] }
+
+define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+; CHECK-LABEL: test1
+; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
+; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %5 = fsub fast <2 x float> %2, %4
+; CHECK: %6 = fmul fast <2 x float> %5, %5
+; CHECK: %7 = extractelement <2 x float> %6, i32 0
+; CHECK: %8 = extractelement <2 x float> %6, i32 1
+; CHECK: %add = fadd fast float %7, %8
+; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+
+entry:
+  br label %for.body3.lr.ph
+
+for.body3.lr.ph:
+  %conv5 = sitofp i32 %ymin to float
+  %conv = sitofp i32 %xmin to float
+  %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+  %0 = load float* %arrayidx4, align 4
+  %sub = fsub fast float %conv, %0
+  %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+  %1 = load float* %arrayidx9, align 4
+  %sub10 = fsub fast float %conv5, %1
+  %mul11 = fmul fast float %sub, %sub
+  %mul12 = fmul fast float %sub10, %sub10
+  %add = fadd fast float %mul11, %mul12
+  %cmp = fcmp oeq float %add, 0.000000e+00
+  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
+
+for.end27:
+  ret void
+}
+
+define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
+; CHECK-LABEL: test2
+; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
+; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %5 = fsub fast <2 x float> %2, %4
+; CHECK: %6 = fmul fast <2 x float> %5, %5
+; CHECK: %7 = extractelement <2 x float> %6, i32 0
+; CHECK: %8 = extractelement <2 x float> %6, i32 1
+; CHECK: %add = fadd fast float %8, %7
+; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
+
+entry:
+  br label %for.body3.lr.ph
+
+for.body3.lr.ph:
+  %conv5 = sitofp i32 %ymin to float
+  %conv = sitofp i32 %xmin to float
+  %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
+  %0 = load float* %arrayidx4, align 4
+  %sub = fsub fast float %conv, %0
+  %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
+  %1 = load float* %arrayidx9, align 4
+  %sub10 = fsub fast float %conv5, %1
+  %mul11 = fmul fast float %sub, %sub
+  %mul12 = fmul fast float %sub10, %sub10
+  %add = fadd fast float %mul12, %mul11         ;;;<---- Operands commuted!!
+  %cmp = fcmp oeq float %add, 0.000000e+00
+  br i1 %cmp, label %for.body3.lr.ph, label %for.end27
+
+for.end27:
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
new file mode 100644
index 000000000000..45fa2f917f3a
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -basicaa -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; Holding a value live over a call boundary may require
+; spills and fills. This is the case for <2 x double>,
+; as it occupies a Q register of which there are no
+; callee-saves.
+ 
+; CHECK: load double
+; CHECK: load double
+; CHECK: call void @g
+; CHECK: store double
+; CHECK: store double
+define void @f(double* %p, double* %q) {
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  %x = load double* %p
+  %y = load double* %addr
+  call void @g()
+  store double %x, double* %q
+  store double %y, double* %addr2
+  ret void
+}
+declare void @g()
+
+; Check we deal with loops correctly.
+;
+; CHECK: store <2 x double>
+; CHECK: load <2 x double>
+define void @f2(double* %p, double* %q) {
+entry:
+  br label %loop
+
+loop:
+  %p1 = phi double [0.0, %entry], [%x, %loop]
+  %p2 = phi double [0.0, %entry], [%y, %loop]
+  %addr2 = getelementptr double* %q, i32 1
+  %addr = getelementptr double* %p, i32 1
+  store double %p1, double* %q
+  store double %p2, double* %addr2
+
+  %x = load double* %p
+  %y = load double* %addr
+  br label %loop
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
new file mode 100644
index 000000000000..e49c7adda400
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=cortex-a57 | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @test1
+; CHECK: load <4 x i32>
+; CHECK: add nsw <4 x i32>
+; CHECK: sdiv <4 x i32>
+
+define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
+entry:
+  %0 = load i32* %b, align 4
+  %1 = load i32* %c, align 4
+  %add = add nsw i32 %1, %0
+  %div = sdiv i32 %add, 2
+  store i32 %div, i32* %a, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 1
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 1
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %3, %2
+  %div6 = sdiv i32 %add5, 2
+  %arrayidx7 = getelementptr inbounds i32* %a, i64 1
+  store i32 %div6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32* %b, i64 2
+  %4 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  %5 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %5, %4
+  %div11 = sdiv i32 %add10, 2
+  %arrayidx12 = getelementptr inbounds i32* %a, i64 2
+  store i32 %div11, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32* %b, i64 3
+  %6 = load i32* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32* %c, i64 3
+  %7 = load i32* %arrayidx14, align 4
+  %add15 = add nsw i32 %7, %6
+  %div16 = sdiv i32 %add15, 2
+  %arrayidx17 = getelementptr inbounds i32* %a, i64 3
+  store i32 %div16, i32* %arrayidx17, align 4
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/ARM/sroa.ll b/test/Transforms/SLPVectorizer/ARM/sroa.ll
index e0c75b147f6f..899cfb1f82cf 100644
--- a/test/Transforms/SLPVectorizer/ARM/sroa.ll
+++ b/test/Transforms/SLPVectorizer/ARM/sroa.ll
@@ -5,11 +5,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 %class.Complex = type { double, double }
 
 ; Code like this is the result of SROA. Make sure we don't vectorize this
-; because the in the scalar version of this the shl/or are handled by the
+; because the scalar version of the shl/or are handled by the
 ; backend and disappear, the vectorized code stays.
 
 ; CHECK-LABEL: SROAed
-; CHECK-NOT: shl <2 x i64>
+; CHECK-NOT: shl nuw <2 x i64>
 ; CHECK-NOT: or <2 x i64>
 
 define void @SROAed(%class.Complex* noalias nocapture sret %agg.result, [4 x i32] %a.coerce, [4 x i32] %b.coerce) {
diff --git a/test/Transforms/SLPVectorizer/X86/addsub.ll b/test/Transforms/SLPVectorizer/X86/addsub.ll
index 8303bc8181ef..174d40046849 100644
--- a/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -12,9 +12,9 @@ target triple = "x86_64-unknown-linux-gnu"
 @fa = common global [4 x float] zeroinitializer, align 16
 
 ; CHECK-LABEL: @addsub
-; CHECK: %5 = add <4 x i32> %3, %4
-; CHECK: %6 = add <4 x i32> %2, %5
-; CHECK: %7 = sub <4 x i32> %2, %5
+; CHECK: %5 = add nsw <4 x i32> %3, %4
+; CHECK: %6 = add nsw <4 x i32> %2, %5
+; CHECK: %7 = sub nsw <4 x i32> %2, %5
 ; CHECK: %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 
 ; Function Attrs: nounwind uwtable
@@ -56,9 +56,9 @@ entry:
 }
 
 ; CHECK-LABEL: @subadd
-; CHECK:  %5 = add <4 x i32> %3, %4
-; CHECK:  %6 = sub <4 x i32> %2, %5
-; CHECK:  %7 = add <4 x i32> %2, %5
+; CHECK:  %5 = add nsw <4 x i32> %3, %4
+; CHECK:  %6 = sub nsw <4 x i32> %2, %5
+; CHECK:  %7 = add nsw <4 x i32> %2, %5
 ; CHECK:  %8 = shufflevector <4 x i32> %6, <4 x i32> %7, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 
 ; Function Attrs: nounwind uwtable
diff --git a/test/Transforms/SLPVectorizer/X86/align.ll b/test/Transforms/SLPVectorizer/X86/align.ll
index f5865733ccb5..ce8062025064 100644
--- a/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/test/Transforms/SLPVectorizer/X86/align.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Simple 3-pair chain with loads and stores
-; CHECK: test1
+; CHECK-LABEL: @test1
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
   %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
@@ -25,3 +25,31 @@ entry:
 ; CHECK: ret
   ret void
 }
+
+; Float has 4 byte abi alignment on x86_64. We must use the alignmnet of the
+; value being loaded/stored not the alignment of the pointer type.
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: align 8
+; CHECK: load <4 x float>{{.*}}, align 4
+; CHECK: store <4 x float>{{.*}}, align 4
+; CHECK: ret
+
+define void @test2(float * %a, float * %b) {
+entry:
+  %l0 = load float* %a
+  %a1 = getelementptr inbounds float* %a, i64 1
+  %l1 = load float* %a1
+  %a2 = getelementptr inbounds float* %a, i64 2
+  %l2 = load float* %a2
+  %a3 = getelementptr inbounds float* %a, i64 3
+  %l3 = load float* %a3
+  store float %l0, float* %b
+  %b1 = getelementptr inbounds float* %b, i64 1
+  store float %l1, float* %b1
+  %b2 = getelementptr inbounds float* %b, i64 2
+  store float %l2, float* %b2
+  %b3 = getelementptr inbounds float* %b, i64 3
+  store float %l3, float* %b3
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
index f4f112fe32c6..aa59429a77d1 100644
--- a/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -172,4 +172,4 @@ attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-po
 
 !llvm.ident = !{!0}
 
-!0 = metadata !{metadata !"clang version 3.5.0 "}
+!0 = !{!"clang version 3.5.0 "}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
new file mode 100644
index 000000000000..dc99366e1da7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.3.0"
+
+@a = common global double 0.000000e+00, align 8
+
+define i32 @fn1() {
+entry:
+  %init = load double* @a, align 8
+  br label %loop
+
+loop:
+  %phi = phi double [ %add2, %loop ], [ %init, %entry ]
+  %postadd1_phi = phi double [ %postadd1, %loop ], [ %init, %entry ]
+  %postadd2_phi = phi double [ %postadd2, %loop ], [ %init, %entry ]
+  %add1 = fadd double %postadd1_phi, undef
+  %add2 = fadd double %postadd2_phi, %phi
+  %mul2 = fmul double %add2, 0.000000e+00
+  %binaryop_B = fadd double %postadd1_phi, %mul2
+  %mul1 = fmul double %add1, 0.000000e+00
+  %tmp = fadd double %postadd2_phi, 0.000000e+00
+
+  ; tryToVectorize() starts with this binary instruction.
+  ; At the same time vectorization wraps around the loop, vectorizes
+  ; postadd1/2 and eventually binary_V and tmp. So binary_V itself is replaced
+  ; with a vector instruction.
+  ; The SLPVectorizer crashed because it tried to use binary_V
+  ; after vectorization to re-arrange instructions.
+  %binary_V = fadd double %mul1, %binaryop_B
+
+  %postadd1 = fadd double %binary_V, 0.000000e+00
+  %postadd2 = fadd double %tmp, 1.000000e+00
+  %tobool = fcmp une double %postadd1, 0.000000e+00
+  br i1 %tobool, label %exit, label %loop
+
+exit:
+  ret i32 1
+}
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/test/Transforms/SLPVectorizer/X86/crash_gep.ll
new file mode 100644
index 000000000000..dd4034c116e4
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-unknown-linux-gnu
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = common global i64* null, align 8
+
+; Function Attrs: nounwind uwtable
+define i32 @fn1() {
+entry:
+  %0 = load i64** @a, align 8
+  %add.ptr = getelementptr inbounds i64* %0, i64 1
+  %1 = ptrtoint i64* %add.ptr to i64
+  %arrayidx = getelementptr inbounds i64* %0, i64 2
+  store i64 %1, i64* %arrayidx, align 8
+  %2 = ptrtoint i64* %arrayidx to i64
+  store i64 %2, i64* %add.ptr, align 8
+  ret i32 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
new file mode 100644
index 000000000000..e6cc0f783d79
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.3.0"
+
+define void @_foo(double %p1, double %p2, double %p3) #0 {
+entry:
+  %tab1 = alloca [256 x i32], align 16
+  %tab2 = alloca [256 x i32], align 16
+  br label %bb1
+
+
+bb1:
+  %mul19 = fmul double %p1, 1.638400e+04
+  %mul20 = fmul double %p3, 1.638400e+04
+  %add = fadd double %mul20, 8.192000e+03
+  %mul21 = fmul double %p2, 1.638400e+04
+  ; The SLPVectorizer crashed when scheduling this block after it inserted an
+  ; insertelement instruction (during vectorizing the for.body block) at this position.
+  br label %for.body
+
+for.body:
+  %indvars.iv266 = phi i64 [ 0, %bb1 ], [ %indvars.iv.next267, %for.body ]
+  %t.0259 = phi double [ 0.000000e+00, %bb1 ], [ %add27, %for.body ]
+  %p3.addr.0258 = phi double [ %add, %bb1 ], [ %add28, %for.body ]
+  %vecinit.i.i237 = insertelement <2 x double> undef, double %t.0259, i32 0
+  %x13 = tail call i32 @_xfn(<2 x double> %vecinit.i.i237) #2
+  %arrayidx = getelementptr inbounds [256 x i32]* %tab1, i64 0, i64 %indvars.iv266
+  store i32 %x13, i32* %arrayidx, align 4, !tbaa !4
+  %vecinit.i.i = insertelement <2 x double> undef, double %p3.addr.0258, i32 0
+  %x14 = tail call i32 @_xfn(<2 x double> %vecinit.i.i) #2
+  %arrayidx26 = getelementptr inbounds [256 x i32]* %tab2, i64 0, i64 %indvars.iv266
+  store i32 %x14, i32* %arrayidx26, align 4, !tbaa !4
+  %add27 = fadd double %mul19, %t.0259
+  %add28 = fadd double %mul21, %p3.addr.0258
+  %indvars.iv.next267 = add nuw nsw i64 %indvars.iv266, 1
+  %exitcond = icmp eq i64 %indvars.iv.next267, 256
+  br i1 %exitcond, label %return, label %for.body
+
+return:
+  ret void
+}
+
+declare i32 @_xfn(<2 x double>) #4
+
+!3 = !{!"int", !4, i64 0}
+!4 = !{!3, !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index c7ec98adf135..9f1fb71e096f 100644
--- a/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
+; RUN: opt -basicaa -slp-vectorizer -mtriple=x86_64-apple-macosx10.9.0 -mcpu=corei7-avx -S < %s | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
diff --git a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
index fba35499fb7d..bac2c3c0df34 100644
--- a/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ;CHECK: bitcast i32* %A to <4 x i32>*
 ;CHECK-NEXT: load <4 x i32>
 ;CHECK: phi <4 x i32>
-;CHECK-NEXT: mul <4 x i32>
+;CHECK-NEXT: mul nsw <4 x i32>
 ;CHECK-NOT: mul
 ;CHECK: phi <4 x i32>
 ;CHECK: bitcast i32* %A to <4 x i32>*
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index f4e68f217f25..21f51d7c6125 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -18,16 +18,16 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
 ;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
 ;CHECK: ret
-;CHECK: ![[LOC]] = metadata !{i32 4, i32 0,
-;CHECK: ![[LOC2]] = metadata !{i32 7, i32 0,
+;CHECK: ![[LOC]] = !MDLocation(line: 4, scope:
+;CHECK: ![[LOC2]] = !MDLocation(line: 7, scope:
 
 define i32 @depth(double* nocapture %A, i32 %m) #0 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{double* %A}, i64 0, metadata !12), !dbg !19
-  tail call void @llvm.dbg.value(metadata !{i32 %m}, i64 0, metadata !13), !dbg !19
-  tail call void @llvm.dbg.value(metadata !20, i64 0, metadata !14), !dbg !21
-  tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !15), !dbg !21
-  tail call void @llvm.dbg.value(metadata !2, i64 0, metadata !16), !dbg !23
+  tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !{}), !dbg !19
+  tail call void @llvm.dbg.value(metadata i32 00, i64 0, metadata !14, metadata !{}), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 02, i64 0, metadata !15, metadata !{}), !dbg !21
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !{}), !dbg !23
   %cmp8 = icmp sgt i32 %m, 0, !dbg !23
   br i1 %cmp8, label %for.body.lr.ph, label %for.end, !dbg !23
 
@@ -49,7 +49,7 @@ for.end:                                          ; preds = %for.body.lr.ph, %en
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -57,33 +57,33 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !32}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"file.c", metadata !"/Users/nadav"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"depth", metadata !"depth", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (double*, i32)* @depth, null, null, metadata !11, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !9, metadata !8}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
-!10 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!11 = metadata !{metadata !12, metadata !13, metadata !14, metadata !15, metadata !16}
-!12 = metadata !{i32 786689, metadata !4, metadata !"A", metadata !5, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [A] [line 1]
-!13 = metadata !{i32 786689, metadata !4, metadata !"m", metadata !5, i32 33554433, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [m] [line 1]
-!14 = metadata !{i32 786688, metadata !4, metadata !"y0", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y0] [line 2]
-!15 = metadata !{i32 786688, metadata !4, metadata !"y1", metadata !5, i32 2, metadata !10, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [y1] [line 2]
-!16 = metadata !{i32 786688, metadata !17, metadata !"i", metadata !5, i32 3, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [i] [line 3]
-!17 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 2}
-!19 = metadata !{i32 1, i32 0, metadata !4, null}
-!20 = metadata !{double 0.000000e+00}
-!21 = metadata !{i32 2, i32 0, metadata !4, null}
-!22 = metadata !{double 1.000000e+00}
-!23 = metadata !{i32 3, i32 0, metadata !17, null}
-!24 = metadata !{i32 4, i32 0, metadata !25, null}
-!25 = metadata !{i32 786443, metadata !1, metadata !17, i32 3, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
-!29 = metadata !{i32 5, i32 0, metadata !25, null}
-!30 = metadata !{i32 7, i32 0, metadata !4, null}
-!31 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!32 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)\001\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/Users/nadav/file.c] [DW_LANG_C99]
+!1 = !{!"file.c", !"/Users/nadav"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00depth\00depth\00\001\000\001\000\006\00256\001\001", !1, !5, !6, null, i32 (double*, i32)* @depth, null, null, !11} ; [ DW_TAG_subprogram ] [line 1] [def] [depth]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/Users/nadav/file.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !9, !8}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from double]
+!10 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!11 = !{!12, !13, !14, !15, !16}
+!12 = !{!"0x101\00A\0016777217\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [A] [line 1]
+!13 = !{!"0x101\00m\0033554433\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [m] [line 1]
+!14 = !{!"0x100\00y0\002\000", !4, !5, !10} ; [ DW_TAG_auto_variable ] [y0] [line 2]
+!15 = !{!"0x100\00y1\002\000", !4, !5, !10} ; [ DW_TAG_auto_variable ] [y1] [line 2]
+!16 = !{!"0x100\00i\003\000", !17, !5, !8} ; [ DW_TAG_auto_variable ] [i] [line 3]
+!17 = !{!"0xb\003\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!18 = !{i32 2, !"Dwarf Version", i32 2}
+!19 = !MDLocation(line: 1, scope: !4)
+!20 = !{double 0.000000e+00}
+!21 = !MDLocation(line: 2, scope: !4)
+!22 = !{double 1.000000e+00}
+!23 = !MDLocation(line: 3, scope: !17)
+!24 = !MDLocation(line: 4, scope: !25)
+!25 = !{!"0xb\003\000\001", !1, !17} ; [ DW_TAG_lexical_block ] [/Users/nadav/file.c]
+!29 = !MDLocation(line: 5, scope: !25)
+!30 = !MDLocation(line: 7, scope: !4)
+!31 = !MDLocation(line: 8, scope: !4)
+!32 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
new file mode 100644
index 000000000000..3628042cdb4d
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.9.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@a = common global i64* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @fn1() {
+entry:
+  %0 = load i64** @a, align 8
+  %add.ptr = getelementptr inbounds i64* %0, i64 11
+  %1 = ptrtoint i64* %add.ptr to i64
+  store i64 %1, i64* %add.ptr, align 8
+  %add.ptr1 = getelementptr inbounds i64* %0, i64 56
+  %2 = ptrtoint i64* %add.ptr1 to i64
+  %arrayidx2 = getelementptr inbounds i64* %0, i64 12
+  store i64 %2, i64* %arrayidx2, align 8
+  ret i32 undef
+; CHECK-LABEL: @fn1(
+; CHECK: extractelement <2 x i64*>
+; CHECK: ret
+}
+
+
+declare float @llvm.powi.f32(float, i32)
+define void @fn2(i32* %a, i32* %b, float* %c) {
+entry:
+  %i0 = load i32* %a, align 4
+  %i1 = load i32* %b, align 4
+  %add1 = add i32 %i0, %i1
+  %fp1 = sitofp i32 %add1 to float
+  %call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
+
+  %arrayidx2 = getelementptr inbounds i32* %a, i32 1
+  %i2 = load i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i32 1
+  %i3 = load i32* %arrayidx3, align 4
+  %add2 = add i32 %i2, %i3
+  %fp2 = sitofp i32 %add2 to float
+  %call2 = tail call float @llvm.powi.f32(float %fp2,i32 %add1) nounwind readnone
+
+  %arrayidx4 = getelementptr inbounds i32* %a, i32 2
+  %i4 = load i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32* %b, i32 2
+  %i5 = load i32* %arrayidx5, align 4
+  %add3 = add i32 %i4, %i5
+  %fp3 = sitofp i32 %add3 to float
+  %call3 = tail call float @llvm.powi.f32(float %fp3,i32 %add1) nounwind readnone
+
+  %arrayidx6 = getelementptr inbounds i32* %a, i32 3
+  %i6 = load i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32* %b, i32 3
+  %i7 = load i32* %arrayidx7, align 4
+  %add4 = add i32 %i6, %i7
+  %fp4 = sitofp i32 %add4 to float
+  %call4 = tail call float @llvm.powi.f32(float %fp4,i32 %add1) nounwind readnone
+
+  store float %call1, float* %c, align 4
+  %arrayidx8 = getelementptr inbounds float* %c, i32 1
+  store float %call2, float* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds float* %c, i32 2
+  store float %call3, float* %arrayidx9, align 4
+  %arrayidx10 = getelementptr inbounds float* %c, i32 3
+  store float %call4, float* %arrayidx10, align 4
+  ret void
+
+; CHECK-LABEL: @fn2(
+; CHECK: extractelement <4 x i32>
+; CHECK: ret
+}
diff --git a/test/Transforms/SLPVectorizer/X86/hoist.ll b/test/Transforms/SLPVectorizer/X86/hoist.ll
index 5074ceaaabd7..78c58f1d84e9 100644
--- a/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -21,7 +21,7 @@ target triple = "i386-apple-macosx10.9.0"
 ; loop body:
 ;CHECK: phi
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n, i32 %k) {
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 8f919512ff8d..18360471c190 100644
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -148,7 +148,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: long_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @long_red(float* noalias %A, float* noalias %B, i32 %n) {
@@ -250,7 +250,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: chain_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @chain_red(float* noalias %A, float* noalias %B, i32 %n) {
@@ -317,7 +317,7 @@ for.end:
 ; }
 
 ; CHECK-LABEL: store_red
-; CHECK: fmul <4 x float>
+; CHECK: fmul fast <4 x float>
 ; CHECK: shufflevector <4 x float>
 
 define i32 @store_red(float* noalias %A, float* noalias %B, float* noalias %C, i32 %n) {
@@ -379,7 +379,7 @@ for.end:
 ; }
 
 ; STORE-LABEL: store_red_double
-; STORE: fmul <2 x double>
+; STORE: fmul fast <2 x double>
 ; STORE: extractelement <2 x double>
 ; STORE: extractelement <2 x double>
 
diff --git a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index 3115232887bd..194a0fbd5ed0 100644
--- a/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -5,9 +5,11 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
 
-; We can't vectorize when the roots are used inside the tree.
+; Uses inside the tree must be scheduled after the corresponding tree bundle.
 ;CHECK-LABEL: @in_tree_user(
-;CHECK-NOT: load <2 x double>
+;CHECK: load <2 x double>
+;CHECK: fadd <2 x double>
+;CHECK: InTreeUser = fadd
 ;CHECK: ret
 define void @in_tree_user(double* nocapture %A, i32 %n) {
 entry:
@@ -22,7 +24,7 @@ for.body:                                         ; preds = %for.inc, %entry
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
-  %BadValue = fadd double %add, %add    ; <------------------ In tree user.
+  %InTreeUser = fadd double %add, %add    ; <------------------ In tree user.
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double* %A, i64 %2
   %3 = load double* %arrayidx6, align 8
@@ -43,6 +45,7 @@ for.inc:                                          ; preds = %for.body, %if.then
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.inc
+  store double %InTreeUser, double* %A, align 8   ; Avoid dead code elimination of the InTreeUser.
   ret void
 }
 
diff --git a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
index 9eda29f101ac..0221613b9707 100644
--- a/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -35,6 +35,49 @@ define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c)
   ret <4 x float> %rd
 }
 
+declare void @llvm.assume(i1) nounwind
+
+; This entire tree is ephemeral, don't vectorize any of it.
+define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
+; CHECK-LABEL: @simple_select_eph(
+; CHECK-NOT: icmp ne <4 x i32>
+; CHECK-NOT: select <4 x i1>
+  %c0 = extractelement <4 x i32> %c, i32 0
+  %c1 = extractelement <4 x i32> %c, i32 1
+  %c2 = extractelement <4 x i32> %c, i32 2
+  %c3 = extractelement <4 x i32> %c, i32 3
+  %a0 = extractelement <4 x float> %a, i32 0
+  %a1 = extractelement <4 x float> %a, i32 1
+  %a2 = extractelement <4 x float> %a, i32 2
+  %a3 = extractelement <4 x float> %a, i32 3
+  %b0 = extractelement <4 x float> %b, i32 0
+  %b1 = extractelement <4 x float> %b, i32 1
+  %b2 = extractelement <4 x float> %b, i32 2
+  %b3 = extractelement <4 x float> %b, i32 3
+  %cmp0 = icmp ne i32 %c0, 0
+  %cmp1 = icmp ne i32 %c1, 0
+  %cmp2 = icmp ne i32 %c2, 0
+  %cmp3 = icmp ne i32 %c3, 0
+  %s0 = select i1 %cmp0, float %a0, float %b0
+  %s1 = select i1 %cmp1, float %a1, float %b1
+  %s2 = select i1 %cmp2, float %a2, float %b2
+  %s3 = select i1 %cmp3, float %a3, float %b3
+  %ra = insertelement <4 x float> undef, float %s0, i32 0
+  %rb = insertelement <4 x float> %ra, float %s1, i32 1
+  %rc = insertelement <4 x float> %rb, float %s2, i32 2
+  %rd = insertelement <4 x float> %rc, float %s3, i32 3
+  %q0 = extractelement <4 x float> %rd, i32 0
+  %q1 = extractelement <4 x float> %rd, i32 1
+  %q2 = extractelement <4 x float> %rd, i32 2
+  %q3 = extractelement <4 x float> %rd, i32 3
+  %q4 = fadd float %q0, %q1
+  %q5 = fadd float %q2, %q3
+  %q6 = fadd float %q4, %q5
+  %qi = fcmp olt float %q6, %q5
+  call void @llvm.assume(i1 %qi)
+  ret <4 x float> undef
+}
+
 ; Insert in an order different from the vector indices to make sure it
 ; doesn't matter
 define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
diff --git a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index aef2479dd524..bc12926e3fe6 100644
--- a/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -5,10 +5,10 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK-LABEL: @foo(
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) #0 {
diff --git a/test/Transforms/SLPVectorizer/X86/metadata.ll b/test/Transforms/SLPVectorizer/X86/metadata.ll
index 5bd2fa4ea684..e021cca23e34 100644
--- a/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -51,11 +51,11 @@ entry:
   ret void
 }
 
-;CHECK-DAG: ![[TBAA]] = metadata !{metadata [[TYPEC:!.*]], metadata [[TYPEC]], i64 0}
-;CHECK-DAG: ![[FP1]] = metadata !{float 5.000000e+00}
-;CHECK-DAG: ![[FP2]] = metadata !{float 2.500000e+00}
-!0 = metadata !{ float 5.0 }
-!1 = metadata !{ float 2.5 }
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"omnipotent char", metadata !2}
-!4 = metadata !{metadata !"double", metadata !3}
+;CHECK-DAG: ![[TBAA]] = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
+;CHECK-DAG: ![[FP1]] = !{float 5.000000e+00}
+;CHECK-DAG: ![[FP2]] = !{float 2.500000e+00}
+!0 = !{ float 5.0 }
+!1 = !{ float 2.5 }
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"omnipotent char", !2}
+!4 = !{!"double", !3}
diff --git a/test/Transforms/SLPVectorizer/X86/multi_user.ll b/test/Transforms/SLPVectorizer/X86/multi_user.ll
index cab99945e29e..63a77e4b6733 100644
--- a/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -14,7 +14,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK-LABEL: @foo(
 ;CHECK: insertelement <4 x i32>
 ;CHECK: load <4 x i32>
-;CHECK: add <4 x i32>
+;CHECK: add nsw <4 x i32>
 ;CHECK: store <4 x i32>
 ;CHECK: ret
 define i32 @foo(i32* nocapture %A, i32 %n) {
diff --git a/test/Transforms/SLPVectorizer/X86/powof2div.ll b/test/Transforms/SLPVectorizer/X86/powof2div.ll
new file mode 100644
index 000000000000..7aa1efde6f06
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK-LABEL: @powof2div(
+;CHECK: load <4 x i32>*
+;CHECK: add nsw <4 x i32>
+;CHECK: sdiv <4 x i32>
+define void @powof2div(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
+entry:
+  %0 = load i32* %b, align 4
+  %1 = load i32* %c, align 4
+  %add = add nsw i32 %1, %0
+  %div = sdiv i32 %add, 2
+  store i32 %div, i32* %a, align 4
+  %arrayidx3 = getelementptr inbounds i32* %b, i64 1
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %c, i64 1
+  %3 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %3, %2
+  %div6 = sdiv i32 %add5, 2
+  %arrayidx7 = getelementptr inbounds i32* %a, i64 1
+  store i32 %div6, i32* %arrayidx7, align 4
+  %arrayidx8 = getelementptr inbounds i32* %b, i64 2
+  %4 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  %5 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %5, %4
+  %div11 = sdiv i32 %add10, 2
+  %arrayidx12 = getelementptr inbounds i32* %a, i64 2
+  store i32 %div11, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32* %b, i64 3
+  %6 = load i32* %arrayidx13, align 4
+  %arrayidx14 = getelementptr inbounds i32* %c, i64 3
+  %7 = load i32* %arrayidx14, align 4
+  %add15 = add nsw i32 %7, %6
+  %div16 = sdiv i32 %add15, 2
+  %arrayidx17 = getelementptr inbounds i32* %a, i64 3
+  store i32 %div16, i32* %arrayidx17, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SLPVectorizer/X86/pr16899.ll b/test/Transforms/SLPVectorizer/X86/pr16899.ll
index 8631bc9125df..c642f3cfe1d4 100644
--- a/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -23,9 +23,9 @@ do.body:                                          ; preds = %do.body, %entry
 
 attributes #0 = { noreturn nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!0 = metadata !{metadata !"any pointer", metadata !1}
-!1 = metadata !{metadata !"omnipotent char", metadata !2}
-!2 = metadata !{metadata !"Simple C/C++ TBAA"}
-!3 = metadata !{metadata !"int", metadata !1}
-!4 = metadata !{metadata !0, metadata !0, i64 0}
-!5 = metadata !{metadata !3, metadata !3, i64 0}
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
+!4 = !{!0, !0, i64 0}
+!5 = !{!3, !3, i64 0}
diff --git a/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
new file mode 100644
index 000000000000..3843ef7f62c7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -0,0 +1,350 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+
+; Check propagation of optional IR flags (PR20802). For a flag to
+; propagate from scalar instructions to their vector replacement,
+; *all* scalar instructions must have the flag.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; CHECK-LABEL: @exact(
+; CHECK: lshr exact <4 x i32>
+define void @exact(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = lshr exact i32 %load1, 1
+  %op2 = lshr exact i32 %load2, 1
+  %op3 = lshr exact i32 %load3, 1
+  %op4 = lshr exact i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @not_exact(
+; CHECK: lshr <4 x i32>
+define void @not_exact(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = lshr exact i32 %load1, 1
+  %op2 = lshr i32 %load2, 1
+  %op3 = lshr exact i32 %load3, 1
+  %op4 = lshr exact i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @nsw(
+; CHECK: add nsw <4 x i32>
+define void @nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = add nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = add nsw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @not_nsw(
+; CHECK: add <4 x i32>
+define void @not_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = add nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = add i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @nuw(
+; CHECK: add nuw <4 x i32>
+define void @nuw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nuw i32 %load1, 1
+  %op2 = add nuw i32 %load2, 1
+  %op3 = add nuw i32 %load3, 1
+  %op4 = add nuw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @not_nuw(
+; CHECK: add <4 x i32>
+define void @not_nuw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nuw i32 %load1, 1
+  %op2 = add i32 %load2, 1
+  %op3 = add i32 %load3, 1
+  %op4 = add nuw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @nnan(
+; CHECK: fadd nnan <4 x float>
+define void @nnan(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast nnan float %load1, 1.0
+  %op2 = fadd nnan ninf float %load2, 1.0
+  %op3 = fadd nsz nnan float %load3, 1.0
+  %op4 = fadd arcp nnan float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @not_nnan(
+; CHECK: fadd <4 x float>
+define void @not_nnan(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd nnan float %load1, 1.0
+  %op2 = fadd ninf float %load2, 1.0
+  %op3 = fadd nsz float %load3, 1.0
+  %op4 = fadd arcp float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @only_fast(
+; CHECK: fadd fast <4 x float>
+define void @only_fast(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast nnan float %load1, 1.0
+  %op2 = fadd fast nnan ninf float %load2, 1.0
+  %op3 = fadd fast nsz nnan float %load3, 1.0
+  %op4 = fadd arcp nnan fast float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @only_arcp(
+; CHECK: fadd arcp <4 x float>
+define void @only_arcp(float* %x) {
+  %idx1 = getelementptr inbounds float* %x, i64 0
+  %idx2 = getelementptr inbounds float* %x, i64 1
+  %idx3 = getelementptr inbounds float* %x, i64 2
+  %idx4 = getelementptr inbounds float* %x, i64 3
+
+  %load1 = load float* %idx1, align 4
+  %load2 = load float* %idx2, align 4
+  %load3 = load float* %idx3, align 4
+  %load4 = load float* %idx4, align 4
+
+  %op1 = fadd fast float %load1, 1.0
+  %op2 = fadd fast float %load2, 1.0
+  %op3 = fadd fast float %load3, 1.0
+  %op4 = fadd arcp float %load4, 1.0
+
+  store float %op1, float* %idx1, align 4
+  store float %op2, float* %idx2, align 4
+  store float %op3, float* %idx3, align 4
+  store float %op4, float* %idx4, align 4
+
+  ret void
+}
+
+; CHECK-LABEL: @addsub_all_nsw
+; CHECK: add nsw <4 x i32>
+; CHECK: sub nsw <4 x i32>
+define void @addsub_all_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub nsw i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @addsub_some_nsw
+; CHECK: add nsw <4 x i32>
+; CHECK: sub <4 x i32>
+define void @addsub_some_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add nsw i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
+; CHECK-LABEL: @addsub_no_nsw
+; CHECK: add <4 x i32>
+; CHECK: sub <4 x i32>
+define void @addsub_no_nsw(i32* %x) {
+  %idx1 = getelementptr inbounds i32* %x, i64 0
+  %idx2 = getelementptr inbounds i32* %x, i64 1
+  %idx3 = getelementptr inbounds i32* %x, i64 2
+  %idx4 = getelementptr inbounds i32* %x, i64 3
+
+  %load1 = load i32* %idx1, align 4
+  %load2 = load i32* %idx2, align 4
+  %load3 = load i32* %idx3, align 4
+  %load4 = load i32* %idx4, align 4
+
+  %op1 = add i32 %load1, 1
+  %op2 = sub nsw i32 %load2, 1
+  %op3 = add nsw i32 %load3, 1
+  %op4 = sub i32 %load4, 1
+
+  store i32 %op1, i32* %idx1, align 4
+  store i32 %op2, i32* %idx2, align 4
+  store i32 %op3, i32* %idx3, align 4
+  store i32 %op4, i32* %idx4, align 4
+
+  ret void
+}
+ 
diff --git a/test/Transforms/SLPVectorizer/X86/return.ll b/test/Transforms/SLPVectorizer/X86/return.ll
new file mode 100644
index 000000000000..1a81c235bdfb
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/return.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "x86_64--linux-gnu"
+
+@a = common global [4 x double] zeroinitializer, align 8
+@b = common global [4 x double] zeroinitializer, align 8
+
+; [4], b[4];
+; double foo() {
+;  double sum =0;
+;  sum = (a[0]+b[0]) + (a[1]+b[1]);
+;  return sum;
+; }
+
+; CHECK-LABEL: @return1
+; CHECK: %0 = load <2 x double>*
+; CHECK: %1 = load <2 x double>*
+; CHECK: %2 = fadd <2 x double>
+
+define double @return1() {
+entry:
+  %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
+  %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
+  %add0 = fadd double %a0, %b0
+  %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
+  %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
+  %add1 = fadd double %a1, %b1
+  %add2 = fadd double %add0, %add1
+  ret double %add2
+}
+
+; double hadd(double *x) {
+;   return ((x[0] + x[2]) + (x[1] + x[3]));
+; }
+
+; CHECK-LABEL: @return2
+; CHECK: %1 = load <2 x double>*
+; CHECK: %3 = load <2 x double>* %2
+; CHECK: %4 = fadd <2 x double> %1, %3
+
+define double @return2(double* nocapture readonly %x) {
+entry:
+  %x0 = load double* %x, align 4
+  %arrayidx1 = getelementptr inbounds double* %x, i32 2
+  %x2 = load double* %arrayidx1, align 4
+  %add3 = fadd double %x0, %x2
+  %arrayidx2 = getelementptr inbounds double* %x, i32 1
+  %x1 = load double* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds double* %x, i32 3
+  %x3 = load double* %arrayidx3, align 4
+  %add4 = fadd double %x1, %x3
+  %add5 = fadd double %add3, %add4
+  ret double %add5
+}
diff --git a/test/Transforms/SLPVectorizer/X86/saxpy.ll b/test/Transforms/SLPVectorizer/X86/saxpy.ll
index 46263416a90b..4b39d46f89db 100644
--- a/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 
 ; SLP vectorization example from http://cs.stanford.edu/people/eschkufz/research/asplos291-schkufza.pdf
 ;CHECK: SAXPY
-;CHECK: mul <4 x i32>
+;CHECK: mul nsw <4 x i32>
 ;CHECK: ret
 
 define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
diff --git a/test/Transforms/SLPVectorizer/X86/scheduling.ll b/test/Transforms/SLPVectorizer/X86/scheduling.ll
new file mode 100644
index 000000000000..3b3bd804e057
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK-LABEL: @foo
+;CHECK: load <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: %[[S1:.+]] = add nsw <4 x i32>
+;CHECK-DAG: store <4 x i32> %[[S1]]
+;CHECK-DAG: %[[A1:.+]] = add nsw i32
+;CHECK-DAG: %[[A2:.+]] = add nsw i32 %[[A1]]
+;CHECK-DAG: %[[A3:.+]] = add nsw i32 %[[A2]]
+;CHECK-DAG: %[[A4:.+]] = add nsw i32 %[[A3]]
+;CHECK: ret i32 %[[A4]] 
+
+define i32 @foo(i32* nocapture readonly %diff) #0 {
+entry:
+  %m2 = alloca [8 x [8 x i32]], align 16
+  %0 = bitcast [8 x [8 x i32]]* %m2 to i8*
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
+  %1 = shl i64 %indvars.iv, 3
+  %arrayidx = getelementptr inbounds i32* %diff, i64 %1
+  %2 = load i32* %arrayidx, align 4
+  %3 = or i64 %1, 4
+  %arrayidx2 = getelementptr inbounds i32* %diff, i64 %3
+  %4 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %4, %2
+  %arrayidx6 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
+  store i32 %add3, i32* %arrayidx6, align 16
+  %add10 = add nsw i32 %add3, %a.088
+  %5 = or i64 %1, 1
+  %arrayidx13 = getelementptr inbounds i32* %diff, i64 %5
+  %6 = load i32* %arrayidx13, align 4
+  %7 = or i64 %1, 5
+  %arrayidx16 = getelementptr inbounds i32* %diff, i64 %7
+  %8 = load i32* %arrayidx16, align 4
+  %add17 = add nsw i32 %8, %6
+  %arrayidx20 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
+  store i32 %add17, i32* %arrayidx20, align 4
+  %add24 = add nsw i32 %add10, %add17
+  %9 = or i64 %1, 2
+  %arrayidx27 = getelementptr inbounds i32* %diff, i64 %9
+  %10 = load i32* %arrayidx27, align 4
+  %11 = or i64 %1, 6
+  %arrayidx30 = getelementptr inbounds i32* %diff, i64 %11
+  %12 = load i32* %arrayidx30, align 4
+  %add31 = add nsw i32 %12, %10
+  %arrayidx34 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
+  store i32 %add31, i32* %arrayidx34, align 8
+  %add38 = add nsw i32 %add24, %add31
+  %13 = or i64 %1, 3
+  %arrayidx41 = getelementptr inbounds i32* %diff, i64 %13
+  %14 = load i32* %arrayidx41, align 4
+  %15 = or i64 %1, 7
+  %arrayidx44 = getelementptr inbounds i32* %diff, i64 %15
+  %16 = load i32* %arrayidx44, align 4
+  %add45 = add nsw i32 %16, %14
+  %arrayidx48 = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
+  store i32 %add45, i32* %arrayidx48, align 4
+  %add52 = add nsw i32 %add38, %add45
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 8
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %arraydecay = getelementptr inbounds [8 x [8 x i32]]* %m2, i64 0, i64 0
+  call void @ff([8 x i32]* %arraydecay) #1
+  ret i32 %add52
+}
+
+declare void @ff([8 x i32]*) #2
+
+
diff --git a/test/Transforms/SLPVectorizer/X86/unreachable.ll b/test/Transforms/SLPVectorizer/X86/unreachable.ll
new file mode 100644
index 000000000000..8d609571be17
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
+
+; Check if the SLPVectorizer does not crash when handling
+; unreachable blocks with unscheduleable instructions.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+define void @foo(i32* nocapture %x) #0 {
+entry:
+  br label %bb2
+
+bb1:                                    ; an unreachable block
+  %t3 = getelementptr inbounds i32* %x, i64 4
+  %t4 = load i32* %t3, align 4
+  %t5 = getelementptr inbounds i32* %x, i64 5
+  %t6 = load i32* %t5, align 4
+  %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
+                                        ;    but legal in unreachable code
+  %t7 = getelementptr inbounds i32* %x, i64 6
+  %t8 = load i32* %t7, align 4
+  %t9 = getelementptr inbounds i32* %x, i64 7
+  %t10 = load i32* %t9, align 4
+  br label %bb2
+
+bb2:
+  %t1.0 = phi i32 [ %t4, %bb1 ], [ 2, %entry ]
+  %t2.0 = phi i32 [ %t6, %bb1 ], [ 2, %entry ]
+  %t3.0 = phi i32 [ %t8, %bb1 ], [ 2, %entry ]
+  %t4.0 = phi i32 [ %t10, %bb1 ], [ 2, %entry ]
+  store i32 %t1.0, i32* %x, align 4
+  %t12 = getelementptr inbounds i32* %x, i64 1
+  store i32 %t2.0, i32* %t12, align 4
+  %t13 = getelementptr inbounds i32* %x, i64 2
+  store i32 %t3.0, i32* %t13, align 4
+  %t14 = getelementptr inbounds i32* %x, i64 3
+  store i32 %t4.0, i32* %t14, align 4
+  ret void
+}
+
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
index 5fa78766ed0f..4f4e40cf76a7 100644
--- a/test/Transforms/SROA/alignment.ll
+++ b/test/Transforms/SROA/alignment.ll
@@ -85,15 +85,18 @@ entry:
 }
 
 define void @test5() {
-; Test that we preserve underaligned loads and stores when splitting.
+; Test that we preserve underaligned loads and stores when splitting. The use
+; of volatile in this test case is just to force the loads and stores to not be
+; split or promoted out of existence.
+;
 ; CHECK-LABEL: @test5(
 ; CHECK: alloca [9 x i8]
 ; CHECK: alloca [9 x i8]
 ; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
-; CHECK: load i16* %{{.*}}, align 1
+; CHECK: load volatile i16* %{{.*}}, align 1
 ; CHECK: load double* %{{.*}}, align 1
 ; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
-; CHECK: load i16* %{{.*}}, align 1
+; CHECK: load volatile i16* %{{.*}}, align 1
 ; CHECK: ret void
 
 entry:
@@ -103,7 +106,7 @@ entry:
   store volatile double 0.0, double* %ptr1, align 1
   %weird_gep1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 7
   %weird_cast1 = bitcast i8* %weird_gep1 to i16*
-  %weird_load1 = load i16* %weird_cast1, align 1
+  %weird_load1 = load volatile i16* %weird_cast1, align 1
 
   %raw2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 9
   %ptr2 = bitcast i8* %raw2 to double*
@@ -111,7 +114,7 @@ entry:
   store volatile double %d1, double* %ptr2, align 1
   %weird_gep2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 16
   %weird_cast2 = bitcast i8* %weird_gep2 to i16*
-  %weird_load2 = load i16* %weird_cast2, align 1
+  %weird_load2 = load volatile i16* %weird_cast2, align 1
 
   ret void
 }
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index dc2b16550a06..e3f762abfa39 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1440,3 +1440,158 @@ entry:
   ret void
 }
 
+define float @test25() {
+; Check that we split up stores in order to promote the smaller SSA values.. These types
+; of patterns can arise because LLVM maps small memcpy's to integer load and
+; stores. If we get a memcpy of an aggregate (such as C and C++ frontends would
+; produce, but so might any language frontend), this will in many cases turn into
+; an integer load and store. SROA needs to be extremely powerful to correctly
+; handle these cases and form splitable and promotable SSA values.
+;
+; CHECK-LABEL: @test25(
+; CHECK-NOT: alloca
+; CHECK: %[[F1:.*]] = bitcast i32 0 to float
+; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: ret float %[[SUM]]
+
+entry:
+  %a = alloca i64
+  %b = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float]* %a.cast, i32 0, i32 1
+  %b.cast = bitcast i64* %b to [2 x float]*
+  %b.gep1 = getelementptr [2 x float]* %b.cast, i32 0, i32 0
+  %b.gep2 = getelementptr [2 x float]* %b.cast, i32 0, i32 1
+  store float 0.0, float* %a.gep1
+  store float 1.0, float* %a.gep2
+  %v = load i64* %a
+  store i64 %v, i64* %b
+  %f1 = load float* %b.gep1
+  %f2 = load float* %b.gep2
+  %ret = fadd float %f1, %f2
+  ret float %ret
+}
+
+@complex1 = external global [2 x float]
+@complex2 = external global [2 x float]
+
+define void @test26() {
+; Test a case of splitting up loads and stores against a globals.
+;
+; CHECK-LABEL: @test26(
+; CHECK-NOT: alloca
+; CHECK: %[[L1:.*]] = load i32* bitcast
+; CHECK: %[[L2:.*]] = load i32* bitcast
+; CHECK: %[[F1:.*]] = bitcast i32 %[[L1]] to float
+; CHECK: %[[F2:.*]] = bitcast i32 %[[L2]] to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: %[[C1:.*]] = bitcast float %[[SUM]] to i32
+; CHECK: %[[C2:.*]] = bitcast float %[[SUM]] to i32
+; CHECK: store i32 %[[C1]], i32* bitcast
+; CHECK: store i32 %[[C2]], i32* bitcast
+; CHECK: ret void
+
+entry:
+  %a = alloca i64
+  %a.cast = bitcast i64* %a to [2 x float]*
+  %a.gep1 = getelementptr [2 x float]* %a.cast, i32 0, i32 0
+  %a.gep2 = getelementptr [2 x float]* %a.cast, i32 0, i32 1
+  %v1 = load i64* bitcast ([2 x float]* @complex1 to i64*)
+  store i64 %v1, i64* %a
+  %f1 = load float* %a.gep1
+  %f2 = load float* %a.gep2
+  %sum = fadd float %f1, %f2
+  store float %sum, float* %a.gep1
+  store float %sum, float* %a.gep2
+  %v2 = load i64* %a
+  store i64 %v2, i64* bitcast ([2 x float]* @complex2 to i64*)
+  ret void
+}
+
+define float @test27() {
+; Another, more complex case of splittable i64 loads and stores. This example
+; is a particularly challenging one because the load and store both point into
+; the alloca SROA is processing, and they overlap but at an offset.
+;
+; CHECK-LABEL: @test27(
+; CHECK-NOT: alloca
+; CHECK: %[[F1:.*]] = bitcast i32 0 to float
+; CHECK: %[[F2:.*]] = bitcast i32 1065353216 to float
+; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
+; CHECK: ret float %[[SUM]]
+
+entry:
+  %a = alloca [12 x i8]
+  %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
+  %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
+  %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
+  %iptr1 = bitcast i8* %gep1 to i64*
+  %iptr2 = bitcast i8* %gep2 to i64*
+  %fptr1 = bitcast i8* %gep1 to float*
+  %fptr2 = bitcast i8* %gep2 to float*
+  %fptr3 = bitcast i8* %gep3 to float*
+  store float 0.0, float* %fptr1
+  store float 1.0, float* %fptr2
+  %v = load i64* %iptr1
+  store i64 %v, i64* %iptr2
+  %f1 = load float* %fptr2
+  %f2 = load float* %fptr3
+  %ret = fadd float %f1, %f2
+  ret float %ret
+}
+
+define i32 @PR22093() {
+; Test that we don't try to pre-split a splittable store of a splittable but
+; not pre-splittable load over the same alloca. We "handle" this case when the
+; load is unsplittable but unrelated to this alloca by just generating extra
+; loads without touching the original, but when the original load was out of
+; this alloca we need to handle it specially to ensure the splits line up
+; properly for rewriting.
+;
+; CHECK-LABEL: @PR22093(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+
+entry:
+  %a = alloca i32
+  %a.cast = bitcast i32* %a to i16*
+  store volatile i16 42, i16* %a.cast
+  %load = load i32* %a
+  store i32 %load, i32* %a
+  ret i32 %load
+}
+
+define void @PR22093.2() {
+; Another way that we end up being unable to split a particular set of loads
+; and stores can even have ordering importance. Here we have a load which is
+; pre-splittable by itself, and the first store is also compatible. But the
+; second store of the load makes the load unsplittable because of a mismatch of
+; splits. Because this makes the load unsplittable, we also have to go back and
+; remove the first store from the presplit candidates as its load won't be
+; presplit.
+;
+; CHECK-LABEL: @PR22093.2(
+; CHECK-NOT: alloca
+; CHECK: alloca i16
+; CHECK-NEXT: alloca i8
+; CHECK-NOT: alloca
+; CHECK: store volatile i16
+; CHECK: store volatile i8
+
+entry:
+  %a = alloca i64
+  %a.cast1 = bitcast i64* %a to i32*
+  %a.cast2 = bitcast i64* %a to i16*
+  store volatile i16 42, i16* %a.cast2
+  %load = load i32* %a.cast1
+  store i32 %load, i32* %a.cast1
+  %a.gep1 = getelementptr i32* %a.cast1, i32 1
+  %a.cast3 = bitcast i32* %a.gep1 to i8*
+  store volatile i8 13, i8* %a.cast3
+  store i32 %load, i32* %a.gep1
+  ret void
+}
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 946bc40b0587..f2870127352c 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -502,6 +502,71 @@ end:
 ; CHECK: ret float %[[phi]]
 }
 
+; Verifies we fixed PR20425. We should be able to promote all alloca's to
+; registers in this test.
+;
+; %0 = slice
+; %1 = slice
+; %2 = phi(%0, %1) // == slice
+define float @simplify_phi_nodes_that_equal_slice(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %merge
+
+else:
+  %1 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+merge:
+  %2 = phi float* [ %0, %then ], [ %1, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %3 = load float* %2, align 4
+  ret float %3
+}
+
+; A slightly complicated example for PR20425.
+;
+; %0 = slice
+; %1 = phi(%0) // == slice
+; %2 = slice
+; %3 = phi(%1, %2) // == slice
+define float @simplify_phi_nodes_that_equal_slice_2(i1 %cond, float* %temp) {
+; CHECK-LABEL: @simplify_phi_nodes_that_equal_slice_2(
+entry:
+  %arr = alloca [4 x float], align 4
+; CHECK-NOT: alloca
+  br i1 %cond, label %then, label %else
+
+then:
+  %0 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 1.000000e+00, float* %0, align 4
+  br label %then2
+
+then2:
+  %1 = phi float* [ %0, %then ]
+  store float 2.000000e+00, float* %1, align 4
+  br label %merge
+
+else:
+  %2 = getelementptr inbounds [4 x float]* %arr, i64 0, i64 3
+  store float 3.000000e+00, float* %2, align 4
+  br label %merge
+
+merge:
+  %3 = phi float* [ %1, %then2 ], [ %2, %else ]
+  store float 0.000000e+00, float* %temp, align 4
+  %4 = load float* %3, align 4
+  ret float %4
+}
+
 %struct.S = type { i32 }
 
 ; Verifies we fixed PR20822. We have a foldable PHI feeding a speculatable PHI
@@ -514,7 +579,6 @@ define void @PR20822() {
 entry:
   %f = alloca %struct.S, align 4
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
   br i1 undef, label %if.end, label %for.cond
 
 for.cond:                                         ; preds = %for.cond, %entry
@@ -522,8 +586,8 @@ for.cond:                                         ; preds = %for.cond, %entry
 
 if.end:                                           ; preds = %for.cond, %entry
   %f2 = phi %struct.S* [ %f, %entry ], [ %f, %for.cond ]
-; CHECK: phi {{.*}} %[[cast]]
 ; CHECK: phi i32
+; CHECK: %[[cast:.*]] = bitcast i32* %[[alloca]] to %struct.S*
   phi i32 [ undef, %entry ], [ undef, %for.cond ]
   br i1 undef, label %if.then5, label %if.then2
 
@@ -532,6 +596,7 @@ if.then2:                                         ; preds = %if.end
 
 if.then5:                                         ; preds = %if.then2, %if.end
   %f1 = phi %struct.S* [ undef, %if.then2 ], [ %f2, %if.end ]
+; CHECK: phi {{.*}} %[[cast]]
   store %struct.S undef, %struct.S* %f1, align 4
   ret void
 }
diff --git a/test/Transforms/SROA/slice-width.ll b/test/Transforms/SROA/slice-width.ll
index 179780b4afee..ff66dcccee4b 100644
--- a/test/Transforms/SROA/slice-width.ll
+++ b/test/Transforms/SROA/slice-width.ll
@@ -1,7 +1,8 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @no_split_on_non_byte_width(i32) {
 ; This tests that allocas are not split into slices that are not byte width multiple
@@ -23,3 +24,83 @@ load_i1:
   %t1 = load i1* %p1
   ret void
 }
+
+; PR18726: Check that we use memcpy and memset to fill out padding when we have
+; a slice with a simple single type whose store size is smaller than the slice
+; size.
+
+%union.Foo = type { x86_fp80, i64, i64 }
+
+@foo_copy_source = external constant %union.Foo
+@i64_sink = global i64 0
+
+define void @memcpy_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Copy from a global.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x_i8, i8* bitcast (%union.Foo* @foo_copy_source to i8*), i32 32, i32 16, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo* %x, i32 0, i32 1
+  %elt = load i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memcpy_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 1)
+; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 2)
+
+define void @memset_fp80_padding() {
+  %x = alloca %union.Foo
+
+  ; Set to all ones.
+  %x_i8 = bitcast %union.Foo* %x to i8*
+  call void @llvm.memset.p0i8.i32(i8* %x_i8, i8 -1, i32 32, i32 16, i1 false)
+
+  ; Access a slice of the alloca to trigger SROA.
+  %mid_p = getelementptr %union.Foo* %x, i32 0, i32 1
+  %elt = load i64* %mid_p
+  store i64 %elt, i64* @i64_sink
+  ret void
+}
+; CHECK-LABEL: define void @memset_fp80_padding
+; CHECK: alloca x86_fp80
+; CHECK: call void @llvm.memset.p0i8.i32(i8* %{{.*}}, i8 -1, i32 16, i32 16, i1 false)
+; CHECK: store i64 -1, i64* @i64_sink
+
+%S.vec3float = type { float, float, float }
+%U.vec3float = type { <4 x float> }
+
+declare i32 @memcpy_vec3float_helper(%S.vec3float*)
+
+define i32 @memcpy_vec3float_widening(%S.vec3float* %x) {
+; CHECK-LABEL: @memcpy_vec3float_widening(
+; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte
+; vector store, hence accidentally putting gibberish onto the stack.
+entry:
+  ; Create a temporary variable %tmp1 and copy %x[0] into it
+  %tmp1 = alloca %S.vec3float, align 4
+  %0 = bitcast %S.vec3float* %tmp1 to i8*
+  %1 = bitcast %S.vec3float* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 12, i32 4, i1 false)
+
+  ; The following block does nothing; but appears to confuse SROA
+  %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
+  %unused2 = getelementptr inbounds %U.vec3float* %unused1, i32 0, i32 0
+  %unused3 = load <4 x float>* %unused2, align 1
+
+  ; Create a second temporary and copy %tmp1 into it
+  %tmp2 = alloca %S.vec3float, align 4
+  %2 = bitcast %S.vec3float* %tmp2 to i8*
+  %3 = bitcast %S.vec3float* %tmp1 to i8*
+; CHECK: alloca
+; CHECK-NOT: store <4 x float>
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %3, i32 12, i32 4, i1 false)
+
+  %result = call i32 @memcpy_vec3float_helper(%S.vec3float* %tmp2)
+  ret i32 %result
+; CHECK: ret i32 %result
+}
diff --git a/test/Transforms/SROA/vector-lifetime-intrinsic.ll b/test/Transforms/SROA/vector-lifetime-intrinsic.ll
new file mode 100644
index 000000000000..30c93b054ecc
--- /dev/null
+++ b/test/Transforms/SROA/vector-lifetime-intrinsic.ll
@@ -0,0 +1,31 @@
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:32-i64:32-v32:32-n32-S64"
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+; CHECK: @wombat
+; CHECK-NOT: alloca
+; CHECK: ret void
+define void @wombat(<4 x float> %arg1) {
+bb:
+  %tmp = alloca <4 x float>, align 16
+  %tmp8 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %tmp8)
+  store <4 x float> %arg1, <4 x float>* %tmp, align 16
+  %tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
+  %tmp18 = load <3 x float>* %tmp17
+  %tmp20 = bitcast <4 x float>* %tmp to i8*
+  call void @llvm.lifetime.end(i64 16, i8* %tmp20)
+  call void @wombat3(<3 x float> %tmp18)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @wombat3(<3 x float>) #0
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index 9c9f6a1d08d4..c20c6352e099 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -468,3 +468,158 @@ entry:
 ; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
 ; CHECK: ret i32 %[[insert]]
 }
+
+define i32 @test7(<2 x i32> %x, <2 x i32> %y) {
+; Test that we can promote to vectors when the alloca doesn't mention any vector types.
+; CHECK-LABEL: @test7(
+entry:
+	%a = alloca [2 x i64]
+  %a.cast = bitcast [2 x i64]* %a to [2 x <2 x i32>]*
+; CHECK-NOT: alloca
+
+  %a.x = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 0
+  store <2 x i32> %x, <2 x i32>* %a.x
+  %a.y = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1
+  store <2 x i32> %y, <2 x i32>* %a.y
+; CHECK-NOT: store
+
+  %a.tmp1 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1
+  %tmp1 = load i32* %a.tmp1
+  %a.tmp2 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1
+  %tmp2 = load i32* %a.tmp2
+  %a.tmp3 = getelementptr inbounds [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0
+  %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 1
+; CHECK-NEXT: extractelement <2 x i32> %y, i32 0
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  %tmp5 = add i32 %tmp3, %tmp4
+  ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test8(<2 x i32> %x) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single store with a vector type.
+; CHECK-LABEL: @test8(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <2 x i32> %x, <2 x i32>* %a.vec
+; CHECK-NOT: store
+
+  %tmp1 = load i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  %tmp2 = load i32* %a.tmp2
+; CHECK-NOT: load
+; CHECK:      extractelement <2 x i32> %x, i32 0
+; CHECK-NEXT: extractelement <2 x i32> %x, i32 1
+
+  %tmp4 = add i32 %tmp1, %tmp2
+  ret i32 %tmp4
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define <2 x i32> @test9(i32 %x, i32 %y) {
+; Ensure that we can promote an alloca that doesn't mention a vector type based
+; on a single load with a vector type.
+; CHECK-LABEL: @test9(
+entry:
+	%a = alloca i64
+  %a.vec = bitcast i64* %a to <2 x i32>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store i32 %x, i32* %a.i32
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>* %a.vec
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x i32> @test10(<4 x i16> %x, i32 %y) {
+; If there are multiple different vector types used, we should select the one
+; with the widest elements.
+; CHECK-LABEL: @test10(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x i32>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32>
+; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
+
+  %result = load <2 x i32>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x i32> %result
+; CHECK-NEXT: ret <2 x i32> %[[V2]]
+}
+
+define <2 x float> @test11(<4 x i16> %x, i32 %y) {
+; If there are multiple different element types for different vector types,
+; pick the integer types. This isn't really important, but seems like the best
+; heuristic for making a deterministic decision.
+; CHECK-LABEL: @test11(
+entry:
+	%a = alloca i64
+  %a.vec1 = bitcast i64* %a to <2 x float>*
+  %a.vec2 = bitcast i64* %a to <4 x i16>*
+  %a.i32 = bitcast i64* %a to i32*
+; CHECK-NOT: alloca
+
+  store <4 x i16> %x, <4 x i16>* %a.vec2
+  %a.tmp2 = getelementptr inbounds i32* %a.i32, i64 1
+  store i32 %y, i32* %a.tmp2
+; CHECK-NOT: store
+; CHECK:      %[[V1:.*]] = bitcast i32 %y to <2 x i16>
+; CHECK-NEXT: %[[V2:.*]] = shufflevector <2 x i16> %[[V1]], <2 x i16> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
+; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
+
+  %result = load <2 x float>* %a.vec1
+; CHECK-NOT:  load
+
+  ret <2 x float> %result
+; CHECK-NEXT: ret <2 x float> %[[V4]]
+}
+
+define <4 x float> @test12() {
+; CHECK-LABEL: @test12(
+  %a = alloca <3 x i32>, align 16
+; CHECK-NOT: alloca
+
+  %cast1 = bitcast <3 x i32>* %a to <4 x i32>*
+  store <4 x i32> undef, <4 x i32>* %cast1, align 16
+; CHECK-NOT: store
+
+  %cast2 = bitcast <3 x i32>* %a to <3 x float>*
+  %cast3 = bitcast <3 x float>* %cast2 to <4 x float>*
+  %vec = load <4 x float>* %cast3
+; CHECK-NOT: load
+
+; CHECK:      %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float>
+; CHECK-NEXT: ret <4 x float> %[[ret]]
+  ret <4 x float> %vec
+}
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
new file mode 100644
index 000000000000..14d7fd555dae
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.prof b/test/Transforms/SampleProfile/Inputs/fnptr.prof
new file mode 100644
index 000000000000..6a3b4e2315bb
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.prof
@@ -0,0 +1,12 @@
+_Z3fooi:7711:610
+1: 610
+_Z3bari:20301:1437
+1: 1437
+main:184019:0
+4: 534
+6: 2080
+9: 2064 _Z3bari:1471 _Z3fooi:631
+5.1: 1075
+5: 1075
+7: 534
+4.2: 534
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 65f1f1769934..6391fc5158cf 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -32,8 +32,8 @@ define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
 ; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
 
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !13), !dbg !27
-  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !14), !dbg !27
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !13, metadata !{}), !dbg !27
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !{}), !dbg !27
   %cmp = icmp slt i32 %argc, 2, !dbg !28
   br i1 %cmp, label %return, label %if.end, !dbg !28
 ; CHECK: edge entry -> return probability is 1 / 2 = 50%
@@ -43,7 +43,7 @@ if.end:                                           ; preds = %entry
   %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !30
   %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
   %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
-  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !17), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !{}), !dbg !30
   %cmp1 = icmp sgt i32 %call, 100, !dbg !35
   br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
 ; CHECK: edge if.end -> for.body probability is 1 / 2 = 50%
@@ -55,14 +55,14 @@ for.body:                                         ; preds = %if.end, %for.body
   %add = fadd double %s.015, 3.049000e+00, !dbg !36
   %conv = sitofp i32 %u.016 to double, !dbg !36
   %add4 = fadd double %add, %conv, !dbg !36
-  tail call void @llvm.dbg.value(metadata !{double %add4}, i64 0, metadata !18), !dbg !36
+  tail call void @llvm.dbg.value(metadata double %add4, i64 0, metadata !18, metadata !{}), !dbg !36
   %div = fdiv double 3.940000e+00, %s.015, !dbg !37
   %mul = fmul double %div, 3.200000e-01, !dbg !37
   %add5 = fadd double %add4, %mul, !dbg !37
   %sub = fsub double %add4, %add5, !dbg !37
-  tail call void @llvm.dbg.value(metadata !{double %sub}, i64 0, metadata !18), !dbg !37
+  tail call void @llvm.dbg.value(metadata double %sub, i64 0, metadata !18, metadata !{}), !dbg !37
   %inc = add nsw i32 %u.016, 1, !dbg !38
-  tail call void @llvm.dbg.value(metadata !{i32 %inc}, i64 0, metadata !21), !dbg !38
+  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !{}), !dbg !38
   %exitcond = icmp eq i32 %inc, %call, !dbg !38
   br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
 ; CHECK: edge for.body -> if.end6 probability is 1 / 10227 = 0.00977804
@@ -86,7 +86,7 @@ declare i32 @atoi(i8* nocapture) #1
 declare i32 @printf(i8* nocapture readonly, ...) #2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #3
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
@@ -98,46 +98,46 @@ attributes #4 = { nounwind readonly }
 !llvm.module.flags = !{!25, !42}
 !llvm.ident = !{!26}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"branch.cc", metadata !"."}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 4, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !12, i32 4} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./branch.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{metadata !8, metadata !8, metadata !9}
-!8 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!9 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
-!10 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
-!11 = metadata !{i32 786468, null, null, metadata !"char", i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
-!12 = metadata !{metadata !13, metadata !14, metadata !15, metadata !17, metadata !18, metadata !21, metadata !23}
-!13 = metadata !{i32 786689, metadata !4, metadata !"argc", metadata !5, i32 16777220, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argc] [line 4]
-!14 = metadata !{i32 786689, metadata !4, metadata !"argv", metadata !5, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [argv] [line 4]
-!15 = metadata !{i32 786688, metadata !4, metadata !"result", metadata !5, i32 7, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [result] [line 7]
-!16 = metadata !{i32 786468, null, null, metadata !"double", i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
-!17 = metadata !{i32 786688, metadata !4, metadata !"limit", metadata !5, i32 8, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [limit] [line 8]
-!18 = metadata !{i32 786688, metadata !19, metadata !"s", metadata !5, i32 10, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [s] [line 10]
-!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 9, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!20 = metadata !{i32 786443, metadata !1, metadata !4, i32 9, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!21 = metadata !{i32 786688, metadata !22, metadata !"u", metadata !5, i32 11, metadata !8, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [u] [line 11]
-!22 = metadata !{i32 786443, metadata !1, metadata !19, i32 11, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!23 = metadata !{i32 786688, metadata !24, metadata !"x", metadata !5, i32 12, metadata !16, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [x] [line 12]
-!24 = metadata !{i32 786443, metadata !1, metadata !22, i32 11, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!25 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!26 = metadata !{metadata !"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
-!27 = metadata !{i32 4, i32 0, metadata !4, null}
-!28 = metadata !{i32 5, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !1, metadata !4, i32 5, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./branch.cc]
-!30 = metadata !{i32 8, i32 0, metadata !4, null} ; [ DW_TAG_imported_declaration ]
-!31 = metadata !{metadata !32, metadata !32, i64 0}
-!32 = metadata !{metadata !"any pointer", metadata !33, i64 0}
-!33 = metadata !{metadata !"omnipotent char", metadata !34, i64 0}
-!34 = metadata !{metadata !"Simple C/C++ TBAA"}
-!35 = metadata !{i32 9, i32 0, metadata !20, null}
-!36 = metadata !{i32 13, i32 0, metadata !24, null}
-!37 = metadata !{i32 14, i32 0, metadata !24, null}
-!38 = metadata !{i32 11, i32 0, metadata !22, null}
-!39 = metadata !{i32 20, i32 0, metadata !4, null}
-!40 = metadata !{i32 21, i32 0, metadata !4, null}
-!41 = metadata !{i32 22, i32 0, metadata !4, null}
-!42 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\004\00clang version 3.4 (trunk 192896) (llvm/trunk 192895)\001\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./branch.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"branch.cc", !"."}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00main\00main\00\004\000\001\000\006\00256\001\004", !1, !5, !6, null, i32 (i32, i8**)* @main, null, null, !12} ; [ DW_TAG_subprogram ] [line 4] [def] [main]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./branch.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!8, !8, !9}
+!8 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!9 = !{!"0xf\00\000\0064\0064\000\000", null, null, !10} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from ]
+!10 = !{!"0xf\00\000\0064\0064\000\000", null, null, !11} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from char]
+!11 = !{!"0x24\00char\000\008\008\000\000\006", null, null} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!12 = !{!13, !14, !15, !17, !18, !21, !23}
+!13 = !{!"0x101\00argc\0016777220\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [argc] [line 4]
+!14 = !{!"0x101\00argv\0033554436\000", !4, !5, !9} ; [ DW_TAG_arg_variable ] [argv] [line 4]
+!15 = !{!"0x100\00result\007\000", !4, !5, !16} ; [ DW_TAG_auto_variable ] [result] [line 7]
+!16 = !{!"0x24\00double\000\0064\0064\000\000\004", null, null} ; [ DW_TAG_base_type ] [double] [line 0, size 64, align 64, offset 0, enc DW_ATE_float]
+!17 = !{!"0x100\00limit\008\000", !4, !5, !8} ; [ DW_TAG_auto_variable ] [limit] [line 8]
+!18 = !{!"0x100\00s\0010\000", !19, !5, !16} ; [ DW_TAG_auto_variable ] [s] [line 10]
+!19 = !{!"0xb\009\000\000", !1, !20} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!20 = !{!"0xb\009\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!21 = !{!"0x100\00u\0011\000", !22, !5, !8} ; [ DW_TAG_auto_variable ] [u] [line 11]
+!22 = !{!"0xb\0011\000\000", !1, !19} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!23 = !{!"0x100\00x\0012\000", !24, !5, !16} ; [ DW_TAG_auto_variable ] [x] [line 12]
+!24 = !{!"0xb\0011\000\000", !1, !22} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!25 = !{i32 2, !"Dwarf Version", i32 4}
+!26 = !{!"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
+!27 = !MDLocation(line: 4, scope: !4)
+!28 = !MDLocation(line: 5, scope: !29)
+!29 = !{!"0xb\005\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [./branch.cc]
+!30 = !MDLocation(line: 8, scope: !4)
+!31 = !{!32, !32, i64 0}
+!32 = !{!"any pointer", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !MDLocation(line: 9, scope: !20)
+!36 = !MDLocation(line: 13, scope: !24)
+!37 = !MDLocation(line: 14, scope: !24)
+!38 = !MDLocation(line: 11, scope: !22)
+!39 = !MDLocation(line: 20, scope: !4)
+!40 = !MDLocation(line: 21, scope: !4)
+!41 = !MDLocation(line: 22, scope: !4)
+!42 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 381be8714eb6..d56660937bc0 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -15,7 +15,12 @@
 ;   printf("sum is %d\n", s);
 ;   return 0;
 ; }
-
+;
+; Note that this test is missing the llvm.dbg.cu annotation. This emulates
+; the effect of the user having only used -fprofile-sample-use without
+; -gmlt when invoking the driver. In those cases, we need to track source
+; location information but we do not have to generate debug info in the
+; final binary.
 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
 
 ; Function Attrs: nounwind uwtable
@@ -84,33 +89,32 @@ while.end:                                        ; preds = %while.cond
 
 declare i32 @printf(i8*, ...) #2
 
-!llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"calls.cc", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @_Z3sumii, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [sum]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [./calls.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5 "}
-!11 = metadata !{i32 4, i32 0, metadata !4, null}
-!12 = metadata !{i32 8, i32 0, metadata !7, null} ; [ DW_TAG_imported_declaration ]
-!13 = metadata !{i32 9, i32 0, metadata !7, null}
-!14 = metadata !{i32 9, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !7, i32 9, i32 0, i32 1, i32 1} ; [ DW_TAG_lexical_block ] [./calls.cc]
-!16 = metadata !{i32 10, i32 0, metadata !17, null}
-!17 = metadata !{i32 786443, metadata !1, metadata !7, i32 10, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./calls.cc]
-!18 = metadata !{i32 10, i32 0, metadata !19, null}
-!19 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [./calls.cc]
-!20 = metadata !{i32 10, i32 0, metadata !21, null}
-!21 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 2, i32 3} ; [ DW_TAG_lexical_block ] [./calls.cc]
-!22 = metadata !{i32 10, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !17, i32 10, i32 0, i32 3, i32 4} ; [ DW_TAG_lexical_block ] [./calls.cc]
-!24 = metadata !{i32 11, i32 0, metadata !7, null}
-!25 = metadata !{i32 12, i32 0, metadata !7, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"calls.cc", !"."}
+!2 = !{}
+!3 = !{!4, !7}
+!4 = !{!"0x2e\00sum\00sum\00\003\000\001\000\006\00256\000\003", !1, !5, !6, null, i32 (i32, i32)* @_Z3sumii, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [sum]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [./calls.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00main\00main\00\007\000\001\000\006\00256\000\007", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 7] [def] [main]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5 "}
+!11 = !MDLocation(line: 4, scope: !4)
+!12 = !MDLocation(line: 8, scope: !7)
+!13 = !MDLocation(line: 9, scope: !7)
+!14 = !MDLocation(line: 9, scope: !15)
+!15 = !{!"0xb\001", !1, !7} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!16 = !MDLocation(line: 10, scope: !17)
+!17 = !{!"0xb\0010\000\000", !1, !7} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!18 = !MDLocation(line: 10, scope: !19)
+!19 = !{!"0xb\001", !1, !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!20 = !MDLocation(line: 10, scope: !21)
+!21 = !{!"0xb\002", !1, !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!22 = !MDLocation(line: 10, scope: !23)
+!23 = !{!"0xb\003", !1, !17} ; [ DW_TAG_lexical_block ] [./calls.cc]
+!24 = !MDLocation(line: 11, scope: !7)
+!25 = !MDLocation(line: 12, scope: !7)
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 0f773a541a4e..cafc69d14923 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -66,25 +66,25 @@ while.end:                                        ; preds = %while.cond
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [discriminator.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"discriminator.c", metadata !"."}
-!2 = metadata !{}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, metadata !2, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [discriminator.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!8 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!9 = metadata !{metadata !"clang version 3.5 "}
-!10 = metadata !{i32 2, i32 0, metadata !4, null}
-!11 = metadata !{i32 3, i32 0, metadata !4, null}
-!12 = metadata !{i32 3, i32 0, metadata !13, null}
-!13 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 1, i32 2} ; [ DW_TAG_lexical_block ] [discriminator.c]
-!14 = metadata !{i32 4, i32 0, metadata !15, null}
-!15 = metadata !{i32 786443, metadata !1, metadata !16, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [discriminator.c]
-!16 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [discriminator.c]
-!17 = metadata !{i32 4, i32 0, metadata !18, null}
-!18 = metadata !{i32 786443, metadata !1, metadata !15, i32 4, i32 0, i32 1, i32 3} ; [ DW_TAG_lexical_block ] [discriminator.c]
-!19 = metadata !{i32 5, i32 0, metadata !16, null}
-!20 = metadata !{i32 6, i32 0, metadata !16, null}
-!21 = metadata !{i32 7, i32 0, metadata !4, null}
+!0 = !{!"0x11\0012\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [discriminator.c] [DW_LANG_C99]
+!1 = !{!"discriminator.c", !"."}
+!2 = !{}
+!3 = !{!4}
+!4 = !{!"0x2e\00foo\00foo\00\001\000\001\000\006\00256\000\001", !1, !5, !6, null, i32 (i32)* @foo, null, null, !2} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [discriminator.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 2}
+!9 = !{!"clang version 3.5 "}
+!10 = !MDLocation(line: 2, scope: !4)
+!11 = !MDLocation(line: 3, scope: !4)
+!12 = !MDLocation(line: 3, scope: !13)
+!13 = !{!"0xb\001", !1, !4} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!14 = !MDLocation(line: 4, scope: !15)
+!15 = !{!"0xb\004\000\001", !1, !16} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!16 = !{!"0xb\003\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!17 = !MDLocation(line: 4, scope: !18)
+!18 = !{!"0xb\001", !1, !15} ; [ DW_TAG_lexical_block ] [discriminator.c]
+!19 = !MDLocation(line: 5, scope: !16)
+!20 = !MDLocation(line: 6, scope: !16)
+!21 = !MDLocation(line: 7, scope: !4)
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
new file mode 100644
index 000000000000..096033bd35fc
--- /dev/null
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -0,0 +1,155 @@
+; The two profiles used in this test are the same but encoded in different
+; formats. This checks that we produce the same profile annotations regardless
+; of the profile format.
+;
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+
+; CHECK:   edge for.body3 -> if.then probability is 534 / 2598 = 20.5543%
+; CHECK:   edge for.body3 -> if.else probability is 2064 / 2598 = 79.4457%
+; CHECK:   edge for.inc -> for.inc12 probability is 1052 / 2598 = 40.4927%
+; CHECK:   edge for.inc -> for.body3 probability is 1546 / 2598 = 59.5073%
+; CHECK:   edge for.inc12 -> for.end14 probability is 518 / 1052 = 49.2395%
+; CHECK:   edge for.inc12 -> for.cond1.preheader probability is 534 / 1052 = 50.7605%
+
+; Original C++ test case.
+;
+; #include <stdlib.h>
+; #include <math.h>
+; #include <stdio.h>
+;
+; #define N 10000
+; #define M 6000
+;
+; double foo(int x) {
+;   return x * sin((double)x);
+; }
+;
+; double bar(int x) {
+;   return x - cos((double)x);
+; }
+;
+; int main() {
+;   double (*fptr)(int);
+;   double S = 0;
+;   for (int i = 0; i < N; i++)
+;     for (int j = 0; j < M; j++) {
+;       fptr = (rand() % 100 < 30) ? foo : bar;
+;       if (rand() % 100 < 10)
+;         S += (*fptr)(i + j * 300);
+;       else
+;         S += (*fptr)(i - j / 840);
+;     }
+;   printf("S = %lf\n", S);
+;   return 0;
+; }
+
+@.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
+
+define double @_Z3fooi(i32 %x) #0 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !2
+  %call = tail call double @sin(double %conv) #3, !dbg !8
+  %mul = fmul double %conv, %call, !dbg !8
+  ret double %mul, !dbg !8
+}
+
+declare double @sin(double) #1
+
+define double @_Z3bari(i32 %x) #0 {
+entry:
+  %conv = sitofp i32 %x to double, !dbg !9
+  %call = tail call double @cos(double %conv) #3, !dbg !11
+  %sub = fsub double %conv, %call, !dbg !11
+  ret double %sub, !dbg !11
+}
+
+declare double @cos(double) #1
+
+define i32 @main() #2 {
+entry:
+  br label %for.cond1.preheader, !dbg !12
+
+for.cond1.preheader:                              ; preds = %for.inc12, %entry
+  %i.025 = phi i32 [ 0, %entry ], [ %inc13, %for.inc12 ]
+  %S.024 = phi double [ 0.000000e+00, %entry ], [ %S.2.lcssa, %for.inc12 ]
+  br label %for.body3, !dbg !14
+
+for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
+  %j.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %for.inc ]
+  %S.122 = phi double [ %S.024, %for.cond1.preheader ], [ %S.2, %for.inc ]
+  %call = tail call i32 @rand() #3, !dbg !15
+  %rem = srem i32 %call, 100, !dbg !15
+  %cmp4 = icmp slt i32 %rem, 30, !dbg !15
+  %_Z3fooi._Z3bari = select i1 %cmp4, double (i32)* @_Z3fooi, double (i32)* @_Z3bari, !dbg !15
+  %call5 = tail call i32 @rand() #3, !dbg !16
+  %rem6 = srem i32 %call5, 100, !dbg !16
+  %cmp7 = icmp slt i32 %rem6, 10, !dbg !16
+  br i1 %cmp7, label %if.then, label %if.else, !dbg !16, !prof !17
+
+if.then:                                          ; preds = %for.body3
+  %mul = mul nsw i32 %j.023, 300, !dbg !18
+  %add = add nsw i32 %mul, %i.025, !dbg !18
+  %call8 = tail call double %_Z3fooi._Z3bari(i32 %add), !dbg !18
+  br label %for.inc, !dbg !18
+
+if.else:                                          ; preds = %for.body3
+  %div = sdiv i32 %j.023, 840, !dbg !19
+  %sub = sub nsw i32 %i.025, %div, !dbg !19
+  %call10 = tail call double %_Z3fooi._Z3bari(i32 %sub), !dbg !19
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %call8.pn = phi double [ %call8, %if.then ], [ %call10, %if.else ]
+  %S.2 = fadd double %S.122, %call8.pn, !dbg !18
+  %inc = add nsw i32 %j.023, 1, !dbg !20
+  %exitcond = icmp eq i32 %j.023, 5999, !dbg !14
+  br i1 %exitcond, label %for.inc12, label %for.body3, !dbg !14, !prof !21
+
+for.inc12:                                        ; preds = %for.inc
+  %S.2.lcssa = phi double [ %S.2, %for.inc ]
+  %inc13 = add nsw i32 %i.025, 1, !dbg !22
+  %exitcond26 = icmp eq i32 %i.025, 9999, !dbg !12
+  br i1 %exitcond26, label %for.end14, label %for.cond1.preheader, !dbg !12, !prof !23
+
+for.end14:                                        ; preds = %for.inc12
+  %S.2.lcssa.lcssa = phi double [ %S.2.lcssa, %for.inc12 ]
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i64 0, i64 0), double %S.2.lcssa.lcssa), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+; Function Attrs: nounwind
+declare i32 @rand() #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #1
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 2}
+!1 = !{!"clang version 3.6.0 "}
+!2 = !MDLocation(line: 9, column: 3, scope: !3)
+!3 = !{!"0x2e\00foo\00foo\00\008\000\001\000\000\00256\001\008", !4, !5, !6, null, double (i32)* @_Z3fooi, null, null, !7} ; [ DW_TAG_subprogram ] [line 8] [def] [foo]
+!4 = !{!"fnptr.cc", !"."}
+!5 = !{!"0x29", !4}    ; [ DW_TAG_file_type ] [./fnptr.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", null, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{}
+!8 = !MDLocation(line: 9, column: 14, scope: !3)
+!9 = !MDLocation(line: 13, column: 3, scope: !10)
+!10 = !{!"0x2e\00bar\00bar\00\0012\000\001\000\000\00256\001\0012", !4, !5, !6, null, double (i32)* @_Z3bari, null, null, !7} ; [ DW_TAG_subprogram ] [line 12] [def] [bar]
+!11 = !MDLocation(line: 13, column: 14, scope: !10)
+!12 = !MDLocation(line: 19, column: 3, scope: !13)
+!13 = !{!"0x2e\00main\00main\00\0016\000\001\000\000\00256\001\0016", !4, !5, !6, null, i32 ()* @main, null, null, !7} ; [ DW_TAG_subprogram ] [line 16] [def] [main]
+!14 = !MDLocation(line: 20, column: 5, scope: !13)
+!15 = !MDLocation(line: 21, column: 15, scope: !13)
+!16 = !MDLocation(line: 22, column: 11, scope: !13)
+!17 = !{!"branch_weights", i32 534, i32 2064}
+!18 = !MDLocation(line: 23, column: 14, scope: !13)
+!19 = !MDLocation(line: 25, column: 14, scope: !13)
+!20 = !MDLocation(line: 20, column: 28, scope: !13)
+!21 = !{!"branch_weights", i32 0, i32 1075}
+!22 = !MDLocation(line: 19, column: 26, scope: !13)
+!23 = !{!"branch_weights", i32 0, i32 534}
+!24 = !MDLocation(line: 27, column: 3, scope: !13)
+!25 = !MDLocation(line: 28, column: 3, scope: !13)
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index 939361b23703..594645f7f2be 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -198,46 +198,46 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [propagate.cc] [DW_LANG_C_plus_plus]
-!1 = metadata !{metadata !"propagate.cc", metadata !"."}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4, metadata !7}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i64 (i32, i32, i64)* @_Z3fooiil, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [propagate.cc]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 24, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 24} ; [ DW_TAG_subprogram ] [line 24] [def] [main]
-!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
-!10 = metadata !{metadata !"clang version 3.5 "}
-!11 = metadata !{i32 4, i32 0, metadata !12, null}
-!12 = metadata !{i32 786443, metadata !1, metadata !4, i32 4, i32 0, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!13 = metadata !{i32 5, i32 0, metadata !14, null}
-!14 = metadata !{i32 786443, metadata !1, metadata !12, i32 4, i32 0, i32 0, i32 1} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!15 = metadata !{i32 7, i32 0, metadata !16, null}
-!16 = metadata !{i32 786443, metadata !1, metadata !17, i32 7, i32 0, i32 0, i32 3} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!17 = metadata !{i32 786443, metadata !1, metadata !12, i32 6, i32 0, i32 0, i32 2} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!18 = metadata !{i32 8, i32 0, metadata !19, null} ; [ DW_TAG_imported_declaration ]
-!19 = metadata !{i32 786443, metadata !1, metadata !20, i32 8, i32 0, i32 0, i32 5} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!20 = metadata !{i32 786443, metadata !1, metadata !16, i32 7, i32 0, i32 0, i32 4} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!21 = metadata !{i32 9, i32 0, metadata !19, null}
-!22 = metadata !{i32 10, i32 0, metadata !23, null}
-!23 = metadata !{i32 786443, metadata !1, metadata !20, i32 10, i32 0, i32 0, i32 6} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!24 = metadata !{i32 11, i32 0, metadata !25, null}
-!25 = metadata !{i32 786443, metadata !1, metadata !23, i32 10, i32 0, i32 0, i32 7} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!26 = metadata !{i32 12, i32 0, metadata !25, null}
-!27 = metadata !{i32 13, i32 0, metadata !25, null}
-!28 = metadata !{i32 14, i32 0, metadata !29, null}
-!29 = metadata !{i32 786443, metadata !1, metadata !30, i32 14, i32 0, i32 0, i32 9} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!30 = metadata !{i32 786443, metadata !1, metadata !23, i32 13, i32 0, i32 0, i32 8} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!31 = metadata !{i32 15, i32 0, metadata !32, null}
-!32 = metadata !{i32 786443, metadata !1, metadata !29, i32 14, i32 0, i32 0, i32 10} ; [ DW_TAG_lexical_block ] [propagate.cc]
-!33 = metadata !{i32 16, i32 0, metadata !32, null}
-!34 = metadata !{i32 17, i32 0, metadata !32, null}
-!35 = metadata !{i32 19, i32 0, metadata !20, null}
-!36 = metadata !{i32 21, i32 0, metadata !4, null}
-!37 = metadata !{i32 22, i32 0, metadata !4, null}
-!38 = metadata !{i32 25, i32 0, metadata !7, null}
-!39 = metadata !{i32 26, i32 0, metadata !7, null}
-!40 = metadata !{i32 27, i32 0, metadata !7, null}
-!41 = metadata !{i32 28, i32 0, metadata !7, null}
-!42 = metadata !{i32 29, i32 0, metadata !7, null}
+!0 = !{!"0x11\004\00clang version 3.5 \000\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [propagate.cc] [DW_LANG_C_plus_plus]
+!1 = !{!"propagate.cc", !"."}
+!2 = !{i32 0}
+!3 = !{!4, !7}
+!4 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\00256\000\003", !1, !5, !6, null, i64 (i32, i32, i64)* @_Z3fooiil, null, null, !2} ; [ DW_TAG_subprogram ] [line 3] [def] [foo]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [propagate.cc]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{!"0x2e\00main\00main\00\0024\000\001\000\006\00256\000\0024", !1, !5, !6, null, i32 ()* @main, null, null, !2} ; [ DW_TAG_subprogram ] [line 24] [def] [main]
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 2}
+!10 = !{!"clang version 3.5 "}
+!11 = !MDLocation(line: 4, scope: !12)
+!12 = !{!"0xb\004\000\000", !1, !4} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!13 = !MDLocation(line: 5, scope: !14)
+!14 = !{!"0xb\004\000\000", !1, !12} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!15 = !MDLocation(line: 7, scope: !16)
+!16 = !{!"0xb\007\000\000", !1, !17} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!17 = !{!"0xb\006\000\000", !1, !12} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!18 = !MDLocation(line: 8, scope: !19)
+!19 = !{!"0xb\008\000\000", !1, !20} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!20 = !{!"0xb\007\000\000", !1, !16} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!21 = !MDLocation(line: 9, scope: !19)
+!22 = !MDLocation(line: 10, scope: !23)
+!23 = !{!"0xb\0010\000\000", !1, !20} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!24 = !MDLocation(line: 11, scope: !25)
+!25 = !{!"0xb\0010\000\000", !1, !23} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!26 = !MDLocation(line: 12, scope: !25)
+!27 = !MDLocation(line: 13, scope: !25)
+!28 = !MDLocation(line: 14, scope: !29)
+!29 = !{!"0xb\0014\000\000", !1, !30} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!30 = !{!"0xb\0013\000\000", !1, !23} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!31 = !MDLocation(line: 15, scope: !32)
+!32 = !{!"0xb\0014\000\000", !1, !29} ; [ DW_TAG_lexical_block ] [propagate.cc]
+!33 = !MDLocation(line: 16, scope: !32)
+!34 = !MDLocation(line: 17, scope: !32)
+!35 = !MDLocation(line: 19, scope: !20)
+!36 = !MDLocation(line: 21, scope: !4)
+!37 = !MDLocation(line: 22, scope: !4)
+!38 = !MDLocation(line: 25, scope: !7)
+!39 = !MDLocation(line: 26, scope: !7)
+!40 = !MDLocation(line: 27, scope: !7)
+!41 = !MDLocation(line: 28, scope: !7)
+!42 = !MDLocation(line: 29, scope: !7)
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index 53c65f44239c..ed38a175288a 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -1,4 +1,4 @@
-; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
 ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
@@ -11,8 +11,8 @@ define void @empty() {
 entry:
   ret void
 }
-; NO-DEBUG: error: No debug information found in function empty
-; MISSING-FILE: error: missing.prof:
+; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
+; MISSING-FILE: missing.prof: Could not open profile:
 ; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
 ; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
 ; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 71bf22a61cd2..b0c459e21b10 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -17,10 +17,10 @@ entry:
   %b.addr = alloca i32, align 4
   %c = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !6), !dbg !7
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !6, metadata !{}), !dbg !7
   store i32 %b, i32* %b.addr, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %b.addr}, metadata !8), !dbg !9
-  call void @llvm.dbg.declare(metadata !{i32* %c}, metadata !10), !dbg !12
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !8, metadata !{}), !dbg !9
+  call void @llvm.dbg.declare(metadata i32* %c, metadata !10, metadata !{}), !dbg !12
   %tmp = load i32* %a.addr, align 4, !dbg !13
   store i32 %tmp, i32* %c, align 4, !dbg !13
   %tmp1 = load i32* %a.addr, align 4, !dbg !14
@@ -37,29 +37,29 @@ entry:
   ret i32 %add7, !dbg !16
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = metadata !{i32 786449, metadata !18, i32 12, metadata !"clang version 3.0 (trunk 131941)", i1 false, metadata !"", i32 0, metadata !19, metadata !19, metadata !17, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 786478, metadata !18, metadata !2, metadata !"f", metadata !"f", metadata !"", i32 1, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @f, null, null, null, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
-!2 = metadata !{i32 786473, metadata !18} ; [ DW_TAG_file_type ]
-!3 = metadata !{i32 786453, metadata !18, metadata !2, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 786468, null, metadata !0, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 786689, metadata !1, metadata !"a", metadata !2, i32 16777217, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 1, i32 11, metadata !1, null}
-!8 = metadata !{i32 786689, metadata !1, metadata !"b", metadata !2, i32 33554433, metadata !5, i32 0, null} ; [ DW_TAG_arg_variable ]
-!9 = metadata !{i32 1, i32 18, metadata !1, null}
-!10 = metadata !{i32 786688, metadata !11, metadata !"c", metadata !2, i32 2, metadata !5, i32 0, null} ; [ DW_TAG_auto_variable ]
-!11 = metadata !{i32 786443, metadata !18, metadata !1, i32 1, i32 21, i32 0} ; [ DW_TAG_lexical_block ]
-!12 = metadata !{i32 2, i32 9, metadata !11, null}
-!13 = metadata !{i32 2, i32 14, metadata !11, null}
-!14 = metadata !{i32 3, i32 5, metadata !11, null}
-!15 = metadata !{i32 4, i32 5, metadata !11, null}
-!16 = metadata !{i32 5, i32 5, metadata !11, null}
-!17 = metadata !{metadata !1}
-!18 = metadata !{metadata !"/d/j/debug-test.c", metadata !"/Volumes/Data/b"}
-!19 = metadata !{i32 0}
-!20 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.0 (trunk 131941)\000\00\000\00\000", !18, !19, !19, !17, null, null} ; [ DW_TAG_compile_unit ]
+!1 = !{!"0x2e\00f\00f\00\001\000\001\000\006\00256\000\001", !18, !2, !3, null, i32 (i32, i32)* @f, null, null, null} ; [ DW_TAG_subprogram ] [line 1] [def] [f]
+!2 = !{!"0x29", !18} ; [ DW_TAG_file_type ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !18, !2, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !0} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00a\0016777217\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!7 = !MDLocation(line: 1, column: 11, scope: !1)
+!8 = !{!"0x101\00b\0033554433\000", !1, !2, !5} ; [ DW_TAG_arg_variable ]
+!9 = !MDLocation(line: 1, column: 18, scope: !1)
+!10 = !{!"0x100\00c\002\000", !11, !2, !5} ; [ DW_TAG_auto_variable ]
+!11 = !{!"0xb\001\0021\000", !18, !1} ; [ DW_TAG_lexical_block ]
+!12 = !MDLocation(line: 2, column: 9, scope: !11)
+!13 = !MDLocation(line: 2, column: 14, scope: !11)
+!14 = !MDLocation(line: 3, column: 5, scope: !11)
+!15 = !MDLocation(line: 4, column: 5, scope: !11)
+!16 = !MDLocation(line: 5, column: 5, scope: !11)
+!17 = !{!1}
+!18 = !{!"/d/j/debug-test.c", !"/Volumes/Data/b"}
+!19 = !{i32 0}
+!20 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/Scalarizer/basic.ll b/test/Transforms/Scalarizer/basic.ll
index 1cfc0dd29017..a94cf9f61142 100644
--- a/test/Transforms/Scalarizer/basic.ll
+++ b/test/Transforms/Scalarizer/basic.ll
@@ -443,9 +443,9 @@ exit:
   ret <4 x float> %next_acc
 }
 
-!0 = metadata !{ metadata !"root" }
-!1 = metadata !{ metadata !"set1", metadata !0 }
-!2 = metadata !{ metadata !"set2", metadata !0 }
-!3 = metadata !{ metadata !3 }
-!4 = metadata !{ float 4.0 }
-!5 = metadata !{ i64 0, i64 8, null }
+!0 = !{ !"root" }
+!1 = !{ !"set1", !0 }
+!2 = !{ !"set2", !0 }
+!3 = !{ !3 }
+!4 = !{ float 4.0 }
+!5 = !{ i64 0, i64 8, null }
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 546e89da80cb..ed65aaace2c4 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -16,9 +16,9 @@ define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x
 ; CHECK: %b.i1 = getelementptr i32* %b.i0, i32 1
 ; CHECK: %b.i2 = getelementptr i32* %b.i0, i32 2
 ; CHECK: %b.i3 = getelementptr i32* %b.i0, i32 3
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
-; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %a, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %b, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
+; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %c, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
 ; CHECK: %bval.i0 = load i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
 ; CHECK: %bval.i1 = load i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
 ; CHECK: %bval.i2 = load i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
@@ -37,9 +37,9 @@ define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x
 ; CHECK: store i32 %add.i3, i32* %a.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
 ; CHECK: ret void
 entry:
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !15), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !16), !dbg !20
-  tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !17), !dbg !20
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %a, i64 0, metadata !15, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %b, i64 0, metadata !16, metadata !{}), !dbg !20
+  tail call void @llvm.dbg.value(metadata <4 x i32>* %c, i64 0, metadata !17, metadata !{}), !dbg !20
   %bval = load <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
   %cval = load <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
   %add = add <4 x i32> %bval, %cval, !dbg !21
@@ -48,7 +48,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
@@ -57,30 +57,30 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!18, !26}
 !llvm.ident = !{!19}
 
-!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
-!1 = metadata !{metadata !"/tmp/add.c", metadata !"/home/richards/llvm/build"}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, metadata !14, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
-!5 = metadata !{i32 786473, metadata !1}          ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
-!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!7 = metadata !{null, metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
-!9 = metadata !{i32 786454, metadata !1, null, metadata !"V4SI", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
-!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
-!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!12 = metadata !{metadata !13}
-!13 = metadata !{i32 786465, i64 0, i64 4}        ; [ DW_TAG_subrange_type ] [0, 3]
-!14 = metadata !{metadata !15, metadata !16, metadata !17}
-!15 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
-!16 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554435, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 3]
-!17 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331651, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 3]
-!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
-!19 = metadata !{metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
-!20 = metadata !{i32 3, i32 0, metadata !4, null}
-!21 = metadata !{i32 5, i32 0, metadata !4, null}
-!22 = metadata !{metadata !23, metadata !23, i64 0}
-!23 = metadata !{metadata !"omnipotent char", metadata !24, i64 0}
-!24 = metadata !{metadata !"Simple C/C++ TBAA"}
-!25 = metadata !{i32 6, i32 0, metadata !4, null}
-!26 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\0012\00clang version 3.4 (trunk 194134) (llvm/trunk 194126)\001\00\000\00\000", !1, !2, !2, !3, !2, !2} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
+!1 = !{!"/tmp/add.c", !"/home/richards/llvm/build"}
+!2 = !{i32 0}
+!3 = !{!4}
+!4 = !{!"0x2e\00f1\00f1\00\003\000\001\000\006\00256\001\004", !1, !5, !6, null, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, !14} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
+!6 = !{!"0x15\00\000\000\000\000\000\000", i32 0, null, null, !7, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!7 = !{null, !8, !8, !8}
+!8 = !{!"0xf\00\000\0064\0064\000\000", null, null, !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
+!9 = !{!"0x16\00V4SI\001\000\000\000\000", !1, null, !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
+!10 = !{!"0x1\00\000\00128\00128\000\002048", null, null, !11, !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
+!11 = !{!"0x24\00int\000\0032\0032\000\000\005", null, null} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = !{!13}
+!13 = !{!"0x21\000\004"}        ; [ DW_TAG_subrange_type ] [0, 3]
+!14 = !{!15, !16, !17}
+!15 = !{!"0x101\00a\0016777219\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [a] [line 3]
+!16 = !{!"0x101\00b\0033554435\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [b] [line 3]
+!17 = !{!"0x101\00c\0050331651\000", !4, !5, !8} ; [ DW_TAG_arg_variable ] [c] [line 3]
+!18 = !{i32 2, !"Dwarf Version", i32 4}
+!19 = !{!"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
+!20 = !MDLocation(line: 3, scope: !4)
+!21 = !MDLocation(line: 5, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"omnipotent char", !24, i64 0}
+!24 = !{!"Simple C/C++ TBAA"}
+!25 = !MDLocation(line: 6, scope: !4)
+!26 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 1784171454d6..ea0d3f5673a5 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -234,3 +234,46 @@ entry:
 ; CHECK-LABEL: @and(
 ; CHECK: getelementptr [32 x [32 x float]]* @float_2d_array
 ; CHECK-NOT: getelementptr
+
+; The code that rebuilds an OR expression used to be buggy, and failed on this
+; test.
+define float* @shl_add_or(i64 %a, float* %ptr) {
+; CHECK-LABEL: @shl_add_or(
+entry:
+  %shl = shl i64 %a, 2
+  %add = add i64 %shl, 12
+  %or = or i64 %add, 1
+; CHECK: [[OR:%or[0-9]*]] = add i64 %shl, 1
+  ; ((a << 2) + 12) and 1 have no common bits. Therefore,
+  ; SeparateConstOffsetFromGEP is able to extract the 12.
+  ; TODO(jingyue): We could reassociate the expression to combine 12 and 1.
+  %p = getelementptr float* %ptr, i64 %or
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float* %ptr, i64 [[OR]]
+; CHECK: getelementptr float* [[PTR]], i64 12
+  ret float* %p
+; CHECK-NEXT: ret
+}
+
+; The source code used to be buggy in checking
+; (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0)
+; where AccumulativeByteOffset is signed but ElementTypeSizeOfGEP is unsigned.
+; The compiler would promote AccumulativeByteOffset to unsigned, causing
+; unexpected results. For example, while -64 % (int64_t)24 != 0,
+; -64 % (uint64_t)24 == 0.
+%struct3 = type { i64, i32 }
+%struct2 = type { %struct3, i32 }
+%struct1 = type { i64, %struct2 }
+%struct0 = type { i32, i32, i64*, [100 x %struct1] }
+define %struct2* @sign_mod_unsign(%struct0* %ptr, i64 %idx) {
+; CHECK-LABEL: @sign_mod_unsign(
+entry:
+  %arrayidx = add nsw i64 %idx, -2
+; CHECK-NOT: add
+  %ptr2 = getelementptr inbounds %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
+; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
+; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
+; CHECK: getelementptr i8* [[PTR1]], i64 -64
+; CHECK: bitcast
+  ret %struct2* %ptr2
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index e1635f491156..21428c62f531 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -47,7 +47,7 @@ T:
 }
 
 ; PR9450
-define i32 @test4(i32 %v) {
+define i32 @test4(i32 %v, i32 %w) {
 ; CHECK: entry:
 ; CHECK-NEXT:  switch i32 %v, label %T [
 ; CHECK-NEXT:    i32 3, label %V
@@ -67,7 +67,54 @@ SWITCH:
 default:
         unreachable
 U:
-        ret i32 1
+        ret i32 %w
 T:
         ret i32 2
 }
+
+
+;; We can either convert the following control-flow to a select or remove the
+;; unreachable control flow because of the undef store of null. Make sure we do
+;; the latter.
+
+define void @test5(i1 %cond, i8* %ptr) {
+
+; CHECK-LABEL: test5
+; CHECK: entry:
+; CHECK-NOT: select
+; CHECK:  store i8 2, i8* %ptr
+; CHECK:  ret
+
+entry:
+  br i1 %cond, label %bb1, label %bb3
+
+bb3:
+ br label %bb2
+
+bb1:
+ br label %bb2
+
+bb2:
+  %ptr.2 = phi i8* [ %ptr, %bb3 ], [ null, %bb1 ]
+  store i8 2, i8* %ptr.2, align 8
+  ret void
+}
+
+; CHECK-LABEL: test6
+; CHECK: entry:
+; CHECK-NOT: select
+; CHECK:  store i8 2, i8* %ptr
+; CHECK:  ret
+
+define void @test6(i1 %cond, i8* %ptr) {
+entry:
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %ptr.2 = phi i8* [ %ptr, %entry ], [ null, %bb1 ]
+  store i8 2, i8* %ptr.2, align 8
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
new file mode 100644
index 000000000000..22599b397ed4
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -simplifycfg < %s -mtriple=x86_64-apple-darwin12.0.0 | FileCheck %s
+; rdar://17887153
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin12.0.0"
+
+; When we have a covered lookup table, make sure we don't delete PHINodes that
+; are cached in PHIs.
+; CHECK-LABEL: @test
+; CHECK: entry:
+; CHECK-NEXT: sub i3 %arg, -4
+; CHECK-NEXT: zext i3 %switch.tableidx to i4
+; CHECK-NEXT: getelementptr inbounds [8 x i64]* @switch.table, i32 0, i4 %switch.tableidx.zext
+; CHECK-NEXT: load i64* %switch.gep
+; CHECK-NEXT: add i64
+; CHECK-NEXT: ret i64
+define i64 @test(i3 %arg) {
+entry:
+  switch i3 %arg, label %Default [
+    i3 -2, label %Label6
+    i3 1, label %Label1
+    i3 2, label %Label2
+    i3 3, label %Label3
+    i3 -4, label %Label4
+    i3 -3, label %Label5
+  ]
+
+Default:
+  %v1 = phi i64 [ 7, %Label6 ], [ 11, %Label5 ], [ 6, %Label4 ], [ 13, %Label3 ], [ 9, %Label2 ], [ 15, %Label1 ], [ 8, %entry ]
+  %v2 = phi i64 [ 0, %Label6 ], [ 0, %Label5 ], [ 0, %Label4 ], [ 0, %Label3 ], [ 0, %Label2 ], [ 0, %Label1 ], [ 0, %entry ]
+  %v3 = add i64 %v1, %v2
+  ret i64 %v3
+
+Label1:
+  br label %Default
+
+Label2:
+  br label %Default
+
+Label3:
+  br label %Default
+
+Label4:
+  br label %Default
+
+Label5:
+  br label %Default
+
+Label6:
+  br label %Default
+}
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 51ced4099ac9..22f18cd4d432 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -782,7 +782,6 @@ return:
 }
 
 ; Don't create a table with illegal type
-; rdar://12779436
 define i96 @illegaltype(i32 %c) {
 entry:
   switch i32 %c, label %sw.default [
@@ -856,10 +855,10 @@ return:
 ; CHECK: entry:
 ; CHECK: br i1 %{{.*}}, label %switch.hole_check, label %sw.default
 ; CHECK: switch.hole_check:
-; CHECK-NEXT: %switch.maskindex = trunc i32 %switch.tableidx to i6
-; CHECK-NEXT: %switch.shifted = lshr i6 -17, %switch.maskindex
+; CHECK-NEXT: %switch.maskindex = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.shifted = lshr i8 47, %switch.maskindex
 ; The mask is binary 101111.
-; CHECK-NEXT: %switch.lobit = trunc i6 %switch.shifted to i1
+; CHECK-NEXT: %switch.lobit = trunc i8 %switch.shifted to i1
 ; CHECK-NEXT: br i1 %switch.lobit, label %switch.lookup, label %sw.default
 ; CHECK-NOT: switch i32
 }
@@ -895,7 +894,7 @@ sw.bb1: br label %return
 sw.bb2: br label %return
 sw.default: br label %return
 return:
-  %x = phi i32 [ 3, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 9, %entry ]
+  %x = phi i32 [ 3, %sw.default ], [ 5, %sw.bb2 ], [ 7, %sw.bb1 ], [ 10, %entry ]
   ret i32 %x
 ; CHECK-LABEL: @threecases(
 ; CHECK-NOT: switch i32
@@ -915,8 +914,12 @@ return:
   %x = phi i32 [ 3, %sw.default ], [ 7, %sw.bb1 ], [ 9, %entry ]
   ret i32 %x
 ; CHECK-LABEL: @twocases(
-; CHECK: switch i32
+; CHECK-NOT: switch i32
 ; CHECK-NOT: @switch.table
+; CHECK: %switch.selectcmp
+; CHECK-NEXT: %switch.select
+; CHECK-NEXT: %switch.selectcmp1
+; CHECK-NEXT: %switch.select2
 }
 
 ; Don't build tables for switches with TLS variables.
@@ -973,3 +976,271 @@ return:
 ; CHECK: switch i32
 ; CHECK-NOT: @switch.table
 }
+
+; We can use linear mapping.
+define i8 @linearmap1(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 8, %sw.bb2 ], [ 13, %sw.bb1 ], [ 18, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap1(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, -5
+; CHECK-NEXT: %switch.offset = add i8 %switch.idx.mult, 18
+; CHECK-NEXT: ret i8 %switch.offset
+}
+
+; Linear mapping in a different configuration.
+define i32 @linearmap2(i8 %c) {
+entry:
+  switch i8 %c, label %sw.default [
+    i8 -10, label %return
+    i8 -11, label %sw.bb1
+    i8 -12, label %sw.bb2
+    i8 -13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i32 [ 3, %sw.default ], [ 18, %sw.bb3 ], [ 19, %sw.bb2 ], [ 20, %sw.bb1 ], [ 21, %entry ]
+  ret i32 %x
+; CHECK-LABEL: @linearmap2(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, -13
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = zext i8 %switch.tableidx to i32
+; CHECK-NEXT: %switch.offset = add i32 %switch.idx.cast, 18
+; CHECK-NEXT: ret i32 %switch.offset
+}
+
+; Linear mapping with overflows.
+define i8 @linearmap3(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 10, label %return
+    i32 11, label %sw.bb1
+    i32 12, label %sw.bb2
+    i32 13, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 44, %sw.bb3 ], [ -56, %sw.bb2 ], [ 100, %sw.bb1 ], [ 0, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap3(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 10
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: %switch.idx.mult = mul i8 %switch.idx.cast, 100
+; CHECK-NEXT: ret i8 %switch.idx.mult
+}
+
+; Linear mapping with with multiplier 1 and offset 0.
+define i8 @linearmap4(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 -2, label %return
+    i32 -1, label %sw.bb1
+    i32 0, label %sw.bb2
+    i32 1, label %sw.bb3
+  ]
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+return:
+  %x = phi i8 [ 3, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %entry ]
+  ret i8 %x
+; CHECK-LABEL: @linearmap4(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, -2
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.idx.cast = trunc i32 %switch.tableidx to i8
+; CHECK-NEXT: ret i8 %switch.idx.cast
+}
+
+; Reuse the inverted table range compare.
+define i32 @reuse_cmp1(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+sw.bb: br label %sw.epilog
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+sw.epilog:
+  %r.0 = phi i32 [ 0, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ]
+  %cmp = icmp eq i32 %r.0, 0       ; This compare can be "replaced".
+  br i1 %cmp, label %if.then, label %if.end
+if.then: br label %return
+if.end: br label %return
+return:
+  %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
+  ret i32 %retval.0
+; CHECK-LABEL: @reuse_cmp1(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: %inverted.cmp = xor i1 [[C]], true
+; CHECK:      [[R:%.+]] = select i1 %inverted.cmp, i32 100, i32 {{.*}}
+; CHECK-NEXT: ret i32 [[R]]
+}
+
+; Reuse the table range compare.
+define i32 @reuse_cmp2(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+sw.bb: br label %sw.epilog
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+sw.epilog:
+  %r.0 = phi i32 [ 4, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %sw.bb ]
+  %cmp = icmp ne i32 %r.0, 4       ; This compare can be "replaced".
+  br i1 %cmp, label %if.then, label %if.end
+if.then: br label %return
+if.end: br label %return
+return:
+  %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
+  ret i32 %retval.0
+; CHECK-LABEL: @reuse_cmp2(
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: [[C:%.+]] = icmp ult i32 %switch.tableidx, 4
+; CHECK:      [[R:%.+]] = select i1 [[C]], i32 {{.*}}, i32 100
+; CHECK-NEXT: ret i32 [[R]]
+}
+
+; Cannot reuse the table range compare, because the default value is the same
+; as one of the case values.
+define i32 @no_reuse_cmp(i32 %x) {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+sw.bb: br label %sw.epilog
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+sw.epilog:
+  %r.0 = phi i32 [ 12, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ]
+  %cmp = icmp ne i32 %r.0, 0
+  br i1 %cmp, label %if.then, label %if.end
+if.then: br label %return
+if.end: br label %return
+return:
+  %retval.0 = phi i32 [ %r.0, %if.then ], [ 100, %if.end ]
+  ret i32 %retval.0
+; CHECK-LABEL: @no_reuse_cmp(
+; CHECK:  [[S:%.+]] = select 
+; CHECK-NEXT:  %cmp = icmp ne i32 [[S]], 0
+; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp, i32 [[S]], i32 100
+; CHECK-NEXT:  ret i32 [[R]]
+}
+
+; Cannot reuse the table range compare, because the phi at the switch merge
+; point is not dominated by the switch.
+define i32 @no_reuse_cmp2(i32 %x, i32 %y) {
+entry:
+  %ec = icmp ne i32 %y, 0
+  br i1 %ec, label %switch.entry, label %sw.epilog
+switch.entry:
+  switch i32 %x, label %sw.default [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+sw.bb: br label %sw.epilog
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+sw.epilog:
+  %r.0 = phi i32 [100, %entry], [ 0, %sw.default ], [ 13, %sw.bb3 ], [ 12, %sw.bb2 ], [ 11, %sw.bb1 ], [ 10, %sw.bb ]
+  %cmp = icmp eq i32 %r.0, 0       ; This compare can be "replaced".
+  br i1 %cmp, label %if.then, label %if.end
+if.then: br label %return
+if.end: br label %return
+return:
+  %retval.0 = phi i32 [ 100, %if.then ], [ %r.0, %if.end ]
+  ret i32 %retval.0
+; CHECK-LABEL: @no_reuse_cmp2(
+; CHECK:  %r.0 = phi
+; CHECK-NEXT:  %cmp = icmp eq i32 %r.0, 0
+; CHECK-NEXT:  [[R:%.+]] = select i1 %cmp
+; CHECK-NEXT:  ret i32 [[R]]
+}
+
+define void @pr20210(i8 %x, i1 %y) {
+; %z has uses outside of its BB or the phi it feeds into,
+; so doing a table lookup and jumping directly to while.cond would
+; cause %z to cease dominating all its uses.
+
+entry:
+  br i1 %y, label %sw, label %intermediate
+
+sw:
+  switch i8 %x, label %end [
+    i8 7, label %intermediate
+    i8 3, label %intermediate
+    i8 2, label %intermediate
+    i8 1, label %intermediate
+    i8 0, label %intermediate
+  ]
+
+intermediate:
+  %z = zext i8 %x to i32
+  br label %while.cond
+
+while.cond:
+  %i = phi i32 [ %z, %intermediate ], [ %j, %while.body ]
+  %b = icmp ne i32 %i, 7
+  br i1 %b, label %while.body, label %while.end
+
+while.body:
+  %j = add i32 %i, 1
+  br label %while.cond
+
+while.end:
+  call void @exit(i32 %z)
+  unreachable
+
+end:
+  ret void
+; CHECK-LABEL: @pr20210
+; CHECK: switch i8 %x
+}
diff --git a/test/Transforms/SimplifyCFG/assume.ll b/test/Transforms/SimplifyCFG/assume.ll
new file mode 100644
index 000000000000..1d1b96a58edf
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/assume.ll
@@ -0,0 +1,22 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+define void @test1() {
+        call void @llvm.assume(i1 0)
+        ret void
+
+; CHECK-LABEL: @test1
+; CHECK-NOT: llvm.assume
+; CHECK: unreachable
+}
+
+define void @test2() {
+        call void @llvm.assume(i1 undef)
+        ret void
+
+; CHECK-LABEL: @test2
+; CHECK-NOT: llvm.assume
+; CHECK: unreachable
+}
+
+declare void @llvm.assume(i1) nounwind
+
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index d6958a9c111a..5de7cc5e31d5 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -68,6 +68,6 @@ bb3:
 }
 declare i8 @test6g(i8*)
 
-!0 = metadata !{metadata !1, metadata !1, i64 0}
-!1 = metadata !{metadata !"foo"}
-!2 = metadata !{i8 0, i8 2}
+!0 = !{!1, !1, i64 0}
+!1 = !{!"foo"}
+!2 = !{i8 0, i8 2}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 9d8086c29769..f715a0cda93c 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -25,7 +25,7 @@ BB2:                                              ; preds = %BB1
 
 BB3:                                              ; preds = %BB2
   %6 = getelementptr inbounds [5 x %0]* @0, i32 0, i32 %0, !dbg !6
-  call void @llvm.dbg.value(metadata !{%0* %6}, i64 0, metadata !7), !dbg !12
+  call void @llvm.dbg.value(metadata %0* %6, i64 0, metadata !7, metadata !{}), !dbg !12
   %7 = icmp eq %0* %6, null, !dbg !13
   br i1 %7, label %BB5, label %BB4, !dbg !13
 
@@ -37,23 +37,23 @@ BB5:                                              ; preds = %BB3, %BB2, %BB1, %E
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !15, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 231, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !15} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !15, i32 12, metadata !"clang (trunk 129006)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !15, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 131, i32 2, metadata !0, null}
-!6 = metadata !{i32 134, i32 2, metadata !0, null}
-!7 = metadata !{i32 590080, metadata !8, metadata !"bar", metadata !1, i32 232, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !15, metadata !0, i32 231, i32 1, i32 3} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 589839, null, metadata !2, metadata !"", i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 589862, null, metadata !2, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
-!11 = metadata !{i32 589860, null, metadata !2, metadata !"unsigned int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!12 = metadata !{i32 232, i32 40, metadata !8, null}
-!13 = metadata !{i32 234, i32 2, metadata !8, null}
-!14 = metadata !{i32 274, i32 1, metadata !8, null}
-!15 = metadata !{metadata !"a.c", metadata !"/private/tmp"}
+!0 = !{!"0x2e\00foo\00foo\00\00231\000\001\000\006\00256\000\000", !15, !1, !3, null, void (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 231] [def] [scope 0] [foo]
+!1 = !{!"0x29", !15} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang (trunk 129006)\001\00\000\00\000", !15, !4, !4, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !15, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !MDLocation(line: 131, column: 2, scope: !0)
+!6 = !MDLocation(line: 134, column: 2, scope: !0)
+!7 = !{!"0x100\00bar\00232\000", !8, !1, !9} ; [ DW_TAG_auto_variable ]
+!8 = !{!"0xb\00231\001\003", !15, !0} ; [ DW_TAG_lexical_block ]
+!9 = !{!"0xf\00\000\0032\0032\000\000", null, !2, !10} ; [ DW_TAG_pointer_type ]
+!10 = !{!"0x26\00\000\000\000\000\000", null, !2, !11} ; [ DW_TAG_const_type ]
+!11 = !{!"0x24\00unsigned int\000\0032\0032\000\000\007", null, !2} ; [ DW_TAG_base_type ]
+!12 = !MDLocation(line: 232, column: 40, scope: !8)
+!13 = !MDLocation(line: 234, column: 2, scope: !8)
+!14 = !MDLocation(line: 274, column: 1, scope: !8)
+!15 = !{!"a.c", !"/private/tmp"}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
new file mode 100644
index 000000000000..878c0a4837ae
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s --check-prefix=NORMAL
+; RUN: opt %s -simplifycfg -S -bonus-inst-threshold=2 | FileCheck %s --check-prefix=AGGRESSIVE
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32* %input) {
+; NORMAL-LABEL: @foo(
+; AGGRESSIVE-LABEL: @foo(
+entry:
+  %cmp = icmp sgt i32 %d, 3
+  br i1 %cmp, label %cond.end, label %lor.lhs.false
+; NORMAL: br i1
+; AGGRESSIVE: br i1
+
+lor.lhs.false:
+  %mul = shl i32 %c, 1
+  %add = add nsw i32 %mul, %a
+  %cmp1 = icmp slt i32 %add, %b
+  br i1 %cmp1, label %cond.false, label %cond.end
+; NORMAL: br i1
+; AGGRESSIVE-NOT: br i1
+
+cond.false:
+  %0 = load i32* %input, align 4
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.false ], [ 0, %lor.lhs.false ], [ 0, %entry ]
+  ret i32 %cond
+}
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 0547fa972017..869ce099d075 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
 define i32 @foo(i32 %i) nounwind ssp {
-  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !6), !dbg !7
-  call void @llvm.dbg.value(metadata !8, i64 0, metadata !9), !dbg !11
+  call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !{}), !dbg !7
+  call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !{}), !dbg !11
   %1 = icmp ne i32 %i, 0, !dbg !12
 ;CHECK: call i32 (...)* @bar()
 ;CHECK-NEXT: llvm.dbg.value
@@ -10,12 +10,12 @@ define i32 @foo(i32 %i) nounwind ssp {
 
 ; <label>:2                                       ; preds = %0
   %3 = call i32 (...)* @bar(), !dbg !13
-  call void @llvm.dbg.value(metadata !{i32 %3}, i64 0, metadata !9), !dbg !13
+  call void @llvm.dbg.value(metadata i32 %3, i64 0, metadata !9, metadata !{}), !dbg !13
   br label %6, !dbg !15
 
 ; <label>:4                                       ; preds = %0
   %5 = call i32 (...)* @bar(), !dbg !16
-  call void @llvm.dbg.value(metadata !{i32 %5}, i64 0, metadata !9), !dbg !16
+  call void @llvm.dbg.value(metadata i32 %5, i64 0, metadata !9, metadata !{}), !dbg !16
   br label %6, !dbg !18
 
 ; <label>:6                                       ; preds = %4, %2
@@ -23,34 +23,34 @@ define i32 @foo(i32 %i) nounwind ssp {
   ret i32 %k.0, !dbg !19
 }
 
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @bar(...)
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !20, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !20} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !20, i32 12, metadata !"clang", i1 true, metadata !"", i32 0, metadata !8, metadata !8, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !20, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, null, metadata !2, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 16777218, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
-!7 = metadata !{i32 2, i32 13, metadata !0, null}
-!8 = metadata !{i32 0}
-!9 = metadata !{i32 590080, metadata !10, metadata !"k", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!10 = metadata !{i32 589835, metadata !20, metadata !0, i32 2, i32 16, i32 0} ; [ DW_TAG_lexical_block ]
-!11 = metadata !{i32 3, i32 12, metadata !10, null}
-!12 = metadata !{i32 4, i32 3, metadata !10, null}
-!13 = metadata !{i32 5, i32 5, metadata !14, null}
-!14 = metadata !{i32 589835, metadata !20, metadata !10, i32 4, i32 10, i32 1} ; [ DW_TAG_lexical_block ]
-!15 = metadata !{i32 6, i32 3, metadata !14, null}
-!16 = metadata !{i32 7, i32 5, metadata !17, null}
-!17 = metadata !{i32 589835, metadata !20, metadata !10, i32 6, i32 10, i32 2} ; [ DW_TAG_lexical_block ]
-!18 = metadata !{i32 8, i32 3, metadata !17, null}
-!19 = metadata !{i32 9, i32 3, metadata !10, null}
-!20 = metadata !{metadata !"b.c", metadata !"/private/tmp"}
-!21 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\002\000\001\000\006\00256\000\000", !20, !1, !3, null, i32 (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 2] [def] [scope 0] [foo]
+!1 = !{!"0x29", !20} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang\001\00\000\00\000", !20, !8, !8, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !20, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", null, !2} ; [ DW_TAG_base_type ]
+!6 = !{!"0x101\00i\0016777218\000", !0, !1, !5} ; [ DW_TAG_arg_variable ]
+!7 = !MDLocation(line: 2, column: 13, scope: !0)
+!8 = !{i32 0}
+!9 = !{!"0x100\00k\003\000", !10, !1, !5} ; [ DW_TAG_auto_variable ]
+!10 = !{!"0xb\002\0016\000", !20, !0} ; [ DW_TAG_lexical_block ]
+!11 = !MDLocation(line: 3, column: 12, scope: !10)
+!12 = !MDLocation(line: 4, column: 3, scope: !10)
+!13 = !MDLocation(line: 5, column: 5, scope: !14)
+!14 = !{!"0xb\004\0010\001", !20, !10} ; [ DW_TAG_lexical_block ]
+!15 = !MDLocation(line: 6, column: 3, scope: !14)
+!16 = !MDLocation(line: 7, column: 5, scope: !17)
+!17 = !{!"0xb\006\0010\002", !20, !10} ; [ DW_TAG_lexical_block ]
+!18 = !MDLocation(line: 8, column: 3, scope: !17)
+!19 = !MDLocation(line: 9, column: 3, scope: !10)
+!20 = !{!"b.c", !"/private/tmp"}
+!21 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SimplifyCFG/hoist-with-range.ll b/test/Transforms/SimplifyCFG/hoist-with-range.ll
new file mode 100644
index 000000000000..7ca3ff247dc8
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/hoist-with-range.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @foo(i1 %c, i8* %p) {
+; CHECK: if:
+; CHECK-NEXT: load i8* %p, !range !0
+; CHECK: !0 = !{i8 0, i8 1, i8 3, i8 5}
+if:
+  br i1 %c, label %then, label %else
+then:
+  %t = load i8* %p, !range !0
+  br label %out
+else:
+  %e = load i8* %p, !range !1
+  br label %out
+out:
+  ret void
+}
+
+!0 = !{ i8 0, i8 1 }
+!1 = !{ i8 3, i8 5 }
diff --git a/test/Transforms/SimplifyCFG/lifetime.ll b/test/Transforms/SimplifyCFG/lifetime.ll
index b79422172530..7c66be529500 100644
--- a/test/Transforms/SimplifyCFG/lifetime.ll
+++ b/test/Transforms/SimplifyCFG/lifetime.ll
@@ -1,11 +1,11 @@
 ; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
-; Test that a lifetime intrinsic doesn't prevent us from simplifying this.
+; Test that a lifetime intrinsic isn't removed because that would change semantics
 
 ; CHECK: foo
 ; CHECK: entry:
-; CHECK-NOT: bb0:
-; CHECK-NOT: bb1:
+; CHECK: bb0:
+; CHECK: bb1:
 ; CHECK: ret
 define void @foo(i1 %x) {
 entry:
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
index 8cc07e39a180..b2b384112af8 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
@@ -34,4 +34,4 @@ if.end:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 1, i32 0}
+!0 = !{!"branch_weights", i32 1, i32 0}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll
index 941f5ad9d5b6..32a30c3cab4f 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll
@@ -129,12 +129,12 @@ sw.epilog:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
-!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 256, i32 4352, i32 16}
-!2 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 8}
-!3 = metadata !{metadata !"branch_weights", i32 8, i32 8, i32 4}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 32, i32 48, i32 96, i32 16}
-!4 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 3}
-!5 = metadata !{metadata !"branch_weights", i32 17, i32 13, i32 9}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7, i32 3, i32 4, i32 6}
+!0 = !{!"branch_weights", i32 64, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 64}
+; CHECK: !0 = !{!"branch_weights", i32 256, i32 4352, i32 16}
+!2 = !{!"branch_weights", i32 4, i32 4, i32 8}
+!3 = !{!"branch_weights", i32 8, i32 8, i32 4}
+; CHECK: !1 = !{!"branch_weights", i32 32, i32 48, i32 96, i32 16}
+!4 = !{!"branch_weights", i32 7, i32 6, i32 4, i32 3}
+!5 = !{!"branch_weights", i32 17, i32 13, i32 9}
+; CHECK: !3 = !{!"branch_weights", i32 7, i32 3, i32 4, i32 6}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index bdd25ba80585..7802a0593574 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -364,29 +364,29 @@ for.exit:
   ret void
 }
 
-!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
-!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
-!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
-!3 = metadata !{metadata !"branch_weights", i32 4, i32 3, i32 2, i32 1}
-!4 = metadata !{metadata !"branch_weights", i32 4, i32 3, i32 2, i32 1}
-!5 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 5}
-!6 = metadata !{metadata !"branch_weights", i32 1, i32 3}
-!7 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8, i32 7}
-!8 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8}
-!9 = metadata !{metadata !"branch_weights", i32 7, i32 6}
-!10 = metadata !{metadata !"branch_weights", i32 672646, i32 21604207}
-!11 = metadata !{metadata !"branch_weights", i32 6960, i32 21597248}
-
-; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
-; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
-; CHECK: !2 = metadata !{metadata !"branch_weights", i32 7, i32 1, i32 2}
-; CHECK: !3 = metadata !{metadata !"branch_weights", i32 49, i32 12, i32 24, i32 35}
-; CHECK: !4 = metadata !{metadata !"branch_weights", i32 11, i32 5}
-; CHECK: !5 = metadata !{metadata !"branch_weights", i32 17, i32 15} 
-; CHECK: !6 = metadata !{metadata !"branch_weights", i32 9, i32 7}
-; CHECK: !7 = metadata !{metadata !"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
-; CHECK: !8 = metadata !{metadata !"branch_weights", i32 24, i32 33}
-; CHECK: !9 = metadata !{metadata !"branch_weights", i32 8, i32 33}
+!0 = !{!"branch_weights", i32 3, i32 5}
+!1 = !{!"branch_weights", i32 1, i32 1}
+!2 = !{!"branch_weights", i32 1, i32 2}
+!3 = !{!"branch_weights", i32 4, i32 3, i32 2, i32 1}
+!4 = !{!"branch_weights", i32 4, i32 3, i32 2, i32 1}
+!5 = !{!"branch_weights", i32 7, i32 6, i32 5}
+!6 = !{!"branch_weights", i32 1, i32 3}
+!7 = !{!"branch_weights", i32 33, i32 9, i32 8, i32 7}
+!8 = !{!"branch_weights", i32 33, i32 9, i32 8}
+!9 = !{!"branch_weights", i32 7, i32 6}
+!10 = !{!"branch_weights", i32 672646, i32 21604207}
+!11 = !{!"branch_weights", i32 6960, i32 21597248}
+
+; CHECK: !0 = !{!"branch_weights", i32 5, i32 11}
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 5}
+; CHECK: !2 = !{!"branch_weights", i32 7, i32 1, i32 2}
+; CHECK: !3 = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35}
+; CHECK: !4 = !{!"branch_weights", i32 11, i32 5}
+; CHECK: !5 = !{!"branch_weights", i32 17, i32 15} 
+; CHECK: !6 = !{!"branch_weights", i32 9, i32 7}
+; CHECK: !7 = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
+; CHECK: !8 = !{!"branch_weights", i32 24, i32 33}
+; CHECK: !9 = !{!"branch_weights", i32 8, i32 33}
 ;; The false weight prints out as a negative integer here, but inside llvm, we
 ;; treat the weight as an unsigned integer.
-; CHECK: !10 = metadata !{metadata !"branch_weights", i32 112017436, i32 -735157296}
+; CHECK: !10 = !{!"branch_weights", i32 112017436, i32 -735157296}
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll
index 28d727938288..cdb6ed29d850 100644
--- a/test/Transforms/SimplifyCFG/sink-common-code.ll
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -4,7 +4,7 @@ define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
 entry:
   br i1 %flag, label %if.then, label %if.else
 
-; CHECK: test1
+; CHECK-LABEL: test1
 ; CHECK: add
 ; CHECK: select
 ; CHECK: icmp
@@ -30,7 +30,7 @@ define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
 entry:
   br i1 %flag, label %if.then, label %if.else
 
-; CHECK: test2
+; CHECK-LABEL: test2
 ; CHECK: add
 ; CHECK: select
 ; CHECK: icmp
@@ -51,3 +51,33 @@ if.end:
   %tobool4 = icmp ne i8 %obeys.0, 0
   ret i1 %tobool4
 }
+
+declare i32 @foo(i32, i32) nounwind readnone
+
+define i32 @test3(i1 zeroext %flag, i32 %x, i32 %y) {
+entry:
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+  %x0 = call i32 @foo(i32 %x, i32 0) nounwind readnone
+  %y0 = call i32 @foo(i32 %x, i32 1) nounwind readnone
+  br label %if.end
+
+if.else:
+  %x1 = call i32 @foo(i32 %y, i32 0) nounwind readnone
+  %y1 = call i32 @foo(i32 %y, i32 1) nounwind readnone
+  br label %if.end
+
+if.end:
+  %xx = phi i32 [ %x0, %if.then ], [ %x1, %if.else ]
+  %yy = phi i32 [ %y0, %if.then ], [ %y1, %if.else ]
+  %ret = add i32 %xx, %yy
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test3
+; CHECK: select
+; CHECK: call
+; CHECK: call
+; CHECK: add
+; CHECK-NOT: br
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index fa7976d0689f..0ba93d29117a 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -3,6 +3,9 @@
 declare float @llvm.sqrt.f32(float) nounwind readonly
 declare float @llvm.fma.f32(float, float, float) nounwind readonly
 declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
+declare float @llvm.fabs.f32(float) nounwind readonly
+declare float @llvm.minnum.f32(float, float) nounwind readonly
+declare float @llvm.maxnum.f32(float, float) nounwind readonly
 
 ; CHECK-LABEL: @sqrt_test(
 ; CHECK: select
@@ -21,6 +24,22 @@ test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
   ret void
 }
 
+; CHECK-LABEL: @fabs_test(
+; CHECK: select
+define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fabs.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fabs.f32(float %a) nounwind readnone
+  br label %test_fabs.exit
+
+test_fabs.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
 
 ; CHECK-LABEL: @fma_test(
 ; CHECK: select
@@ -56,3 +75,36 @@ test_fmuladd.exit:                                   ; preds = %cond.else.i, %en
   ret void
 }
 
+; CHECK-LABEL: @minnum_test(
+; CHECK: select
+define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_minnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+  br label %test_minnum.exit
+
+test_minnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; CHECK-LABEL: @maxnum_test(
+; CHECK: select
+define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_maxnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+  br label %test_maxnum.exit
+
+test_maxnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll b/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
new file mode 100644
index 000000000000..8bf2b04520b9
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-range-to-icmp.ll
@@ -0,0 +1,50 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s
+
+declare i32 @f(i32)
+
+define i32 @basic(i32 %x) {
+; CHECK-LABEL: @basic
+; CHECK: x.off = add i32 %x, -5
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %a, label %default
+
+entry:
+  switch i32 %x, label %default [
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+  ]
+default:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+a:
+  %1 = call i32 @f(i32 1)
+  ret i32 %1
+}
+
+
+define i32 @unreachable(i32 %x) {
+; CHECK-LABEL: @unreachable
+; CHECK: x.off = add i32 %x, -5
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %a, label %b
+
+entry:
+  switch i32 %x, label %unreachable [
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+    i32 10, label %b
+    i32 20, label %b
+    i32 30, label %b
+    i32 40, label %b
+  ]
+unreachable:
+  unreachable
+a:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+b:
+  %1 = call i32 @f(i32 1)
+  ret i32 %1
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-br.ll b/test/Transforms/SimplifyCFG/switch-to-br.ll
new file mode 100644
index 000000000000..01484cda98b0
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-br.ll
@@ -0,0 +1,64 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s
+
+declare i32 @f(i32)
+
+define i32 @basic(i32 %x) {
+; CHECK-LABEL: @basic
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  call i32 @f(i32 0)
+; CHECK-NEXT:  ret i32 %0
+
+entry:
+  switch i32 %x, label %default [
+    i32 5, label %default
+    i32 6, label %default
+    i32 7, label %default
+  ]
+default:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+}
+
+
+define i32 @constant() {
+; CHECK-LABEL: @constant
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  call i32 @f(i32 1)
+; CHECK-NEXT:  ret i32 %0
+
+entry:
+  switch i32 42, label %default [
+    i32 41, label %default
+    i32 42, label %a
+    i32 43, label %b
+  ]
+default:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+a:
+  %1 = call i32 @f(i32 1)
+  ret i32 %1
+b:
+  %2 = call i32 @f(i32 2)
+  ret i32 %2
+}
+
+
+define i32 @unreachable(i32 %x) {
+; CHECK-LABEL: @unreachable
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  call i32 @f(i32 0)
+; CHECK-NEXT:  ret i32 %0
+
+entry:
+  switch i32 %x, label %unreachable [
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+  ]
+unreachable:
+  unreachable
+a:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll b/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
new file mode 100644
index 000000000000..ddf5d1f06ce7
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; a, b;
+; fn1() {
+;   if (b)
+;     if (a == 0 || a == 5)
+;       return a;
+;   return 0;
+; }
+
+; Checking that we handle correctly the case when we have a switch
+; branching multiple times to the same block
+
+@b = common global i32 0, align 4
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define i32 @fn1() {
+; CHECK-LABEL: @fn1
+; CHECK: %switch.selectcmp1 = icmp eq i32 %1, 5
+; CHECK: %switch.select2 = select i1 %switch.selectcmp1, i32 5, i32 %switch.select
+entry:
+  %0 = load i32* @b, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end3, label %if.then
+
+if.then:
+  %1 = load i32* @a, align 4
+  switch i32 %1, label %if.end3 [
+    i32 5, label %return
+    i32 0, label %return
+  ]
+
+if.end3:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 0, %if.end3 ], [ %1, %if.then ], [ %1, %if.then ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll b/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
new file mode 100644
index 000000000000..69f97e5f9f9f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-to-select-two-case.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; int foo1_with_default(int a) {
+;   switch(a) {
+;     case 10:
+;       return 10;
+;     case 20:
+;       return 2;
+;   }
+;   return 4;
+; }
+
+define i32 @foo1_with_default(i32 %a) {
+; CHECK-LABEL: @foo1_with_default
+; CHECK: %switch.selectcmp = icmp eq i32 %a, 20
+; CHECK-NEXT: %switch.select = select i1 %switch.selectcmp, i32 2, i32 4
+; CHECK-NEXT: %switch.selectcmp1 = icmp eq i32 %a, 10
+; CHECK-NEXT: %switch.select2 = select i1 %switch.selectcmp1, i32 10, i32 %switch.select
+entry:
+  switch i32 %a, label %sw.epilog [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.epilog:
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ 4, %sw.epilog ], [ 2, %sw.bb1 ], [ 10, %sw.bb ]
+  ret i32 %retval.0
+}
+
+; int foo1_without_default(int a) {
+;   switch(a) {
+;     case 10:
+;       return 10;
+;     case 20:
+;       return 2;
+;   }
+;   __builtin_unreachable();
+; }
+
+define i32 @foo1_without_default(i32 %a) {
+; CHECK-LABEL: @foo1_without_default
+; CHECK: %switch.selectcmp = icmp eq i32 %a, 10
+; CHECK-NEXT: %switch.select = select i1 %switch.selectcmp, i32 10, i32 2
+; CHECK-NOT: %switch.selectcmp1
+entry:
+  switch i32 %a, label %sw.epilog [
+    i32 10, label %sw.bb
+    i32 20, label %sw.bb1
+  ]
+
+sw.bb:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.epilog:
+  unreachable
+
+return:
+  %retval.0 = phi i32 [ 2, %sw.bb1 ], [ 10, %sw.bb ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 3b449cb000ab..24a286fd9735 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -11,14 +11,14 @@ define void @foo() nounwind ssp {
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = metadata !{i32 589870, metadata !8, metadata !1, metadata !"foo", metadata !"foo", metadata !"", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
-!1 = metadata !{i32 589865, metadata !8} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, metadata !8, i32 12, metadata !"Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, metadata !9, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !8, metadata !1, metadata !"", i32 0, i64 0, i64 0, i32 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 4, i32 2, metadata !6, null}
-!6 = metadata !{i32 589835, metadata !8, metadata !0, i32 3, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 5, i32 1, metadata !6, null}
-!8 = metadata !{metadata !"foo.c", metadata !"/private/tmp"}
-!9 = metadata !{metadata !0}
-!10 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00\003\000\001\000\006\000\000\000", !8, !1, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 0] [foo]
+!1 = !{!"0x29", !8} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)\001\00\000\00\000", !8, !4, !4, !9, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !8, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !MDLocation(line: 4, column: 2, scope: !6)
+!6 = !{!"0xb\003\0012\000", !8, !0} ; [ DW_TAG_lexical_block ]
+!7 = !MDLocation(line: 5, column: 1, scope: !6)
+!8 = !{!"foo.c", !"/private/tmp"}
+!9 = !{!0}
+!10 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/SimplifyCFG/volatile-phioper.ll b/test/Transforms/SimplifyCFG/volatile-phioper.ll
index 1ef3a7ce59b3..6367451433c1 100644
--- a/test/Transforms/SimplifyCFG/volatile-phioper.ll
+++ b/test/Transforms/SimplifyCFG/volatile-phioper.ll
@@ -45,4 +45,4 @@ attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-
 attributes #1 = { "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "relocation-model"="pic" "ssp-buffers-size"="8" }
 attributes #2 = { nounwind }
 
-!0 = metadata !{i32 1039}
+!0 = !{i32 1039}
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 5353744824dc..f2c705a317af 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -6,11 +6,11 @@
 
 define void @foo() nounwind readnone optsize ssp {
 entry:
-  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !5), !dbg !10
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !5, metadata !{}), !dbg !10
   ret void, !dbg !11
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13}
@@ -18,17 +18,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
 
-!0 = metadata !{i32 524334, metadata !12, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !12} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !12, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !4, metadata !4, null, null, null, metadata !""} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !12, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{null}
-!5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
-!6 = metadata !{i32 524299, metadata !12, metadata !0, i32 2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
-!7 = metadata !{i32 524324, metadata !12, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!8 = metadata !{i32 524340, i32 0, metadata !1, metadata !"x", metadata !"x", metadata !"", metadata !1, i32 1, metadata !7, i1 false, i1 true, i32* @x} ; [ DW_TAG_variable ]
-!9 = metadata !{i32 0}
-!10 = metadata !{i32 3, i32 0, metadata !6, null}
-!11 = metadata !{i32 4, i32 0, metadata !6, null}
-!12 = metadata !{metadata !"b.c", metadata !"/tmp"}
-!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00foo\002\000\001\000\006\000\001\000", !12, !1, !3, null, void ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !12} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\000", !12, !4, !4, null, null, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !12, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{null}
+!5 = !{!"0x100\00y\003\000", !6, !1, !7} ; [ DW_TAG_auto_variable ]
+!6 = !{!"0xb\002\000\000", !12, !0} ; [ DW_TAG_lexical_block ]
+!7 = !{!"0x24\00int\000\0032\0032\000\000\005", !12, !1} ; [ DW_TAG_base_type ]
+!8 = !{!"0x34\00x\00x\00\001\000\001", !1, !1, !7, i32* @x} ; [ DW_TAG_variable ]
+!9 = !{i32 0}
+!10 = !MDLocation(line: 3, scope: !6)
+!11 = !MDLocation(line: 4, scope: !6)
+!12 = !{!"b.c", !"/tmp"}
+!13 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index b55ac3c08245..1534647c2a71 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -7,18 +7,18 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14}
 
-!0 = metadata !{i32 524334, metadata !10, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !10} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, metadata !10, i32 12, metadata !"clang version 2.8 (trunk 112062)", i1 true, metadata !"", i32 0, metadata !11, metadata !11, metadata !12, metadata !13, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 524324, metadata !10, metadata !1, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0, null} ; [ DW_TAG_variable ]
-!7 = metadata !{i32 524326, metadata !10, metadata !1, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
-!8 = metadata !{i32 3, i32 13, metadata !9, null}
-!9 = metadata !{i32 524299, metadata !10, metadata !0, i32 3, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
-!10 = metadata !{metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW"}
-!11 = metadata !{i32 0}
-!12 = metadata !{metadata !0}
-!13 = metadata !{metadata !6}
-!14 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x2e\00foo\00foo\00foo\003\000\001\000\006\000\000\000", !10, !1, !3, null, i32 ()* @foo, null, null, null} ; [ DW_TAG_subprogram ]
+!1 = !{!"0x29", !10} ; [ DW_TAG_file_type ]
+!2 = !{!"0x11\0012\00clang version 2.8 (trunk 112062)\001\00\000\00\001", !10, !11, !11, !12, !13, null} ; [ DW_TAG_compile_unit ]
+!3 = !{!"0x15\00\000\000\000\000\000\000", !10, !1, null, !4, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!4 = !{!5}
+!5 = !{!"0x24\00int\000\0032\0032\000\000\005", !10, !1} ; [ DW_TAG_base_type ]
+!6 = !{!"0x34\00i\00i\00i\002\001\001", !1, !1, !7, i32 0, null} ; [ DW_TAG_variable ]
+!7 = !{!"0x26\00\000\000\000\000\000", !10, !1, !5} ; [ DW_TAG_const_type ]
+!8 = !MDLocation(line: 3, column: 13, scope: !9)
+!9 = !{!"0xb\003\0011\000", !10, !0} ; [ DW_TAG_lexical_block ]
+!10 = !{!"/tmp/a.c", !"/Volumes/Lalgate/clean/D.CW"}
+!11 = !{i32 0}
+!12 = !{!0}
+!13 = !{!6}
+!14 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 8ce7b87c8250..aca7cd608470 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -7,7 +7,7 @@
 @xyz = global i32 2
 
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata) #0
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
 
 ; Function Attrs: nounwind readnone ssp
 define i32 @fn() #1 {
@@ -18,7 +18,7 @@ entry:
 ; Function Attrs: nounwind readonly ssp
 define i32 @foo(i32 %i) #2 {
 entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !15), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !{}), !dbg !20
   %.0 = load i32* @xyz, align 4
   ret i32 %.0, !dbg !21
 }
@@ -30,29 +30,29 @@ attributes #2 = { nounwind readonly ssp }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!25}
 
-!0 = metadata !{i32 524305, metadata !1, i32 1, metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !23, metadata !24, null, metadata !"", i32 1} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
-!1 = metadata !{metadata !"g.c", metadata !"/tmp/"}
-!2 = metadata !{null}
-!3 = metadata !{i32 524334, metadata !1, null, metadata !"bar", metadata !"bar", metadata !"", i32 5, metadata !4, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
-!4 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!5 = metadata !{i32 524329, metadata !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
-!6 = metadata !{i32 524334, metadata !1, null, metadata !"fn", metadata !"fn", metadata !"fn", i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
-!7 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 524324, metadata !1, metadata !5, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
-!10 = metadata !{i32 524334, metadata !1, null, metadata !"foo", metadata !"foo", metadata !"foo", i32 7, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo, null, null, null, i32 0} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
-!11 = metadata !{i32 524309, metadata !1, metadata !5, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
-!12 = metadata !{metadata !9, metadata !9}
-!13 = metadata !{i32 524544, metadata !14, metadata !"bb", metadata !5, i32 5, metadata !9}
-!14 = metadata !{i32 524299, metadata !1, metadata !3, i32 5, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
-!15 = metadata !{i32 524545, metadata !10, metadata !"i", metadata !5, i32 7, metadata !9}
-!16 = metadata !{i32 524340, i32 0, metadata !5, metadata !"abcd", metadata !"abcd", metadata !"", metadata !5, i32 2, metadata !9, i1 true, i1 true, null, null}
-!17 = metadata !{i32 524340, i32 0, metadata !5, metadata !"xyz", metadata !"xyz", metadata !"", metadata !5, i32 3, metadata !9, i1 false, i1 true, i32* @xyz, null}
-!18 = metadata !{i32 6, i32 0, metadata !19, null}
-!19 = metadata !{i32 524299, metadata !1, metadata !6, i32 6, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
-!20 = metadata !{i32 7, i32 0, metadata !10, null}
-!21 = metadata !{i32 10, i32 0, metadata !22, null}
-!22 = metadata !{i32 524299, metadata !1, metadata !10, i32 7, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
-!23 = metadata !{metadata !3, metadata !6, metadata !10}
-!24 = metadata !{metadata !16, metadata !17}
-!25 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
+!0 = !{!"0x11\001\004.2.1 (Based on Apple Inc. build 5658) (LLVM build)\001\00\000\00\001", !1, !2, !2, !23, !24, null} ; [ DW_TAG_compile_unit ] [/tmp//g.c] [DW_LANG_C89]
+!1 = !{!"g.c", !"/tmp/"}
+!2 = !{null}
+!3 = !{!"0x2e\00bar\00bar\00\005\001\001\000\006\000\001\000", !1, null, !4, null, null, null, null, null} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [scope 0] [bar]
+!4 = !{!"0x15\00\000\000\000\000\000\000", !1, !5, null, !2, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!5 = !{!"0x29", !1}          ; [ DW_TAG_file_type ] [/tmp//g.c]
+!6 = !{!"0x2e\00fn\00fn\00fn\006\000\001\000\006\000\001\000", !1, null, !7, null, i32 ()* @fn, null, null, null} ; [ DW_TAG_subprogram ] [line 6] [def] [scope 0] [fn]
+!7 = !{!"0x15\00\000\000\000\000\000\000", !1, !5, null, !8, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = !{!9}
+!9 = !{!"0x24\00int\000\0032\0032\000\000\005", !1, !5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = !{!"0x2e\00foo\00foo\00foo\007\000\001\000\006\000\001\000", !1, null, !11, null, i32 (i32)* @foo, null, null, null} ; [ DW_TAG_subprogram ] [line 7] [def] [scope 0] [foo]
+!11 = !{!"0x15\00\000\000\000\000\000\000", !1, !5, null, !12, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!12 = !{!9, !9}
+!13 = !{!"0x100\00bb\005\000", !14, !5, !9} ; [ DW_TAG_auto_variable ]
+!14 = !{!"0xb\005\000\000", !1, !3} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!15 = !{!"0x101\00i\007\000", !10, !5, !9} ; [ DW_TAG_arg_variable ]
+!16 = !{!"0x34\00abcd\00abcd\00\002\001\001", !5, !5, !9, null, null} ; [ DW_TAG_variable ]
+!17 = !{!"0x34\00xyz\00xyz\00\003\000\001", !5, !5, !9, i32* @xyz, null} ; [ DW_TAG_variable ]
+!18 = !MDLocation(line: 6, scope: !19)
+!19 = !{!"0xb\006\000\000", !1, !6} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!20 = !MDLocation(line: 7, scope: !10)
+!21 = !MDLocation(line: 10, scope: !22)
+!22 = !{!"0xb\007\000\000", !1, !10} ; [ DW_TAG_lexical_block ] [/tmp//g.c]
+!23 = !{!3, !6, !10}
+!24 = !{!16, !17}
+!25 = !{i32 1, !"Debug Info Version", i32 2}
diff --git a/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
new file mode 100644
index 000000000000..8ebd47aefd15
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -structurizecfg %s -o - | FileCheck %s
+
+; CHECK-NOT: br i1 true
+
+define void @blam(i32 addrspace(1)* nocapture %arg, float %arg1, float %arg2) {
+; CHECK: bb:
+bb:
+  br label %bb3
+
+; CHECK: bb3:
+bb3:                                              ; preds = %bb7, %bb
+  %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ]
+  %tmp4 = fcmp ult float %arg1, 3.500000e+00
+; CHECK: br i1 %tmp4, label %bb7, label %Flow
+  br i1 %tmp4, label %bb7, label %bb5
+
+; CHECK: Flow:
+; CHECK: br i1 %2, label %Flow1, label %bb3
+
+; CHECK: Flow1:
+; CHECK: br i1 %3, label %bb5, label %bb10
+
+; CHECK: bb5:
+bb5:                                              ; preds = %bb3
+  %tmp6 = fcmp olt float 0.000000e+00, %arg2
+; CHECK: br label %bb10
+  br i1 %tmp6, label %bb10, label %bb7
+
+; CHECK: bb7
+bb7:                                              ; preds = %bb5, %bb3
+  %tmp8 = add nuw nsw i64 %tmp, 1
+  %tmp9 = icmp slt i64 %tmp8, 5
+; CHECK: br label %Flow
+  br i1 %tmp9, label %bb3, label %bb10
+
+; CHECK: bb10
+bb10:                                             ; preds = %bb7, %bb5
+  %tmp11 = phi i32 [ 15, %bb5 ], [ 255, %bb7 ]
+  store i32 %tmp11, i32 addrspace(1)* %arg, align 4
+  ret void
+}
diff --git a/test/Transforms/TailCallElim/EraseBB.ll b/test/Transforms/TailCallElim/EraseBB.ll
new file mode 100644
index 000000000000..c8290d7ea951
--- /dev/null
+++ b/test/Transforms/TailCallElim/EraseBB.ll
@@ -0,0 +1,26 @@
+; RUN: opt -tailcallelim -S < %s 2>&1 | FileCheck %s
+
+; CHECK: add nsw i32
+; CHECK-NEXT: br label
+; CHECK: add nsw i32
+; CHECK-NEXT: br label
+; CHECK-NOT: Uses remain when a value is destroyed
+define i32 @test(i32 %n) {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %v1 = add nsw i32 %n, -2
+  %call1 = tail call i32 @test(i32 %v1)
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %v2 = add nsw i32 %n, 4
+  %call2 = tail call i32 @test(i32 %v2)
+  br label %return
+
+return:                                           ; preds = %if.end, %if.else
+  %retval = phi i32 [ %call1, %if.then ], [ %call2, %if.else ]
+  ret i32 %retval
+}
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index 53c65dab101b..2e350d662a39 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -1,6 +1,8 @@
 ; RUN: opt < %s -tailcallelim -S | FileCheck %s
 ; PR4323
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 ; Several cases where tail call elimination should move the load above the call,
 ; then eliminate the tail recursion.
 
@@ -12,6 +14,11 @@
 ; This load can be moved above the call because the function won't write to it
 ; and the call has no side effects.
 define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_1(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -21,7 +28,6 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -32,6 +38,11 @@ else:		; preds = %entry
 ; This load can be moved above the call because the function won't write to it
 ; and the load provably can't trap.
 define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_2(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -48,7 +59,6 @@ unwind:		; preds = %else
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -59,6 +69,11 @@ recurse:		; preds = %else
 ; This load can be safely moved above the call (even though it's from an
 ; extern_weak global) because the call has no side effects.
 define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+; CHECK-LABEL: @raise_load_3(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -68,7 +83,6 @@ if:		; preds = %entry
 
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
 	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
@@ -80,6 +94,12 @@ else:		; preds = %entry
 ; unknown pointer (which normally means it might trap) because the first load
 ; proves it doesn't trap.
 define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_4(
+; CHECK-NOT: call
+; CHECK: load i32*
+; CHECK-NEXT: load i32*
+; CHECK-NOT: call
+; CHECK: }
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
 	br i1 %tmp2, label %if, label %else
@@ -97,7 +117,6 @@ unwind:		; preds = %else
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%first = load i32* %a_arg		; <i32> [#uses=1]
-; CHECK-NOT: call
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
 	%second = load i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
diff --git a/test/Transforms/Util/flattencfg.ll b/test/Transforms/Util/flattencfg.ll
new file mode 100644
index 000000000000..4fcb77ab0237
--- /dev/null
+++ b/test/Transforms/Util/flattencfg.ll
@@ -0,0 +1,26 @@
+; RUN: opt -flattencfg -S < %s | FileCheck %s
+
+
+; This test checks whether the pass completes without a crash.
+; The code is not transformed in any way
+;
+; CHECK-LABEL: @test_not_crash
+define void @test_not_crash(i32 %in_a) #0 {
+entry:
+  %cmp0 = icmp eq i32 %in_a, -1
+  %cmp1 = icmp ne i32 %in_a, 0
+  %cond0 = and i1 %cmp0, %cmp1
+  br i1 %cond0, label %b0, label %b1
+
+b0:                                ; preds = %entry
+  %cmp2 = icmp eq i32 %in_a, 0
+  %cmp3 = icmp ne i32 %in_a, 1
+  %cond1 = or i1 %cmp2, %cmp3
+  br i1 %cond1, label %exit, label %b1
+
+b1:                                       ; preds = %entry, %b0
+  br label %exit
+
+exit:                               ; preds = %entry, %b0, %b1
+  ret void
+}
diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll
new file mode 100644
index 000000000000..17c12028705f
--- /dev/null
+++ b/test/Transforms/Util/lowerswitch.ll
@@ -0,0 +1,54 @@
+; RUN: opt -lowerswitch -S < %s | FileCheck %s
+
+; Test that we don't crash and have a different basic block for each incoming edge.
+define void @test0() {
+; CHECK-LABEL: @test0
+; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ]
+BB1:
+  switch i32 undef, label %BB2 [
+    i32 3, label %BB2
+    i32 5, label %BB2
+    i32 0, label %BB3
+    i32 2, label %BB3
+    i32 4, label %BB3
+  ]
+
+BB2:
+  %merge = phi i64 [ 1, %BB3 ], [ 0, %BB1 ], [ 0, %BB1 ], [ 0, %BB1 ]
+  ret void
+
+BB3:
+  br label %BB2
+}
+
+; Test switch cases that are merged into a single case during lowerswitch
+; (take 84 and 85 below) - check that the number of incoming phi values match
+; the number of branches.
+define void @test1() {
+; CHECK-LABEL: @test1
+entry:
+  br label %bb1
+
+bb1:
+  switch i32 undef, label %bb1 [
+    i32 84, label %bb3
+    i32 85, label %bb3
+    i32 86, label %bb2
+    i32 78, label %exit
+    i32 99, label %bb3
+  ]
+
+bb2:
+  br label %bb3
+
+bb3:
+; CHECK-LABEL: bb3
+; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock3 ]
+  %tmp = phi i32 [ 1, %bb1 ], [ 0, %bb2 ], [ 1, %bb1 ], [ 1, %bb1 ]
+; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock3 ]
+  %tmp2 = phi i32 [ 2, %bb1 ], [ 2, %bb1 ], [ 5, %bb2 ], [ 2, %bb1 ]
+  br label %exit
+
+exit:
+  ret void
+}
diff --git a/test/Verifier/alias.ll b/test/Verifier/alias.ll
index ff02a37bab95..dd04ae05f634 100644
--- a/test/Verifier/alias.ll
+++ b/test/Verifier/alias.ll
@@ -21,7 +21,7 @@ declare void @f()
 
 
 @test3_a = global i32 42
-@test3_b = alias weak i32* @test3_a
+@test3_b = weak alias i32* @test3_a
 @test3_c = alias i32* @test3_b
 ; CHECK: Alias cannot point to a weak alias
 ; CHECK-NEXT: i32* @test3_c
diff --git a/test/Verifier/comdat.ll b/test/Verifier/comdat.ll
index ca47429b1086..dcf67d89f8d7 100644
--- a/test/Verifier/comdat.ll
+++ b/test/Verifier/comdat.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
 
 $v = comdat any
-@v = common global i32 0, comdat $v
+@v = common global i32 0, comdat($v)
 ; CHECK: 'common' global may not be in a Comdat!
diff --git a/test/Verifier/comdat2.ll b/test/Verifier/comdat2.ll
index d78030c12af8..9d892b974fa9 100644
--- a/test/Verifier/comdat2.ll
+++ b/test/Verifier/comdat2.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
 
 $v = comdat any
-@v = private global i32 0, comdat $v
+@v = private global i32 0, comdat($v)
 ; CHECK: comdat global value has private linkage
diff --git a/test/Verifier/fpmath.ll b/test/Verifier/fpmath.ll
index 7002c5c825f7..2689b69d1d06 100644
--- a/test/Verifier/fpmath.ll
+++ b/test/Verifier/fpmath.ll
@@ -22,10 +22,10 @@ define void @fpmath1(i32 %i, float %f, <2 x float> %g) {
   ret void
 }
 
-!0 = metadata !{ float 1.0 }
-!1 = metadata !{ }
-!2 = metadata !{ float 1.0, float 1.0 }
-!3 = metadata !{ i32 1 }
-!4 = metadata !{ float -1.0 }
-!5 = metadata !{ float 0.0 }
-!6 = metadata !{ float 0x7FFFFFFF00000000 }
+!0 = !{ float 1.0 }
+!1 = !{ }
+!2 = !{ float 1.0, float 1.0 }
+!3 = !{ i32 1 }
+!4 = !{ float -1.0 }
+!5 = !{ float 0.0 }
+!6 = !{ float 0x7FFFFFFF00000000 }
diff --git a/test/Verifier/frameallocate.ll b/test/Verifier/frameallocate.ll
new file mode 100644
index 000000000000..e3018db15275
--- /dev/null
+++ b/test/Verifier/frameallocate.ll
@@ -0,0 +1,48 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @llvm.frameallocate(i32)
+declare i8* @llvm.framerecover(i8*, i8*)
+
+define internal void @f() {
+  call i8* @llvm.frameallocate(i32 4)
+  call i8* @llvm.frameallocate(i32 4)
+  ret void
+}
+; CHECK: multiple calls to llvm.frameallocate in one function
+
+define internal void @f_a(i32 %n) {
+  call i8* @llvm.frameallocate(i32 %n)
+  ret void
+}
+; CHECK: llvm.frameallocate argument must be constant integer size
+
+define internal void @g() {
+entry:
+  br label %not_entry
+not_entry:
+  call i8* @llvm.frameallocate(i32 4)
+  ret void
+}
+; CHECK: llvm.frameallocate used outside of entry block
+
+define internal void @h() {
+  call i8* @llvm.framerecover(i8* null, i8* null)
+  ret void
+}
+; CHECK: llvm.framerecover first argument must be function defined in this module
+
+@global = constant i8 0
+
+declare void @declaration()
+
+define internal void @i() {
+  call i8* @llvm.framerecover(i8* @global, i8* null)
+  ret void
+}
+; CHECK: llvm.framerecover first argument must be function defined in this module
+
+define internal void @j() {
+  call i8* @llvm.framerecover(i8* bitcast(void()* @declaration to i8*), i8* null)
+  ret void
+}
+; CHECK: llvm.framerecover first argument must be function defined in this module
diff --git a/test/Verifier/ident-meta1.ll b/test/Verifier/ident-meta1.ll
index fb247a8c5e2e..3202fbd3e8f7 100644
--- a/test/Verifier/ident-meta1.ll
+++ b/test/Verifier/ident-meta1.ll
@@ -4,9 +4,9 @@
 ; Each metadata entry can have only one string.
 
 !llvm.ident = !{!0, !1}
-!0 = metadata !{metadata !"version string"}
-!1 = metadata !{metadata !"string1", metadata !"string2"}
+!0 = !{!"version string"}
+!1 = !{!"string1", !"string2"}
 ; CHECK: assembly parsed, but does not verify as correct!
 ; CHECK-NEXT: incorrect number of operands in llvm.ident metadata
-; CHECK-NEXT: metadata !1
+; CHECK-NEXT: !1
 
diff --git a/test/Verifier/ident-meta2.ll b/test/Verifier/ident-meta2.ll
index e86f18adc0e8..1c11e1bcba2a 100644
--- a/test/Verifier/ident-meta2.ll
+++ b/test/Verifier/ident-meta2.ll
@@ -4,10 +4,10 @@
 ; Each metadata entry can contain one string only.
 
 !llvm.ident = !{!0, !1, !2, !3}
-!0 = metadata !{metadata !"str1"}
-!1 = metadata !{metadata !"str2"}
-!2 = metadata !{metadata !"str3"}
-!3 = metadata !{i32 1}
+!0 = !{!"str1"}
+!1 = !{!"str2"}
+!2 = !{!"str3"}
+!3 = !{i32 1}
 ; CHECK: assembly parsed, but does not verify as correct!
 ; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
 ; CHECK-NEXT: i32 1
diff --git a/test/Verifier/ident-meta3.ll b/test/Verifier/ident-meta3.ll
index a847b462161d..91e1cfec5023 100644
--- a/test/Verifier/ident-meta3.ll
+++ b/test/Verifier/ident-meta3.ll
@@ -4,7 +4,7 @@
 ; Each metadata entry can contain one string only.
 
 !llvm.ident = !{!0}
-!0 = metadata !{metadata !{metadata !"nested metadata"}}
+!0 = !{!{!"nested metadata"}}
 ; CHECK: assembly parsed, but does not verify as correct!
 ; CHECK-NEXT: invalid value for llvm.ident metadata entry operand(the operand should be a string)
-; CHECK-NEXT: metadata !1
+; CHECK-NEXT: !1
diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll
index c2750bb121f2..e80cfcf830b1 100644
--- a/test/Verifier/invoke.ll
+++ b/test/Verifier/invoke.ll
@@ -46,7 +46,7 @@ contb:
 
 define i8 @f2() {
 entry:
-; CHECK: Cannot invoke an intrinsinc other than donothing
+; CHECK: Cannot invoke an intrinsinc other than donothing or patchpoint
   invoke void @llvm.trap()
   to label %cont unwind label %lpad
 
diff --git a/test/Verifier/module-flags-1.ll b/test/Verifier/module-flags-1.ll
index e5feaf3a580d..36bcb335ffc2 100644
--- a/test/Verifier/module-flags-1.ll
+++ b/test/Verifier/module-flags-1.ll
@@ -3,57 +3,54 @@
 ; Check that module flags are structurally correct.
 ;
 ; CHECK: incorrect number of operands in module flag
-; CHECK: metadata !0
-!0 = metadata !{ i32 1 }
+; CHECK: !0
+!0 = !{i32 1}
 ; CHECK: invalid behavior operand in module flag (expected constant integer)
-; CHECK: metadata !"foo"
-!1 = metadata !{ metadata !"foo", metadata !"foo", i32 42 }
+; CHECK: !"foo"
+!1 = !{!"foo", !"foo", i32 42}
 ; CHECK: invalid behavior operand in module flag (unexpected constant)
 ; CHECK: i32 999
-!2 = metadata !{ i32 999, metadata !"foo", i32 43 }
+!2 = !{i32 999, !"foo", i32 43}
 ; CHECK: invalid ID operand in module flag (expected metadata string)
 ; CHECK: i32 1
-!3 = metadata !{ i32 1, i32 1, i32 44 }
+!3 = !{i32 1, i32 1, i32 44}
 ; CHECK: invalid value for 'require' module flag (expected metadata pair)
 ; CHECK: i32 45
-!4 = metadata !{ i32 3, metadata !"bla", i32 45 }
+!4 = !{i32 3, !"bla", i32 45}
 ; CHECK: invalid value for 'require' module flag (expected metadata pair)
-; CHECK: metadata !
-!5 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 46 } }
+; CHECK: !
+!5 = !{i32 3, !"bla", !{i32 46}}
 ; CHECK: invalid value for 'require' module flag (first value operand should be a string)
 ; CHECK: i32 47
-!6 = metadata !{ i32 3, metadata !"bla", metadata !{ i32 47, i32 48 } }
+!6 = !{i32 3, !"bla", !{i32 47, i32 48}}
 
 ; Check that module flags only have unique IDs.
 ;
 ; CHECK: module flag identifiers must be unique (or of 'require' type)
-!7 = metadata !{ i32 1, metadata !"foo", i32 49 }
-!8 = metadata !{ i32 2, metadata !"foo", i32 50 }
+!7 = !{i32 1, !"foo", i32 49}
+!8 = !{i32 2, !"foo", i32 50}
 ; CHECK-NOT: module flag identifiers must be unique
-!9 = metadata !{ i32 2, metadata !"bar", i32 51 }
-!10 = metadata !{ i32 3, metadata !"bar", metadata !{ metadata !"bar", i32 51 } }
+!9 = !{i32 2, !"bar", i32 51}
+!10 = !{i32 3, !"bar", !{!"bar", i32 51}}
 
 ; Check that any 'append'-type module flags are valid.
 ; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
-!16 = metadata !{ i32 5, metadata !"flag-2", i32 56 }
+!16 = !{i32 5, !"flag-2", i32 56}
 ; CHECK: invalid value for 'append'-type module flag (expected a metadata node)
-!17 = metadata !{ i32 5, metadata !"flag-3", i32 57 }
+!17 = !{i32 5, !"flag-3", i32 57}
 ; CHECK-NOT: invalid value for 'append'-type module flag (expected a metadata node)
-!18 = metadata !{ i32 5, metadata !"flag-4", metadata !{ i32 57 } }
+!18 = !{i32 5, !"flag-4", !{i32 57}}
 
 ; Check that any 'require' module flags are valid.
 ; CHECK: invalid requirement on flag, flag is not present in module
-!11 = metadata !{ i32 3, metadata !"bar",
-     metadata !{ metadata !"no-such-flag", i32 52 } }
+!11 = !{i32 3, !"bar", !{!"no-such-flag", i32 52}}
 ; CHECK: invalid requirement on flag, flag does not have the required value
-!12 = metadata !{ i32 1, metadata !"flag-0", i32 53 }
-!13 = metadata !{ i32 3, metadata !"bar",
-     metadata !{ metadata !"flag-0", i32 54 } }
+!12 = !{i32 1, !"flag-0", i32 53}
+!13 = !{i32 3, !"bar", !{!"flag-0", i32 54}}
 ; CHECK-NOT: invalid requirement on flag, flag is not present in module
 ; CHECK-NOT: invalid requirement on flag, flag does not have the required value
-!14 = metadata !{ i32 1, metadata !"flag-1", i32 55 }
-!15 = metadata !{ i32 3, metadata !"bar",
-     metadata !{ metadata !"flag-1", i32 55 } }
+!14 = !{i32 1, !"flag-1", i32 55}
+!15 = !{i32 3, !"bar", !{!"flag-1", i32 55}}
 
 !llvm.module.flags = !{
   !0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15,
diff --git a/test/Verifier/musttail-valid.ll b/test/Verifier/musttail-valid.ll
index 815d77a13e35..bdc0c8cf7fd6 100644
--- a/test/Verifier/musttail-valid.ll
+++ b/test/Verifier/musttail-valid.ll
@@ -14,3 +14,26 @@ define i32* @similar_ret_ptrty() {
   %w = bitcast i8* %v to i32*
   ret i32* %w
 }
+
+declare x86_thiscallcc void @varargs_thiscall(i8*, ...)
+define x86_thiscallcc void @varargs_thiscall_thunk(i8* %this, ...) {
+  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  ret void
+}
+
+declare x86_fastcallcc void @varargs_fastcall(i8*, ...)
+define x86_fastcallcc void @varargs_fastcall_thunk(i8* %this, ...) {
+  musttail call x86_fastcallcc void (i8*, ...)* @varargs_fastcall(i8* %this, ...)
+  ret void
+}
+
+define x86_thiscallcc void @varargs_thiscall_unreachable(i8* %this, ...) {
+  unreachable
+}
+
+define x86_thiscallcc void @varargs_thiscall_ret_unreachable(i8* %this, ...) {
+  musttail call x86_thiscallcc void (i8*, ...)* @varargs_thiscall(i8* %this, ...)
+  ret void
+bb1:
+  ret void
+}
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
index f15ca3f74065..fda65cbbbb89 100644
--- a/test/Verifier/range-1.ll
+++ b/test/Verifier/range-1.ll
@@ -5,7 +5,7 @@ entry:
   store i8 0, i8* %x, align 1, !range !0
   ret void
 }
-!0 = metadata !{i8 0, i8 1}
+!0 = !{i8 0, i8 1}
 ; CHECK: Ranges are only for loads, calls and invokes!
 ; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
 
@@ -14,16 +14,15 @@ entry:
   %y = load i8* %x, align 1, !range !1
   ret i8 %y
 }
-!1 = metadata !{}
+!1 = !{}
 ; CHECK: It should have at least one range!
-; CHECK-NEXT: metadata
 
 define i8 @f3(i8* %x) {
 entry:
   %y = load i8* %x, align 1, !range !2
   ret i8 %y
 }
-!2 = metadata !{i8 0}
+!2 = !{i8 0}
 ; CHECK: Unfinished range!
 
 define i8 @f4(i8* %x) {
@@ -31,7 +30,7 @@ entry:
   %y = load i8* %x, align 1, !range !3
   ret i8 %y
 }
-!3 = metadata !{double 0.0, i8 0}
+!3 = !{double 0.0, i8 0}
 ; CHECK: The lower limit must be an integer!
 
 define i8 @f5(i8* %x) {
@@ -39,7 +38,7 @@ entry:
   %y = load i8* %x, align 1, !range !4
   ret i8 %y
 }
-!4 = metadata !{i8 0, double 0.0}
+!4 = !{i8 0, double 0.0}
 ; CHECK: The upper limit must be an integer!
 
 define i8 @f6(i8* %x) {
@@ -47,8 +46,8 @@ entry:
   %y = load i8* %x, align 1, !range !5
   ret i8 %y
 }
-!5 = metadata !{i32 0, i8 0}
-; CHECK: Range types must match load type!
+!5 = !{i32 0, i8 0}
+; CHECK: Range types must match instruction type!
 ; CHECK:  %y = load
 
 define i8 @f7(i8* %x) {
@@ -56,8 +55,8 @@ entry:
   %y = load i8* %x, align 1, !range !6
   ret i8 %y
 }
-!6 = metadata !{i8 0, i32 0}
-; CHECK: Range types must match load type!
+!6 = !{i8 0, i32 0}
+; CHECK: Range types must match instruction type!
 ; CHECK:  %y = load
 
 define i8 @f8(i8* %x) {
@@ -65,8 +64,8 @@ entry:
   %y = load i8* %x, align 1, !range !7
   ret i8 %y
 }
-!7 = metadata !{i32 0, i32 0}
-; CHECK: Range types must match load type!
+!7 = !{i32 0, i32 0}
+; CHECK: Range types must match instruction type!
 ; CHECK:  %y = load
 
 define i8 @f9(i8* %x) {
@@ -74,7 +73,7 @@ entry:
   %y = load i8* %x, align 1, !range !8
   ret i8 %y
 }
-!8 = metadata !{i8 0, i8 0}
+!8 = !{i8 0, i8 0}
 ; CHECK: Range must not be empty!
 
 define i8 @f10(i8* %x) {
@@ -82,7 +81,7 @@ entry:
   %y = load i8* %x, align 1, !range !9
   ret i8 %y
 }
-!9 = metadata !{i8 0, i8 2, i8 1, i8 3}
+!9 = !{i8 0, i8 2, i8 1, i8 3}
 ; CHECK: Intervals are overlapping
 
 define i8 @f11(i8* %x) {
@@ -90,7 +89,7 @@ entry:
   %y = load i8* %x, align 1, !range !10
   ret i8 %y
 }
-!10 = metadata !{i8 0, i8 2, i8 2, i8 3}
+!10 = !{i8 0, i8 2, i8 2, i8 3}
 ; CHECK: Intervals are contiguous
 
 define i8 @f12(i8* %x) {
@@ -98,7 +97,7 @@ entry:
   %y = load i8* %x, align 1, !range !11
   ret i8 %y
 }
-!11 = metadata !{i8 1, i8 2, i8 -1, i8 0}
+!11 = !{i8 1, i8 2, i8 -1, i8 0}
 ; CHECK: Intervals are not in order
 
 define i8 @f13(i8* %x) {
@@ -106,7 +105,7 @@ entry:
   %y = load i8* %x, align 1, !range !12
   ret i8 %y
 }
-!12 = metadata !{i8 1, i8 3, i8 5, i8 1}
+!12 = !{i8 1, i8 3, i8 5, i8 1}
 ; CHECK: Intervals are contiguous
 
 define i8 @f14(i8* %x) {
@@ -114,7 +113,7 @@ entry:
   %y = load i8* %x, align 1, !range !13
   ret i8 %y
 }
-!13 = metadata !{i8 1, i8 3, i8 5, i8 2}
+!13 = !{i8 1, i8 3, i8 5, i8 2}
 ; CHECK: Intervals are overlapping
 
 define i8 @f15(i8* %x) {
@@ -122,7 +121,7 @@ entry:
   %y = load i8* %x, align 1, !range !14
   ret i8 %y
 }
-!14 = metadata !{i8 10, i8 1, i8 12, i8 13}
+!14 = !{i8 10, i8 1, i8 12, i8 13}
 ; CHECK: Intervals are overlapping
 
 define i8 @f16(i8* %x) {
@@ -130,7 +129,7 @@ entry:
   %y = load i8* %x, align 1, !range !16
   ret i8 %y
 }
-!16 = metadata !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 2}
+!16 = !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 2}
 ; CHECK: Intervals are overlapping
 
 define i8 @f17(i8* %x) {
@@ -138,5 +137,13 @@ entry:
   %y = load i8* %x, align 1, !range !17
   ret i8 %y
 }
-!17 = metadata !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 1}
+!17 = !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 1}
 ; CHECK: Intervals are contiguous
+
+define i8 @f18() {
+entry:
+  %y = call i8 undef(), !range !18
+  ret i8 %y
+}
+!18 = !{}
+; CHECK: It should have at least one range!
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
index 1d2e0575d76a..f8891c83225e 100644
--- a/test/Verifier/range-2.ll
+++ b/test/Verifier/range-2.ll
@@ -5,35 +5,35 @@ entry:
   %y = load i8* %x, align 1, !range !0
   ret i8 %y
 }
-!0 = metadata !{i8 0, i8 1}
+!0 = !{i8 0, i8 1}
 
 define i8 @f2(i8* %x) {
 entry:
   %y = load i8* %x, align 1, !range !1
   ret i8 %y
 }
-!1 = metadata !{i8 255, i8 1}
+!1 = !{i8 255, i8 1}
 
 define i8 @f3(i8* %x) {
 entry:
   %y = load i8* %x, align 1, !range !2
   ret i8 %y
 }
-!2 = metadata !{i8 1, i8 3, i8 5, i8 42}
+!2 = !{i8 1, i8 3, i8 5, i8 42}
 
 define i8 @f4(i8* %x) {
 entry:
   %y = load i8* %x, align 1, !range !3
   ret i8 %y
 }
-!3 = metadata !{i8 -1, i8 0, i8 1, i8 2}
+!3 = !{i8 -1, i8 0, i8 1, i8 2}
 
 define i8 @f5(i8* %x) {
 entry:
   %y = load i8* %x, align 1, !range !4
   ret i8 %y
 }
-!4 = metadata !{i8 -1, i8 0, i8 1, i8 -2}
+!4 = !{i8 -1, i8 0, i8 1, i8 -2}
 
 ; We can annotate the range of the return value of a CALL.
 define void @call_all(i8* %x) {
diff --git a/test/Verifier/statepoint.ll b/test/Verifier/statepoint.ll
new file mode 100644
index 000000000000..3fbaeb53f918
--- /dev/null
+++ b/test/Verifier/statepoint.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S %s -verify | FileCheck %s
+
+declare void @use(...)
+declare i8 addrspace(1)* @llvm.gc.relocate.p1i8(i32, i32, i32)
+declare i32 @llvm.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)
+
+;; Basic usage
+define i8 addrspace(1)* @test1(i8 addrspace(1)* %arg) {
+entry:
+  %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
+  %safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %reloc = call i8 addrspace(1)* @llvm.gc.relocate.p1i8(i32 %safepoint_token, i32 9, i32 10)
+  ;; It is perfectly legal to relocate the same value multiple times...
+  %reloc2 = call i8 addrspace(1)* @llvm.gc.relocate.p1i8(i32 %safepoint_token, i32 9, i32 10)
+  %reloc3 = call i8 addrspace(1)* @llvm.gc.relocate.p1i8(i32 %safepoint_token, i32 10, i32 9)
+  ret i8 addrspace(1)* %reloc
+; CHECK-LABEL: test1
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK: gc.relocate
+; CHECK: gc.relocate
+; CHECK: ret i8 addrspace(1)* %reloc
+}
+
+; This test catches two cases where the verifier was too strict:
+; 1) A base doesn't need to be relocated if it's never used again
+; 2) A value can be replaced by one which is known equal.  This
+; means a potentially derived pointer can be known base and that
+; we can't check that derived pointer are never bases.
+define void @test2(i8 addrspace(1)* %arg, i64 addrspace(1)* %arg2) {
+entry:
+  %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
+  %c = icmp eq i64 addrspace(1)* %cast,  %arg2
+  br i1 %c, label %equal, label %notequal
+
+notequal:
+  ret void
+
+equal:
+%safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.statepoint.p0f_isVoidf(void ()* undef, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %reloc = call i8 addrspace(1)* @llvm.gc.relocate.p1i8(i32 %safepoint_token, i32 9, i32 10)
+  call void undef(i8 addrspace(1)* %reloc)
+  ret void
+; CHECK-LABEL: test2
+; CHECK-LABEL: equal
+; CHECK: statepoint
+; CHECK-NEXT: %reloc = call 
+; CHECK-NEXT: call
+; CHECK-NEXT: ret voi
+}
diff --git a/test/lit.cfg b/test/lit.cfg
index 4f015267c619..cd615b3c5736 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -95,10 +95,30 @@ for symbolizer in ['ASAN_SYMBOLIZER_PATH', 'MSAN_SYMBOLIZER_PATH']:
     if symbolizer in os.environ:
         config.environment[symbolizer] = os.environ[symbolizer]
 
-# Propagate options for sanitizers.
-for options in ['ASAN_OPTIONS']:
-    if options in os.environ:
-        config.environment[options] = os.environ[options]
+# Set up OCAMLPATH to include newly built OCaml libraries.
+llvm_lib_dir = getattr(config, 'llvm_lib_dir', None)
+if llvm_lib_dir is None:
+    if llvm_obj_root is not None:
+        llvm_lib_dir = os.path.join(llvm_obj_root, 'lib')
+
+if llvm_lib_dir is not None:
+    llvm_ocaml_lib = os.path.join(llvm_lib_dir, 'ocaml')
+    if llvm_ocaml_lib is not None:
+        if 'OCAMLPATH' in os.environ:
+            ocamlpath = os.path.pathsep.join((llvm_ocaml_lib, os.environ['OCAMLPATH']))
+            config.environment['OCAMLPATH'] = ocamlpath
+        else:
+            config.environment['OCAMLPATH'] = llvm_ocaml_lib
+
+        if 'CAML_LD_LIBRARY_PATH' in os.environ:
+            caml_ld_library_path = os.path.pathsep.join((llvm_ocaml_lib,
+                                        os.environ['CAML_LD_LIBRARY_PATH']))
+            config.environment['CAML_LD_LIBRARY_PATH'] = caml_ld_library_path
+        else:
+            config.environment['CAML_LD_LIBRARY_PATH'] = llvm_ocaml_lib
+
+# Set up OCAMLRUNPARAM to enable backtraces in OCaml tests.
+config.environment['OCAMLRUNPARAM'] = 'b'
 
 ###
 
@@ -150,16 +170,15 @@ if config.test_exec_root is None:
 
 ###
 
-# Provide a command line for mcjit tests
-lli_mcjit = 'lli -use-mcjit'
+lli = 'lli'
 # The target triple used by default by lli is the process target triple (some
 # triple appropriate for generating code for the current process) but because
 # we don't support COFF in MCJIT well enough for the tests, force ELF format on
 # Windows.  FIXME: the process target triple should be used here, but this is
 # difficult to obtain on Windows.
-if re.search(r'cygwin|mingw32|win32', config.host_triple):
-  lli_mcjit += ' -mtriple='+config.host_triple+'-elf'
-config.substitutions.append( ('%lli_mcjit', lli_mcjit) )
+if re.search(r'cygwin|mingw32|windows-gnu|win32', config.host_triple):
+  lli += ' -mtriple='+config.host_triple+'-elf'
+config.substitutions.append( ('%lli', lli ) )
 
 # Similarly, have a macro to use llc with DWARF even when the host is win32.
 llc_dwarf = 'llc'
@@ -167,41 +186,25 @@ if re.search(r'win32', config.target_triple):
   llc_dwarf += ' -mtriple='+config.target_triple.replace('-win32', '-mingw32')
 config.substitutions.append( ('%llc_dwarf', llc_dwarf) )
 
-# Provide a substition for those tests that need to run the jit to obtain data
-# but simply want use the currently considered most reliable jit for platform
-# FIXME: ppc32 is not ready for mcjit.
-if 'arm' in config.target_triple \
-   or 'aarch64' in config.target_triple \
-   or 'powerpc64' in config.target_triple \
-   or 's390x' in config.target_triple:
-    defaultIsMCJIT = 'true'
-else:
-    defaultIsMCJIT = 'false'
-config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) )
-
-# Process jit implementation option
-jit_impl_cfg = lit_config.params.get('jit_impl', None)
-if jit_impl_cfg == 'mcjit':
-  # When running with mcjit, mangle -mcjit into target triple
-  # and add -use-mcjit flag to lli invocation
-  if 'i386' in config.target_triple or 'i686' in config.target_triple:
-    config.target_triple += jit_impl_cfg + '-ia32'
-  elif 'x86_64' in config.target_triple:
-    config.target_triple += jit_impl_cfg + '-ia64'
-  else:
-    config.target_triple += jit_impl_cfg
-
-  config.substitutions.append( ('%lli', 'lli -use-mcjit') )
-else:
-  config.substitutions.append( ('%lli', 'lli') )
-
 # Add site-specific substitutions.
-config.substitutions.append( ('%ocamlopt', config.ocamlopt_executable) )
+config.substitutions.append( ('%go', config.go_executable) )
 config.substitutions.append( ('%llvmshlibdir', config.llvm_shlib_dir) )
 config.substitutions.append( ('%shlibext', config.llvm_shlib_ext) )
 config.substitutions.append( ('%exeext', config.llvm_exe_ext) )
 config.substitutions.append( ('%python', config.python_executable) )
 
+# OCaml substitutions.
+# Support tests for both native and bytecode builds.
+config.substitutions.append( ('%ocamlc',
+    "%s ocamlc -cclib -L%s %s" %
+        (config.ocamlfind_executable, llvm_lib_dir, config.ocaml_flags)) )
+if config.have_ocamlopt in ('1', 'TRUE'):
+    config.substitutions.append( ('%ocamlopt',
+        "%s ocamlopt -cclib -L%s -cclib -Wl,-rpath,%s %s" %
+            (config.ocamlfind_executable, llvm_lib_dir, llvm_lib_dir, config.ocaml_flags)) )
+else:
+    config.substitutions.append( ('%ocamlopt', "true" ) )
+
 # For each occurrence of an llvm tool name as its own word, replace it
 # with the full path to the build directory holding that tool.  This
 # ensures that we are testing the tools just built and not some random
@@ -226,8 +229,10 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-cov\b",
                 r"\bllvm-diff\b",
                 r"\bllvm-dis\b",
+                r"\bllvm-dsymutil\b",
                 r"\bllvm-dwarfdump\b",
                 r"\bllvm-extract\b",
+                r"\bllvm-go\b",
                 r"\bllvm-link\b",
                 r"\bllvm-lto\b",
                 r"\bllvm-mc\b",
@@ -240,12 +245,14 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-rtdyld\b",
                 r"\bllvm-size\b",
                 r"\bllvm-tblgen\b",
+                r"\bllvm-vtabledump\b",
                 r"\bllvm-c-test\b",
                 r"\bmacho-dump\b",
                 NOJUNK + r"\bopt\b",
                 r"\bFileCheck\b",
                 r"\bobj2yaml\b",
                 r"\byaml2obj\b",
+                r"\bverify-uselistorder\b",
                 # Handle these specially as they are strings searched
                 # for during testing.
                 r"\| \bcount\b",
@@ -295,9 +302,14 @@ if config.llvm_use_sanitizer == "Address":
 if (config.llvm_use_sanitizer == "Memory" or
         config.llvm_use_sanitizer == "MemoryWithOrigins"):
     config.available_features.add("msan")
+if config.llvm_use_sanitizer == "Undefined":
+    config.available_features.add("ubsan")
+else:
+    config.available_features.add("not_ubsan")
 
 # Direct object generation
-if not 'hexagon' in config.target_triple:
+# Suppress x86_64-mingw32 while investigating since r219108.
+if not 'hexagon' in config.target_triple and not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
     config.available_features.add("object-emission")
 
 if config.have_zlib == "1":
@@ -311,12 +323,47 @@ else:
 if config.host_triple == config.target_triple:
     config.available_features.add("native")
 
-# Ask llvm-config about assertion mode.
 import subprocess
+
+def have_ld_plugin_support():
+    if not os.path.exists(os.path.join(config.llvm_shlib_dir, 'LLVMgold.so')):
+        return False
+
+    ld_cmd = subprocess.Popen(['ld', '--help'], stdout = subprocess.PIPE)
+    ld_out = ld_cmd.stdout.read().decode()
+    ld_cmd.wait()
+
+    if not '-plugin' in ld_out:
+        return False
+
+    # check that the used emulations are supported.
+    emu_line = [l for l in ld_out.split('\n') if 'supported emulations' in l]
+    if len(emu_line) != 1:
+        return False
+    emu_line = emu_line[0]
+    fields = emu_line.split(':')
+    if len(fields) != 3:
+        return False
+    emulations = fields[2].split()
+    if 'elf32ppc' not in emulations or 'elf_x86_64' not in emulations:
+        return False
+
+    ld_version = subprocess.Popen(['ld', '--version'], stdout = subprocess.PIPE)
+    if not 'GNU gold' in ld_version.stdout.read():
+        return False
+    ld_version.wait()
+
+    return True
+
+if have_ld_plugin_support():
+    config.available_features.add('ld_plugin')
+
+# Ask llvm-config about assertion mode.
 try:
     llvm_config_cmd = subprocess.Popen(
         [os.path.join(llvm_tools_dir, 'llvm-config'), '--assertion-mode'],
-        stdout = subprocess.PIPE)
+        stdout = subprocess.PIPE,
+        env=config.environment)
 except OSError:
     print("Could not find llvm-config in " + llvm_tools_dir)
     exit(42)
@@ -337,7 +384,7 @@ if 'darwin' == sys.platform:
     sysctl_cmd.wait()
 
 # .debug_frame is not emitted for targeting Windows x64.
-if not re.match(r'^x86_64.*-(mingw32|win32)', config.target_triple):
+if not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
     config.available_features.add('debug_frame')
 
 # Check if we should use gmalloc.
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 4c0bb2e47d36..64ad0c398420 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -7,21 +7,30 @@ config.target_triple = "@TARGET_TRIPLE@"
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvm_lib_dir = "@LLVM_LIBRARY_DIR@"
 config.llvm_shlib_dir = "@SHLIBDIR@"
 config.llvm_shlib_ext = "@SHLIBEXT@"
 config.llvm_exe_ext = "@EXEEXT@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
-config.ocamlopt_executable = "@OCAMLOPT@"
+config.ocamlfind_executable = "@OCAMLFIND@"
+config.have_ocamlopt = "@HAVE_OCAMLOPT@"
+config.have_ocaml_ounit = "@HAVE_OCAML_OUNIT@"
+config.ocaml_flags = "@OCAMLFLAGS@"
+config.go_executable = "@GO_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
 config.targets_to_build = "@TARGETS_TO_BUILD@"
-config.llvm_bindings = "@LLVM_BINDINGS@"
+config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
 config.host_os = "@HOST_OS@"
 config.host_arch = "@HOST_ARCH@"
+config.host_cc = "@HOST_CC@"
+config.host_cxx = "@HOST_CXX@"
+config.host_ldflags = "@HOST_LDFLAGS@"
 config.llvm_use_intel_jitevents = "@LLVM_USE_INTEL_JITEVENTS@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.have_zlib = "@HAVE_LIBZ@"
+config.enable_ffi = "@LLVM_ENABLE_FFI@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/dsymutil/Inputs/basic-archive.macho.x86_64 b/test/tools/dsymutil/Inputs/basic-archive.macho.x86_64
new file mode 100755
index 000000000000..abffb06147d9
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic-archive.macho.x86_64
diff --git a/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64 b/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64
new file mode 100755
index 000000000000..b5ffb03c984d
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64
diff --git a/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64.o
new file mode 100644
index 000000000000..c68e15acf318
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic-lto.macho.x86_64.o
diff --git a/test/tools/dsymutil/Inputs/basic.macho.x86_64 b/test/tools/dsymutil/Inputs/basic.macho.x86_64
new file mode 100755
index 000000000000..8b3a34a55977
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic.macho.x86_64
diff --git a/test/tools/dsymutil/Inputs/basic1.c b/test/tools/dsymutil/Inputs/basic1.c
new file mode 100644
index 000000000000..cedf83aec3e7
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic1.c
@@ -0,0 +1,28 @@
+/* This is the main file used to produce the basic* objects that are
+   used for the dsymutil tests.
+
+   These are compiled in a couple of different ways (always on a
+   Darwin system):
+   Basic compilation:
+      for FILE in basic1.c basic2.c basic3.c; do
+         clang -g -c $FILE -o ${FILE%.c}.macho.x86_64.o
+      done
+      clang basic1.macho.x86_64.o basic2.macho.x86_64.o basic3.macho.x86_64.o -o basic.macho.x86_64 -Wl,-dead_strip
+
+   LTO compilation:
+      for FILE in basic1.c basic2.c basic3.c; do
+         clang -g -c -flto $FILE -o ${FILE%.c}-lto.o
+      done
+      clang basic1-lto.o basic2-lto.o basic3-lto.o -o basic-lto.macho.x86_64 -Wl,-object_path_lto,$PWD/basic-lto.macho.x86_64.o -Wl,-dead_strip
+      rm basic1-lto.o basic2-lto.o basic3-lto.o
+
+   Archive compilation (after basic compilation):
+      ar -q libbasic.a basic2.macho.x86_64.o basic3.macho.x86_64.o
+      clang basic1.macho.x86_64.o -lbasic -o basic-archive.macho.x86_64 -Wl,-dead_strip -L.
+*/
+
+int foo(int);
+
+int main(int argc, const char *argv[]) {
+  return foo(argc);
+}
diff --git a/test/tools/dsymutil/Inputs/basic1.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic1.macho.x86_64.o
new file mode 100644
index 000000000000..d7b5000a6127
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic1.macho.x86_64.o
diff --git a/test/tools/dsymutil/Inputs/basic2.c b/test/tools/dsymutil/Inputs/basic2.c
new file mode 100644
index 000000000000..13c6d07c2a08
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic2.c
@@ -0,0 +1,22 @@
+/* For compilation instructions see basic1.c. */
+
+static int baz = 42;
+static int private_int;
+extern volatile int val;
+int unused_data = 1;
+
+int bar(int);
+
+void unused1() {
+  bar(baz);
+}
+
+static int inc() {
+  return ++private_int;
+}
+
+__attribute__((noinline))
+int foo(int arg) {
+  return bar(arg+val) + inc() + baz++;
+}
+
diff --git a/test/tools/dsymutil/Inputs/basic2.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic2.macho.x86_64.o
new file mode 100644
index 000000000000..bdd82252e3af
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic2.macho.x86_64.o
diff --git a/test/tools/dsymutil/Inputs/basic3.c b/test/tools/dsymutil/Inputs/basic3.c
new file mode 100644
index 000000000000..f20998a00dbe
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic3.c
@@ -0,0 +1,20 @@
+/* For compilation instructions see basic1.c. */
+
+volatile int val;
+
+extern int foo(int);
+
+int unused2() {
+  return foo(val);
+}
+
+static int inc() {
+  return ++val;
+}
+
+__attribute__((noinline))
+int bar(int arg) {
+  if (arg > 42)
+    return inc();
+  return foo(val + arg);
+}
diff --git a/test/tools/dsymutil/Inputs/basic3.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic3.macho.x86_64.o
new file mode 100644
index 000000000000..3c1c6399fd89
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/basic3.macho.x86_64.o
diff --git a/test/tools/dsymutil/Inputs/libbasic.a b/test/tools/dsymutil/Inputs/libbasic.a
new file mode 100644
index 000000000000..9657e7895578
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/libbasic.a
diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test
new file mode 100644
index 000000000000..f1b310f9b714
--- /dev/null
+++ b/test/tools/dsymutil/debug-map-parsing.test
@@ -0,0 +1,77 @@
+RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -v -parse-only -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -v -parse-only %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
+RUN: not llvm-dsymutil -v -parse-only %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
+
+
+Check that We can parse the debug map of the basic executable.
+
+CHECK-NOT: error
+CHECK: DEBUG MAP:
+CHECK: /Inputs/basic1.macho.x86_64.o:
+CHECK: 	0000000000000000 => 0000000100000ea0	_main
+CHECK: /Inputs/basic2.macho.x86_64.o:
+CHECK: 	0000000000000310 => 0000000100001000	_baz
+CHECK: 	0000000000000020 => 0000000100000ed0	_foo
+CHECK: 	0000000000000070 => 0000000100000f20	_inc
+CHECK: 	0000000000000560 => 0000000100001008	_private_int
+CHECK: /Inputs/basic3.macho.x86_64.o:
+CHECK: 	0000000000000020 => 0000000100000f40	_bar
+CHECK: 	0000000000000070 => 0000000100000f90	_inc
+CHECK: 	0000000000000004 => 0000000100001004	_val
+CHECK: END DEBUG MAP
+
+
+Check that we can parse the debug-map of the basic-lto executable
+
+CHECK-LTO-NOT: error
+CHECK-LTO: DEBUG MAP:
+CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o:
+CHECK-LTO: 	0000000000000050 => 0000000100000f90	_bar
+CHECK-LTO: 	0000000000000658 => 0000000100001000	_baz
+CHECK-LTO: 	0000000000000010 => 0000000100000f50	_foo
+CHECK-LTO: 	0000000000000000 => 0000000100000f40	_main
+CHECK-LTO: 	00000000000008e8 => 0000000100001008	_private_int
+CHECK-LTO: 	00000000000008ec => 0000000100001004	_val
+CHECK-LTO: END DEBUG MAP
+
+Check thet we correctly handle debug maps with archive members (including only
+opening the archive once if mulitple of its members are used).
+
+CHECK-ARCHIVE:      trying to open {{.*}}basic-archive.macho.x86_64'
+CHECK-ARCHIVE-NEXT: 	loaded file.
+CHECK-ARCHIVE-NEXT: trying to open {{.*}}/Inputs/basic1.macho.x86_64.o'
+CHECK-ARCHIVE-NEXT: 	loaded file.
+CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic2.macho.x86_64.o)'
+CHECK-ARCHIVE-NEXT: 	opened new archive {{.*}}/libbasic.a'
+CHECK-ARCHIVE-NEXT: 	found member in current archive.
+CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic3.macho.x86_64.o)'
+CHECK-ARCHIVE-NEXT: 	found member in current archive.
+CHECK-ARCHIVE: DEBUG MAP:   object addr =>  executable addr	symbol name
+CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o:
+CHECK-ARCHIVE: 	0000000000000000 => 0000000100000ea0	_main
+CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o):
+CHECK-ARCHIVE: 	0000000000000310 => 0000000100001000	_baz
+CHECK-ARCHIVE: 	0000000000000020 => 0000000100000ed0	_foo
+CHECK-ARCHIVE: 	0000000000000070 => 0000000100000f20	_inc
+CHECK-ARCHIVE: 	0000000000000560 => 0000000100001004	_private_int
+CHECK-ARCHIVE: /Inputs/./libbasic.a(basic3.macho.x86_64.o):
+CHECK-ARCHIVE: 	0000000000000020 => 0000000100000f40	_bar
+CHECK-ARCHIVE: 	0000000000000070 => 0000000100000f90	_inc
+CHECK-ARCHIVE: 	0000000000000004 => 0000000100001008	_val
+CHECK-ARCHIVE: END DEBUG MAP
+
+Check that we warn about missing object files (this presumes that the files aren't
+present in the machine's /Inputs/ folder, which should be a pretty safe bet).
+
+NOT-FOUND: cannot open{{.*}}"/Inputs/basic1.macho.x86_64.o": {{[Nn]o}} such file
+NOT-FOUND: cannot open{{.*}}"/Inputs/basic2.macho.x86_64.o": {{[Nn]o}} such file
+NOT-FOUND: cannot open{{.*}}"/Inputs/basic3.macho.x86_64.o": {{[Nn]o}} such file
+NOT-FOUND: DEBUG MAP:
+NOT-FOUND-NEXT: END DEBUG MAP
+
+Check that we correctly error out on invalid executatble.
+
+NO-EXECUTABLE: cannot parse{{.*}}/inexistant": {{[Nn]o}} such file
+NO-EXECUTABLE-NOT: DEBUG MAP
diff --git a/test/tools/gold/Inputs/alias-1.ll b/test/tools/gold/Inputs/alias-1.ll
new file mode 100644
index 000000000000..96183aa9537c
--- /dev/null
+++ b/test/tools/gold/Inputs/alias-1.ll
@@ -0,0 +1 @@
+@a = global i32 42
diff --git a/test/tools/gold/Inputs/bcsection.s b/test/tools/gold/Inputs/bcsection.s
new file mode 100644
index 000000000000..ede1e5c532dd
--- /dev/null
+++ b/test/tools/gold/Inputs/bcsection.s
@@ -0,0 +1,2 @@
+.section .llvmbc
+.incbin "bcsection.bc"
diff --git a/test/tools/gold/Inputs/comdat.ll b/test/tools/gold/Inputs/comdat.ll
new file mode 100644
index 000000000000..464aefa49dc1
--- /dev/null
+++ b/test/tools/gold/Inputs/comdat.ll
@@ -0,0 +1,25 @@
+$c2 = comdat any
+$c1 = comdat any
+
+; This is only present in this file. The linker will keep $c1 from the first
+; file and this will be undefined.
+@will_be_undefined = global i32 1, comdat($c1)
+
+@v1 = weak_odr global i32 41, comdat($c2)
+define weak_odr protected i32 @f1(i8* %this) comdat($c2) {
+bb20:
+  store i8* %this, i8** null
+  br label %bb21
+bb21:
+  ret i32 41
+}
+
+@r21 = global i32* @v1
+@r22 = global i32(i8*)* @f1
+
+@a21 = alias i32* @v1
+@a22 = alias bitcast (i32* @v1 to i16*)
+
+@a23 = alias i32(i8*)* @f1
+@a24 = alias bitcast (i32(i8*)* @f1 to i16*)
+@a25 = alias i16* @a24
diff --git a/test/tools/gold/Inputs/common.ll b/test/tools/gold/Inputs/common.ll
new file mode 100644
index 000000000000..46f199e167af
--- /dev/null
+++ b/test/tools/gold/Inputs/common.ll
@@ -0,0 +1 @@
+@a = common global i16 0, align 4
diff --git a/test/tools/gold/Inputs/invalid.bc b/test/tools/gold/Inputs/invalid.bc
new file mode 100644
index 000000000000..2e7ca8d2e105
--- /dev/null
+++ b/test/tools/gold/Inputs/invalid.bc
diff --git a/test/tools/gold/Inputs/linker-script.export b/test/tools/gold/Inputs/linker-script.export
new file mode 100644
index 000000000000..2062a081ffe2
--- /dev/null
+++ b/test/tools/gold/Inputs/linker-script.export
@@ -0,0 +1,5 @@
+{
+  global:
+    f;
+  local: *;
+};
diff --git a/test/tools/gold/Inputs/linkonce-weak.ll b/test/tools/gold/Inputs/linkonce-weak.ll
new file mode 100644
index 000000000000..f42af8faa844
--- /dev/null
+++ b/test/tools/gold/Inputs/linkonce-weak.ll
@@ -0,0 +1,3 @@
+define weak_odr void @f() {
+  ret void
+}
diff --git a/test/tools/gold/Inputs/pr19901-1.ll b/test/tools/gold/Inputs/pr19901-1.ll
new file mode 100644
index 000000000000..2f7153268aca
--- /dev/null
+++ b/test/tools/gold/Inputs/pr19901-1.ll
@@ -0,0 +1,4 @@
+target triple = "x86_64-unknown-linux-gnu"
+define linkonce_odr hidden void @f() {
+  ret void
+}
diff --git a/test/tools/gold/Inputs/weak.ll b/test/tools/gold/Inputs/weak.ll
new file mode 100644
index 000000000000..53b1d1650d1b
--- /dev/null
+++ b/test/tools/gold/Inputs/weak.ll
@@ -0,0 +1,2 @@
+@a = weak global i32 41
+@c = global i32* @a
diff --git a/test/tools/gold/alias.ll b/test/tools/gold/alias.ll
new file mode 100644
index 000000000000..dbf3af57a7b4
--- /dev/null
+++ b/test/tools/gold/alias.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/alias-1.ll -o %t2.o
+; RUN: ld -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t2.o %t.o \
+; RUN:  -plugin-opt=emit-llvm
+; RUN: llvm-dis < %t3.o -o - | FileCheck %s
+
+; CHECK-NOT: alias
+; CHECK: @a = global i32 42
+; CHECK-NEXT: @b = global i32 1
+; CHECK-NOT: alias
+
+@a = weak alias i32* @b
+@b = global i32 1
diff --git a/test/tools/gold/bad-alias.ll b/test/tools/gold/bad-alias.ll
new file mode 100644
index 000000000000..e0fc788a6123
--- /dev/null
+++ b/test/tools/gold/bad-alias.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: not ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2.o 2>&1 | FileCheck %s
+
+; CHECK: Unable to determine comdat of alias!
+
+@g1 = global i32 1
+@g2 = global i32 2
+
+@a = alias inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
+                             i32 ptrtoint (i32* @g2 to i32)) to i32*)
diff --git a/test/tools/gold/bcsection.ll b/test/tools/gold/bcsection.ll
new file mode 100644
index 000000000000..8565d9ddc4c1
--- /dev/null
+++ b/test/tools/gold/bcsection.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as -o %T/bcsection.bc %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -o %T/bcsection.bco %p/Inputs/bcsection.s
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
+; RUN: ld -r -o %T/bcsection.o -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
+
+; CHECK: main
+define i32 @main() {
+  ret i32 0
+}
diff --git a/test/tools/gold/coff.ll b/test/tools/gold/coff.ll
new file mode 100644
index 000000000000..b66f028a14d8
--- /dev/null
+++ b/test/tools/gold/coff.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: ld      -plugin %llvmshlibdir/LLVMgold.so -plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+
+target datalayout = "m:w"
+
+; CHECK: define void @f() {
+define void @f() {
+  ret void
+}
+
+; CHECK: define internal void @g() {
+define hidden void @g() {
+  ret void
+}
+
+; CHECK: define internal void @h() {
+define linkonce_odr void @h() {
+  ret void
+}
diff --git a/test/tools/gold/comdat.ll b/test/tools/gold/comdat.ll
new file mode 100644
index 000000000000..2edd7822c91a
--- /dev/null
+++ b/test/tools/gold/comdat.ll
@@ -0,0 +1,65 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o
+; RUN: ld -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \
+; RUN:  -plugin-opt=emit-llvm
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+$c1 = comdat any
+
+@v1 = weak_odr global i32 42, comdat($c1)
+define weak_odr i32 @f1(i8*) comdat($c1) {
+bb10:
+  br label %bb11
+bb11:
+  ret i32 42
+}
+
+@r11 = global i32* @v1
+@r12 = global i32 (i8*)* @f1
+
+@a11 = alias i32* @v1
+@a12 = alias bitcast (i32* @v1 to i16*)
+
+@a13 = alias i32 (i8*)* @f1
+@a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
+@a15 = alias i16* @a14
+
+; CHECK: $c1 = comdat any
+; CHECK: $c2 = comdat any
+
+; CHECK: @v1 = weak_odr global i32 42, comdat($c1)
+
+; CHECK: @r11 = global i32* @v1{{$}}
+; CHECK: @r12 = global i32 (i8*)* @f1{{$}}
+
+; CHECK: @r21 = global i32* @v1{{$}}
+; CHECK: @r22 = global i32 (i8*)* @f1{{$}}
+
+; CHECK: @v11 = internal global i32 41, comdat($c2)
+
+; CHECK: @a11 = alias i32* @v1{{$}}
+; CHECK: @a12 = alias bitcast (i32* @v1 to i16*)
+
+; CHECK: @a13 = alias i32 (i8*)* @f1{{$}}
+; CHECK: @a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
+
+; CHECK: @a21 = alias i32* @v11{{$}}
+; CHECK: @a22 = alias bitcast (i32* @v11 to i16*)
+
+; CHECK: @a23 = alias i32 (i8*)* @f12{{$}}
+; CHECK: @a24 = alias bitcast (i32 (i8*)* @f12 to i16*)
+
+; CHECK:      define weak_odr protected i32 @f1(i8*) comdat($c1) {
+; CHECK-NEXT: bb10:
+; CHECK-NEXT:   br label %bb11{{$}}
+; CHECK:      bb11:
+; CHECK-NEXT:   ret i32 42
+; CHECK-NEXT: }
+
+; CHECK:      define internal i32 @f12(i8* %this) comdat($c2) {
+; CHECK-NEXT: bb20:
+; CHECK-NEXT:   store i8* %this, i8** null
+; CHECK-NEXT:   br label %bb21
+; CHECK:      bb21:
+; CHECK-NEXT:   ret i32 41
+; CHECK-NEXT: }
diff --git a/test/tools/gold/common.ll b/test/tools/gold/common.ll
new file mode 100644
index 000000000000..f3092310a1da
--- /dev/null
+++ b/test/tools/gold/common.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as %s -o %t1.o
+; RUN: llvm-as %p/Inputs/common.ll -o %t2.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t1.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+@a = common global i8 0, align 8
+
+; Shared library case, we merge @a as common and keep it for the symbol table.
+; CHECK: @a = common global i16 0, align 8
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    %t1.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck --check-prefix=EXEC %s
+
+; All IR case, we internalize a after merging.
+; EXEC: @a = internal global i16 0, align 8
+
+; RUN: llc %p/Inputs/common.ll -o %t2.o -filetype=obj
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    %t1.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck --check-prefix=MIXED %s
+
+; Mixed ELF and IR. We keep ours as common so the linker will finish the merge.
+; MIXED: @a = common global i8 0, align 8
diff --git a/test/tools/gold/emit-llvm.ll b/test/tools/gold/emit-llvm.ll
new file mode 100644
index 000000000000..cfdc55108c0b
--- /dev/null
+++ b/test/tools/gold/emit-llvm.ll
@@ -0,0 +1,73 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    --plugin-opt=generate-api-file \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+; RUN: FileCheck --check-prefix=API %s < %T/../apifile.txt
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:     -m elf_x86_64 --plugin-opt=save-temps \
+; RUN:    -shared %t.o -o %t3.o
+; RUN: llvm-dis %t3.o.bc -o - | FileCheck %s
+; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT %s
+
+; RUN: rm -f %t4.o
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:     -m elf_x86_64 --plugin-opt=disable-output \
+; RUN:    -shared %t.o -o %t4.o
+; RUN: not test -a %t4.o
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define internal void @f1()
+; OPT-NOT: @f1
+define hidden void @f1() {
+  ret void
+}
+
+; CHECK: define hidden void @f2()
+; OPT: define hidden void @f2()
+define hidden void @f2() {
+  ret void
+}
+
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f2 to i8*)]
+
+; CHECK: define void @f3()
+; OPT: define void @f3()
+define void @f3() {
+  call void @f4()
+  ret void
+}
+
+; CHECK: define internal void @f4()
+; OPT-NOT: @f4
+define linkonce_odr void @f4() {
+  ret void
+}
+
+; CHECK: define linkonce_odr void @f5()
+; OPT: define linkonce_odr void @f5()
+define linkonce_odr void @f5() {
+  ret void
+}
+@g5 = global void()* @f5
+
+; CHECK: define internal void @f6() unnamed_addr
+; OPT: define internal void @f6() unnamed_addr
+define linkonce_odr void @f6() unnamed_addr {
+  ret void
+}
+@g6 = global void()* @f6
+
+
+; API: f1 PREVAILING_DEF_IRONLY
+; API: f2 PREVAILING_DEF_IRONLY
+; API: f3 PREVAILING_DEF_IRONLY_EXP
+; API: f4 PREVAILING_DEF_IRONLY_EXP
+; API: f5 PREVAILING_DEF_IRONLY_EXP
+; API: f6 PREVAILING_DEF_IRONLY_EXP
+; API: g5 PREVAILING_DEF_IRONLY_EXP
+; API: g6 PREVAILING_DEF_IRONLY_EXP
diff --git a/test/tools/gold/invalid.ll b/test/tools/gold/invalid.ll
new file mode 100644
index 000000000000..8db76446a3d7
--- /dev/null
+++ b/test/tools/gold/invalid.ll
@@ -0,0 +1,7 @@
+; RUN: not ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    %p/Inputs/invalid.bc -o %t2 2>&1 | FileCheck %s
+
+; test that only one error gets printed
+
+; CHECK: error: LLVM gold plugin has failed to create LTO module: Malformed block
+; CHECK-NOT: error
diff --git a/test/tools/gold/linker-script.ll b/test/tools/gold/linker-script.ll
new file mode 100644
index 000000000000..35a769453e57
--- /dev/null
+++ b/test/tools/gold/linker-script.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2.o \
+; RUN:    -version-script=%p/Inputs/linker-script.export
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+; CHECK: define void @f()
+define void @f() {
+  ret void
+}
+
+; CHECK: define internal void @g()
+define void @g() {
+  ret void
+}
diff --git a/test/tools/gold/linkonce-weak.ll b/test/tools/gold/linkonce-weak.ll
new file mode 100644
index 000000000000..765275b09d50
--- /dev/null
+++ b/test/tools/gold/linkonce-weak.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t2.o %t.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define linkonce_odr void @f() {
+  ret void
+}
+
+; Test that we get a weak_odr regardless of the order of the files
+; CHECK: define weak_odr void @f() {
diff --git a/test/tools/gold/lit.local.cfg b/test/tools/gold/lit.local.cfg
new file mode 100644
index 000000000000..a59549d47abe
--- /dev/null
+++ b/test/tools/gold/lit.local.cfg
@@ -0,0 +1,4 @@
+if (not 'ld_plugin' in  config.available_features or
+    not 'X86' in config.root.targets or
+    not 'PowerPC' in config.root.targets):
+   config.unsupported = True
diff --git a/test/tools/gold/mtriple.ll b/test/tools/gold/mtriple.ll
new file mode 100644
index 000000000000..6395af6f1ab9
--- /dev/null
+++ b/test/tools/gold/mtriple.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -m elf32ppc \
+; RUN:    -plugin-opt=mtriple=powerpc-linux-gnu \
+; RUN:    -plugin-opt=obj-path=%t3.o \
+; RUN:    -shared %t.o -o %t2
+; RUN: llvm-readobj --file-headers %t2 | FileCheck  --check-prefix=DSO %s
+; RUN: llvm-readobj --file-headers %t3.o | FileCheck --check-prefix=REL %s
+
+; REL:       Type: Relocatable
+; REL-NEXT:  Machine: EM_PPC
+
+; DSO:       Type: SharedObject
+; DSO-NEXT:  Machine: EM_PPC
diff --git a/test/tools/gold/option.ll b/test/tools/gold/option.ll
new file mode 100644
index 000000000000..8154e435b4ed
--- /dev/null
+++ b/test/tools/gold/option.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so -m elf_x86_64 \
+; RUN:    --plugin-opt=-jump-table-type=arity \
+; RUN:    --plugin-opt=-mattr=+aes \
+; RUN:    --plugin-opt=mcpu=core-avx2 \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-nm %t2.o | FileCheck %s
+
+; CHECK: t __llvm_jump_instr_table_0_1
+; CHECK: t __llvm_jump_instr_table_1_1
+
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @g(i32 %a) unnamed_addr jumptable {
+  ret i32 %a
+}
+
+define i32 (i32)* @get_g() {
+  ret i32 (i32)* @g
+}
+
+define i32 @f() unnamed_addr jumptable {
+  ret i32 0
+}
+
+define i32 ()* @get_f() {
+  ret i32 ()* @f
+}
+
+define <2 x i64> @test_aes(<2 x i64> %a0, <2 x i64> %a1) {
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <32 x i8> @test_avx2(<16 x i16> %a0, <16 x i16> %a1) {
+  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
diff --git a/test/tools/gold/pr19901.ll b/test/tools/gold/pr19901.ll
new file mode 100644
index 000000000000..304246bca47a
--- /dev/null
+++ b/test/tools/gold/pr19901.ll
@@ -0,0 +1,23 @@
+; RUN: llc %s -o %t.o -filetype=obj -relocation-model=pic
+; RUN: llvm-as %p/Inputs/pr19901-1.ll -o %t2.o
+; RUN: ld -shared -o %t.so -plugin %llvmshlibdir/LLVMgold.so %t2.o %t.o
+; RUN: llvm-readobj -t %t.so | FileCheck %s
+
+; CHECK:       Symbol {
+; CHECK:         Name: f
+; CHECK-NEXT:    Value:
+; CHECK-NEXT:    Size:
+; CHECK-NEXT:    Binding: Local
+; CHECK-NEXT:    Type: Function
+; CHECK-NEXT:    Other: 2
+; CHECK-NEXT:    Section: .text
+; CHECK-NEXT:  }
+
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @g() {
+  call void @f()
+  ret i32 0
+}
+define linkonce_odr hidden void @f() {
+  ret void
+}
diff --git a/test/tools/gold/slp-vectorize.ll b/test/tools/gold/slp-vectorize.ll
new file mode 100644
index 000000000000..d378902e32f6
--- /dev/null
+++ b/test/tools/gold/slp-vectorize.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=save-temps \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o.opt.bc -o - | FileCheck %s
+
+; test that the vectorizer is run.
+; CHECK: fadd <4 x float>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* nocapture %x) {
+  %tmp = load float* %x, align 4
+  %add = fadd float %tmp, 1.000000e+00
+  store float %add, float* %x, align 4
+  %arrayidx1 = getelementptr inbounds float* %x, i64 1
+  %tmp1 = load float* %arrayidx1, align 4
+  %add2 = fadd float %tmp1, 1.000000e+00
+  store float %add2, float* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds float* %x, i64 2
+  %tmp2 = load float* %arrayidx3, align 4
+  %add4 = fadd float %tmp2, 1.000000e+00
+  store float %add4, float* %arrayidx3, align 4
+  %arrayidx5 = getelementptr inbounds float* %x, i64 3
+  %tmp3 = load float* %arrayidx5, align 4
+  %add6 = fadd float %tmp3, 1.000000e+00
+  store float %add6, float* %arrayidx5, align 4
+  ret void
+}
diff --git a/test/tools/gold/stats.ll b/test/tools/gold/stats.ll
new file mode 100644
index 000000000000..7c353e6ab2a6
--- /dev/null
+++ b/test/tools/gold/stats.ll
@@ -0,0 +1,7 @@
+; REQUIRES: asserts
+
+; RUN: llvm-as %s -o %t.o
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so  -shared \
+; RUN:    -plugin-opt=-stats %t.o -o %t2 2>&1 | FileCheck %s
+
+; CHECK: Statistics Collected
diff --git a/test/tools/gold/vectorize.ll b/test/tools/gold/vectorize.ll
new file mode 100644
index 000000000000..3d305db1e31b
--- /dev/null
+++ b/test/tools/gold/vectorize.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=save-temps \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o.opt.bc -o - | FileCheck %s
+
+; test that the vectorizer is run.
+; CHECK: fadd <4 x float>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(float* nocapture %x, i64 %n) {
+bb:
+  br label %bb1
+
+bb1:
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp4, %bb1 ]
+  %tmp = getelementptr inbounds float* %x, i64 %i.0
+  %tmp2 = load float* %tmp, align 4
+  %tmp3 = fadd float %tmp2, 1.000000e+00
+  store float %tmp3, float* %tmp, align 4
+  %tmp4 = add nsw i64 %i.0, 1
+  %tmp5 = icmp slt i64 %tmp4, %n
+  br i1 %tmp5, label %bb1, label %bb6
+
+bb6:
+  ret void
+}
diff --git a/test/tools/gold/weak.ll b/test/tools/gold/weak.ll
new file mode 100644
index 000000000000..e05e905cc140
--- /dev/null
+++ b/test/tools/gold/weak.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/weak.ll -o %t2.o
+
+; RUN: ld -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+@a = weak global i32 42
+@b = global i32* @a
+
+; Test that @b and @c end up pointing to the same variable.
+
+; CHECK: @a = weak global i32 42
+; CHECK: @b = global i32* @a{{$}}
+; CHECK: @c = global i32* @a{{$}}
diff --git a/test/tools/llvm-cov/Inputs/README b/test/tools/llvm-cov/Inputs/README
index 2cfb1917965c..3773ba3f5e6d 100644
--- a/test/tools/llvm-cov/Inputs/README
+++ b/test/tools/llvm-cov/Inputs/README
@@ -1,7 +1,21 @@
 These inputs were pre-generated to allow for easier testing of llvm-cov.
 
-test.gcno and test.gcda were create by running clang:
-  clang++ -g -ftest-coverage -fprofile-arcs test.cpp
+The files used to test the gcov compatible code coverage tool were generated
+using the following method:
 
-test.cpp.gcov was created by running gcov 4.2.1:
-  gcov test.cpp
+  test.gcno and test.gcda were create by running clang:
+    clang++ -g -ftest-coverage -fprofile-arcs test.cpp
+
+  test.cpp.gcov was created by running gcov 4.2.1:
+    gcov test.cpp
+
+The 'covmapping' files that are used to test llvm-cov contain raw sections
+with the coverage mapping data generated by the compiler and linker. They are
+created by running clang and llvm-cov:
+  clang++ -fprofile-instr-generate -fcoverage-mapping -o test test.cpp
+  llvm-cov convert-for-testing -o test.covmapping test
+
+The 'profdata' files were generated by running an instrumented version of the
+program and merging the raw profile data using llvm-profdata.
+  ./test
+  llvm-profdata merge -o test.profdata default.profraw
diff --git a/test/tools/llvm-cov/Inputs/highlightedRanges.covmapping b/test/tools/llvm-cov/Inputs/highlightedRanges.covmapping
new file mode 100644
index 000000000000..20eb0d7e8df5
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/highlightedRanges.covmapping
diff --git a/test/tools/llvm-cov/Inputs/highlightedRanges.profdata b/test/tools/llvm-cov/Inputs/highlightedRanges.profdata
new file mode 100644
index 000000000000..b465b000b037
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/highlightedRanges.profdata
diff --git a/test/tools/llvm-cov/Inputs/lineExecutionCounts.covmapping b/test/tools/llvm-cov/Inputs/lineExecutionCounts.covmapping
new file mode 100644
index 000000000000..9774b89ede8c
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/lineExecutionCounts.covmapping
diff --git a/test/tools/llvm-cov/Inputs/lineExecutionCounts.profdata b/test/tools/llvm-cov/Inputs/lineExecutionCounts.profdata
new file mode 100644
index 000000000000..871222723967
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/lineExecutionCounts.profdata
diff --git a/test/tools/llvm-cov/Inputs/regionMarkers.covmapping b/test/tools/llvm-cov/Inputs/regionMarkers.covmapping
new file mode 100644
index 000000000000..3ebcb0777af3
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/regionMarkers.covmapping
diff --git a/test/tools/llvm-cov/Inputs/regionMarkers.profdata b/test/tools/llvm-cov/Inputs/regionMarkers.profdata
new file mode 100644
index 000000000000..871222723967
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/regionMarkers.profdata
diff --git a/test/tools/llvm-cov/Inputs/report.covmapping b/test/tools/llvm-cov/Inputs/report.covmapping
new file mode 100644
index 000000000000..32d84bc7273a
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/report.covmapping
diff --git a/test/tools/llvm-cov/Inputs/report.profdata b/test/tools/llvm-cov/Inputs/report.profdata
new file mode 100644
index 000000000000..aa47be05afa7
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/report.profdata
diff --git a/test/tools/llvm-cov/Inputs/showExpansions.covmapping b/test/tools/llvm-cov/Inputs/showExpansions.covmapping
new file mode 100644
index 000000000000..b8c7d9711afe
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/showExpansions.covmapping
diff --git a/test/tools/llvm-cov/Inputs/showExpansions.profdata b/test/tools/llvm-cov/Inputs/showExpansions.profdata
new file mode 100644
index 000000000000..7925c6020686
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/showExpansions.profdata
diff --git a/test/tools/llvm-cov/Inputs/templateInstantiations.covmapping b/test/tools/llvm-cov/Inputs/templateInstantiations.covmapping
new file mode 100644
index 000000000000..d24373604513
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/templateInstantiations.covmapping
diff --git a/test/tools/llvm-cov/Inputs/templateInstantiations.profdata b/test/tools/llvm-cov/Inputs/templateInstantiations.profdata
new file mode 100644
index 000000000000..6ccf526b5f3d
--- /dev/null
+++ b/test/tools/llvm-cov/Inputs/templateInstantiations.profdata
diff --git a/test/tools/llvm-cov/lit.local.cfg b/test/tools/llvm-cov/lit.local.cfg
index 56c6f1f5fac2..650bc02747b5 100644
--- a/test/tools/llvm-cov/lit.local.cfg
+++ b/test/tools/llvm-cov/lit.local.cfg
@@ -1 +1,5 @@
 config.suffixes = ['.test', '.m', '.cpp']
+
+# http://llvm.org/bugs/show_bug.cgi?id=20979
+if 'ubsan' in config.available_features:
+  config.unsupported = True
diff --git a/test/tools/llvm-cov/report.cpp b/test/tools/llvm-cov/report.cpp
new file mode 100644
index 000000000000..297322a775d7
--- /dev/null
+++ b/test/tools/llvm-cov/report.cpp
@@ -0,0 +1,24 @@
+// RUN: llvm-cov report %S/Inputs/report.covmapping -instr-profile %S/Inputs/report.profdata -no-colors 2>&1 | FileCheck %s
+
+// CHECK: Filename                    Regions    Miss   Cover Functions  Executed
+// CHECK: TOTAL                             5       2  60.00%         4    75.00%
+
+void foo(bool cond) {
+  if (cond) {
+  }
+}
+
+void bar() {
+}
+
+void func() {
+}
+
+int main() {
+  foo(false);
+  bar();
+  return 0;
+}
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/showExpansions.cpp b/test/tools/llvm-cov/showExpansions.cpp
new file mode 100644
index 000000000000..30edd90584f8
--- /dev/null
+++ b/test/tools/llvm-cov/showExpansions.cpp
@@ -0,0 +1,29 @@
+// RUN: llvm-cov show %S/Inputs/showExpansions.covmapping -instr-profile %S/Inputs/showExpansions.profdata -dump -show-expansions -filename-equivalence %s 2>&1 | FileCheck %s
+
+#define DO_SOMETHING_ELSE() \
+  do {                      \
+  } while (0)
+#define ANOTHER_THING() \
+  do {                  \
+    if (0) {            \
+    }                   \
+  } while (0)
+
+#define DO_SOMETHING(x)    \
+  do {                     \
+    if (x)                 \
+      DO_SOMETHING_ELSE(); \
+    else                   \
+      ANOTHER_THING();     \
+  } while (0)
+// CHECK-DAG: Expansion at line [[@LINE-4]], 7 -> 24
+// CHECK-DAG: Expansion at line [[@LINE-3]], 7 -> 20
+
+int main(int argc, const char *argv[]) {
+  for (int i = 0; i < 100; ++i)
+    DO_SOMETHING(i); // CHECK-DAG: Expansion at line [[@LINE]], 5 -> 17
+  return 0;
+}
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/showHighlightedRanges.cpp b/test/tools/llvm-cov/showHighlightedRanges.cpp
new file mode 100644
index 000000000000..cec730829e0c
--- /dev/null
+++ b/test/tools/llvm-cov/showHighlightedRanges.cpp
@@ -0,0 +1,48 @@
+// RUN: llvm-cov show %S/Inputs/highlightedRanges.covmapping -instr-profile %S/Inputs/highlightedRanges.profdata -dump -filename-equivalence %s 2>&1 | FileCheck %s
+
+void func() {
+  return;
+  int i = 0;                     // CHECK: Highlighted line [[@LINE]], 3 -> 12
+}
+
+void func2(int x) {
+  if(x > 5) {
+    while(x >= 9) {
+      return;
+      --x;                       // CHECK: Highlighted line [[@LINE]], 7 -> 10
+    }
+    int i = 0;                   // CHECK: Highlighted line [[@LINE]], 5 -> 14
+  }
+}
+
+void test() {
+  int x = 0;
+
+  if (x) {                       // CHECK: Highlighted line [[@LINE]], 10 -> ?
+    x = 0;                       // CHECK: Highlighted line [[@LINE]], 1 -> ?
+  } else {                       // CHECK: Highlighted line [[@LINE]], 1 -> 4
+    x = 1;
+  }
+
+                                  // CHECK: Highlighted line [[@LINE+1]], 26 -> 29
+  for (int i = 0; i < 0; ++i) {   // CHECK: Highlighted line [[@LINE]], 31 -> ?
+    x = 1;                        // CHECK: Highlighted line [[@LINE]], 1 -> ?
+  }                               // CHECK: Highlighted line [[@LINE]], 1 -> 4
+
+  x = x < 10 ? x +
+               1
+             : x - 1;             // CHECK: Highlighted line [[@LINE]], 16 -> 21
+  x = x > 10 ? x +                // CHECK: Highlighted line [[@LINE]], 16 -> ?
+               1                  // CHECK: Highlighted line [[@LINE]], 1 -> 17
+             : x - 1;
+}
+
+int main() {
+  test();
+  func();
+  func2(9);
+  return 0;
+}
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/showLineExecutionCounts.cpp b/test/tools/llvm-cov/showLineExecutionCounts.cpp
new file mode 100644
index 000000000000..34baa574dab2
--- /dev/null
+++ b/test/tools/llvm-cov/showLineExecutionCounts.cpp
@@ -0,0 +1,30 @@
+// RUN: llvm-cov show %S/Inputs/lineExecutionCounts.covmapping -instr-profile %S/Inputs/lineExecutionCounts.profdata -no-colors -filename-equivalence %s | FileCheck -check-prefix=CHECK -check-prefix=WHOLE-FILE %s
+// RUN: llvm-cov show %S/Inputs/lineExecutionCounts.covmapping -instr-profile %S/Inputs/lineExecutionCounts.profdata -no-colors -filename-equivalence -name=main %s | FileCheck -check-prefix=CHECK -check-prefix=FILTER %s
+
+// before any coverage              // WHOLE-FILE:    | [[@LINE]]|// before
+                                    // FILTER-NOT:    | [[@LINE-1]]|// before
+int main() {                             // CHECK:   1| [[@LINE]]|int main(
+  int x = 0;                             // CHECK:   1| [[@LINE]]|  int x
+                                         // CHECK:   1| [[@LINE]]|
+  if (x) {                               // CHECK:   0| [[@LINE]]|  if (x)
+    x = 0;                               // CHECK:   0| [[@LINE]]|    x = 0
+  } else {                               // CHECK:   1| [[@LINE]]|  } else
+    x = 1;                               // CHECK:   1| [[@LINE]]|    x = 1
+  }                                      // CHECK:   1| [[@LINE]]|  }
+                                         // CHECK:   1| [[@LINE]]|
+  for (int i = 0; i < 100; ++i) {        // CHECK: 100| [[@LINE]]|  for (
+    x = 1;                               // CHECK: 100| [[@LINE]]|    x = 1
+  }                                      // CHECK: 100| [[@LINE]]|  }
+                                         // CHECK:   1| [[@LINE]]|
+  x = x < 10 ? x + 1 : x - 1;            // CHECK:   0| [[@LINE]]|  x =
+  x = x > 10 ?                           // CHECK:   1| [[@LINE]]|  x =
+        x - 1:                           // CHECK:   0| [[@LINE]]|        x
+        x + 1;                           // CHECK:   1| [[@LINE]]|        x
+                                         // CHECK:   1| [[@LINE]]|
+  return 0;                              // CHECK:   1| [[@LINE]]|  return
+}                                        // CHECK:   1| [[@LINE]]|}
+// after coverage                   // WHOLE-FILE:    | [[@LINE]]|// after
+                                    // FILTER-NOT:    | [[@LINE-1]]|// after
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/showRegionMarkers.cpp b/test/tools/llvm-cov/showRegionMarkers.cpp
new file mode 100644
index 000000000000..136c3bf7cac3
--- /dev/null
+++ b/test/tools/llvm-cov/showRegionMarkers.cpp
@@ -0,0 +1,26 @@
+// RUN: llvm-cov show %S/Inputs/regionMarkers.covmapping -instr-profile %S/Inputs/regionMarkers.profdata -show-regions -dump -filename-equivalence %s 2>&1 | FileCheck %s
+
+int main() {                      // CHECK: Marker at [[@LINE]]:12 = 1
+  int x = 0;
+
+  if (x) {                        // CHECK: Marker at [[@LINE]]:10 = 0
+    x = 0;
+  } else {                        // CHECK: Marker at [[@LINE]]:10 = 1
+    x = 1;
+  }
+                                  // CHECK: Marker at [[@LINE+2]]:19 = 101
+                                  // CHECK: Marker at [[@LINE+1]]:28 = 100
+  for (int i = 0; i < 100; ++i) { // CHECK: Marker at [[@LINE]]:33 = 100
+    x = 1;
+  }
+                                  // CHECK: Marker at [[@LINE+1]]:16 = 1
+  x = x < 10 ? x + 1 : x - 1;     // CHECK: Marker at [[@LINE]]:24 = 0
+  x = x > 10 ?
+        x - 1:                    // CHECK: Marker at [[@LINE]]:9 = 0
+        x + 1;                    // CHECK: Marker at [[@LINE]]:9 = 1
+
+  return 0;
+}
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-cov/showTemplateInstantiations.cpp b/test/tools/llvm-cov/showTemplateInstantiations.cpp
new file mode 100644
index 000000000000..2b72d83c013d
--- /dev/null
+++ b/test/tools/llvm-cov/showTemplateInstantiations.cpp
@@ -0,0 +1,43 @@
+// RUN: llvm-cov show %S/Inputs/templateInstantiations.covmapping -instr-profile %S/Inputs/templateInstantiations.profdata -no-colors -filename-equivalence %s | FileCheck -check-prefix=CHECK -check-prefix=ALL %s
+// RUN: llvm-cov show %S/Inputs/templateInstantiations.covmapping -instr-profile %S/Inputs/templateInstantiations.profdata -no-colors -filename-equivalence -name=_Z4funcIbEiT_ %s | FileCheck -check-prefix=CHECK -check-prefix=FILTER %s
+
+// before coverage   // WHOLE-FILE:   | [[@LINE]]|// before
+                     // FILTER-NOT:   | [[@LINE-1]]|// before
+template<typename T> // ALL:          | [[@LINE]]|template<typename T>
+int func(T x) {      // ALL-NEXT:    2| [[@LINE]]|int func(T x) {
+  if(x)              // ALL-NEXT:    2| [[@LINE]]|  if(x)
+    return 0;        // ALL-NEXT:    1| [[@LINE]]|    return 0;
+  else               // ALL-NEXT:    1| [[@LINE]]|  else
+    return 1;        // ALL-NEXT:    1| [[@LINE]]|    return 1;
+  int j = 1;         // ALL-NEXT:    0| [[@LINE]]|  int j = 1;
+}                    // ALL-NEXT:    1| [[@LINE]]|}
+
+                     // CHECK:       {{^ *(\| )?}}_Z4funcIbEiT_:
+                     // CHECK-NEXT:  1| [[@LINE-9]]|int func(T x) {
+                     // CHECK-NEXT:  1| [[@LINE-9]]|  if(x)
+                     // CHECK-NEXT:  1| [[@LINE-9]]|    return 0;
+                     // CHECK-NEXT:  1| [[@LINE-9]]|  else
+                     // CHECK-NEXT:  0| [[@LINE-9]]|    return 1;
+                     // CHECK-NEXT:  0| [[@LINE-9]]|  int j = 1;
+                     // CHECK-NEXT:  1| [[@LINE-9]]|}
+
+                     // ALL:         {{^ *}}| _Z4funcIiEiT_:
+                     // FILTER-NOT:  {{^ *(\| )?}} _Z4funcIiEiT_:
+                     // ALL-NEXT:    1| [[@LINE-19]]|int func(T x) {
+                     // ALL-NEXT:    1| [[@LINE-19]]|  if(x)
+                     // ALL-NEXT:    0| [[@LINE-19]]|    return 0;
+                     // ALL-NEXT:    1| [[@LINE-19]]|  else
+                     // ALL-NEXT:    1| [[@LINE-19]]|    return 1;
+                     // ALL-NEXT:    0| [[@LINE-19]]|  int j = 1;
+                     // ALL-NEXT:    1| [[@LINE-19]]|}
+
+int main() {         // ALL:         1| [[@LINE]]|int main() {
+  func<int>(0);      // ALL-NEXT:    1| [[@LINE]]|  func<int>(0);
+  func<bool>(true);  // ALL-NEXT:    1| [[@LINE]]|  func<bool>(true);
+  return 0;          // ALL-NEXT:    1| [[@LINE]]|  return 0;
+}                    // ALL-NEXT:    1| [[@LINE]]|}
+// after coverage    // ALL-NEXT:     | [[@LINE]]|// after
+                     // FILTER-NOT:   | [[@LINE-1]]|// after
+
+// llvm-cov doesn't work on big endian yet
+// XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
diff --git a/test/tools/llvm-mc/line_end_with_space.test b/test/tools/llvm-mc/line_end_with_space.test
new file mode 100644
index 000000000000..2ce313990af0
--- /dev/null
+++ b/test/tools/llvm-mc/line_end_with_space.test
@@ -0,0 +1,2 @@
+RUN: llvm-mc -disassemble %s
+ 
+\ No newline at end of file
diff --git a/test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64
new file mode 100755
index 000000000000..d28cbcbda3e7
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.exe.macho-aarch64
diff --git a/test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64
new file mode 100644
index 000000000000..836607672d45
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/Inputs/ObjC.obj.macho-aarch64
diff --git a/test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64
new file mode 100755
index 000000000000..c30d35824fb7
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/Inputs/hello.exe.macho-aarch64
diff --git a/test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64 b/test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64
new file mode 100644
index 000000000000..704dbab0282e
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/Inputs/hello.obj.macho-aarch64
diff --git a/test/tools/llvm-objdump/AArch64/lit.local.cfg b/test/tools/llvm-objdump/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..7184443994b6
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
diff --git a/test/tools/llvm-objdump/AArch64/macho-private-headers.test b/test/tools/llvm-objdump/AArch64/macho-private-headers.test
new file mode 100644
index 000000000000..cdf98b145165
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/macho-private-headers.test
@@ -0,0 +1,312 @@
+// RUN: llvm-objdump -p %p/Inputs/hello.obj.macho-aarch64 | FileCheck %s
+// RUN: llvm-objdump -p %p/Inputs/hello.exe.macho-aarch64 \
+// RUN:     | FileCheck %s -check-prefix=EXE
+ 
+CHECK: Mach header
+CHECK:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+CHECK: MH_MAGIC_64   ARM64        ALL  0x00      OBJECT     4        352 SUBSECTIONS_VIA_SYMBOLS
+CHECK: Load command 0
+CHECK:       cmd LC_SEGMENT_64
+CHECK:   cmdsize 232
+CHECK:   segname 
+CHECK:    vmaddr 0x0000000000000000
+CHECK:    vmsize 0x000000000000004d
+CHECK:   fileoff 384
+CHECK:  filesize 77
+CHECK:   maxprot rwx
+CHECK:  initprot rwx
+CHECK:    nsects 2
+CHECK:     flags (none)
+CHECK: Section
+CHECK:   sectname __text
+CHECK:    segname __TEXT
+CHECK:       addr 0x0000000000000000
+CHECK:       size 0x0000000000000040
+CHECK:     offset 384
+CHECK:      align 2^2 (4)
+CHECK:     reloff 464
+CHECK:     nreloc 3
+CHECK:       type S_REGULAR
+CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __cstring
+CHECK:    segname __TEXT
+CHECK:       addr 0x0000000000000040
+CHECK:       size 0x000000000000000d
+CHECK:     offset 448
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_CSTRING_LITERALS
+CHECK: attributes (none)
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Load command 1
+CHECK:       cmd LC_VERSION_MIN_IPHONEOS
+CHECK:   cmdsize 16
+CHECK:   version 9.0
+CHECK:       sdk n/a
+CHECK: Load command 2
+CHECK:      cmd LC_SYMTAB
+CHECK:  cmdsize 24
+CHECK:   symoff 488
+CHECK:    nsyms 5
+CHECK:   stroff 568
+CHECK:  strsize 36
+CHECK: Load command 3
+CHECK:             cmd LC_DYSYMTAB
+CHECK:         cmdsize 80
+CHECK:       ilocalsym 0
+CHECK:       nlocalsym 3
+CHECK:      iextdefsym 3
+CHECK:      nextdefsym 1
+CHECK:       iundefsym 4
+CHECK:       nundefsym 1
+CHECK:          tocoff 0
+CHECK:            ntoc 0
+CHECK:       modtaboff 0
+CHECK:         nmodtab 0
+CHECK:    extrefsymoff 0
+CHECK:     nextrefsyms 0
+CHECK:  indirectsymoff 0
+CHECK:   nindirectsyms 0
+CHECK:       extreloff 0
+CHECK:         nextrel 0
+CHECK:       locreloff 0
+CHECK:         nlocrel 0
+
+EXE: Mach header
+EXE:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+EXE: MH_MAGIC_64   ARM64        ALL  0x00     EXECUTE    17       1240   NOUNDEFS DYLDLINK TWOLEVEL PIE
+EXE: Load command 0
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 72
+EXE:   segname __PAGEZERO
+EXE:    vmaddr 0x0000000000000000
+EXE:    vmsize 0x0000000100000000
+EXE:   fileoff 0
+EXE:  filesize 0
+EXE:   maxprot ---
+EXE:  initprot ---
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 1
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 472
+EXE:   segname __TEXT
+EXE:    vmaddr 0x0000000100000000
+EXE:    vmsize 0x0000000000008000
+EXE:   fileoff 0
+EXE:  filesize 32768
+EXE:   maxprot r-x
+EXE:  initprot r-x
+EXE:    nsects 5
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __text
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100007f38
+EXE:       size 0x0000000000000040
+EXE:     offset 32568
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __stubs
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100007f78
+EXE:       size 0x000000000000000c
+EXE:     offset 32632
+EXE:      align 2^1 (2)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_SYMBOL_STUBS
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0 (index into indirect symbol table)
+EXE:  reserved2 12 (size of stubs)
+EXE: Section
+EXE:   sectname __stub_helper
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100007f84
+EXE:       size 0x0000000000000024
+EXE:     offset 32644
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __cstring
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100007fa8
+EXE:       size 0x000000000000000d
+EXE:     offset 32680
+EXE:      align 2^0 (1)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_CSTRING_LITERALS
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __unwind_info
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100007fb8
+EXE:       size 0x0000000000000048
+EXE:     offset 32696
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Load command 2
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 232
+EXE:   segname __DATA
+EXE:    vmaddr 0x0000000100008000
+EXE:    vmsize 0x0000000000004000
+EXE:   fileoff 32768
+EXE:  filesize 16384
+EXE:   maxprot rw-
+EXE:  initprot rw-
+EXE:    nsects 2
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __got
+EXE:    segname __DATA
+EXE:       addr 0x0000000100008000
+EXE:       size 0x0000000000000010
+EXE:     offset 32768
+EXE:      align 2^3 (8)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_NON_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 1 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __la_symbol_ptr
+EXE:    segname __DATA
+EXE:       addr 0x0000000100008010
+EXE:       size 0x0000000000000008
+EXE:     offset 32784
+EXE:      align 2^3 (8)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 3 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Load command 3
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 72
+EXE:   segname __LINKEDIT
+EXE:    vmaddr 0x000000010000c000
+EXE:    vmsize 0x0000000000004000
+EXE:   fileoff 49152
+EXE:  filesize 264
+EXE:   maxprot r--
+EXE:  initprot r--
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 4
+EXE:             cmd LC_DYLD_INFO_ONLY
+EXE:         cmdsize 48
+EXE:      rebase_off 49152
+EXE:     rebase_size 8
+EXE:        bind_off 49160
+EXE:       bind_size 24
+EXE:   weak_bind_off 0
+EXE:  weak_bind_size 0
+EXE:   lazy_bind_off 49184
+EXE:  lazy_bind_size 16
+EXE:      export_off 49200
+EXE:     export_size 48
+EXE: Load command 5
+EXE:      cmd LC_SYMTAB
+EXE:  cmdsize 24
+EXE:   symoff 49280
+EXE:    nsyms 4
+EXE:   stroff 49360
+EXE:  strsize 56
+EXE: Load command 6
+EXE:             cmd LC_DYSYMTAB
+EXE:         cmdsize 80
+EXE:       ilocalsym 0
+EXE:       nlocalsym 0
+EXE:      iextdefsym 0
+EXE:      nextdefsym 2
+EXE:       iundefsym 2
+EXE:       nundefsym 2
+EXE:          tocoff 0
+EXE:            ntoc 0
+EXE:       modtaboff 0
+EXE:         nmodtab 0
+EXE:    extrefsymoff 0
+EXE:     nextrefsyms 0
+EXE:  indirectsymoff 49344
+EXE:   nindirectsyms 4
+EXE:       extreloff 0
+EXE:         nextrel 0
+EXE:       locreloff 0
+EXE:         nlocrel 0
+EXE: Load command 7
+EXE:           cmd LC_LOAD_DYLINKER
+EXE:       cmdsize 32
+EXE:          name /usr/lib/dyld (offset 12)
+EXE: Load command 8
+EXE:      cmd LC_UUID
+EXE:  cmdsize 24
+EXE:     uuid D687F888-CD3F-3276-8C94-BA3CCA21D820
+EXE: Load command 9
+EXE:       cmd LC_VERSION_MIN_IPHONEOS
+EXE:   cmdsize 16
+EXE:   version 9.0
+EXE:       sdk 9.0
+EXE: Load command 10
+EXE:       cmd LC_SOURCE_VERSION
+EXE:   cmdsize 16
+EXE:   version 0.0
+EXE: Load command 11
+EXE:        cmd LC_MAIN
+EXE:    cmdsize 24
+EXE:   entryoff 32568
+EXE:  stacksize 0
+EXE: Load command 12
+EXE:           cmd LC_ENCRYPTION_INFO_64
+EXE:       cmdsize 24
+EXE:      cryptoff 16384
+EXE:     cryptsize 16384
+EXE:       cryptid 0
+EXE:           pad 0
+EXE: Load command 13
+EXE:           cmd LC_LOAD_DYLIB
+EXE:       cmdsize 56
+EXE:          name /usr/lib/libSystem.B.dylib (offset 24)
+EXE:       current version 1215.0.0
+EXE: compatibility version 1.0.0
+EXE: Load command 14
+EXE:       cmd LC_FUNCTION_STARTS
+EXE:   cmdsize 16
+EXE:   dataoff 49248
+EXE:  datasize 8
+EXE: Load command 15
+EXE:       cmd LC_DATA_IN_CODE
+EXE:   cmdsize 16
+EXE:   dataoff 49256
+EXE:  datasize 0
+EXE: Load command 16
+EXE:       cmd LC_DYLIB_CODE_SIGN_DRS
+EXE:   cmdsize 16
+EXE:   dataoff 49256
+EXE:  datasize 24
diff --git a/test/tools/llvm-objdump/AArch64/macho-symbolized-disassembly.test b/test/tools/llvm-objdump/AArch64/macho-symbolized-disassembly.test
new file mode 100644
index 000000000000..311ff51fbac4
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/macho-symbolized-disassembly.test
@@ -0,0 +1,23 @@
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-aarch64 | FileCheck %s -check-prefix=OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-aarch64 | FileCheck %s -check-prefix=EXE
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.obj.macho-aarch64 | FileCheck %s -check-prefix=ObjC-OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.exe.macho-aarch64 | FileCheck %s -check-prefix=ObjC-EXE
+
+OBJ: 000000000000001c	adrp	x0, L_.str@PAGE
+OBJ: 0000000000000020	add	x0, x0, L_.str@PAGEOFF
+OBJ: 0000000000000024	bl	_printf
+
+EXE: 0000000100007f58	add	x0, x0, #4008 ; literal pool for: "Hello world
+"
+EXE: 0000000100007f5c	bl	0x100007f78 ; symbol stub for: _printf
+
+ObjC-OBJ: 000000000000000c	adrp	x8, L_OBJC_SELECTOR_REFERENCES_3@PAGE
+ObjC-OBJ: 0000000000000010	add	x8, x8, L_OBJC_SELECTOR_REFERENCES_3@PAGEOFF
+ObjC-OBJ:0000000000000044	bl	_objc_msgSend
+
+ObjC-EXE: 0000000100007ed0	add	x8, x8, #80 ; Objc selector ref: date
+ObjC-EXE: 0000000100007ed8	add	x9, x9, #96 ; Objc class ref: _OBJC_CLASS_$_NSDate
+ObjC-EXE: 0000000100007f04	bl	0x100007f50 ; Objc message: +[NSObject new]
+ObjC-EXE: 0000000100007f1c	bl	0x100007f50 ; Objc message: -[x0 new]
+ObjC-EXE: 0000000100007f2c	add	x0, x0, #32 ; Objc cfstring ref: @"The current date and time is: %@"
+ObjC-EXE: 0000000100007f30	bl	0x100007f44 ; symbol stub for: _NSLog
diff --git a/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm b/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm
new file mode 100755
index 000000000000..40d657b35c04
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/Inputs/hello.exe.macho-arm
diff --git a/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm b/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm
new file mode 100644
index 000000000000..fb8706b28573
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/Inputs/hello.obj.macho-arm
diff --git a/test/tools/llvm-objdump/ARM/lit.local.cfg b/test/tools/llvm-objdump/ARM/lit.local.cfg
new file mode 100644
index 000000000000..236e1d344166
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
diff --git a/test/tools/llvm-objdump/ARM/macho-arm-and-thumb.test b/test/tools/llvm-objdump/ARM/macho-arm-and-thumb.test
new file mode 100644
index 000000000000..720b78fa89e2
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-arm-and-thumb.test
@@ -0,0 +1,15 @@
+@ RUN: llvm-mc < %s -triple armv7-apple-darwin -filetype=obj | llvm-objdump -m -d - | FileCheck %s
+
+.thumb
+.thumb_func _t
+_t:
+nop
+nop
+.align 2
+.arm
+_a:
+nop
+
+@ CHECK: 00 bf nop
+@ CHECK-NEXT: 00 bf nop
+@ CHECK: 00 f0 20 e3 nop
diff --git a/test/tools/llvm-objdump/ARM/macho-mattr-arm.test b/test/tools/llvm-objdump/ARM/macho-mattr-arm.test
new file mode 100644
index 000000000000..1b1714640c89
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-mattr-arm.test
@@ -0,0 +1,5 @@
+@ RUN: llvm-mc < %s -triple armv8-apple-darwin10 -mattr=+fp-armv8 -filetype=obj -o - | llvm-objdump -d -m -mattr=+fp-armv8 - | FileCheck %s
+
+vcvtt.f64.f16 d3, s1
+
+@ CHECK: e0 3b b2 ee vcvtt.f64.f16 d3, s1
diff --git a/test/tools/llvm-objdump/ARM/macho-mcpu-arm.test b/test/tools/llvm-objdump/ARM/macho-mcpu-arm.test
new file mode 100644
index 000000000000..7a3432d9e1a8
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-mcpu-arm.test
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc < %s -triple thumbv7-apple-darwin -mcpu=cortex-a7 -filetype=obj | llvm-objdump -triple thumbv7-apple-darwin10 -m -d -mcpu=cortex-a7 - | FileCheck %s
+
+.thumb
+.thumb_func _t
+_t:
+sdiv r1, r2, r3
+udiv r1, r2, r3
+
+@ CHECK: 92 fb f3 f1 sdiv r1, r2, r3
+@ CHECK: b2 fb f3 f1 udiv r1, r2, r3
diff --git a/test/tools/llvm-objdump/ARM/macho-private-headers.test b/test/tools/llvm-objdump/ARM/macho-private-headers.test
new file mode 100644
index 000000000000..4ab30433c078
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-private-headers.test
@@ -0,0 +1,345 @@
+// RUN: llvm-objdump -p %p/Inputs/hello.obj.macho-arm | FileCheck %s
+// RUN: llvm-objdump -p %p/Inputs/hello.exe.macho-arm \
+// RUN:     | FileCheck %s -check-prefix=EXE
+
+CHECK: Mach header
+CHECK:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+CHECK:    MH_MAGIC     ARM         V7  0x00      OBJECT     3        568 SUBSECTIONS_VIA_SYMBOLS
+CHECK: Load command 0
+CHECK:       cmd LC_SEGMENT
+CHECK:   cmdsize 464
+CHECK:   segname 
+CHECK:    vmaddr 0x00000000
+CHECK:    vmsize 0x00000043
+CHECK:   fileoff 596
+CHECK:  filesize 67
+CHECK:   maxprot rwx
+CHECK:  initprot rwx
+CHECK:    nsects 6
+CHECK:     flags (none)
+CHECK: Section
+CHECK:   sectname __text
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000000
+CHECK:       size 0x00000036
+CHECK:     offset 596
+CHECK:      align 2^2 (4)
+CHECK:     reloff 664
+CHECK:     nreloc 5
+CHECK:       type S_REGULAR
+CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __textcoal_nt
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000036
+CHECK:       size 0x00000000
+CHECK:     offset 650
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_COALESCED
+CHECK: attributes PURE_INSTRUCTIONS
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __const_coal
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000036
+CHECK:       size 0x00000000
+CHECK:     offset 650
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_COALESCED
+CHECK: attributes (none)
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __picsymbolstub4
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000036
+CHECK:       size 0x00000000
+CHECK:     offset 650
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_SYMBOL_STUBS
+CHECK: attributes (none)
+CHECK:  reserved1 0 (index into indirect symbol table)
+CHECK:  reserved2 16 (size of stubs)
+CHECK: Section
+CHECK:   sectname __StaticInit
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000036
+CHECK:       size 0x00000000
+CHECK:     offset 650
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_REGULAR
+CHECK: attributes PURE_INSTRUCTIONS
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __cstring
+CHECK:    segname __TEXT
+CHECK:       addr 0x00000036
+CHECK:       size 0x0000000d
+CHECK:     offset 650
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_CSTRING_LITERALS
+CHECK: attributes (none)
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Load command 1
+CHECK:      cmd LC_SYMTAB
+CHECK:  cmdsize 24
+CHECK:   symoff 704
+CHECK:    nsyms 2
+CHECK:   stroff 728
+CHECK:  strsize 16
+CHECK: Load command 2
+CHECK:             cmd LC_DYSYMTAB
+CHECK:         cmdsize 80
+CHECK:       ilocalsym 0
+CHECK:       nlocalsym 0
+CHECK:      iextdefsym 0
+CHECK:      nextdefsym 1
+CHECK:       iundefsym 1
+CHECK:       nundefsym 1
+CHECK:          tocoff 0
+CHECK:            ntoc 0
+CHECK:       modtaboff 0
+CHECK:         nmodtab 0
+CHECK:    extrefsymoff 0
+CHECK:     nextrefsyms 0
+CHECK:  indirectsymoff 0
+CHECK:   nindirectsyms 0
+CHECK:       extreloff 0
+CHECK:         nextrel 0
+CHECK:       locreloff 0
+CHECK:         nlocrel 0
+
+EXE: Mach header
+EXE:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+EXE:    MH_MAGIC     ARM         V7  0x00     EXECUTE    17       1012   NOUNDEFS DYLDLINK TWOLEVEL PIE
+EXE: Load command 0
+EXE:       cmd LC_SEGMENT
+EXE:   cmdsize 56
+EXE:   segname __PAGEZERO
+EXE:    vmaddr 0x00000000
+EXE:    vmsize 0x00004000
+EXE:   fileoff 0
+EXE:  filesize 0
+EXE:   maxprot ---
+EXE:  initprot ---
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 1
+EXE:       cmd LC_SEGMENT
+EXE:   cmdsize 328
+EXE:   segname __TEXT
+EXE:    vmaddr 0x00004000
+EXE:    vmsize 0x00008000
+EXE:   fileoff 0
+EXE:  filesize 32768
+EXE:   maxprot r-x
+EXE:  initprot r-x
+EXE:    nsects 4
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __text
+EXE:    segname __TEXT
+EXE:       addr 0x0000bf84
+EXE:       size 0x00000036
+EXE:     offset 32644
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __stub_helper
+EXE:    segname __TEXT
+EXE:       addr 0x0000bfbc
+EXE:       size 0x00000030
+EXE:     offset 32700
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __cstring
+EXE:    segname __TEXT
+EXE:       addr 0x0000bfec
+EXE:       size 0x0000000d
+EXE:     offset 32748
+EXE:      align 2^0 (1)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_CSTRING_LITERALS
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __symbolstub1
+EXE:    segname __TEXT
+EXE:       addr 0x0000bffc
+EXE:       size 0x00000004
+EXE:     offset 32764
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_SYMBOL_STUBS
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0 (index into indirect symbol table)
+EXE:  reserved2 4 (size of stubs)
+EXE: Load command 2
+EXE:       cmd LC_SEGMENT
+EXE:   cmdsize 192
+EXE:   segname __DATA
+EXE:    vmaddr 0x0000c000
+EXE:    vmsize 0x00004000
+EXE:   fileoff 32768
+EXE:  filesize 16384
+EXE:   maxprot rw-
+EXE:  initprot rw-
+EXE:    nsects 2
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __lazy_symbol
+EXE:    segname __DATA
+EXE:       addr 0x0000c000
+EXE:       size 0x00000004
+EXE:     offset 32768
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 1 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __nl_symbol_ptr
+EXE:    segname __DATA
+EXE:       addr 0x0000c004
+EXE:       size 0x00000008
+EXE:     offset 32772
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_NON_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 2 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Load command 3
+EXE:       cmd LC_SEGMENT
+EXE:   cmdsize 56
+EXE:   segname __LINKEDIT
+EXE:    vmaddr 0x00010000
+EXE:    vmsize 0x00004000
+EXE:   fileoff 49152
+EXE:  filesize 256
+EXE:   maxprot r--
+EXE:  initprot r--
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 4
+EXE:             cmd LC_DYLD_INFO_ONLY
+EXE:         cmdsize 48
+EXE:      rebase_off 49152
+EXE:     rebase_size 4
+EXE:        bind_off 49156
+EXE:       bind_size 24
+EXE:   weak_bind_off 0
+EXE:  weak_bind_size 0
+EXE:   lazy_bind_off 49180
+EXE:  lazy_bind_size 16
+EXE:      export_off 49196
+EXE:     export_size 44
+EXE: Load command 5
+EXE:      cmd LC_SYMTAB
+EXE:  cmdsize 24
+EXE:   symoff 49264
+EXE:    nsyms 5
+EXE:   stroff 49340
+EXE:  strsize 68
+EXE: Load command 6
+EXE:             cmd LC_DYSYMTAB
+EXE:         cmdsize 80
+EXE:       ilocalsym 0
+EXE:       nlocalsym 1
+EXE:      iextdefsym 1
+EXE:      nextdefsym 2
+EXE:       iundefsym 3
+EXE:       nundefsym 2
+EXE:          tocoff 0
+EXE:            ntoc 0
+EXE:       modtaboff 0
+EXE:         nmodtab 0
+EXE:    extrefsymoff 0
+EXE:     nextrefsyms 0
+EXE:  indirectsymoff 49324
+EXE:   nindirectsyms 4
+EXE:       extreloff 0
+EXE:         nextrel 0
+EXE:       locreloff 0
+EXE:         nlocrel 0
+EXE: Load command 7
+EXE:           cmd LC_LOAD_DYLINKER
+EXE:       cmdsize 28
+EXE:          name /usr/lib/dyld (offset 12)
+EXE: Load command 8
+EXE:      cmd LC_UUID
+EXE:  cmdsize 24
+EXE:     uuid C2D9351C-1EF1-330B-A2AB-EED6CF7D2C5D
+EXE: Load command 9
+EXE:      cmd LC_VERSION_MIN_IPHONEOS
+EXE:  cmdsize 16
+EXE:  version 8.0
+EXE:      sdk 8.0
+EXE: Load command 10
+EXE:       cmd LC_SOURCE_VERSION
+EXE:   cmdsize 16
+EXE:   version 0.0
+EXE: Load command 11
+EXE:        cmd LC_MAIN
+EXE:    cmdsize 24
+EXE:   entryoff 32645
+EXE:  stacksize 0
+EXE: Load command 12
+EXE:          cmd LC_ENCRYPTION_INFO
+EXE:      cmdsize 20
+EXE:     cryptoff 16384
+EXE:    cryptsize 16384
+EXE:      cryptid 0
+EXE: Load command 13
+EXE:           cmd LC_LOAD_DYLIB
+EXE:       cmdsize 52
+EXE:          name /usr/lib/libSystem.B.dylib (offset 24)
+EXE:       current version 1213.0.0
+EXE: compatibility version 1.0.0
+EXE: Load command 14
+EXE:       cmd LC_FUNCTION_STARTS
+EXE:   cmdsize 16
+EXE:   dataoff 49240
+EXE:  datasize 4
+EXE: Load command 15
+EXE:       cmd LC_DATA_IN_CODE
+EXE:   cmdsize 16
+EXE:   dataoff 49244
+EXE:  datasize 0
+EXE: Load command 16
+EXE:       cmd LC_DYLIB_CODE_SIGN_DRS
+EXE:   cmdsize 16
+EXE:   dataoff 49244
+EXE:  datasize 20
diff --git a/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test b/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test
new file mode 100644
index 000000000000..eeeab526cf81
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-symbolized-disassembly.test
@@ -0,0 +1,8 @@
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-arm | FileCheck %s -check-prefix=OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-arm | FileCheck %s -check-prefix=EXE
+
+OBJ: 00000006	movw	r3, :lower16:((54-14)-4)
+OBJ: 0000000a	movt	r3, :upper16:((54-14)-4)
+OBJ: 00000024	bl	_printf
+
+EXE: 0000bfa8	blx	0xbffc @ symbol stub for: _printf
diff --git a/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test b/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test
new file mode 100644
index 000000000000..65df2a984cd0
--- /dev/null
+++ b/test/tools/llvm-objdump/ARM/macho-symbolized-subtractor.test
@@ -0,0 +1,15 @@
+# RUN: llvm-mc < %s -triple armv7-apple-darwin -filetype=obj | llvm-objdump -m -d - | FileCheck %s
+	.thumb
+	.thumb_func	_t
+_t:
+	movw	r3, :lower16:(Str-(PCinst+4))
+	movt	r3, :upper16:(Str-(PCinst+4))
+	.thumb_func	PCinst
+PCinst:
+	add	r3, pc
+
+	.section	__TEXT,__cstring,cstring_literals
+Str: 
+	.asciz	"Hello world\n"
+# CHECK: movw	r3, :lower16:((Str-PCinst)-4)
+# CHECK: movt	r3, :upper16:((Str-PCinst)-4)
diff --git a/test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64 b/test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64
new file mode 100755
index 000000000000..3ab622781b85
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/bad-ordinal.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/bind.macho-x86_64 b/test/tools/llvm-objdump/Inputs/bind.macho-x86_64
new file mode 100755
index 000000000000..51a58a7342e9
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/bind.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/bind2.macho-x86_64 b/test/tools/llvm-objdump/Inputs/bind2.macho-x86_64
new file mode 100755
index 000000000000..f756fbb4a93f
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/bind2.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/compact-unwind.macho-i386 b/test/tools/llvm-objdump/Inputs/compact-unwind.macho-i386
new file mode 100644
index 000000000000..174d38302325
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/compact-unwind.macho-i386
diff --git a/test/tools/llvm-objdump/Inputs/compact-unwind.macho-x86_64 b/test/tools/llvm-objdump/Inputs/compact-unwind.macho-x86_64
new file mode 100644
index 000000000000..fde1bb5a1602
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/compact-unwind.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64 b/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64
new file mode 100755
index 000000000000..5d7506078a71
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/exports-trie.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/lazy-bind.macho-x86_64 b/test/tools/llvm-objdump/Inputs/lazy-bind.macho-x86_64
new file mode 100755
index 000000000000..02a4d1216f21
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/lazy-bind.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64
new file mode 100755
index 000000000000..05062d824832
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/unwind-info-no-relocs.macho-x86_64 b/test/tools/llvm-objdump/Inputs/unwind-info-no-relocs.macho-x86_64
new file mode 100755
index 000000000000..a1fd6874035e
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/unwind-info-no-relocs.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/unwind-info.macho-arm64 b/test/tools/llvm-objdump/Inputs/unwind-info.macho-arm64
new file mode 100755
index 000000000000..5b9ce9cf96c9
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/unwind-info.macho-arm64
diff --git a/test/tools/llvm-objdump/Inputs/unwind-info.macho-x86_64 b/test/tools/llvm-objdump/Inputs/unwind-info.macho-x86_64
new file mode 100755
index 000000000000..9e6ad6bb9262
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/unwind-info.macho-x86_64
diff --git a/test/tools/llvm-objdump/Inputs/weak-bind.macho-x86_64 b/test/tools/llvm-objdump/Inputs/weak-bind.macho-x86_64
new file mode 100755
index 000000000000..6534116f57ca
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/weak-bind.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/ObjC.exe.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/ObjC.exe.macho-x86_64
new file mode 100755
index 000000000000..4de8a1ff60af
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/ObjC.exe.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/ObjC.obj.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/ObjC.obj.macho-x86_64
new file mode 100644
index 000000000000..66edb3cb48ce
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/ObjC.obj.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibLoadKinds.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibLoadKinds.macho-x86_64
new file mode 100755
index 000000000000..87d1f8c70f4b
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibLoadKinds.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibRoutines.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibRoutines.macho-x86_64
new file mode 100755
index 000000000000..35680452a099
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibRoutines.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibSubClient.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibSubClient.macho-x86_64
new file mode 100755
index 000000000000..e7f95428e234
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibSubClient.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibSubFramework.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibSubFramework.macho-x86_64
new file mode 100755
index 000000000000..3036c27f52cf
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibSubFramework.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibSubLibrary.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibSubLibrary.macho-x86_64
new file mode 100755
index 000000000000..dafee5f8db3f
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibSubLibrary.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibSubUmbrella.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibSubUmbrella.macho-x86_64
new file mode 100755
index 000000000000..1e42a4f4ebbc
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/dylibSubUmbrella.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/exeThread.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/exeThread.macho-x86_64
new file mode 100755
index 000000000000..93fe1db83f27
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/exeThread.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-i386
new file mode 100755
index 000000000000..b1f7bd8ecfb1
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-x86_64
new file mode 100755
index 000000000000..d004bedf6ad4
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/hello.exe.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-i386 b/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-i386
new file mode 100644
index 000000000000..b69d4beb60dc
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-x86_64
new file mode 100644
index 000000000000..2b59a1cfc63b
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/hello.obj.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64
new file mode 100755
index 000000000000..6b54b15c0c5d
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/hello_cpp.exe.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/linkerOption.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/linkerOption.macho-x86_64
new file mode 100644
index 000000000000..38053c5ce9a6
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/linkerOption.macho-x86_64
diff --git a/test/tools/llvm-objdump/X86/Inputs/macho-universal-archive.x86_64.i386 b/test/tools/llvm-objdump/X86/Inputs/macho-universal-archive.x86_64.i386
new file mode 100644
index 000000000000..1660714c68ea
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/macho-universal-archive.x86_64.i386
diff --git a/test/tools/llvm-objdump/X86/Inputs/macho-universal.x86_64.i386 b/test/tools/llvm-objdump/X86/Inputs/macho-universal.x86_64.i386
new file mode 100755
index 000000000000..36d5fc29d681
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/Inputs/macho-universal.x86_64.i386
diff --git a/test/tools/llvm-objdump/Inputs/out-of-section-sym.elf-i386 b/test/tools/llvm-objdump/X86/Inputs/out-of-section-sym.elf-i386
index 4c7158498baf..4c7158498baf 100644
--- a/test/tools/llvm-objdump/Inputs/out-of-section-sym.elf-i386
+++ b/test/tools/llvm-objdump/X86/Inputs/out-of-section-sym.elf-i386
diff --git a/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-objdump/X86/Inputs/trivial.obj.elf-i386
index fdc48743a886..fdc48743a886 100644
--- a/test/tools/llvm-objdump/Inputs/trivial.obj.elf-i386
+++ b/test/tools/llvm-objdump/X86/Inputs/trivial.obj.elf-i386
diff --git a/test/tools/llvm-objdump/disassembly-show-raw.test b/test/tools/llvm-objdump/X86/disassembly-show-raw.test
index e9956a5ebe4b..e9956a5ebe4b 100644
--- a/test/tools/llvm-objdump/disassembly-show-raw.test
+++ b/test/tools/llvm-objdump/X86/disassembly-show-raw.test
diff --git a/test/tools/llvm-objdump/lit.local.cfg b/test/tools/llvm-objdump/X86/lit.local.cfg
index c8625f4d9d24..c8625f4d9d24 100644
--- a/test/tools/llvm-objdump/lit.local.cfg
+++ b/test/tools/llvm-objdump/X86/lit.local.cfg
diff --git a/test/tools/llvm-objdump/X86/macho-private-headers.test b/test/tools/llvm-objdump/X86/macho-private-headers.test
new file mode 100644
index 000000000000..c80bb083af37
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-private-headers.test
@@ -0,0 +1,445 @@
+// RUN: llvm-objdump -p %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+// RUN: llvm-objdump -p %p/Inputs/hello.exe.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=EXE
+// RUN: llvm-objdump -p %p/Inputs/dylibLoadKinds.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=LOAD
+// RUN: llvm-objdump -p %p/Inputs/linkerOption.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=LD_OPT
+// RUN: llvm-objdump -p %p/Inputs/dylibSubFramework.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=SUB_FRAME
+// RUN: llvm-objdump -p %p/Inputs/dylibSubUmbrella.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=SUB_UMB
+// RUN: llvm-objdump -p %p/Inputs/dylibSubLibrary.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=SUB_LIB
+// RUN: llvm-objdump -p %p/Inputs/dylibSubClient.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=SUB_CLI
+// RUN: llvm-objdump -p %p/Inputs/dylibRoutines.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=ROUTINE
+// RUN: llvm-objdump -p %p/Inputs/exeThread.macho-x86_64 \
+// RUN:     | FileCheck %s -check-prefix=THREAD
+// RUN: llvm-objdump -macho -p -arch i386 %p/Inputs/macho-universal.x86_64.i386 \
+// RUN:     | FileCheck %s -check-prefix=FATi386
+
+CHECK: Mach header
+CHECK:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+CHECK: MH_MAGIC_64  X86_64        ALL  0x00      OBJECT     3        496 SUBSECTIONS_VIA_SYMBOLS
+CHECK: Load command 0
+CHECK:       cmd LC_SEGMENT_64
+CHECK:   cmdsize 392
+CHECK:   segname 
+CHECK:    vmaddr 0x0000000000000000
+CHECK:    vmsize 0x00000000000000a8
+CHECK:   fileoff 528
+CHECK:  filesize 168
+CHECK:   maxprot rwx
+CHECK:  initprot rwx
+CHECK:    nsects 4
+CHECK:     flags (none)
+CHECK: Section
+CHECK:   sectname __text
+CHECK:    segname __TEXT
+CHECK:       addr 0x0000000000000000
+CHECK:       size 0x000000000000003b
+CHECK:     offset 528
+CHECK:      align 2^4 (16)
+CHECK:     reloff 696
+CHECK:     nreloc 2
+CHECK:       type S_REGULAR
+CHECK: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __cstring
+CHECK:    segname __TEXT
+CHECK:       addr 0x000000000000003b
+CHECK:       size 0x000000000000000d
+CHECK:     offset 587
+CHECK:      align 2^0 (1)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_CSTRING_LITERALS
+CHECK: attributes (none)
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __compact_unwind
+CHECK:    segname __LD
+CHECK:       addr 0x0000000000000048
+CHECK:       size 0x0000000000000020
+CHECK:     offset 600
+CHECK:      align 2^3 (8)
+CHECK:     reloff 712
+CHECK:     nreloc 1
+CHECK:       type S_REGULAR
+CHECK: attributes DEBUG
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Section
+CHECK:   sectname __eh_frame
+CHECK:    segname __TEXT
+CHECK:       addr 0x0000000000000068
+CHECK:       size 0x0000000000000040
+CHECK:     offset 632
+CHECK:      align 2^3 (8)
+CHECK:     reloff 0
+CHECK:     nreloc 0
+CHECK:       type S_COALESCED
+CHECK: attributes NO_TOC STRIP_STATIC_SYMS LIVE_SUPPORT
+CHECK:  reserved1 0
+CHECK:  reserved2 0
+CHECK: Load command 1
+CHECK:      cmd LC_SYMTAB
+CHECK:  cmdsize 24
+CHECK:   symoff 720
+CHECK:    nsyms 5
+CHECK:   stroff 800
+CHECK:  strsize 44
+CHECK: Load command 2
+CHECK:             cmd LC_DYSYMTAB
+CHECK:         cmdsize 80
+CHECK:       ilocalsym 0
+CHECK:       nlocalsym 2
+CHECK:      iextdefsym 2
+CHECK:      nextdefsym 2
+CHECK:       iundefsym 4
+CHECK:       nundefsym 1
+CHECK:          tocoff 0
+CHECK:            ntoc 0
+CHECK:       modtaboff 0
+CHECK:         nmodtab 0
+CHECK:    extrefsymoff 0
+CHECK:     nextrefsyms 0
+CHECK:  indirectsymoff 0
+CHECK:   nindirectsyms 0
+CHECK:       extreloff 0
+CHECK:         nextrel 0
+CHECK:       locreloff 0
+CHECK:         nlocrel 0
+
+EXE: Mach header
+EXE:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+EXE: MH_MAGIC_64  X86_64        ALL LIB64     EXECUTE    16       1296   NOUNDEFS DYLDLINK TWOLEVEL PIE
+EXE: Load command 0
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 72
+EXE:   segname __PAGEZERO
+EXE:    vmaddr 0x0000000000000000
+EXE:    vmsize 0x0000000100000000
+EXE:   fileoff 0
+EXE:  filesize 0
+EXE:   maxprot ---
+EXE:  initprot ---
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 1
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 552
+EXE:   segname __TEXT
+EXE:    vmaddr 0x0000000100000000
+EXE:    vmsize 0x0000000000001000
+EXE:   fileoff 0
+EXE:  filesize 4096
+EXE:   maxprot rwx
+EXE:  initprot r-x
+EXE:    nsects 6
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __text
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000f30
+EXE:       size 0x000000000000003b
+EXE:     offset 3888
+EXE:      align 2^4 (16)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __stubs
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000f6c
+EXE:       size 0x0000000000000006
+EXE:     offset 3948
+EXE:      align 2^1 (2)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_SYMBOL_STUBS
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0 (index into indirect symbol table)
+EXE:  reserved2 6 (size of stubs)
+EXE: Section
+EXE:   sectname __stub_helper
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000f74
+EXE:       size 0x000000000000001a
+EXE:     offset 3956
+EXE:      align 2^2 (4)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes PURE_INSTRUCTIONS SOME_INSTRUCTIONS
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __cstring
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000f8e
+EXE:       size 0x000000000000000d
+EXE:     offset 3982
+EXE:      align 2^0 (1)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_CSTRING_LITERALS
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __unwind_info
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000f9b
+EXE:       size 0x0000000000000048
+EXE:     offset 3995
+EXE:      align 2^0 (1)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __eh_frame
+EXE:    segname __TEXT
+EXE:       addr 0x0000000100000fe8
+EXE:       size 0x0000000000000018
+EXE:     offset 4072
+EXE:      align 2^3 (8)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_REGULAR
+EXE: attributes (none)
+EXE:  reserved1 0
+EXE:  reserved2 0
+EXE: Load command 2
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 232
+EXE:   segname __DATA
+EXE:    vmaddr 0x0000000100001000
+EXE:    vmsize 0x0000000000001000
+EXE:   fileoff 4096
+EXE:  filesize 4096
+EXE:   maxprot rwx
+EXE:  initprot rw-
+EXE:    nsects 2
+EXE:     flags (none)
+EXE: Section
+EXE:   sectname __nl_symbol_ptr
+EXE:    segname __DATA
+EXE:       addr 0x0000000100001000
+EXE:       size 0x0000000000000010
+EXE:     offset 4096
+EXE:      align 2^3 (8)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_NON_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 1 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Section
+EXE:   sectname __la_symbol_ptr
+EXE:    segname __DATA
+EXE:       addr 0x0000000100001010
+EXE:       size 0x0000000000000008
+EXE:     offset 4112
+EXE:      align 2^3 (8)
+EXE:     reloff 0
+EXE:     nreloc 0
+EXE:       type S_LAZY_SYMBOL_POINTERS
+EXE: attributes (none)
+EXE:  reserved1 3 (index into indirect symbol table)
+EXE:  reserved2 0
+EXE: Load command 3
+EXE:       cmd LC_SEGMENT_64
+EXE:   cmdsize 72
+EXE:   segname __LINKEDIT
+EXE:    vmaddr 0x0000000100002000
+EXE:    vmsize 0x0000000000001000
+EXE:   fileoff 8192
+EXE:  filesize 304
+EXE:   maxprot rwx
+EXE:  initprot r--
+EXE:    nsects 0
+EXE:     flags (none)
+EXE: Load command 4
+EXE:             cmd LC_DYLD_INFO_ONLY
+EXE:         cmdsize 48
+EXE:      rebase_off 8192
+EXE:     rebase_size 8
+EXE:        bind_off 8200
+EXE:       bind_size 24
+EXE:   weak_bind_off 0
+EXE:  weak_bind_size 0
+EXE:   lazy_bind_off 8224
+EXE:  lazy_bind_size 16
+EXE:      export_off 8240
+EXE:     export_size 48
+EXE: Load command 5
+EXE:      cmd LC_SYMTAB
+EXE:  cmdsize 24
+EXE:   symoff 8360
+EXE:    nsyms 4
+EXE:   stroff 8440
+EXE:  strsize 56
+EXE: Load command 6
+EXE:             cmd LC_DYSYMTAB
+EXE:         cmdsize 80
+EXE:       ilocalsym 0
+EXE:       nlocalsym 0
+EXE:      iextdefsym 0
+EXE:      nextdefsym 2
+EXE:       iundefsym 2
+EXE:       nundefsym 2
+EXE:          tocoff 0
+EXE:            ntoc 0
+EXE:       modtaboff 0
+EXE:         nmodtab 0
+EXE:    extrefsymoff 0
+EXE:     nextrefsyms 0
+EXE:  indirectsymoff 8424
+EXE:   nindirectsyms 4
+EXE:       extreloff 0
+EXE:         nextrel 0
+EXE:       locreloff 0
+EXE:         nlocrel 0
+EXE: Load command 7
+EXE:           cmd LC_LOAD_DYLINKER
+EXE:       cmdsize 32
+EXE:          name /usr/lib/dyld (offset 12)
+EXE: Load command 8
+EXE:      cmd LC_UUID
+EXE:  cmdsize 24
+EXE:     uuid 65C2DD41-79B0-3B34-871B-8CB3446AB762
+EXE: Load command 9
+EXE:       cmd LC_VERSION_MIN_MACOSX
+EXE:   cmdsize 16
+EXE:   version 10.9
+EXE:       sdk 10.9
+EXE: Load command 10
+EXE:       cmd LC_SOURCE_VERSION
+EXE:   cmdsize 16
+EXE:   version 0.0
+EXE: Load command 11
+EXE:        cmd LC_MAIN
+EXE:    cmdsize 24
+EXE:   entryoff 3888
+EXE:  stacksize 0
+EXE: Load command 12
+EXE:           cmd LC_LOAD_DYLIB
+EXE:       cmdsize 56
+EXE:          name /usr/lib/libSystem.B.dylib (offset 24)
+EXE:       current version 1197.1.1
+EXE: compatibility version 1.0.0
+EXE: Load command 13
+EXE:       cmd LC_FUNCTION_STARTS
+EXE:   cmdsize 16
+EXE:   dataoff 8288
+EXE:  datasize 8
+EXE: Load command 14
+EXE:       cmd LC_DATA_IN_CODE
+EXE:   cmdsize 16
+EXE:   dataoff 8296
+EXE:  datasize 0
+EXE: Load command 15
+EXE:       cmd LC_DYLIB_CODE_SIGN_DRS
+EXE:   cmdsize 16
+EXE:   dataoff 8296
+EXE:  datasize 64
+
+
+LOAD: Load command 10
+LOAD:           cmd LC_LOAD_DYLIB
+LOAD:       cmdsize 48
+LOAD:          name /usr/lib/foo1.dylib (offset 24)
+LOAD:       current version 0.0.0
+LOAD: compatibility version 0.0.0
+LOAD: Load command 11
+LOAD:           cmd LC_LOAD_WEAK_DYLIB
+LOAD:       cmdsize 48
+LOAD:          name /usr/lib/foo2.dylib (offset 24)
+LOAD:       current version 0.0.0
+LOAD: compatibility version 0.0.0
+LOAD: Load command 12
+LOAD:           cmd LC_REEXPORT_DYLIB
+LOAD:       cmdsize 48
+LOAD:          name /usr/lib/foo3.dylib (offset 24)
+LOAD:       current version 0.0.0
+LOAD: compatibility version 0.0.0
+LOAD: Load command 13
+LOAD:           cmd LC_LAZY_LOAD_DYLIB
+LOAD:       cmdsize 48
+LOAD:          name /usr/lib/foo4.dylib (offset 24)
+LOAD:       current version 0.0.0
+LOAD: compatibility version 0.0.0
+
+LD_OPT: Load command 4
+LD_OPT:      cmd LC_LINKER_OPTION
+LD_OPT:  cmdsize 24
+LD_OPT:    count 1
+LD_OPT:   string #1 -lc++
+LD_OPT: Load command 5
+LD_OPT:      cmd LC_LINKER_OPTION
+LD_OPT:  cmdsize 40
+LD_OPT:    count 2
+LD_OPT:   string #1 -framework
+LD_OPT:   string #2 Foundation
+
+SUB_FRAME: Load command 10
+SUB_FRAME:           cmd LC_SUB_FRAMEWORK
+SUB_FRAME:       cmdsize 16
+SUB_FRAME:      umbrella Bar (offset 12)
+
+SUB_UMB: Load command 5
+SUB_UMB:           cmd LC_SUB_UMBRELLA
+SUB_UMB:       cmdsize 16
+SUB_UMB:  sub_umbrella Foo (offset 12)
+
+SUB_LIB: Load command 5
+SUB_LIB:           cmd LC_SUB_LIBRARY
+SUB_LIB:       cmdsize 20
+SUB_LIB:   sub_library libfoo (offset 12)
+
+SUB_CLI: Load command 10
+SUB_CLI:           cmd LC_SUB_CLIENT
+SUB_CLI:       cmdsize 16
+SUB_CLI:        client bar (offset 12)
+
+ROUTINE: Load command 6
+ROUTINE:           cmd LC_ROUTINES_64
+ROUTINE:       cmdsize 72
+ROUTINE:  init_address 0x0000000000000f80
+ROUTINE:   init_module 0
+ROUTINE:     reserved1 0
+ROUTINE:     reserved2 0
+ROUTINE:     reserved3 0
+ROUTINE:     reserved4 0
+ROUTINE:     reserved5 0
+ROUTINE:     reserved6 0
+
+THREAD: Load command 10
+THREAD:         cmd LC_UNIXTHREAD
+THREAD:     cmdsize 184
+THREAD:      flavor x86_THREAD_STATE64
+THREAD:       count x86_THREAD_STATE64_COUNT
+THREAD:    rax  0x0000000000000000 rbx 0x0000000000000000 rcx  0x0000000000000000
+THREAD:    rdx  0x0000000000000000 rdi 0x0000000000000000 rsi  0x0000000000000000
+THREAD:    rbp  0x0000000000000000 rsp 0x0000000000000000 r8   0x0000000000000000
+THREAD:     r9  0x0000000000000000 r10 0x0000000000000000 r11  0x0000000000000000
+THREAD:    r12  0x0000000000000000 r13 0x0000000000000000 r14  0x0000000000000000
+THREAD:    r15  0x0000000000000000 rip 0x0000000100000d00
+THREAD: rflags  0x0000000000000000 cs  0x0000000000000000 fs   0x0000000000000000
+THREAD:     gs  0x0000000000000000
+
+FATi386: Mach header
+FATi386:       magic cputype cpusubtype  caps    filetype ncmds sizeofcmds      flags
+FATi386:    MH_MAGIC    I386        ALL  0x00     EXECUTE    16        716   NOUNDEFS DYLDLINK TWOLEVEL PIE MH_NO_HEAP_EXECUTION
diff --git a/test/tools/llvm-objdump/X86/macho-symbolized-disassembly.test b/test/tools/llvm-objdump/X86/macho-symbolized-disassembly.test
new file mode 100644
index 000000000000..1e1080a30f06
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-symbolized-disassembly.test
@@ -0,0 +1,38 @@
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s -check-prefix=OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-x86_64 | FileCheck %s -check-prefix=EXE
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.obj.macho-x86_64 | FileCheck %s -check-prefix=ObjC-OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.exe.macho-x86_64 | FileCheck %s -check-prefix=ObjC-EXE
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello_cpp.exe.macho-x86_64 | FileCheck %s -check-prefix=CXX-EXE
+
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-i386 | FileCheck %s -check-prefix=i386-OBJ
+// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-i386 | FileCheck %s -check-prefix=i386-EXE
+
+OBJ: 0000000000000008	leaq	L_.str(%rip), %rax      ## literal pool for: "Hello world\n"
+OBJ: 0000000000000026	callq	_printf
+
+EXE: 0000000100000f38	leaq	0x4f(%rip), %rax        ## literal pool for: "Hello world\n"
+EXE: 0000000100000f56	callq	0x100000f6c             ## symbol stub for: _printf
+
+ObjC-OBJ: 0000000000000008	leaq	0xb1(%rip), %rax        ## Objc cfstring ref: @"The current date and time is: %@"
+ObjC-OBJ: 0000000000000016	movq	0x4b(%rip), %rcx        ## Objc class ref: NSObject
+ObjC-OBJ: 000000000000001d	movq	0x64(%rip), %rsi        ## Objc selector ref: new
+ObjC-OBJ: 0000000000000034	movq	0x35(%rip), %rax        ## Objc class ref: NSDate
+ObjC-OBJ: 000000000000003b	movq	0x4e(%rip), %rsi        ## Objc selector ref: date
+
+ObjC-EXE: 0000000100000ee8	leaq	0x159(%rip), %rax       ## Objc cfstring ref: @"The current date and time is: %@"
+ObjC-EXE: 0000000100000ef6	movq	0x13b(%rip), %rcx       ## Objc class ref: _OBJC_CLASS_$_NSObject
+ObjC-EXE: 0000000100000efd	movq	0x124(%rip), %rsi       ## Objc selector ref: new
+ObjC-EXE: 0000000100000f0b	callq	0x100000f4a             ## Objc message: +[NSObject new]
+ObjC-EXE: 0000000100000f14	movq	0x125(%rip), %rax       ## Objc class ref: _OBJC_CLASS_$_NSDate
+ObjC-EXE: 0000000100000f1b	movq	0x10e(%rip), %rsi       ## Objc selector ref: date
+ObjC-EXE: 0000000100000f25	callq	0x100000f4a             ## Objc message: +[NSDate date]
+ObjC-EXE: 0000000100000f33	callq	0x100000f44             ## symbol stub for: _NSLog
+
+CXX-EXE: 00000001000014cb	callq	__ZNSt3__116__pad_and_outputIcNS_11char_traitsIcEEEENS_19ostreambuf_iteratorIT_T0_EES6_PKS4_S8_S8_RNS_8ios_baseES4_
+
+// FIXME: Demangler depends on host's <cxxabi.h>.
+// std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> > std::__1::__pad_and_output<char, std::__1::char_traits<char> >(std::__1::ostreambuf_iterator<char, std::__1::char_traits<char> >, char const*, char const*, char const*, std::__1::ios_base&, char)
+
+i386-OBJ: 0000002f	calll	_printf
+
+i386-EXE: 00001f6f	calll	0x1f84                  ## symbol stub for: _printf
diff --git a/test/tools/llvm-objdump/X86/macho-symbolized-subtractor-i386.test b/test/tools/llvm-objdump/X86/macho-symbolized-subtractor-i386.test
new file mode 100644
index 000000000000..a0f753bdb7ad
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-symbolized-subtractor-i386.test
@@ -0,0 +1,10 @@
+# RUN: llvm-mc < %s -triple x86_64-apple-darwin -filetype=obj | llvm-objdump -m -d - | FileCheck %s
+
+nop
+x:
+leal	x-y(%eax), %ebx
+.data
+y:
+.quad 0
+
+# CHECK: leal	x-y(%eax), %ebx
diff --git a/test/tools/llvm-objdump/X86/macho-symbolized-subtractor.test b/test/tools/llvm-objdump/X86/macho-symbolized-subtractor.test
new file mode 100644
index 000000000000..a730b5c65fa4
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-symbolized-subtractor.test
@@ -0,0 +1,10 @@
+# RUN: llvm-mc < %s -triple x86_64-apple-darwin -filetype=obj | llvm-objdump -m -d - | FileCheck %s
+
+nop
+x:
+leaq	x-y(%rax), %rbx
+.data
+y:
+.quad 0
+
+# CHECK: leaq	x-y(%rax), %rbx
diff --git a/test/tools/llvm-objdump/X86/macho-universal-x86_64.i386.test b/test/tools/llvm-objdump/X86/macho-universal-x86_64.i386.test
new file mode 100644
index 000000000000..e4fd37a902c4
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-universal-x86_64.i386.test
@@ -0,0 +1,44 @@
+RUN: llvm-objdump %p/Inputs/macho-universal.x86_64.i386 -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex -arch all \
+RUN: | FileCheck %s -check-prefix UEXE-all
+RUN: llvm-objdump %p/Inputs/macho-universal-archive.x86_64.i386 -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex -arch i386 \
+RUN: | FileCheck %s -check-prefix UArchive-i386
+RUN: llvm-objdump %p/Inputs/macho-universal.x86_64.i386 -universal-headers -m \
+RUN: | FileCheck %s -check-prefix FAT
+
+UEXE-all: macho-universal.x86_64.i386 (architecture x86_64):
+UEXE-all: (__TEXT,__text) section
+UEXE-all: _main:
+UEXE-all: 0000000100000f60	pushq	%rbp
+UEXE-all: 0000000100000f61	movq	%rsp, %rbp
+UEXE-all: macho-universal.x86_64.i386 (architecture i386):
+UEXE-all: (__TEXT,__text) section
+UEXE-all: _main:
+UEXE-all: 00001fa0	pushl	%ebp
+UEXE-all: 00001fa1	movl	%esp, %ebp
+
+UArchive-i386: Archive : {{.*}}/macho-universal-archive.x86_64.i386
+UArchive-i386: macho-universal-archive.x86_64.i386(foo.o):
+UArchive-i386: (__TEXT,__text) section
+UArchive-i386: _foo:
+UArchive-i386: 00000000	pushl	%ebp
+UArchive-i386: 00000001	movl	%esp, %ebp
+UArchive-i386: 00000003	popl	%ebp
+UArchive-i386: 00000004	retl
+
+FAT: Fat headers
+FAT: fat_magic FAT_MAGIC
+FAT: nfat_arch 2
+FAT: architecture x86_64
+FAT:     cputype CPU_TYPE_X86_64
+FAT:     cpusubtype CPU_SUBTYPE_X86_64_ALL
+FAT:     capabilities CPU_SUBTYPE_LIB64
+FAT:     offset 4096
+FAT:     size 4360
+FAT:     align 2^12 (4096)
+FAT: architecture i386
+FAT:     cputype CPU_TYPE_I386
+FAT:     cpusubtype CPU_SUBTYPE_I386_ALL
+FAT:     capabilities 0x0
+FAT:     offset 12288
+FAT:     size 4336
+FAT:     align 2^12 (4096)
diff --git a/test/tools/llvm-objdump/out-of-section-sym.test b/test/tools/llvm-objdump/X86/out-of-section-sym.test
index f70dce6a8ed8..f70dce6a8ed8 100644
--- a/test/tools/llvm-objdump/out-of-section-sym.test
+++ b/test/tools/llvm-objdump/X86/out-of-section-sym.test
diff --git a/test/tools/llvm-objdump/coff-large-bss.test b/test/tools/llvm-objdump/coff-large-bss.test
index 2d7643eb61a4..dc0fc6758b60 100644
--- a/test/tools/llvm-objdump/coff-large-bss.test
+++ b/test/tools/llvm-objdump/coff-large-bss.test
@@ -1,6 +1,3 @@
 RUN: llvm-objdump -s %p/Inputs/large-bss.obj.coff-i386 | FileCheck %s
 
-; CHECK:      Contents of section .text:
-: CHECK-NEXT: Contents of section .data:
-: CHECK-NEXT: Contents of section .bss:
-: CHECK-NEXT: <skipping contents of bss section at [0000, 010f)>
+: CHECK: <skipping contents of bss section at [0000, 010f)>
diff --git a/test/tools/llvm-objdump/macho-bad-ordinal.test b/test/tools/llvm-objdump/macho-bad-ordinal.test
new file mode 100644
index 000000000000..16badcc878d8
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-bad-ordinal.test
@@ -0,0 +1,6 @@
+# RUN: llvm-objdump -macho -bind -lazy-bind %p/Inputs/bad-ordinal.macho-x86_64 \
+# RUN:   | FileCheck %s 
+
+
+# CHECK: __DATA   __nl_symbol_ptr    0x100001000 pointer         0 <<bad library ordinal>> dyld_stub_binder
+# CHECK: __DATA   __la_symbol_ptr    0x100001010 <<bad library ordinal>> _printf
diff --git a/test/tools/llvm-objdump/macho-bind.test b/test/tools/llvm-objdump/macho-bind.test
new file mode 100644
index 000000000000..5527bfa83638
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-bind.test
@@ -0,0 +1,10 @@
+# RUN: llvm-objdump -macho -bind %p/Inputs/bind.macho-x86_64 \
+# RUN:   | FileCheck %s  
+
+
+# CHECK:__DATA   __data             0x00001028  pointer  0        flat-namespace      _any
+# CHECK:__DATA   __data             0x00001020  pointer  0        main-executable     _fromApp
+# CHECK:__DATA   __data             0x00001018  pointer  0        this-image          _myfunc
+# CHECK:__DATA   __data             0x00001000  pointer  0        libfoo              _foo
+# CHECK:__DATA   __data             0x00001008  pointer  0        libbar              _bar
+# CHECK:__DATA   __data             0x00001010  pointer  0        libSystem           _malloc
diff --git a/test/tools/llvm-objdump/macho-bind2.test b/test/tools/llvm-objdump/macho-bind2.test
new file mode 100644
index 000000000000..2eee2fcb60b3
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-bind2.test
@@ -0,0 +1,5 @@
+# RUN: llvm-objdump -macho -bind %p/Inputs/bind2.macho-x86_64 | FileCheck %s  
+
+# CHECK: __DATA   __data             0x00001008 pointer         0 libSystem        _malloc
+# CHECK: __DATA   __data             0x00001050 pointer         0 libSystem        _malloc
+# CHECK: __DATA   __data             0x00001458 pointer         0 libSystem        _malloc
diff --git a/test/tools/llvm-objdump/macho-compact-unwind-i386.test b/test/tools/llvm-objdump/macho-compact-unwind-i386.test
new file mode 100644
index 000000000000..9a14c2044c01
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-compact-unwind-i386.test
@@ -0,0 +1,27 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/compact-unwind.macho-i386 | FileCheck %s
+
+# CHECK: Contents of __compact_unwind section:
+# CHECK:   Entry at offset 0x0:
+# CHECK:     start:                0x0 __Z10test_throwv
+# CHECK:     length:               0x55
+# CHECK:     compact encoding:     0x01010005
+# CHECK-NOT: personality function
+# CHECK-NOT: LSDA
+# CHECK:   Entry at offset 0x14:
+# CHECK:     start:                0x60 __Z11test_catch1v
+# CHECK:     length:               0x6f
+# CHECK:     compact encoding:     0x41000000
+# CHECK:     personality function: 0x288 __pointers + 0x8
+# CHECK:     LSDA:                 0x180 GCC_except_table1
+# CHECK:   Entry at offset 0x28:
+# CHECK:     start:                0xd0 __Z11test_catch2v
+# CHECK:     length:               0x75
+# CHECK:     compact encoding:     0x41000000
+# CHECK:     personality function: 0x288 __pointers + 0x8
+# CHECK:     LSDA:                 0x1a8 GCC_except_table2
+# CHECK:   Entry at offset 0x3c:
+# CHECK:     start:                0x150 __Z3foov
+# CHECK:     length:               0x22
+# CHECK:     compact encoding:     0x01000000
+# CHECK-NOT: personality function
+# CHECK-NOT: LSDA
diff --git a/test/tools/llvm-objdump/macho-compact-unwind-x86_64.test b/test/tools/llvm-objdump/macho-compact-unwind-x86_64.test
new file mode 100644
index 000000000000..852800d357f2
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-compact-unwind-x86_64.test
@@ -0,0 +1,27 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/compact-unwind.macho-x86_64 | FileCheck %s
+
+# CHECK: Contents of __compact_unwind section:
+# CHECK:   Entry at offset 0x0:
+# CHECK:     start:                0x1 __Z10test_throwv + 0x1
+# CHECK:     length:               0x44
+# CHECK:     compact encoding:     0x01000000
+# CHECK-NOT: personality function
+# CHECK-NOT: LSDA
+# CHECK:   Entry at offset 0x20:
+# CHECK:     start:                0x50 __Z11test_catch1v
+# CHECK:     length:               0x71
+# CHECK:     compact encoding:     0x41000000
+# CHECK:     personality function: 0x0 ___gxx_personality_v0
+# CHECK:     LSDA:                 0x180 GCC_except_table1
+# CHECK:   Entry at offset 0x40:
+# CHECK:     start:                0xd0 __Z11test_catch2v
+# CHECK:     length:               0x77
+# CHECK:     compact encoding:     0x41000000
+# CHECK:     personality function: 0x0 ___gxx_personality_v0
+# CHECK:     LSDA:                 0x1a8 GCC_except_table2
+# CHECK:   Entry at offset 0x60:
+# CHECK:     start:                0x150 __Z3foov
+# CHECK:     length:               0x25
+# CHECK:     compact encoding:     0x01000000
+# CHECK-NOT: personality function
+# CHECK-NOT: LSDA
diff --git a/test/tools/llvm-objdump/macho-exports-trie.test b/test/tools/llvm-objdump/macho-exports-trie.test
new file mode 100644
index 000000000000..473c7cbfa4b2
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-exports-trie.test
@@ -0,0 +1,11 @@
+# RUN: llvm-objdump -macho -exports-trie -arch x86_64 \
+# RUN:   %p/Inputs/exports-trie.macho-x86_64 2>/dev/null | FileCheck %s
+
+
+# CHECK:[re-export] _malloc (from libSystem)
+# CHECK:[re-export] _myfree (_free from libSystem)
+# CHECK:0x00000F70  _myWeak [weak_def]
+# CHECK:0x00001018  _myTLV [per-thread]
+# CHECK:0x12345678  _myAbs [absolute]
+# CHECK:0x00000F60  _foo
+
diff --git a/test/tools/llvm-objdump/macho-lazy-bind.test b/test/tools/llvm-objdump/macho-lazy-bind.test
new file mode 100644
index 000000000000..088ea0641f0b
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-lazy-bind.test
@@ -0,0 +1,7 @@
+# RUN: llvm-objdump -macho -lazy-bind %p/Inputs/lazy-bind.macho-x86_64 \
+# RUN:   | FileCheck %s 
+
+
+# CHECK: __DATA   __la_symbol_ptr    0x100001010   libfoo          _foo
+# CHECK: __DATA   __la_symbol_ptr    0x100001018   libbar          _bar
+# CHECK: __DATA   __la_symbol_ptr    0x100001020   libSystem       _malloc
diff --git a/test/tools/llvm-objdump/macho-rebase.test b/test/tools/llvm-objdump/macho-rebase.test
new file mode 100644
index 000000000000..96df39058959
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-rebase.test
@@ -0,0 +1,15 @@
+# RUN: llvm-objdump -macho -rebase -arch x86_64 \
+# RUN:   %p/Inputs/rebase.macho-x86_64 | FileCheck %s
+
+
+# CHECK: segment  section            address     type
+# CHECK: __DATA   __data             0x00001010  pointer
+# CHECK: __DATA   __data             0x00001028  pointer
+# CHECK: __DATA   __data             0x00001030  pointer
+# CHECK: __DATA   __data             0x00001038  pointer
+# CHECK: __DATA   __data             0x00001040  pointer
+# CHECK: __DATA   __data             0x00001258  pointer
+# CHECK: __DATA   __mystuff          0x00001278  pointer
+# CHECK: __DATA   __mystuff          0x00001288  pointer
+# CHECK: __DATA   __mystuff          0x00001298  pointer
+# CHECK: __DATA   __mystuff          0x000012A8  pointer
diff --git a/test/tools/llvm-objdump/macho-unwind-info-arm64.test b/test/tools/llvm-objdump/macho-unwind-info-arm64.test
new file mode 100644
index 000000000000..712edef50bd8
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-unwind-info-arm64.test
@@ -0,0 +1,28 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/unwind-info.macho-arm64 2>/dev/null | FileCheck %s
+
+# The 2nd level index here is "regular", including all offsets & encodings in
+# full.
+
+# CHECK: Contents of __unwind_info section:
+# CHECK:   Version:                                   0x1
+# CHECK:   Common encodings array section offset:     0x1c
+# CHECK:   Number of common encodings in array:       0x2
+# CHECK:   Personality function array section offset: 0x24
+# CHECK:   Number of personality functions in array:  0x1
+# CHECK:   Index array section offset:                0x28
+# CHECK:   Number of indices in array:                0x2
+# CHECK:   Common encodings: (count = 2)
+# CHECK:     encoding[0]: 0x04000000
+# CHECK:     encoding[1]: 0x54000000
+# CHECK:   Personality functions: (count = 1)
+# CHECK:     personality[1]: 0x00008008
+# CHECK:   Top level indices: (count = 2)
+# CHECK:     [0]: function offset=0x00007d64, 2nd level page offset=0x00000050, LSDA offset=0x00000040
+# CHECK:     [1]: function offset=0x00007eb5, 2nd level page offset=0x00000000, LSDA offset=0x00000050
+# CHECK:   LSDA descriptors:
+# CHECK:     [0]: function offset=0x00007d90, LSDA offset=0x00007f44
+# CHECK:     [1]: function offset=0x00007e10, LSDA offset=0x00007f6c
+# CHECK:   Second level indices:
+# CHECK:     Second level index[0]: offset in section=0x00000050, base function offset=0x00007d64
+# CHECK:       [0]: function offset=0x00007d90, encoding=0x78563412
+# CHECK:       [1]: function offset=0x00007e10, encoding=0x21436587
diff --git a/test/tools/llvm-objdump/macho-unwind-info-no-relocs.test b/test/tools/llvm-objdump/macho-unwind-info-no-relocs.test
new file mode 100644
index 000000000000..3adad658c087
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-unwind-info-no-relocs.test
@@ -0,0 +1,8 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/unwind-info-no-relocs.macho-x86_64 2>/dev/null | FileCheck %s
+
+# Make sure we can deal with __compact_unwind sections that don't have helpful
+# relocations.
+
+# CHECK: Contents of __compact_unwind section:
+# CHECK:   Entry at offset 0x0:
+# CHECK:     start: 0x100000f7e
diff --git a/test/tools/llvm-objdump/macho-unwind-info-x86_64.test b/test/tools/llvm-objdump/macho-unwind-info-x86_64.test
new file mode 100644
index 000000000000..1333d9a4cdb9
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-unwind-info-x86_64.test
@@ -0,0 +1,29 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/unwind-info.macho-x86_64 2>/dev/null | FileCheck %s
+
+# The 2nd level index in this file is in compressed form, referring to both
+# common and packed encodings.
+
+# CHECK:Contents of __unwind_info section:
+# CHECK:  Version:                                   0x1
+# CHECK:  Common encodings array section offset:     0x1c
+# CHECK:  Number of common encodings in array:       0x2
+# CHECK:  Personality function array section offset: 0x24
+# CHECK:  Number of personality functions in array:  0x1
+# CHECK:  Index array section offset:                0x28
+# CHECK:  Number of indices in array:                0x2
+# CHECK:  Common encodings: (count = 2)
+# CHECK:    encoding[0]: 0x01000000
+# CHECK:    encoding[1]: 0x51000000
+# CHECK:  Personality functions: (count = 1)
+# CHECK:    personality[1]: 0x00001018
+# CHECK:  Top level indices: (count = 2)
+# CHECK:    [0]: function offset=0x00000d70, 2nd level page offset=0x00000050, LSDA offset=0x00000040
+# CHECK:    [1]: function offset=0x00000eab, 2nd level page offset=0x00000000, LSDA offset=0x00000050
+# CHECK:  LSDA descriptors:
+# CHECK:    [0]: function offset=0x00000db0, LSDA offset=0x00000f0c
+# CHECK:    [1]: function offset=0x00000e20, LSDA offset=0x00000f34
+# CHECK:  Second level indices:
+# CHECK:    Second level index[0]: offset in section=0x00000050, base function offset=0x00000d70
+# CHECK:      [0]: function offset=0x00000d70, encoding[0]=0x01000000
+# CHECK:      [1]: function offset=0x00000db0, encoding[1]=0x51000000
+# CHECK:      [2]: function offset=0x00000e20, encoding[2]=0x01234567
diff --git a/test/tools/llvm-objdump/macho-weak-bind.test b/test/tools/llvm-objdump/macho-weak-bind.test
new file mode 100644
index 000000000000..1013132a7506
--- /dev/null
+++ b/test/tools/llvm-objdump/macho-weak-bind.test
@@ -0,0 +1,10 @@
+# RUN: llvm-objdump -macho -weak-bind  %p/Inputs/weak-bind.macho-x86_64 \
+# RUN:   | FileCheck %s
+
+
+# CHECK: __DATA   __data             0x100001018   pointer  0        __ZTISt12out_of_range
+# CHECK: __DATA   __data             0x100001020   pointer  0        __ZTISt12out_of_range
+# CHECK: __DATA   __data             0x100001028   pointer  0        __ZTISt12out_of_range
+# CHECK:                                           strong            __ZdlPv
+# CHECK: __DATA   __data             0x100001018   pointer  0        __Znam
+# CHECK:                                           strong            __Znwm
diff --git a/test/tools/llvm-profdata/Inputs/bad-hash.profdata b/test/tools/llvm-profdata/Inputs/bad-hash.proftext
index faa6f4015253..faa6f4015253 100644
--- a/test/tools/llvm-profdata/Inputs/bad-hash.profdata
+++ b/test/tools/llvm-profdata/Inputs/bad-hash.proftext
diff --git a/test/tools/llvm-profdata/Inputs/bar3-1.profdata b/test/tools/llvm-profdata/Inputs/bar3-1.proftext
index 5486e9d84d72..5486e9d84d72 100644
--- a/test/tools/llvm-profdata/Inputs/bar3-1.profdata
+++ b/test/tools/llvm-profdata/Inputs/bar3-1.proftext
diff --git a/test/tools/llvm-profdata/Inputs/c-general.profdata b/test/tools/llvm-profdata/Inputs/c-general.profraw
index e8cef21de5f2..e8cef21de5f2 100644
--- a/test/tools/llvm-profdata/Inputs/c-general.profdata
+++ b/test/tools/llvm-profdata/Inputs/c-general.profraw
diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v1 b/test/tools/llvm-profdata/Inputs/compat.profdata.v1
new file mode 100644
index 000000000000..fd17459d091b
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/compat.profdata.v1
diff --git a/test/tools/llvm-profdata/Inputs/empty.proftext b/test/tools/llvm-profdata/Inputs/empty.proftext
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/empty.proftext
diff --git a/test/tools/llvm-profdata/Inputs/extra-word.profdata b/test/tools/llvm-profdata/Inputs/extra-word.proftext
index 67a662909cf0..67a662909cf0 100644
--- a/test/tools/llvm-profdata/Inputs/extra-word.profdata
+++ b/test/tools/llvm-profdata/Inputs/extra-word.proftext
diff --git a/test/tools/llvm-profdata/Inputs/foo3-1.profdata b/test/tools/llvm-profdata/Inputs/foo3-1.proftext
index 14a620043576..14a620043576 100644
--- a/test/tools/llvm-profdata/Inputs/foo3-1.profdata
+++ b/test/tools/llvm-profdata/Inputs/foo3-1.proftext
diff --git a/test/tools/llvm-profdata/Inputs/foo3-2.profdata b/test/tools/llvm-profdata/Inputs/foo3-2.proftext
index 801846e44045..801846e44045 100644
--- a/test/tools/llvm-profdata/Inputs/foo3-2.profdata
+++ b/test/tools/llvm-profdata/Inputs/foo3-2.proftext
diff --git a/test/tools/llvm-profdata/Inputs/foo3bar3-1.profdata b/test/tools/llvm-profdata/Inputs/foo3bar3-1.proftext
index 12157b9f9ada..12157b9f9ada 100644
--- a/test/tools/llvm-profdata/Inputs/foo3bar3-1.profdata
+++ b/test/tools/llvm-profdata/Inputs/foo3bar3-1.proftext
diff --git a/test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata b/test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata
deleted file mode 100644
index f1f10bd6f3df..000000000000
--- a/test/tools/llvm-profdata/Inputs/foo3bar3-2.profdata
+++ /dev/null
@@ -1,13 +0,0 @@
-foo
-3
-3
-17
-19
-23
-
-bar
-3
-3
-29
-31
-37
diff --git a/test/tools/llvm-profdata/Inputs/foo4-1.profdata b/test/tools/llvm-profdata/Inputs/foo4-1.profdata
deleted file mode 100644
index 31d2a2ce7569..000000000000
--- a/test/tools/llvm-profdata/Inputs/foo4-1.profdata
+++ /dev/null
@@ -1,7 +0,0 @@
-foo
-4
-4
-11
-22
-33
-44
diff --git a/test/tools/llvm-profdata/Inputs/foo4-2.profdata b/test/tools/llvm-profdata/Inputs/foo4-2.profdata
deleted file mode 100644
index 01d8309b5ce2..000000000000
--- a/test/tools/llvm-profdata/Inputs/foo4-2.profdata
+++ /dev/null
@@ -1,7 +0,0 @@
-foo
-4
-4
-7
-6
-5
-4
diff --git a/test/tools/llvm-profdata/Inputs/invalid-count-later.profdata b/test/tools/llvm-profdata/Inputs/invalid-count-later.proftext
index 2b61c55a6766..2b61c55a6766 100644
--- a/test/tools/llvm-profdata/Inputs/invalid-count-later.profdata
+++ b/test/tools/llvm-profdata/Inputs/invalid-count-later.proftext
diff --git a/test/tools/llvm-profdata/Inputs/no-counts.profdata b/test/tools/llvm-profdata/Inputs/no-counts.proftext
index 5c1fa15c086f..5c1fa15c086f 100644
--- a/test/tools/llvm-profdata/Inputs/no-counts.profdata
+++ b/test/tools/llvm-profdata/Inputs/no-counts.proftext
diff --git a/test/tools/llvm-profdata/Inputs/overflow.profdata b/test/tools/llvm-profdata/Inputs/overflow.profdata
deleted file mode 100644
index c9a9d697ecfb..000000000000
--- a/test/tools/llvm-profdata/Inputs/overflow.profdata
+++ /dev/null
@@ -1,4 +0,0 @@
-overflow
-1
-1
-9223372036854775808
diff --git a/test/tools/llvm-profdata/Inputs/sample-profile.proftext b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
new file mode 100644
index 000000000000..9dc6d4310da9
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
@@ -0,0 +1,12 @@
+_Z3bari:20301:1437
+1: 1437
+_Z3fooi:7711:610
+1: 610
+main:184019:0
+4: 534
+4.2: 534
+5: 1075
+5.1: 1075
+6: 2080
+7: 534
+9: 2064  _Z3bari:1471  _Z3fooi:631
diff --git a/test/tools/llvm-profdata/c-general.test b/test/tools/llvm-profdata/c-general.test
index 9b6cd7f4828c..01435303d445 100644
--- a/test/tools/llvm-profdata/c-general.test
+++ b/test/tools/llvm-profdata/c-general.test
@@ -7,10 +7,10 @@ REGENERATE: $ CFE=$SRC/tools/clang
 REGENERATE: $ TESTDIR=$SRC/test/tools/llvm-profdata
 REGENERATE: $ CFE_TESTDIR=$CFE/test/Profile
 REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/test/Profile/c-general.c
-REGENERATE: $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profdata ./a.out
+REGENERATE: $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profraw ./a.out
 
-RUN: llvm-profdata show %p/Inputs/c-general.profdata -o - | FileCheck %s -check-prefix=CHECK
-RUN: llvm-profdata show %p/Inputs/c-general.profdata -o - --function=switches | FileCheck %s -check-prefix=SWITCHES -check-prefix=CHECK
+RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - | FileCheck %s -check-prefix=CHECK
+RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | FileCheck %s -check-prefix=SWITCHES -check-prefix=CHECK
 
 SWITCHES-LABEL: Counters:
 SWITCHES-NEXT:   switches:
diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext
new file mode 100644
index 000000000000..14da3374b5e9
--- /dev/null
+++ b/test/tools/llvm-profdata/compat.proftext
@@ -0,0 +1,47 @@
+# Compatibility tests for older profile format versions. These ensure
+# that we don't break compatibility with an older profile version
+# without noticing it.
+
+# The input file at %S/Inputs/compat.profdata.v1 was generated with
+# llvm-profdata merge from r214548.
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function function_count_only --counts | FileCheck %s -check-prefix=FUNC_COUNT_ONLY
+function_count_only
+0
+1
+97531
+# FUNC_COUNT_ONLY:      Hash: 0x{{0+$}}
+# FUNC_COUNT_ONLY-NEXT: Counters: 1
+# FUNC_COUNT_ONLY-NEXT: Function count: 97531
+# FUNC_COUNT_ONLY-NEXT: Block counts: []
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES
+name with spaces
+1024
+2
+0
+0
+# SPACES:      Hash: 0x{{0+}}400
+# SPACES-NEXT: Counters: 2
+# SPACES-NEXT: Function count: 0
+# SPACES-NEXT: Block counts: [0]
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 --function large_numbers --counts | FileCheck %s -check-prefix=LARGENUM
+large_numbers
+4611686018427387903
+6
+2305843009213693952
+1152921504606846976
+576460752303423488
+288230376151711744
+144115188075855872
+72057594037927936
+# LARGENUM:      Hash: 0x3fffffffffffffff
+# LARGENUM-NEXT: Counters: 6
+# LARGENUM-NEXT: Function count: 2305843009213693952
+# LARGENUM-NEXT: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936]
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v1 | FileCheck %s -check-prefix=SUMMARY
+# SUMMARY: Total functions: 3
+# SUMMARY: Maximum function count: 2305843009213693952
+# SUMMARY: Maximum internal block count: 1152921504606846976
diff --git a/test/tools/llvm-profdata/count-mismatch.proftext b/test/tools/llvm-profdata/count-mismatch.proftext
new file mode 100644
index 000000000000..1a2e73fbffdb
--- /dev/null
+++ b/test/tools/llvm-profdata/count-mismatch.proftext
@@ -0,0 +1,40 @@
+# Make sure we don't try to combine counters with the same function
+# name and a matching hash if the number of counters differs
+
+# RUN: llvm-profdata merge %s -o %t.profdata 2>&1 | FileCheck -check-prefix=MERGE_ERRS %s
+# RUN: llvm-profdata show %t.profdata -all-functions -counts > %t.out
+# RUN: FileCheck %s -input-file %t.out
+foo
+1024
+4
+1
+2
+4
+8
+
+# The hash matches, but we can't combine these because the number of
+# counters differs.
+# MERGE_ERRS: count-mismatch.proftext: foo: Function count mismatch
+foo
+1024
+3
+2
+4
+8
+
+# This one does match, so it should combine with the first just fine.
+# CHECK: Hash: 0x{{0+}}400
+# CHECK-NEXT: Counters: 4
+# CHECK-NEXT: Function count: 5
+# CHECK-NEXT: Block counts: [10, 20, 40]
+foo
+1024
+4
+4
+8
+16
+32
+
+# CHECK: Total functions: 1
+# CHECK: Maximum function count: 5
+# CHECK: Maximum internal block count: 40
diff --git a/test/tools/llvm-profdata/errors.test b/test/tools/llvm-profdata/errors.test
deleted file mode 100644
index 28262efe0638..000000000000
--- a/test/tools/llvm-profdata/errors.test
+++ /dev/null
@@ -1,16 +0,0 @@
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo4-1.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=HASH
-HASH: foo4-1.profdata: foo: Function hash mismatch
-
-RUN: llvm-profdata merge %p/Inputs/overflow.profdata %p/Inputs/overflow.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=OVERFLOW
-OVERFLOW: overflow.profdata: overflow: Counter overflow
-
-RUN: not llvm-profdata show %p/Inputs/invalid-count-later.profdata 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.profdata %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.profdata: Malformed profile data
-
-RUN: not llvm-profdata show %p/Inputs/bad-hash.profdata 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-RUN: not llvm-profdata merge %p/Inputs/bad-hash.profdata %p/Inputs/bad-hash.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-BAD-HASH: error: {{.*}}bad-hash.profdata: Malformed profile data
-
-RUN: not llvm-profdata show %p/Inputs/no-counts.profdata 2>&1 | FileCheck %s --check-prefix=NO-COUNTS
-NO-COUNTS: error: {{.*}}no-counts.profdata: Malformed profile data
diff --git a/test/tools/llvm-profdata/general.proftext b/test/tools/llvm-profdata/general.proftext
new file mode 100644
index 000000000000..591d2628d776
--- /dev/null
+++ b/test/tools/llvm-profdata/general.proftext
@@ -0,0 +1,56 @@
+
+
+# RUN: llvm-profdata merge %s -o %t.profdata
+
+# RUN: llvm-profdata show %t.profdata --function function_count_only --counts | FileCheck %s -check-prefix=FUNC_COUNT_ONLY
+function_count_only
+0
+1
+97531
+# FUNC_COUNT_ONLY:      Hash: 0x{{0+$}}
+# FUNC_COUNT_ONLY-NEXT: Counters: 1
+# FUNC_COUNT_ONLY-NEXT: Function count: 97531
+# FUNC_COUNT_ONLY-NEXT: Block counts: []
+
+# RUN: llvm-profdata show %t.profdata --function "name with spaces" --counts | FileCheck %s -check-prefix=SPACES
+name with spaces
+1024
+2
+0
+0
+# SPACES:      Hash: 0x{{0+}}400
+# SPACES-NEXT: Counters: 2
+# SPACES-NEXT: Function count: 0
+# SPACES-NEXT: Block counts: [0]
+
+# RUN: llvm-profdata show %t.profdata --function large_numbers --counts | FileCheck %s -check-prefix=LARGENUM
+large_numbers
+4611686018427387903
+6
+2305843009213693952
+1152921504606846976
+576460752303423488
+288230376151711744
+144115188075855872
+72057594037927936
+# LARGENUM:      Hash: 0x3fffffffffffffff
+# LARGENUM-NEXT: Counters: 6
+# LARGENUM-NEXT: Function count: 2305843009213693952
+# LARGENUM-NEXT: Block counts: [1152921504606846976, 576460752303423488, 288230376151711744, 144115188075855872, 72057594037927936]
+
+# RUN: llvm-profdata show %t.profdata --function NOSUCHFUNC | FileCheck %s -check-prefix=NOSUCHFUNC
+# NOSUCHFUNC-NOT: Counters:
+# NOSUCHFUNC: Functions shown: 0
+
+# RUN: llvm-profdata show %t.profdata --function _ | FileCheck %s -check-prefix=SOMEFUNCS
+# SOMEFUNCS: Counters:
+# SOMEFUNCS: function_count_only:
+# SOMEFUNCS: large_numbers:
+# SOMEFUNCS: Functions shown: 2
+
+# RUN: llvm-profdata show %t.profdata | FileCheck %s -check-prefix=SUMMARY
+# SUMMARY-NOT: Counters:
+# SUMMARY-NOT: Functions shown:
+# SUMMARY: Total functions: 3
+# SUMMARY: Maximum function count: 2305843009213693952
+# SUMMARY: Maximum internal block count: 1152921504606846976
diff --git a/test/tools/llvm-profdata/hash-mismatch.proftext b/test/tools/llvm-profdata/hash-mismatch.proftext
new file mode 100644
index 000000000000..fe0d4fb4f6b5
--- /dev/null
+++ b/test/tools/llvm-profdata/hash-mismatch.proftext
@@ -0,0 +1,37 @@
+# If we see the same function name, but with different hashes, make
+# sure we keep both.
+
+# RUN: llvm-profdata merge %s -o %t 2>&1
+# RUN: llvm-profdata show %t -all-functions -counts > %t.out
+
+# The function ordering is non-deterministic, so we need to do our
+# checks in multiple runs.
+# RUN: FileCheck -check-prefix=FOO3 -check-prefix=BOTH %s -input-file %t.out
+# RUN: FileCheck -check-prefix=FOO4 -check-prefix=BOTH %s -input-file %t.out
+
+# FOO3: Hash: 0x{{0+}}3
+# FOO3-NEXT: Counters: 3
+# FOO3-NEXT: Function count: 1
+# FOO3-NEXT: Block counts: [2, 3]
+foo
+3
+3
+1
+2
+3
+
+# FOO4: Hash: 0x{{0+}}4
+# FOO4-NEXT: Counters: 4
+# FOO4-NEXT: Function count: 11
+# FOO4-NEXT: Block counts: [22, 33, 44]
+foo
+4
+4
+11
+22
+33
+44
+
+# BOTH: Total functions: 2
+# BOTH: Maximum function count: 11
+# BOTH: Maximum internal block count: 44
diff --git a/test/tools/llvm-profdata/lit.local.cfg b/test/tools/llvm-profdata/lit.local.cfg
new file mode 100644
index 000000000000..d44913a8239c
--- /dev/null
+++ b/test/tools/llvm-profdata/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes.add('.proftext')
diff --git a/test/tools/llvm-profdata/multiple-inputs.test b/test/tools/llvm-profdata/multiple-inputs.test
new file mode 100644
index 000000000000..616efe92bb86
--- /dev/null
+++ b/test/tools/llvm-profdata/multiple-inputs.test
@@ -0,0 +1,51 @@
+Some very basic tests for the multiple input cases.
+
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-2.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
+RUN: llvm-profdata merge %p/Inputs/foo3-2.proftext %p/Inputs/foo3-1.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
+FOO3: foo:
+FOO3: Counters: 3
+FOO3: Function count: 8
+FOO3: Block counts: [7, 6]
+FOO3: Total functions: 1
+FOO3: Maximum function count: 8
+FOO3: Maximum internal block count: 7
+
+RUN: llvm-profdata merge %p/Inputs/empty.proftext %p/Inputs/foo3-1.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
+FOO3EMPTY: foo:
+FOO3EMPTY: Counters: 3
+FOO3EMPTY: Function count: 1
+FOO3EMPTY: Block counts: [2, 3]
+FOO3EMPTY: Total functions: 1
+FOO3EMPTY: Maximum function count: 1
+FOO3EMPTY: Maximum internal block count: 3
+
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3bar3-1.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
+FOO3FOO3BAR3: foo:
+FOO3FOO3BAR3: Counters: 3
+FOO3FOO3BAR3: Function count: 3
+FOO3FOO3BAR3: Block counts: [5, 8]
+FOO3FOO3BAR3: bar:
+FOO3FOO3BAR3: Counters: 3
+FOO3FOO3BAR3: Function count: 7
+FOO3FOO3BAR3: Block counts: [11, 13]
+FOO3FOO3BAR3: Total functions: 2
+FOO3FOO3BAR3: Maximum function count: 7
+FOO3FOO3BAR3: Maximum internal block count: 13
+
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/bar3-1.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
+DISJOINT: foo:
+DISJOINT: Counters: 3
+DISJOINT: Function count: 1
+DISJOINT: Block counts: [2, 3]
+DISJOINT: bar:
+DISJOINT: Counters: 3
+DISJOINT: Function count: 1
+DISJOINT: Block counts: [2, 3]
+DISJOINT: Total functions: 2
+DISJOINT: Maximum function count: 1
+DISJOINT: Maximum internal block count: 3
diff --git a/test/tools/llvm-profdata/overflow.proftext b/test/tools/llvm-profdata/overflow.proftext
new file mode 100644
index 000000000000..cbf3bf161823
--- /dev/null
+++ b/test/tools/llvm-profdata/overflow.proftext
@@ -0,0 +1,12 @@
+# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s
+# CHECK: overflow.proftext: overflow: Counter overflow
+
+overflow
+1
+1
+9223372036854775808
+
+overflow
+1
+1
+9223372036854775808
diff --git a/test/tools/llvm-profdata/raw-two-profiles.test b/test/tools/llvm-profdata/raw-two-profiles.test
index 3260836ba666..be78793215ed 100644
--- a/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/test/tools/llvm-profdata/raw-two-profiles.test
@@ -39,11 +39,9 @@ RUN: printf '\0\0\0\0\0' >> %t-foo-padded.profraw
 RUN: cat %t-bar.profraw > %t-bar-padded.profraw
 RUN: printf '\0\0\0\0\0' >> %t-bar-padded.profraw
 
-RUN: cat %t-foo.profraw %t-bar.profraw > %t-nopad.profraw
 RUN: cat %t-foo-padded.profraw %t-bar.profraw > %t-pad-between.profraw
 RUN: cat %t-foo-padded.profraw %t-bar-padded.profraw > %t-pad.profraw
 
-RUN: llvm-profdata show %t-nopad.profraw -all-functions -counts | FileCheck %s
 RUN: llvm-profdata show %t-pad-between.profraw -all-functions -counts | FileCheck %s
 RUN: llvm-profdata show %t-pad.profraw -all-functions -counts | FileCheck %s
 
diff --git a/test/tools/llvm-profdata/sample-profile-basic.test b/test/tools/llvm-profdata/sample-profile-basic.test
new file mode 100644
index 000000000000..0651c513e965
--- /dev/null
+++ b/test/tools/llvm-profdata/sample-profile-basic.test
@@ -0,0 +1,30 @@
+Basic tests for sample profiles.
+
+1- Show all functions
+RUN: llvm-profdata show --sample %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 184019, 0, 7 sampled lines
+SHOW1: line offset: 9, discriminator: 0, number of samples: 2064, calls: _Z3fooi:631 _Z3bari:1471
+SHOW1: Function: _Z3fooi: 7711, 610, 1 sampled lines
+SHOW1: Function: _Z3bari: 20301, 1437, 1 sampled lines
+SHOW1: line offset: 1, discriminator: 0, number of samples: 1437
+
+2- Show only bar
+RUN: llvm-profdata show --sample --function=_Z3bari %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW2
+SHOW2: Function: _Z3bari: 20301, 1437, 1 sampled lines
+SHOW2: line offset: 1, discriminator: 0, number of samples: 1437
+SHOW2-NOT: Function: main: 184019, 0, 7 sampled lines
+SHOW2-NOT: Function: _Z3fooi: 7711, 610, 1 sampled lines
+
+3- Convert the profile to binary encoding and check that they are both
+   identical.
+RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --binary -o - | llvm-profdata show --sample - -o %t-binary
+RUN: llvm-profdata show --sample %p/Inputs/sample-profile.proftext -o %t-text
+RUN: diff %t-binary %t-text
+
+4- Merge the binary and text encodings of the profile and check that the
+   counters have doubled.
+RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext -o %t-binprof
+RUN: llvm-profdata merge --sample --text %p/Inputs/sample-profile.proftext %t-binprof -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:368038:0
+MERGE1: 9: 4128 _Z3fooi:1262 _Z3bari:2942
+MERGE1: _Z3fooi:15422:1220
diff --git a/test/tools/llvm-profdata/simple.test b/test/tools/llvm-profdata/simple.test
deleted file mode 100644
index 18741dd2ba86..000000000000
--- a/test/tools/llvm-profdata/simple.test
+++ /dev/null
@@ -1,77 +0,0 @@
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3-2.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
-RUN: llvm-profdata merge %p/Inputs/foo3-2.profdata %p/Inputs/foo3-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3
-FOO3: foo:
-FOO3: Counters: 3
-FOO3: Function count: 8
-FOO3: Block counts: [7, 6]
-FOO3: Total functions: 1
-FOO3: Maximum function count: 8
-FOO3: Maximum internal block count: 7
-
-RUN: llvm-profdata merge %p/Inputs/foo4-1.profdata %p/Inputs/foo4-2.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
-RUN: llvm-profdata merge %p/Inputs/foo4-2.profdata %p/Inputs/foo4-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
-FOO4: foo:
-FOO4: Counters: 4
-FOO4: Function count: 18
-FOO4: Block counts: [28, 38, 48]
-FOO4: Total functions: 1
-FOO4: Maximum function count: 18
-FOO4: Maximum internal block count: 48
-
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-1.profdata %p/Inputs/foo3bar3-2.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
-RUN: llvm-profdata merge %p/Inputs/foo3bar3-2.profdata %p/Inputs/foo3bar3-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3BAR3
-FOO3BAR3: foo:
-FOO3BAR3: Counters: 3
-FOO3BAR3: Function count: 19
-FOO3BAR3: Block counts: [22, 28]
-FOO3BAR3: bar:
-FOO3BAR3: Counters: 3
-FOO3BAR3: Function count: 36
-FOO3BAR3: Block counts: [42, 50]
-FOO3BAR3: Total functions: 2
-FOO3BAR3: Maximum function count: 36
-FOO3BAR3: Maximum internal block count: 50
-
-RUN: llvm-profdata merge %p/Inputs/empty.profdata %p/Inputs/foo3-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3EMPTY
-FOO3EMPTY: foo:
-FOO3EMPTY: Counters: 3
-FOO3EMPTY: Function count: 1
-FOO3EMPTY: Block counts: [2, 3]
-FOO3EMPTY: Total functions: 1
-FOO3EMPTY: Maximum function count: 1
-FOO3EMPTY: Maximum internal block count: 3
-
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/foo3bar3-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3
-FOO3FOO3BAR3: foo:
-FOO3FOO3BAR3: Counters: 3
-FOO3FOO3BAR3: Function count: 3
-FOO3FOO3BAR3: Block counts: [5, 8]
-FOO3FOO3BAR3: bar:
-FOO3FOO3BAR3: Counters: 3
-FOO3FOO3BAR3: Function count: 7
-FOO3FOO3BAR3: Block counts: [11, 13]
-FOO3FOO3BAR3: Total functions: 2
-FOO3FOO3BAR3: Maximum function count: 7
-FOO3FOO3BAR3: Maximum internal block count: 13
-
-RUN: llvm-profdata merge %p/Inputs/foo3-1.profdata %p/Inputs/bar3-1.profdata -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT
-DISJOINT: foo:
-DISJOINT: Counters: 3
-DISJOINT: Function count: 1
-DISJOINT: Block counts: [2, 3]
-DISJOINT: bar:
-DISJOINT: Counters: 3
-DISJOINT: Function count: 1
-DISJOINT: Block counts: [2, 3]
-DISJOINT: Total functions: 2
-DISJOINT: Maximum function count: 1
-DISJOINT: Maximum internal block count: 3
diff --git a/test/tools/llvm-profdata/text-format-errors.test b/test/tools/llvm-profdata/text-format-errors.test
new file mode 100644
index 000000000000..01513e4fcb9e
--- /dev/null
+++ b/test/tools/llvm-profdata/text-format-errors.test
@@ -0,0 +1,10 @@
+RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed profile data
+
+RUN: not llvm-profdata show %p/Inputs/bad-hash.proftext 2>&1 | FileCheck %s --check-prefix=BAD-HASH
+RUN: not llvm-profdata merge %p/Inputs/bad-hash.proftext %p/Inputs/bad-hash.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH
+BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed profile data
+
+RUN: not llvm-profdata show %p/Inputs/no-counts.proftext 2>&1 | FileCheck %s --check-prefix=NO-COUNTS
+NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed profile data
diff --git a/test/tools/llvm-readobj/ARM/attribute-0.s b/test/tools/llvm-readobj/ARM/attribute-0.s
new file mode 100644
index 000000000000..b761dd8f251f
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-0.s
@@ -0,0 +1,234 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 0
+@CHECK:   .eabi_attribute 6, 0
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: Pre-v4
+
+.eabi_attribute  Tag_CPU_arch_profile, 0
+@CHECK:   .eabi_attribute 7, 0
+@CHECK-OBJ: Tag: 7
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: CPU_arch_profile
+@CHECK-OBJ-NEXT: Description: None
+
+.eabi_attribute  Tag_ARM_ISA_use, 0
+@CHECK:   .eabi_attribute 8, 0
+@CHECK-OBJ: Tag: 8
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ARM_ISA_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_THUMB_ISA_use, 0
+@CHECK:   .eabi_attribute 9, 0
+@CHECK-OBJ: Tag: 9
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: THUMB_ISA_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_FP_arch, 0
+@CHECK:   .eabi_attribute 10, 0
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_WMMX_arch, 0
+@CHECK:   .eabi_attribute 11, 0
+@CHECK-OBJ: Tag: 11
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: WMMX_arch
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_Advanced_SIMD_arch, 0
+@CHECK:   .eabi_attribute 12, 0
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_PCS_config, 0
+@CHECK:   .eabi_attribute 13, 0
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: None
+
+.eabi_attribute  Tag_ABI_PCS_R9_use, 0
+@CHECK:   .eabi_attribute 14, 0
+@CHECK-OBJ: Tag: 14
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_R9_use
+@CHECK-OBJ-NEXT: Description: v6
+
+.eabi_attribute  Tag_ABI_PCS_RW_data, 0
+@CHECK:   .eabi_attribute 15, 0
+@CHECK-OBJ: Tag: 15
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RW_data
+@CHECK-OBJ-NEXT: Description: Absolute
+
+.eabi_attribute  Tag_ABI_PCS_RO_data, 0
+@CHECK:   .eabi_attribute 16, 0
+@CHECK-OBJ: Tag: 16
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RO_data
+@CHECK-OBJ-NEXT: Description: Absolute
+
+.eabi_attribute  Tag_ABI_PCS_GOT_use, 0
+@CHECK:   .eabi_attribute 17, 0
+@CHECK-OBJ: Tag: 17
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_GOT_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_PCS_wchar_t, 0
+@CHECK:   .eabi_attribute 18, 0
+@CHECK-OBJ: Tag: 18
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_wchar_t
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_FP_rounding, 0
+@CHECK:   .eabi_attribute 19, 0
+@CHECK-OBJ: Tag: 19
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_rounding
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_ABI_FP_denormal, 0
+@CHECK:   .eabi_attribute 20, 0
+@CHECK-OBJ: Tag: 20
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_denormal
+@CHECK-OBJ-NEXT: Description: Unsupported
+
+.eabi_attribute  Tag_ABI_FP_exceptions, 0
+@CHECK:   .eabi_attribute 21, 0
+@CHECK-OBJ: Tag: 21
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_exceptions
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_FP_user_exceptions, 0
+@CHECK:   .eabi_attribute 22, 0
+@CHECK-OBJ: Tag: 22
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_user_exceptions
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_FP_number_model, 0
+@CHECK:   .eabi_attribute 23, 0
+@CHECK-OBJ: Tag: 23
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_number_model
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_align_needed, 0
+@CHECK:   .eabi_attribute 24, 0
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_align_preserved, 0
+@CHECK:   .eabi_attribute 25, 0
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: Not Required
+
+.eabi_attribute  Tag_ABI_enum_size, 0
+@CHECK:   .eabi_attribute 26, 0
+@CHECK-OBJ: Tag: 26
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_enum_size
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_HardFP_use, 0
+@CHECK:   .eabi_attribute 27, 0
+@CHECK-OBJ: Tag: 27
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_HardFP_use
+@CHECK-OBJ-NEXT: Description: Tag_FP_arch
+
+.eabi_attribute  Tag_ABI_VFP_args, 0
+@CHECK:   .eabi_attribute 28, 0
+@CHECK-OBJ: Tag: 28
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_VFP_args
+@CHECK-OBJ-NEXT: Description: AAPCS
+
+.eabi_attribute  Tag_ABI_WMMX_args, 0
+@CHECK:   .eabi_attribute 29, 0
+@CHECK-OBJ: Tag: 29
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_WMMX_args
+@CHECK-OBJ-NEXT: Description: AAPCS
+
+.eabi_attribute  Tag_ABI_optimization_goals, 0
+@CHECK:   .eabi_attribute 30, 0
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: None
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 0
+@CHECK:   .eabi_attribute 31, 0
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: None
+
+.eabi_attribute  Tag_compatibility, 0, "ARM"
+@CHECK:   .eabi_attribute 32, 0
+@CHECK-OBJ: Tag: 32
+@CHECK-OBJ-NEXT: Value: 0, ARM
+@CHECK-OBJ-NEXT: TagName: compatibility
+@CHECK-OBJ-NEXT: Description: No Specific Requirements
+
+.eabi_attribute  Tag_CPU_unaligned_access, 0
+@CHECK:   .eabi_attribute 34, 0
+@CHECK-OBJ: Tag: 34
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: CPU_unaligned_access
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_FP_HP_extension, 0
+@CHECK:   .eabi_attribute 36, 0
+@CHECK-OBJ: Tag: 36
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: FP_HP_extension
+@CHECK-OBJ-NEXT: Description: If Available
+
+.eabi_attribute  Tag_ABI_FP_16bit_format, 0
+@CHECK:   .eabi_attribute 38, 0
+@CHECK-OBJ: Tag: 38
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: ABI_FP_16bit_format
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_MPextension_use, 0
+@CHECK:   .eabi_attribute 42, 0
+@CHECK-OBJ: Tag: 42
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: MPextension_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_DIV_use, 0
+@CHECK:   .eabi_attribute 44, 0
+@CHECK-OBJ: Tag: 44
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: DIV_use
+@CHECK-OBJ-NEXT: Description: If Available
+
+.eabi_attribute  Tag_Virtualization_use, 0
+@CHECK:   .eabi_attribute 68, 0
+@CHECK-OBJ: Tag: 68
+@CHECK-OBJ-NEXT: Value: 0
+@CHECK-OBJ-NEXT: TagName: Virtualization_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-1.s b/test/tools/llvm-readobj/ARM/attribute-1.s
new file mode 100644
index 000000000000..f433cbcc19b7
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-1.s
@@ -0,0 +1,220 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 1
+@CHECK:   .eabi_attribute 6, 1
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v4
+
+.eabi_attribute  Tag_ARM_ISA_use, 1
+@CHECK:   .eabi_attribute 8, 1
+@CHECK-OBJ: Tag: 8
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ARM_ISA_use
+@CHECK-OBJ-NEXT: Description: Permitted
+
+.eabi_attribute  Tag_THUMB_ISA_use, 1
+@CHECK:   .eabi_attribute 9, 1
+@CHECK-OBJ: Tag: 9
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: THUMB_ISA_use
+@CHECK-OBJ-NEXT: Description: Thumb-1
+
+.eabi_attribute  Tag_FP_arch, 1
+@CHECK:   .eabi_attribute 10, 1
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv1
+
+.eabi_attribute  Tag_WMMX_arch, 1
+@CHECK:   .eabi_attribute 11, 1
+@CHECK-OBJ: Tag: 11
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: WMMX_arch
+@CHECK-OBJ-NEXT: Description: WMMXv1
+
+.eabi_attribute  Tag_Advanced_SIMD_arch, 1
+@CHECK:   .eabi_attribute 12, 1
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: NEONv1
+
+.eabi_attribute  Tag_PCS_config, 1
+@CHECK:   .eabi_attribute 13, 1
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Bare Platform
+
+.eabi_attribute  Tag_ABI_PCS_R9_use, 1
+@CHECK:   .eabi_attribute 14, 1
+@CHECK-OBJ: Tag: 14
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_R9_use
+@CHECK-OBJ-NEXT: Description: Static Base
+
+.eabi_attribute  Tag_ABI_PCS_RW_data, 1
+@CHECK:   .eabi_attribute 15, 1
+@CHECK-OBJ: Tag: 15
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RW_data
+@CHECK-OBJ-NEXT: Description: PC-relative
+
+.eabi_attribute  Tag_ABI_PCS_RO_data, 1
+@CHECK:   .eabi_attribute 16, 1
+@CHECK-OBJ: Tag: 16
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RO_data
+@CHECK-OBJ-NEXT: Description: PC-relative
+
+.eabi_attribute  Tag_ABI_PCS_GOT_use, 1
+@CHECK:   .eabi_attribute 17, 1
+@CHECK-OBJ: Tag: 17
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_GOT_use
+@CHECK-OBJ-NEXT: Description: Direct
+
+.eabi_attribute  Tag_ABI_FP_rounding, 1
+@CHECK:   .eabi_attribute 19, 1
+@CHECK-OBJ: Tag: 19
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_rounding
+@CHECK-OBJ-NEXT: Description: Runtime
+
+.eabi_attribute  Tag_ABI_FP_denormal, 1
+@CHECK:   .eabi_attribute 20, 1
+@CHECK-OBJ: Tag: 20
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_denormal
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_ABI_FP_exceptions, 1
+@CHECK:   .eabi_attribute 21, 1
+@CHECK-OBJ: Tag: 21
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_exceptions
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_ABI_FP_user_exceptions, 1
+@CHECK:   .eabi_attribute 22, 1
+@CHECK-OBJ: Tag: 22
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_user_exceptions
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_ABI_FP_number_model, 1
+@CHECK:   .eabi_attribute 23, 1
+@CHECK-OBJ: Tag: 23
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_number_model
+@CHECK-OBJ-NEXT: Description: Finite Only
+
+.eabi_attribute  Tag_ABI_align_needed, 1
+@CHECK:   .eabi_attribute 24, 1
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 1
+@CHECK:   .eabi_attribute 25, 1
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte data alignment
+
+.eabi_attribute  Tag_ABI_enum_size, 1
+@CHECK:   .eabi_attribute 26, 1
+@CHECK-OBJ: Tag: 26
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_enum_size
+@CHECK-OBJ-NEXT: Description: Packed
+
+.eabi_attribute  Tag_ABI_HardFP_use, 1
+@CHECK:   .eabi_attribute 27, 1
+@CHECK-OBJ: Tag: 27
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_HardFP_use
+@CHECK-OBJ-NEXT: Description: Single-Precision
+
+.eabi_attribute  Tag_ABI_VFP_args, 1
+@CHECK:   .eabi_attribute 28, 1
+@CHECK-OBJ: Tag: 28
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_VFP_args
+@CHECK-OBJ-NEXT: Description: AAPCS VFP
+
+.eabi_attribute  Tag_ABI_WMMX_args, 1
+@CHECK:   .eabi_attribute 29, 1
+@CHECK-OBJ: Tag: 29
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_WMMX_args
+@CHECK-OBJ-NEXT: Description: iWMMX
+
+.eabi_attribute  Tag_ABI_optimization_goals, 1
+@CHECK:   .eabi_attribute 30, 1
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Speed
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 1
+@CHECK:   .eabi_attribute 31, 1
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Speed
+
+.eabi_attribute  Tag_compatibility, 1, ""
+@CHECK:   .eabi_attribute 32, 1
+@CHECK-OBJ: Tag: 32
+@CHECK-OBJ-NEXT: Value: 1,
+@CHECK-OBJ-NEXT: TagName: compatibility
+@CHECK-OBJ-NEXT: Description: AEABI Conformant
+
+.eabi_attribute  Tag_CPU_unaligned_access, 1
+@CHECK:   .eabi_attribute 34, 1
+@CHECK-OBJ: Tag: 34
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: CPU_unaligned_access
+@CHECK-OBJ-NEXT: Description: v6-style
+
+.eabi_attribute  Tag_FP_HP_extension, 1
+@CHECK:   .eabi_attribute 36, 1
+@CHECK-OBJ: Tag: 36
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: FP_HP_extension
+@CHECK-OBJ-NEXT: Description: Permitted
+
+.eabi_attribute  Tag_ABI_FP_16bit_format, 1
+@CHECK:   .eabi_attribute 38, 1
+@CHECK-OBJ: Tag: 38
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: ABI_FP_16bit_format
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_MPextension_use, 1
+@CHECK:   .eabi_attribute 42, 1
+@CHECK-OBJ: Tag: 42
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: MPextension_use
+@CHECK-OBJ-NEXT: Description: Permitted
+
+.eabi_attribute  Tag_DIV_use, 1
+@CHECK:   .eabi_attribute 44, 1
+@CHECK-OBJ: Tag: 44
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: DIV_use
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_Virtualization_use, 1
+@CHECK:   .eabi_attribute 68, 1
+@CHECK-OBJ: Tag: 68
+@CHECK-OBJ-NEXT: Value: 1
+@CHECK-OBJ-NEXT: TagName: Virtualization_use
+@CHECK-OBJ-NEXT: Description: TrustZone
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-10.s b/test/tools/llvm-readobj/ARM/attribute-10.s
new file mode 100644
index 000000000000..667db8c56405
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-10.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 10
+@CHECK:   .eabi_attribute 6, 10
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 10
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v7
+
+.eabi_attribute  Tag_ABI_align_needed, 10
+@CHECK:   .eabi_attribute 24, 10
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 10
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 1024-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 10
+@CHECK:   .eabi_attribute 25, 10
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 10
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 1024-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-11.s b/test/tools/llvm-readobj/ARM/attribute-11.s
new file mode 100644
index 000000000000..2d8e43b58ce6
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-11.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 11
+@CHECK:   .eabi_attribute 6, 11
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 11
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6-M
+
+.eabi_attribute  Tag_ABI_align_needed, 11
+@CHECK:   .eabi_attribute 24, 11
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 11
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 2048-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 11
+@CHECK:   .eabi_attribute 25, 11
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 11
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 2048-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-12.s b/test/tools/llvm-readobj/ARM/attribute-12.s
new file mode 100644
index 000000000000..4387527a3f7d
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-12.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 12
+@CHECK:   .eabi_attribute 6, 12
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 12
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6S-M
+
+.eabi_attribute  Tag_ABI_align_needed, 12
+@CHECK:   .eabi_attribute 24, 12
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 12
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 4096-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 12
+@CHECK:   .eabi_attribute 25, 12
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 12
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 4096-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-13.s b/test/tools/llvm-readobj/ARM/attribute-13.s
new file mode 100644
index 000000000000..25ac5f19f6e3
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-13.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 13
+@CHECK:   .eabi_attribute 6, 13
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 13
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v7E-M
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-136.s b/test/tools/llvm-readobj/ARM/attribute-136.s
new file mode 100644
index 000000000000..a2d2a9aa411c
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-136.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_compatibility, 136, "Foo Corp"
+@CHECK:   .eabi_attribute 32, 136
+@CHECK-OBJ: Tag: 32
+@CHECK-OBJ-NEXT: Value: 136, Foo Corp
+@CHECK-OBJ-NEXT: TagName: compatibility
+@CHECK-OBJ-NEXT: Description: AEABI Non-Conformant
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-14.s b/test/tools/llvm-readobj/ARM/attribute-14.s
new file mode 100644
index 000000000000..e0d8e468daf9
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-14.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 14
+@CHECK:   .eabi_attribute 6, 14
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 14
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v8
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-15.s b/test/tools/llvm-readobj/ARM/attribute-15.s
new file mode 100644
index 000000000000..7877ce7aeb71
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-15.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_compatibility, 15, "Longer Corporation NaMe"
+@CHECK:   .eabi_attribute 32, 15
+@CHECK-OBJ: Tag: 32
+@CHECK-OBJ-NEXT: Value: 15, Longer Corporation NaMe
+@CHECK-OBJ-NEXT: TagName: compatibility
+@CHECK-OBJ-NEXT: Description: AEABI Non-Conformant
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-2.s b/test/tools/llvm-readobj/ARM/attribute-2.s
new file mode 100644
index 000000000000..21ee41fbc090
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-2.s
@@ -0,0 +1,178 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 2
+@CHECK:   .eabi_attribute 6, 2
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v4T
+
+.eabi_attribute  Tag_THUMB_ISA_use, 2
+@CHECK:   .eabi_attribute 9, 2
+@CHECK-OBJ: Tag: 9
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: THUMB_ISA_use
+@CHECK-OBJ-NEXT: Description: Thumb-2
+
+.eabi_attribute  Tag_FP_arch, 2
+@CHECK:   .eabi_attribute 10, 2
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv2
+
+.eabi_attribute  Tag_WMMX_arch, 2
+@CHECK:   .eabi_attribute 11, 2
+@CHECK-OBJ: Tag: 11
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: WMMX_arch
+@CHECK-OBJ-NEXT: Description: WMMXv2
+
+.eabi_attribute  Tag_Advanced_SIMD_arch, 2
+@CHECK:   .eabi_attribute 12, 2
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: NEONv2+FMA
+
+.eabi_attribute  Tag_PCS_config, 2
+@CHECK:   .eabi_attribute 13, 2
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Linux Application
+
+.eabi_attribute  Tag_ABI_PCS_R9_use, 2
+@CHECK:   .eabi_attribute 14, 2
+@CHECK-OBJ: Tag: 14
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_R9_use
+@CHECK-OBJ-NEXT: Description: TLS
+
+.eabi_attribute  Tag_ABI_PCS_RW_data, 2
+@CHECK:   .eabi_attribute 15, 2
+@CHECK-OBJ: Tag: 15
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RW_data
+@CHECK-OBJ-NEXT: Description: SB-relative
+
+.eabi_attribute  Tag_ABI_PCS_RO_data, 2
+@CHECK:   .eabi_attribute 16, 2
+@CHECK-OBJ: Tag: 16
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RO_data
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_PCS_GOT_use, 2
+@CHECK:   .eabi_attribute 17, 2
+@CHECK-OBJ: Tag: 17
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_GOT_use
+@CHECK-OBJ-NEXT: Description: GOT-Indirect
+
+.eabi_attribute  Tag_ABI_PCS_wchar_t, 2
+@CHECK:   .eabi_attribute 18, 2
+@CHECK-OBJ: Tag: 18
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_wchar_t
+@CHECK-OBJ-NEXT: Description: 2-byte
+
+.eabi_attribute  Tag_ABI_FP_denormal, 2
+@CHECK:   .eabi_attribute 20, 2
+@CHECK-OBJ: Tag: 20
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_FP_denormal
+@CHECK-OBJ-NEXT: Description: Sign Only
+
+.eabi_attribute  Tag_ABI_FP_number_model, 2
+@CHECK:   .eabi_attribute 23, 2
+@CHECK-OBJ: Tag: 23
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_FP_number_model
+@CHECK-OBJ-NEXT: Description: RTABI
+
+.eabi_attribute  Tag_ABI_align_needed, 2
+@CHECK:   .eabi_attribute 24, 2
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 4-byte alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 2
+@CHECK:   .eabi_attribute 25, 2
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte data and code alignment
+
+.eabi_attribute  Tag_ABI_enum_size, 2
+@CHECK:   .eabi_attribute 26, 2
+@CHECK-OBJ: Tag: 26
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_enum_size
+@CHECK-OBJ-NEXT: Description: Int32
+
+.eabi_attribute  Tag_ABI_HardFP_use, 2
+@CHECK:   .eabi_attribute 27, 2
+@CHECK-OBJ: Tag: 27
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_HardFP_use
+@CHECK-OBJ-NEXT: Description: Reserved
+
+.eabi_attribute  Tag_ABI_VFP_args, 2
+@CHECK:   .eabi_attribute 28, 2
+@CHECK-OBJ: Tag: 28
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_VFP_args
+@CHECK-OBJ-NEXT: Description: Custom
+
+.eabi_attribute  Tag_ABI_WMMX_args, 2
+@CHECK:   .eabi_attribute 29, 2
+@CHECK-OBJ: Tag: 29
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_WMMX_args
+@CHECK-OBJ-NEXT: Description: Custom
+
+.eabi_attribute  Tag_ABI_optimization_goals, 2
+@CHECK:   .eabi_attribute 30, 2
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Aggressive Speed
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 2
+@CHECK:   .eabi_attribute 31, 2
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Aggressive Speed
+
+.eabi_attribute  Tag_compatibility, 2, ""
+@CHECK:   .eabi_attribute 32, 2
+@CHECK-OBJ: Tag: 32
+@CHECK-OBJ-NEXT: Value: 2,
+@CHECK-OBJ-NEXT: TagName: compatibility
+@CHECK-OBJ-NEXT: Description: AEABI Non-Conformant
+
+.eabi_attribute  Tag_ABI_FP_16bit_format, 2
+@CHECK:   .eabi_attribute 38, 2
+@CHECK-OBJ: Tag: 38
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: ABI_FP_16bit_format
+@CHECK-OBJ-NEXT: Description: VFPv3
+
+.eabi_attribute  Tag_DIV_use, 2
+@CHECK:   .eabi_attribute 44, 2
+@CHECK-OBJ: Tag: 44
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: DIV_use
+@CHECK-OBJ-NEXT: Description: Permitted
+
+.eabi_attribute  Tag_Virtualization_use, 2
+@CHECK:   .eabi_attribute 68, 2
+@CHECK-OBJ: Tag: 68
+@CHECK-OBJ-NEXT: Value: 2
+@CHECK-OBJ-NEXT: TagName: Virtualization_use
+@CHECK-OBJ-NEXT: Description: Virtualization Extensions
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-3.s b/test/tools/llvm-readobj/ARM/attribute-3.s
new file mode 100644
index 000000000000..ad2de257ecc6
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-3.s
@@ -0,0 +1,108 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 3
+@CHECK:   .eabi_attribute 6, 3
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v5T
+
+.eabi_attribute  Tag_FP_arch, 3
+@CHECK:   .eabi_attribute 10, 3
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv3
+
+.eabi_attribute  Tag_Advanced_SIMD_arch, 3
+@CHECK:   .eabi_attribute 12, 3
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: ARMv8-a NEON
+
+.eabi_attribute  Tag_PCS_config, 3
+@CHECK:   .eabi_attribute 13, 3
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Linux DSO
+
+.eabi_attribute  Tag_ABI_PCS_R9_use, 3
+@CHECK:   .eabi_attribute 14, 3
+@CHECK-OBJ: Tag: 14
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_R9_use
+@CHECK-OBJ-NEXT: Description: Unused
+
+.eabi_attribute  Tag_ABI_PCS_RW_data, 3
+@CHECK:   .eabi_attribute 15, 3
+@CHECK-OBJ: Tag: 15
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_RW_data
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_FP_number_model, 3
+@CHECK:   .eabi_attribute 23, 3
+@CHECK-OBJ: Tag: 23
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_FP_number_model
+@CHECK-OBJ-NEXT: Description: IEEE-754
+
+.eabi_attribute  Tag_ABI_align_needed, 3
+@CHECK:   .eabi_attribute 24, 3
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: Reserved
+
+.eabi_attribute  Tag_ABI_align_preserved, 3
+@CHECK:   .eabi_attribute 25, 3
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: Reserved
+
+.eabi_attribute  Tag_ABI_enum_size, 3
+@CHECK:   .eabi_attribute 26, 3
+@CHECK-OBJ: Tag: 26
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_enum_size
+@CHECK-OBJ-NEXT: Description: External Int32
+
+.eabi_attribute  Tag_ABI_HardFP_use, 3
+@CHECK:   .eabi_attribute 27, 3
+@CHECK-OBJ: Tag: 27
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_HardFP_use
+@CHECK-OBJ-NEXT: Description: Tag_FP_arch (deprecated)
+
+.eabi_attribute  Tag_ABI_VFP_args, 3
+@CHECK:   .eabi_attribute 28, 3
+@CHECK-OBJ: Tag: 28
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_VFP_args
+@CHECK-OBJ-NEXT: Description: Not Permitted
+
+.eabi_attribute  Tag_ABI_optimization_goals, 3
+@CHECK:   .eabi_attribute 30, 3
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Size
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 3
+@CHECK:   .eabi_attribute 31, 3
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Size
+
+.eabi_attribute  Tag_Virtualization_use, 3
+@CHECK:   .eabi_attribute 68, 3
+@CHECK-OBJ: Tag: 68
+@CHECK-OBJ-NEXT: Value: 3
+@CHECK-OBJ-NEXT: TagName: Virtualization_use
+@CHECK-OBJ-NEXT: Description: TrustZone + Virtualization Extensions
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-4.s b/test/tools/llvm-readobj/ARM/attribute-4.s
new file mode 100644
index 000000000000..dd0a4a6d6a73
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-4.s
@@ -0,0 +1,59 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 4
+@CHECK:   .eabi_attribute 6, 4
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v5TE
+
+.eabi_attribute  Tag_FP_arch, 4
+@CHECK:   .eabi_attribute 10, 4
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv3-D16
+
+.eabi_attribute  Tag_PCS_config, 4
+@CHECK:   .eabi_attribute 13, 4
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Palm OS 2004
+
+.eabi_attribute  Tag_ABI_PCS_wchar_t, 4
+@CHECK:   .eabi_attribute 18, 4
+@CHECK-OBJ: Tag: 18
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: ABI_PCS_wchar_t
+@CHECK-OBJ-NEXT: Description: 4-byte
+
+.eabi_attribute  Tag_ABI_align_needed, 4
+@CHECK:   .eabi_attribute 24, 4
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 16-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 4
+@CHECK:   .eabi_attribute 25, 4
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 16-byte data alignment
+
+.eabi_attribute  Tag_ABI_optimization_goals, 4
+@CHECK:   .eabi_attribute 30, 4
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Aggressive Size
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 4
+@CHECK:   .eabi_attribute 31, 4
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Aggressive Size
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-5.s b/test/tools/llvm-readobj/ARM/attribute-5.s
new file mode 100644
index 000000000000..97e37e2f460f
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-5.s
@@ -0,0 +1,52 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 5
+@CHECK:   .eabi_attribute 6, 5
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v5TEJ
+
+.eabi_attribute  Tag_FP_arch, 5
+@CHECK:   .eabi_attribute 10, 5
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv4
+
+.eabi_attribute  Tag_PCS_config, 5
+@CHECK:   .eabi_attribute 13, 5
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Reserved (Palm OS)
+
+.eabi_attribute  Tag_ABI_align_needed, 5
+@CHECK:   .eabi_attribute 24, 5
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 32-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 5
+@CHECK:   .eabi_attribute 25, 5
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 32-byte data alignment
+
+.eabi_attribute  Tag_ABI_optimization_goals, 5
+@CHECK:   .eabi_attribute 30, 5
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Debugging
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 5
+@CHECK:   .eabi_attribute 31, 5
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 5
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Accuracy
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-6.s b/test/tools/llvm-readobj/ARM/attribute-6.s
new file mode 100644
index 000000000000..8da7b9924e16
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-6.s
@@ -0,0 +1,52 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 6
+@CHECK:   .eabi_attribute 6, 6
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6
+
+.eabi_attribute  Tag_FP_arch, 6
+@CHECK:   .eabi_attribute 10, 6
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: VFPv4-D16
+
+.eabi_attribute  Tag_PCS_config, 6
+@CHECK:   .eabi_attribute 13, 6
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Symbian OS 2004
+
+.eabi_attribute  Tag_ABI_align_needed, 6
+@CHECK:   .eabi_attribute 24, 6
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 64-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 6
+@CHECK:   .eabi_attribute 25, 6
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 64-byte data alignment
+
+.eabi_attribute  Tag_ABI_optimization_goals, 6
+@CHECK:   .eabi_attribute 30, 6
+@CHECK-OBJ: Tag: 30
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: ABI_optimization_goals
+@CHECK-OBJ-NEXT: Description: Best Debugging
+
+.eabi_attribute  Tag_ABI_FP_optimization_goals, 6
+@CHECK:   .eabi_attribute 31, 6
+@CHECK-OBJ: Tag: 31
+@CHECK-OBJ-NEXT: Value: 6
+@CHECK-OBJ-NEXT: TagName: ABI_FP_optimization_goals
+@CHECK-OBJ-NEXT: Description: Best Accuracy
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-7.s b/test/tools/llvm-readobj/ARM/attribute-7.s
new file mode 100644
index 000000000000..2fd1b209d310
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-7.s
@@ -0,0 +1,38 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 7
+@CHECK:   .eabi_attribute 6, 7
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 7
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6KZ
+
+.eabi_attribute  Tag_FP_arch, 7
+@CHECK:   .eabi_attribute 10, 7
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 7
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: ARMv8-a FP
+
+.eabi_attribute  Tag_PCS_config, 7
+@CHECK:   .eabi_attribute 13, 7
+@CHECK-OBJ: Tag: 13
+@CHECK-OBJ-NEXT: Value: 7
+@CHECK-OBJ-NEXT: TagName: PCS_config
+@CHECK-OBJ-NEXT: Description: Reserved (Symbian OS)
+
+.eabi_attribute  Tag_ABI_align_needed, 7
+@CHECK:   .eabi_attribute 24, 7
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 7
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 128-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 7
+@CHECK:   .eabi_attribute 25, 7
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 7
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 128-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-8.s b/test/tools/llvm-readobj/ARM/attribute-8.s
new file mode 100644
index 000000000000..ac3e3a0b4056
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-8.s
@@ -0,0 +1,31 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 8
+@CHECK:   .eabi_attribute 6, 8
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 8
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6T2
+
+.eabi_attribute  Tag_FP_arch, 8
+@CHECK:   .eabi_attribute 10, 8
+@CHECK-OBJ: Tag: 10
+@CHECK-OBJ-NEXT: Value: 8
+@CHECK-OBJ-NEXT: TagName: FP_arch
+@CHECK-OBJ-NEXT: Description: ARMv8-a FP-D16
+
+.eabi_attribute  Tag_ABI_align_needed, 8
+@CHECK:   .eabi_attribute 24, 8
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 8
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 256-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 8
+@CHECK:   .eabi_attribute 25, 8
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 8
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 256-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-9.s b/test/tools/llvm-readobj/ARM/attribute-9.s
new file mode 100644
index 000000000000..68f6ccb5a63c
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-9.s
@@ -0,0 +1,24 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch, 9
+@CHECK:   .eabi_attribute 6, 9
+@CHECK-OBJ: Tag: 6
+@CHECK-OBJ-NEXT: Value: 9
+@CHECK-OBJ-NEXT: TagName: CPU_arch
+@CHECK-OBJ-NEXT: Description: ARM v6K
+
+.eabi_attribute  Tag_ABI_align_needed, 9
+@CHECK:   .eabi_attribute 24, 9
+@CHECK-OBJ: Tag: 24
+@CHECK-OBJ-NEXT: Value: 9
+@CHECK-OBJ-NEXT: TagName: ABI_align_needed
+@CHECK-OBJ-NEXT: Description: 8-byte alignment, 512-byte extended alignment
+
+.eabi_attribute  Tag_ABI_align_preserved, 9
+@CHECK:   .eabi_attribute 25, 9
+@CHECK-OBJ: Tag: 25
+@CHECK-OBJ-NEXT: Value: 9
+@CHECK-OBJ-NEXT: TagName: ABI_align_preserved
+@CHECK-OBJ-NEXT: Description: 8-byte stack alignment, 512-byte data alignment
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-A.s b/test/tools/llvm-readobj/ARM/attribute-A.s
new file mode 100644
index 000000000000..720f56e9ddf5
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-A.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch_profile, 'A'
+@CHECK:   .eabi_attribute 7, 65
+@CHECK-OBJ: Tag: 7
+@CHECK-OBJ-NEXT: Value: 65
+@CHECK-OBJ-NEXT: TagName: CPU_arch_profile
+@CHECK-OBJ-NEXT: Description: Application
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-M.s b/test/tools/llvm-readobj/ARM/attribute-M.s
new file mode 100644
index 000000000000..7d1e1efba3e4
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-M.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch_profile, 'M'
+@CHECK:   .eabi_attribute 7, 77
+@CHECK-OBJ: Tag: 7
+@CHECK-OBJ-NEXT: Value: 77
+@CHECK-OBJ-NEXT: TagName: CPU_arch_profile
+@CHECK-OBJ-NEXT: Description: Microcontroller
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-R.s b/test/tools/llvm-readobj/ARM/attribute-R.s
new file mode 100644
index 000000000000..096d55764c1c
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-R.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch_profile, 'R'
+@CHECK:   .eabi_attribute 7, 82
+@CHECK-OBJ: Tag: 7
+@CHECK-OBJ-NEXT: Value: 82
+@CHECK-OBJ-NEXT: TagName: CPU_arch_profile
+@CHECK-OBJ-NEXT: Description: Real-time
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-S.s b/test/tools/llvm-readobj/ARM/attribute-S.s
new file mode 100644
index 000000000000..cb909580ab3b
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-S.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_CPU_arch_profile, 'S'
+@CHECK:   .eabi_attribute 7, 83
+@CHECK-OBJ: Tag: 7
+@CHECK-OBJ-NEXT: Value: 83
+@CHECK-OBJ-NEXT: TagName: CPU_arch_profile
+@CHECK-OBJ-NEXT: Description: Classic
+
diff --git a/test/tools/llvm-readobj/ARM/attribute-conformance-1.s b/test/tools/llvm-readobj/ARM/attribute-conformance-1.s
new file mode 100644
index 000000000000..daa44c14ecac
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-conformance-1.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_conformance, "0"
+@CHECK:   .eabi_attribute 67, "0"
+@CHECK-OBJ: Tag: 67
+@CHECK-OBJ-NEXT: TagName: conformance
+@CHECK-OBJ-NEXT: Value: 0
diff --git a/test/tools/llvm-readobj/ARM/attribute-conformance-2.s b/test/tools/llvm-readobj/ARM/attribute-conformance-2.s
new file mode 100644
index 000000000000..47c83c03696d
--- /dev/null
+++ b/test/tools/llvm-readobj/ARM/attribute-conformance-2.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple armv7-elf -filetype asm -o - %s | FileCheck %s
+@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
+@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+.eabi_attribute  Tag_conformance, "A.long--non numeric oddity...!!"
+@CHECK:   .eabi_attribute 67, "A.long--non numeric oddity...!!"
+@CHECK-OBJ: Tag: 67
+@CHECK-OBJ-NEXT: TagName: conformance
+@CHECK-OBJ-NEXT: Value: A.long--non numeric oddity...!!
diff --git a/test/tools/llvm-readobj/ARM/attributes.s b/test/tools/llvm-readobj/ARM/attributes.s
deleted file mode 100644
index 594bab85a09e..000000000000
--- a/test/tools/llvm-readobj/ARM/attributes.s
+++ /dev/null
@@ -1,287 +0,0 @@
-@ RUN: llvm-mc -triple armv7-eabi -filetype obj -o - %s \
-@ RUN:   | llvm-readobj -arm-attributes - | FileCheck %s
-
-	.syntax unified
-
-	.cpu cortex-a8
-	.fpu neon
-
-	.eabi_attribute Tag_CPU_raw_name, "Cortex-A9"
-	.eabi_attribute Tag_CPU_name, "cortex-a9"
-	.eabi_attribute Tag_CPU_arch, 10
-	.eabi_attribute Tag_CPU_arch_profile, 'A'
-	.eabi_attribute Tag_ARM_ISA_use, 0
-	.eabi_attribute Tag_THUMB_ISA_use, 2
-	.eabi_attribute Tag_FP_arch, 3
-	.eabi_attribute Tag_WMMX_arch, 0
-	.eabi_attribute Tag_Advanced_SIMD_arch, 1
-	.eabi_attribute Tag_PCS_config, 2
-	.eabi_attribute Tag_ABI_PCS_R9_use, 0
-	.eabi_attribute Tag_ABI_PCS_RW_data, 0
-	.eabi_attribute Tag_ABI_PCS_RO_data, 0
-	.eabi_attribute Tag_ABI_PCS_GOT_use, 0
-	.eabi_attribute Tag_ABI_PCS_wchar_t, 4
-	.eabi_attribute Tag_ABI_FP_rounding, 1
-	.eabi_attribute Tag_ABI_FP_denormal, 2
-	.eabi_attribute Tag_ABI_FP_exceptions, 1
-	.eabi_attribute Tag_ABI_FP_user_exceptions, 1
-	.eabi_attribute Tag_ABI_FP_number_model, 3
-	.eabi_attribute Tag_ABI_align_needed, 1
-	.eabi_attribute Tag_ABI_align_preserved, 2
-	.eabi_attribute Tag_ABI_enum_size, 3
-	.eabi_attribute Tag_ABI_HardFP_use, 0
-	.eabi_attribute Tag_ABI_VFP_args, 1
-	.eabi_attribute Tag_ABI_WMMX_args, 0
-	.eabi_attribute Tag_ABI_optimization_goals, 2
-	.eabi_attribute Tag_ABI_FP_optimization_goals, 2
-	.eabi_attribute Tag_compatibility, 1
-	.eabi_attribute Tag_compatibility, 1, "aeabi"
-	.eabi_attribute Tag_CPU_unaligned_access, 0
-	.eabi_attribute Tag_FP_HP_extension, 0
-	.eabi_attribute Tag_ABI_FP_16bit_format, 0
-	.eabi_attribute Tag_MPextension_use, 0
-	.eabi_attribute Tag_DIV_use, 0
-	.eabi_attribute Tag_nodefaults, 0
-	.eabi_attribute Tag_also_compatible_with, "gnu"
-	.eabi_attribute Tag_T2EE_use, 0
-	.eabi_attribute Tag_conformance, "2.09"
-	.eabi_attribute Tag_Virtualization_use, 0
-
-@ CHECK: BuildAttributes {
-@ CHECK:   Section 1 {
-@ CHECK:     Tag: Tag_File (0x1)
-@ CHECK:     FileAttributes {
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 4
-@ CHECK:         TagName: CPU_raw_name
-@ CHECK:         Value: CORTEX-A9
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 5
-@ CHECK:         TagName: CPU_name
-@ CHECK:         Value: CORTEX-A9
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 6
-@ CHECK:         Value: 10
-@ CHECK:         TagName: CPU_arch
-@ CHECK:         Description: ARM v7
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 7
-@ CHECK:         Value: 65
-@ CHECK:         TagName: CPU_arch_profile
-@ CHECK:         Description: Application
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 8
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ARM_ISA_use
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 9
-@ CHECK:         Value: 2
-@ CHECK:         TagName: THUMB_ISA_use
-@ CHECK:         Description: Thumb-2
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 10
-@ CHECK:         Value: 3
-@ CHECK:         TagName: FP_arch
-@ CHECK:         Description: VFPv3
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 11
-@ CHECK:         Value: 0
-@ CHECK:         TagName: WMMX_arch
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 12
-@ CHECK:         Value: 1
-@ CHECK:         TagName: Advanced_SIMD_arch
-@ CHECK:         Description: NEONv1
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 13
-@ CHECK:         Value: 2
-@ CHECK:         TagName: PCS_config
-@ CHECK:         Description: Linux Application
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 14
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_PCS_R9_use
-@ CHECK:         Description: v6
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 15
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_PCS_RW_data
-@ CHECK:         Description: Absolute
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 16
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_PCS_RO_data
-@ CHECK:         Description: Absolute
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 17
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_PCS_GOT_use
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 18
-@ CHECK:         Value: 4
-@ CHECK:         TagName: ABI_PCS_wchar_t
-@ CHECK:         Description: 4-byte
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 19
-@ CHECK:         Value: 1
-@ CHECK:         TagName: ABI_FP_rounding
-@ CHECK:         Description: Runtime
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 20
-@ CHECK:         Value: 2
-@ CHECK:         TagName: ABI_FP_denormal
-@ CHECK:         Description: Sign Only
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 21
-@ CHECK:         Value: 1
-@ CHECK:         TagName: ABI_FP_exceptions
-@ CHECK:         Description: IEEE-754
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 22
-@ CHECK:         Value: 1
-@ CHECK:         TagName: ABI_FP_user_exceptions
-@ CHECK:         Description: IEEE-754
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 23
-@ CHECK:         Value: 3
-@ CHECK:         TagName: ABI_FP_number_model
-@ CHECK:         Description: IEEE-754
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 24
-@ CHECK:         Value: 1
-@ CHECK:         TagName: ABI_align_needed
-@ CHECK:         Description: 8-byte alignment
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 25
-@ CHECK:         Value: 2
-@ CHECK:         TagName: ABI_align_preserved
-@ CHECK:         Description: 8-byte data and code alignment
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 26
-@ CHECK:         Value: 3
-@ CHECK:         TagName: ABI_enum_size
-@ CHECK:         Description: External Int32
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 27
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_HardFP_use
-@ CHECK:         Description: Tag_FP_arch
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 28
-@ CHECK:         Value: 1
-@ CHECK:         TagName: ABI_VFP_args
-@ CHECK:         Description: AAPCS VFP
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 29
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_WMMX_args
-@ CHECK:         Description: AAPCS
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 30
-@ CHECK:         Value: 2
-@ CHECK:         TagName: ABI_optimization_goals
-@ CHECK:         Description: Aggressive Speed
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 31
-@ CHECK:         Value: 2
-@ CHECK:         TagName: ABI_FP_optimization_goals
-@ CHECK:         Description: Aggressive Speed
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 32
-@ CHECK:         Value: 1, AEABI
-@ CHECK:         TagName: compatibility
-@ CHECK:         Description: AEABI Conformant
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 34
-@ CHECK:         Value: 0
-@ CHECK:         TagName: CPU_unaligned_access
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 36
-@ CHECK:         Value: 0
-@ CHECK:         TagName: FP_HP_extension
-@ CHECK:         Description: If Available
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 38
-@ CHECK:         Value: 0
-@ CHECK:         TagName: ABI_FP_16bit_format
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 42
-@ CHECK:         Value: 0
-@ CHECK:         TagName: MPextension_use
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 44
-@ CHECK:         Value: 0
-@ CHECK:         TagName: DIV_use
-@ CHECK:         Description: If Available
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 64
-@ CHECK:         Value: 0
-@ CHECK:         TagName: nodefaults
-@ CHECK:         Description: Unspecified Tags UNDEFINED
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 65
-@ CHECK:         TagName: also_compatible_with
-@ CHECK:         Value: GNU
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 66
-@ CHECK:         Value: 0
-@ CHECK:         TagName: T2EE_use
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 67
-@ CHECK:         TagName: conformance
-@ CHECK:         Value: 2.09
-@ CHECK:       }
-@ CHECK:       Attribute {
-@ CHECK:         Tag: 68
-@ CHECK:         Value: 0
-@ CHECK:         TagName: Virtualization_use
-@ CHECK:         Description: Not Permitted
-@ CHECK:       }
-@ CHECK:     }
-@ CHECK:   }
-@ CHECK: }
-
diff --git a/test/tools/llvm-readobj/Inputs/bad-relocs.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/bad-relocs.obj.coff-i386
new file mode 100644
index 000000000000..06ec4718da19
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/bad-relocs.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/basereloc.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/basereloc.obj.coff-i386
new file mode 100644
index 000000000000..0aeed443e33a
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/basereloc.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/bigobj.coff-x86-64 b/test/tools/llvm-readobj/Inputs/bigobj.coff-x86-64
new file mode 100644
index 000000000000..fdfda5e5713a
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/bigobj.coff-x86-64
diff --git a/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2012-i386 b/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2012-i386
new file mode 100755
index 000000000000..4a723044801b
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2012-i386
diff --git a/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2013-i386 b/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2013-i386
new file mode 100755
index 000000000000..4adaf2edc6f7
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/comdat-function-linetables.obj.coff-2013-i386
diff --git a/test/tools/llvm-readobj/Inputs/directives.obj.coff-x86_64 b/test/tools/llvm-readobj/Inputs/directives.obj.coff-x86_64
new file mode 100644
index 000000000000..4c156dcbb034
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/directives.obj.coff-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/export-arm.dll b/test/tools/llvm-readobj/Inputs/export-arm.dll
new file mode 100755
index 000000000000..a5555628afc6
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/export-arm.dll
diff --git a/test/tools/llvm-readobj/Inputs/export-x64.dll b/test/tools/llvm-readobj/Inputs/export-x64.dll
new file mode 100755
index 000000000000..10b14e8fff95
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/export-x64.dll
diff --git a/test/tools/llvm-readobj/Inputs/export-x86.dll b/test/tools/llvm-readobj/Inputs/export-x86.dll
new file mode 100755
index 000000000000..9efcd3131fe0
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/export-x86.dll
diff --git a/test/tools/llvm-readobj/Inputs/file-aux-record.yaml b/test/tools/llvm-readobj/Inputs/file-aux-record.yaml
index d19afaf68a85..89d6761a26db 100644
--- a/test/tools/llvm-readobj/Inputs/file-aux-record.yaml
+++ b/test/tools/llvm-readobj/Inputs/file-aux-record.yaml
@@ -6,7 +6,7 @@ symbols:
   - !Symbol
     Name: .file
     Value: 0
-    SectionNumber: 65534
+    SectionNumber: -2
     SimpleType: IMAGE_SYM_TYPE_NULL
     ComplexType: IMAGE_SYM_DTYPE_NULL
     StorageClass: IMAGE_SYM_CLASS_FILE
@@ -14,7 +14,7 @@ symbols:
   - !Symbol
     Name: '@comp.id'
     Value: 13485607
-    SectionNumber: 65535
+    SectionNumber: -1
     SimpleType: IMAGE_SYM_TYPE_NULL
     ComplexType: IMAGE_SYM_DTYPE_NULL
     StorageClass: IMAGE_SYM_CLASS_STATIC
diff --git a/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml b/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml
index 8d8f68447d47..d5b1eec878b0 100644
--- a/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml
+++ b/test/tools/llvm-readobj/Inputs/file-multiple-aux-records.yaml
@@ -6,7 +6,7 @@ symbols:
   - !Symbol
     Name: .file
     Value: 0
-    SectionNumber: 65534
+    SectionNumber: -2
     SimpleType: IMAGE_SYM_TYPE_NULL
     ComplexType: IMAGE_SYM_DTYPE_NULL
     StorageClass: IMAGE_SYM_CLASS_FILE
@@ -14,7 +14,7 @@ symbols:
   - !Symbol
     Name: '@comp.id'
     Value: 13485607
-    SectionNumber: 65535
+    SectionNumber: -1
     SimpleType: IMAGE_SYM_TYPE_NULL
     ComplexType: IMAGE_SYM_DTYPE_NULL
     StorageClass: IMAGE_SYM_CLASS_STATIC
diff --git a/test/tools/llvm-readobj/Inputs/imports.exe.coff-i386 b/test/tools/llvm-readobj/Inputs/imports.exe.coff-i386
new file mode 100644
index 000000000000..72077adaebef
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/imports.exe.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/imports.exe.coff-x86-64 b/test/tools/llvm-readobj/Inputs/imports.exe.coff-x86-64
new file mode 100644
index 000000000000..5ee198e4a2e2
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/imports.exe.coff-x86-64
diff --git a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-i368 b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-i368
index 1672d3a54242..213331f604e0 100644
--- a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-i368
+++ b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-i368
diff --git a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-x86_64 b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-x86_64
index 30bfe79bc308..a35cc11c2ebc 100644
--- a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-x86_64
+++ b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2012-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-i368 b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-i368
new file mode 100644
index 000000000000..8a901f64af8e
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-i368
diff --git a/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-x86_64 b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-x86_64
new file mode 100644
index 000000000000..f1c2e1f25e7f
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/multifile-linetables.obj.coff-2013-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-i368 b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-i368
index a0196ff2d737..41479edc7119 100644
--- a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-i368
+++ b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-i368
diff --git a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-x86_64 b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-x86_64
index 14f65ab2f6d7..8d30f325d70b 100644
--- a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-x86_64
+++ b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2012-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-i368 b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-i368
new file mode 100644
index 000000000000..4269b2145467
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-i368
diff --git a/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-x86_64 b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-x86_64
new file mode 100644
index 000000000000..65a1af061c83
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/multifunction-linetables.obj.coff-2013-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/relocs-no-symtab.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/relocs-no-symtab.obj.coff-i386
new file mode 100644
index 000000000000..5882daf4c595
--- /dev/null
+++ b/test/tools/llvm-readobj/Inputs/relocs-no-symtab.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
index 15e43ef54fef..9c24ac8d9d3c 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-i386
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64 b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
index cd63173b0e3a..356437e66cb8 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.coff-x86_64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64 b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
index d39e60c8eec3..658b0ea6227a 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-aarch64
diff --git a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
index 908507d20ab9..206c933d478f 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
+++ b/test/tools/llvm-readobj/Inputs/relocs.obj.elf-arm
diff --git a/test/tools/llvm-readobj/Inputs/relocs.py b/test/tools/llvm-readobj/Inputs/relocs.py
index af9459df8ee2..62dbd627ca52 100644
--- a/test/tools/llvm-readobj/Inputs/relocs.py
+++ b/test/tools/llvm-readobj/Inputs/relocs.py
@@ -327,6 +327,10 @@ def patchCoff(path, relocs):
 
   machine_type            = f.uint16()
   section_count           = f.uint16()
+
+  # Zero out timestamp to prevent churn when regenerating COFF files.
+  f.writeUInt32(0)
+
   f.seek(20)
   sections = [CoffSection(f) for idx in range(section_count)]
 
@@ -587,7 +591,7 @@ class Relocs_Elf_PPC64(Enum):
   R_PPC64_TLSLD               = 108
 
 class Relocs_Elf_AArch64(Enum):
-  R_AARCH64_NONE                        = 0x100
+  R_AARCH64_NONE                        = 0
   R_AARCH64_ABS64                       = 0x101
   R_AARCH64_ABS32                       = 0x102
   R_AARCH64_ABS16                       = 0x103
@@ -607,6 +611,7 @@ class Relocs_Elf_AArch64(Enum):
   R_AARCH64_LD_PREL_LO19                = 0x111
   R_AARCH64_ADR_PREL_LO21               = 0x112
   R_AARCH64_ADR_PREL_PG_HI21            = 0x113
+  R_AARCH64_ADR_PREL_PG_HI21_NC         = 0x114
   R_AARCH64_ADD_ABS_LO12_NC             = 0x115
   R_AARCH64_LDST8_ABS_LO12_NC           = 0x116
   R_AARCH64_TSTBR14                     = 0x117
@@ -616,9 +621,39 @@ class Relocs_Elf_AArch64(Enum):
   R_AARCH64_LDST16_ABS_LO12_NC          = 0x11c
   R_AARCH64_LDST32_ABS_LO12_NC          = 0x11d
   R_AARCH64_LDST64_ABS_LO12_NC          = 0x11e
+  R_AARCH64_MOVW_PREL_G0                = 0x11f
+  R_AARCH64_MOVW_PREL_G0_NC             = 0x120
+  R_AARCH64_MOVW_PREL_G1                = 0x121
+  R_AARCH64_MOVW_PREL_G1_NC             = 0x122
+  R_AARCH64_MOVW_PREL_G2                = 0x123
+  R_AARCH64_MOVW_PREL_G2_NC             = 0x124
+  R_AARCH64_MOVW_PREL_G3                = 0x125
   R_AARCH64_LDST128_ABS_LO12_NC         = 0x12b
+  R_AARCH64_MOVW_GOTOFF_G0              = 0x12c
+  R_AARCH64_MOVW_GOTOFF_G0_NC           = 0x12d
+  R_AARCH64_MOVW_GOTOFF_G1              = 0x12e
+  R_AARCH64_MOVW_GOTOFF_G1_NC           = 0x12f
+  R_AARCH64_MOVW_GOTOFF_G2              = 0x130
+  R_AARCH64_MOVW_GOTOFF_G2_NC           = 0x131
+  R_AARCH64_MOVW_GOTOFF_G3              = 0x132
+  R_AARCH64_GOTREL64                    = 0x133
+  R_AARCH64_GOTREL32                    = 0x134
+  R_AARCH64_GOT_LD_PREL19               = 0x135
+  R_AARCH64_LD64_GOTOFF_LO15            = 0x136
   R_AARCH64_ADR_GOT_PAGE                = 0x137
   R_AARCH64_LD64_GOT_LO12_NC            = 0x138
+  R_AARCH64_LD64_GOTPAGE_LO15           = 0x139
+  R_AARCH64_TLSGD_ADR_PREL21            = 0x200
+  R_AARCH64_TLSGD_ADR_PAGE21            = 0x201
+  R_AARCH64_TLSGD_ADD_LO12_NC           = 0x202
+  R_AARCH64_TLSGD_MOVW_G1               = 0x203
+  R_AARCH64_TLSGD_MOVW_G0_NC            = 0x204
+  R_AARCH64_TLSLD_ADR_PREL21            = 0x205
+  R_AARCH64_TLSLD_ADR_PAGE21            = 0x206
+  R_AARCH64_TLSLD_ADD_LO12_NC           = 0x207
+  R_AARCH64_TLSLD_MOVW_G1               = 0x208
+  R_AARCH64_TLSLD_MOVW_G0_NC            = 0x209
+  R_AARCH64_TLSLD_LD_PREL19             = 0x20a
   R_AARCH64_TLSLD_MOVW_DTPREL_G2        = 0x20b
   R_AARCH64_TLSLD_MOVW_DTPREL_G1        = 0x20c
   R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC     = 0x20d
@@ -656,10 +691,29 @@ class Relocs_Elf_AArch64(Enum):
   R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC  = 0x22d
   R_AARCH64_TLSLE_LDST64_TPREL_LO12     = 0x22e
   R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC  = 0x22f
-  R_AARCH64_TLSDESC_ADR_PAGE            = 0x232
+  R_AARCH64_TLSDESC_LD_PREL19           = 0x230
+  R_AARCH64_TLSDESC_ADR_PREL21          = 0x231
+  R_AARCH64_TLSDESC_ADR_PAGE21          = 0x232
   R_AARCH64_TLSDESC_LD64_LO12_NC        = 0x233
   R_AARCH64_TLSDESC_ADD_LO12_NC         = 0x234
+  R_AARCH64_TLSDESC_OFF_G1              = 0x235
+  R_AARCH64_TLSDESC_OFF_G0_NC           = 0x236
+  R_AARCH64_TLSDESC_LDR                 = 0x237
+  R_AARCH64_TLSDESC_ADD                 = 0x238
   R_AARCH64_TLSDESC_CALL                = 0x239
+  R_AARCH64_TLSLE_LDST128_TPREL_LO12    = 0x23a
+  R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC = 0x23b
+  R_AARCH64_TLSLD_LDST128_DTPREL_LO12   = 0x23c
+  R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC = 0x23d
+  R_AARCH64_COPY                        = 0x400
+  R_AARCH64_GLOB_DAT                    = 0x401
+  R_AARCH64_JUMP_SLOT                   = 0x402
+  R_AARCH64_RELATIVE                    = 0x403
+  R_AARCH64_TLS_DTPREL64                = 0x404
+  R_AARCH64_TLS_DTPMOD64                = 0x405
+  R_AARCH64_TLS_TPREL64                 = 0x406
+  R_AARCH64_TLSDESC                     = 0x407
+  R_AARCH64_IRELATIVE                   = 0x408
 
 class Relocs_Elf_ARM(Enum):
   R_ARM_NONE                  = 0x00
@@ -793,6 +847,7 @@ class Relocs_Elf_ARM(Enum):
   R_ARM_ME_TOO                = 0x80
   R_ARM_THM_TLS_DESCSEQ16     = 0x81
   R_ARM_THM_TLS_DESCSEQ32     = 0x82
+  R_ARM_IRELATIVE             = 0xa0
 
 class Relocs_Elf_Mips(Enum):
   R_MIPS_NONE              =  0
diff --git a/test/tools/llvm-readobj/bigobj.test b/test/tools/llvm-readobj/bigobj.test
new file mode 100644
index 000000000000..10972144315d
--- /dev/null
+++ b/test/tools/llvm-readobj/bigobj.test
@@ -0,0 +1,139 @@
+RUN: llvm-readobj --file-headers --sections --symbols --relocations %p/Inputs/bigobj.coff-x86-64 | FileCheck %s
+
+CHECK:      File: {{(.*[/\\])?}}bigobj.coff-x86-64
+CHECK-NEXT: Format: COFF-x86-64
+CHECK-NEXT: Arch: x86_64
+CHECK-NEXT: AddressSize: 64bit
+CHECK-NEXT: ImageFileHeader {
+CHECK-NEXT:   Machine: IMAGE_FILE_MACHINE_AMD64 (0x8664)
+CHECK-NEXT:   SectionCount: 3
+CHECK-NEXT:   TimeDateStamp: 1970-01-01 00:00:00 (0x0)
+CHECK-NEXT:   PointerToSymbolTable: 0xB0
+CHECK-NEXT:   SymbolCount: 8
+CHECK-NEXT:   OptionalHeaderSize: 0
+CHECK-NEXT:   Characteristics [ (0x0)
+CHECK-NEXT:   ]
+CHECK-NEXT: }
+CHECK-NEXT: Sections [
+CHECK-NEXT:   Section {
+CHECK-NEXT:     Number: 1
+CHECK-NEXT:     Name: .text (2E 74 65 78 74 00 00 00)
+CHECK-NEXT:     VirtualSize: 0x0
+CHECK-NEXT:     VirtualAddress: 0x0
+CHECK-NEXT:     RawDataSize: 0
+CHECK-NEXT:     PointerToRawData: 0x0
+CHECK-NEXT:     PointerToRelocations: 0x0
+CHECK-NEXT:     PointerToLineNumbers: 0x0
+CHECK-NEXT:     RelocationCount: 0
+CHECK-NEXT:     LineNumberCount: 0
+CHECK-NEXT:     Characteristics [ (0x60500020)
+CHECK-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+CHECK-NEXT:       IMAGE_SCN_CNT_CODE (0x20)
+CHECK-NEXT:       IMAGE_SCN_MEM_EXECUTE (0x20000000)
+CHECK-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+CHECK-NEXT:     ]
+CHECK-NEXT:   }
+CHECK-NEXT:   Section {
+CHECK-NEXT:     Number: 2
+CHECK-NEXT:     Name: .data (2E 64 61 74 61 00 00 00)
+CHECK-NEXT:     VirtualSize: 0x0
+CHECK-NEXT:     VirtualAddress: 0x0
+CHECK-NEXT:     RawDataSize: 0
+CHECK-NEXT:     PointerToRawData: 0x0
+CHECK-NEXT:     PointerToRelocations: 0x0
+CHECK-NEXT:     PointerToLineNumbers: 0x0
+CHECK-NEXT:     RelocationCount: 0
+CHECK-NEXT:     LineNumberCount: 0
+CHECK-NEXT:     Characteristics [ (0xC0500040)
+CHECK-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+CHECK-NEXT:       IMAGE_SCN_CNT_INITIALIZED_DATA (0x40)
+CHECK-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+CHECK-NEXT:       IMAGE_SCN_MEM_WRITE (0x80000000)
+CHECK-NEXT:     ]
+CHECK-NEXT:   }
+CHECK-NEXT:   Section {
+CHECK-NEXT:     Number: 3
+CHECK-NEXT:     Name: .bss (2E 62 73 73 00 00 00 00)
+CHECK-NEXT:     VirtualSize: 0x0
+CHECK-NEXT:     VirtualAddress: 0x0
+CHECK-NEXT:     RawDataSize: 0
+CHECK-NEXT:     PointerToRawData: 0x0
+CHECK-NEXT:     PointerToRelocations: 0x0
+CHECK-NEXT:     PointerToLineNumbers: 0x0
+CHECK-NEXT:     RelocationCount: 0
+CHECK-NEXT:     LineNumberCount: 0
+CHECK-NEXT:     Characteristics [ (0xC0500080)
+CHECK-NEXT:       IMAGE_SCN_ALIGN_16BYTES (0x500000)
+CHECK-NEXT:       IMAGE_SCN_CNT_UNINITIALIZED_DATA (0x80)
+CHECK-NEXT:       IMAGE_SCN_MEM_READ (0x40000000)
+CHECK-NEXT:       IMAGE_SCN_MEM_WRITE (0x80000000)
+CHECK-NEXT:     ]
+CHECK-NEXT:   }
+CHECK-NEXT: ]
+CHECK-NEXT: Relocations [
+CHECK-NEXT: ]
+CHECK-NEXT: Symbols [
+CHECK-NEXT:   Symbol {
+CHECK-NEXT:     Name: .file
+CHECK-NEXT:     Value: 0
+CHECK-NEXT:     Section:  IMAGE_SYM_DEBUG (-2)
+CHECK-NEXT:     BaseType: Null (0x0)
+CHECK-NEXT:     ComplexType: Null (0x0)
+CHECK-NEXT:     StorageClass: File (0x67)
+CHECK-NEXT:     AuxSymbolCount: 1
+CHECK-NEXT:     AuxFileRecord {
+CHECK-NEXT:       FileName: fake
+CHECK-NEXT:     }
+CHECK-NEXT:   }
+CHECK-NEXT:   Symbol {
+CHECK-NEXT:     Name: .text
+CHECK-NEXT:     Value: 0
+CHECK-NEXT:     Section: .text (1)
+CHECK-NEXT:     BaseType: Null (0x0)
+CHECK-NEXT:     ComplexType: Null (0x0)
+CHECK-NEXT:     StorageClass: Static (0x3)
+CHECK-NEXT:     AuxSymbolCount: 1
+CHECK-NEXT:     AuxSectionDef {
+CHECK-NEXT:       Length: 0
+CHECK-NEXT:       RelocationCount: 0
+CHECK-NEXT:       LineNumberCount: 0
+CHECK-NEXT:       Checksum: 0x0
+CHECK-NEXT:       Number: 0
+CHECK-NEXT:       Selection: 0x0
+CHECK-NEXT:     }
+CHECK-NEXT:   }
+CHECK-NEXT:   Symbol {
+CHECK-NEXT:     Name: .data
+CHECK-NEXT:     Value: 0
+CHECK-NEXT:     Section: .data (2)
+CHECK-NEXT:     BaseType: Null (0x0)
+CHECK-NEXT:     ComplexType: Null (0x0)
+CHECK-NEXT:     StorageClass: Static (0x3)
+CHECK-NEXT:     AuxSymbolCount: 1
+CHECK-NEXT:     AuxSectionDef {
+CHECK-NEXT:       Length: 0
+CHECK-NEXT:       RelocationCount: 0
+CHECK-NEXT:       LineNumberCount: 0
+CHECK-NEXT:       Checksum: 0x0
+CHECK-NEXT:       Number: 0
+CHECK-NEXT:       Selection: 0x0
+CHECK-NEXT:     }
+CHECK-NEXT:   }
+CHECK-NEXT:   Symbol {
+CHECK-NEXT:     Name: .bss
+CHECK-NEXT:     Value: 0
+CHECK-NEXT:     Section: .bss (3)
+CHECK-NEXT:     BaseType: Null (0x0)
+CHECK-NEXT:     ComplexType: Null (0x0)
+CHECK-NEXT:     StorageClass: Static (0x3)
+CHECK-NEXT:     AuxSymbolCount: 1
+CHECK-NEXT:     AuxSectionDef {
+CHECK-NEXT:       Length: 0
+CHECK-NEXT:       RelocationCount: 0
+CHECK-NEXT:       LineNumberCount: 0
+CHECK-NEXT:       Checksum: 0x0
+CHECK-NEXT:       Number: 0
+CHECK-NEXT:       Selection: 0x0
+CHECK-NEXT:     }
+CHECK-NEXT:   }
+CHECK-NEXT: ]
diff --git a/test/tools/llvm-readobj/codeview-linetables.test b/test/tools/llvm-readobj/codeview-linetables.test
index 4854d7ac6a39..e5e344bf10ff 100644
--- a/test/tools/llvm-readobj/codeview-linetables.test
+++ b/test/tools/llvm-readobj/codeview-linetables.test
@@ -1,11 +1,31 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; The following two object files were generated using the following command:
+;   D:\> cl /Z7 /c source.c
+; with the following contents of D:\source.c:
+;   void z(void);
+;
+;   void x(void) {
+;     z();
+;   }
+;
+;   void y(void) {
+;     z();
+;   }
+;
+;   void f(void) {
+;     x();
+;     y();
+;     z();
+;   }
+; using 32-/64-bit versions of CL v17.00.61030 and v18.00.21005.1 respectively.
 RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifunction-linetables.obj.coff-2012-i368 \
 RUN:   | FileCheck %s -check-prefix MFUN32
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifunction-linetables.obj.coff-2013-i368 \
+RUN:   | FileCheck %s -check-prefix MFUN32
 RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifunction-linetables.obj.coff-2012-x86_64 \
 RUN:   | FileCheck %s -check-prefix MFUN64
-RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2012-i368 \
-RUN:   | FileCheck %s -check-prefix MFILE32
-RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2012-x86_64 \
-RUN:   | FileCheck %s -check-prefix MFILE64
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifunction-linetables.obj.coff-2013-x86_64 \
+RUN:   | FileCheck %s -check-prefix MFUN64
 
 MFUN32:      CodeViewLineTables [
 MFUN32-NEXT:   Magic: 0x4
@@ -20,6 +40,12 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF1
 MFUN32-NEXT:     PayloadSize: 0x4B
+MFUN32:          ProcStart {
+MFUN32-NEXT:       DisplayName: x
+MFUN32-NEXT:       Section: _x
+MFUN32-NEXT:       CodeSize: 0xA
+MFUN32-NEXT:     }
+MFUN32-NEXT:     ProcEnd
 MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
@@ -33,6 +59,12 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF1
 MFUN32-NEXT:     PayloadSize: 0x4B
+MFUN32:          ProcStart {
+MFUN32-NEXT:       DisplayName: y
+MFUN32-NEXT:       Section: _y
+MFUN32-NEXT:       CodeSize: 0xA
+MFUN32-NEXT:     }
+MFUN32-NEXT:     ProcEnd
 MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
@@ -46,6 +78,12 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF1
 MFUN32-NEXT:     PayloadSize: 0x4B
+MFUN32:          ProcStart {
+MFUN32-NEXT:       DisplayName: f
+MFUN32-NEXT:       Section: _f
+MFUN32-NEXT:       CodeSize: 0x14
+MFUN32-NEXT:     }
+MFUN32-NEXT:     ProcEnd
 MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
@@ -107,6 +145,12 @@ MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF1
 MFUN64-NEXT:     PayloadSize: 0x4B
+MFUN64:          ProcStart {
+MFUN64-NEXT:       DisplayName: x
+MFUN64-NEXT:       Section: x
+MFUN64-NEXT:       CodeSize: 0xE
+MFUN64-NEXT:     }
+MFUN64-NEXT:     ProcEnd
 MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
@@ -116,6 +160,12 @@ MFUN64-NEXT:   ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF1
 MFUN64-NEXT:     PayloadSize: 0x4B
+MFUN64:          ProcStart {
+MFUN64-NEXT:       DisplayName: y
+MFUN64-NEXT:       Section: y
+MFUN64-NEXT:       CodeSize: 0xE
+MFUN64-NEXT:     }
+MFUN64-NEXT:     ProcEnd
 MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
@@ -125,6 +175,12 @@ MFUN64-NEXT:   ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF1
 MFUN64-NEXT:     PayloadSize: 0x4B
+MFUN64:          ProcStart {
+MFUN64-NEXT:       DisplayName: f
+MFUN64-NEXT:       Section: f
+MFUN64-NEXT:       CodeSize: 0x18
+MFUN64-NEXT:     }
+MFUN64-NEXT:     ProcEnd
 MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
@@ -177,6 +233,30 @@ MFUN64-NEXT:     ]
 MFUN64-NEXT:   ]
 MFUN64-NEXT: ]
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; The following two object files were generated using the following command:
+;   D:\> cl /Z7 /c input.c
+; with the following contents of D:\input.c:
+;   void g(void);
+;
+;   void f(void) {
+;   #line 1 "one.c"
+;     g();
+;   #line 2 "two.c"
+;     g();
+;   #line 7 "one.c"
+;     g();
+;   }
+; using 32-/64-bit versions of CL v17.00.61030 and v18.00.21005.1 respectively.
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2012-i368 \
+RUN:   | FileCheck %s -check-prefix MFILE32
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2013-i368 \
+RUN:   | FileCheck %s -check-prefix MFILE32
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2012-x86_64 \
+RUN:   | FileCheck %s -check-prefix MFILE64
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/multifile-linetables.obj.coff-2013-x86_64 \
+RUN:   | FileCheck %s -check-prefix MFILE64
+
 MFILE32:      CodeViewLineTables [
 MFILE32-NEXT:   Magic: 0x4
 MFILE32-NEXT:   Subsection [
@@ -190,6 +270,12 @@ MFILE32:        ]
 MFILE32-NEXT:   Subsection [
 MFILE32-NEXT:     Type: 0xF1
 MFILE32-NEXT:     PayloadSize: 0x4B
+MFILE32:          ProcStart {
+MFILE32-NEXT:       DisplayName: f
+MFILE32-NEXT:       Section: _f
+MFILE32-NEXT:       CodeSize: 0x14
+MFILE32-NEXT:     }
+MFILE32-NEXT:     ProcEnd
 MFILE32:        ]
 MFILE32-NEXT:   Subsection [
 MFILE32-NEXT:     Type: 0xF2
@@ -240,6 +326,12 @@ MFILE64:        ]
 MFILE64-NEXT:   Subsection [
 MFILE64-NEXT:     Type: 0xF1
 MFILE64-NEXT:     PayloadSize: 0x4B
+MFILE64:          ProcStart {
+MFILE64-NEXT:       DisplayName: f
+MFILE64-NEXT:       Section: f
+MFILE64-NEXT:       CodeSize: 0x18
+MFILE64-NEXT:     }
+MFILE64-NEXT:     ProcEnd
 MFILE64:        ]
 MFILE64-NEXT:   Subsection [
 MFILE64-NEXT:     Type: 0xF2
@@ -280,3 +372,53 @@ MFILE64-NEXT:       +0x13: 8
 MFILE64-NEXT:     ]
 MFILE64-NEXT:   ]
 MFILE64-NEXT: ]
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; The following object files were generated using the following command:
+;   C:\src> cl /Z7 /Gy /c test.cc
+; with the following contents of C:\src\test.cc:
+;   int f()
+;   {
+;     return 0;
+;   }
+;
+;   int g()
+;   {
+;     return 0;
+;   }
+; using 32-version of CL v17.00.61030 and v18.00.21005.1 respectively.
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/comdat-function-linetables.obj.coff-2012-i386 \
+RUN:   | FileCheck %s -check-prefix MCOMDAT
+RUN: llvm-readobj -s -codeview-linetables %p/Inputs/comdat-function-linetables.obj.coff-2013-i386 \
+RUN:   | FileCheck %s -check-prefix MCOMDAT
+
+MCOMDAT:      ProcStart {
+MCOMDAT-NEXT:   DisplayName: f
+MCOMDAT-NEXT:   Section: ?f@@YAHXZ
+MCOMDAT-NEXT:   CodeSize: 0x7
+MCOMDAT-NEXT: }
+MCOMDAT:      FunctionLineTable [
+MCOMDAT-NEXT:   FunctionName: ?f@@YAHXZ
+MCOMDAT-NEXT:   CodeSize: 0x7
+MCOMDAT-NEXT:   FilenameSegment [
+MCOMDAT-NEXT:     Filename: c:\src\test.cc
+MCOMDAT-NEXT:     +0x0: 2
+MCOMDAT-NEXT:     +0x3: 3
+MCOMDAT-NEXT:     +0x5: 4
+MCOMDAT-NEXT:   ]
+MCOMDAT-NEXT: ]
+MCOMDAT:      ProcStart {
+MCOMDAT-NEXT:   DisplayName: g
+MCOMDAT-NEXT:   Section: ?g@@YAHXZ
+MCOMDAT-NEXT:   CodeSize: 0x7
+MCOMDAT-NEXT: }
+MCOMDAT:      FunctionLineTable [
+MCOMDAT-NEXT:   FunctionName: ?g@@YAHXZ
+MCOMDAT-NEXT:   CodeSize: 0x7
+MCOMDAT-NEXT:   FilenameSegment [
+MCOMDAT-NEXT:     Filename: c:\src\test.cc
+MCOMDAT-NEXT:     +0x0: 7
+MCOMDAT-NEXT:     +0x3: 8
+MCOMDAT-NEXT:     +0x5: 9
+MCOMDAT-NEXT:   ]
+MCOMDAT-NEXT: ]
diff --git a/test/tools/llvm-readobj/coff-basereloc.test b/test/tools/llvm-readobj/coff-basereloc.test
new file mode 100644
index 000000000000..cd6687cae44c
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-basereloc.test
@@ -0,0 +1,24 @@
+RUN: llvm-readobj -coff-basereloc %p/Inputs/basereloc.obj.coff-i386 | FileCheck %s
+
+CHECK:      Format: COFF-i386
+CHECK-NEXT: Arch: i386
+CHECK-NEXT: AddressSize: 32bit
+CHECK-NEXT: BaseReloc [
+CHECK-NEXT:   Entry {
+CHECK-NEXT:     Type: HIGHLOW
+CHECK-NEXT:     Address: 0x1004
+CHECK-NEXT:   }
+CHECK-NEXT:   Entry {
+CHECK-NEXT:     Type: HIGHLOW
+CHECK-NEXT:     Address: 0x100A
+CHECK-NEXT:   }
+CHECK-NEXT:   Entry {
+CHECK-NEXT:     Type: HIGHLOW
+CHECK-NEXT:     Address: 0x1010
+CHECK-NEXT:   }
+CHECK-NEXT:   Entry {
+CHECK-NEXT:     Type: ABSOLUTE
+CHECK-NEXT:     Address: 0x1000
+CHECK-NEXT:   }
+CHECK-NEXT: ]
+
diff --git a/test/tools/llvm-readobj/coff-directives.test b/test/tools/llvm-readobj/coff-directives.test
new file mode 100644
index 000000000000..83efffcf217f
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-directives.test
@@ -0,0 +1,2 @@
+RUN: llvm-readobj -coff-directives %p/Inputs/directives.obj.coff-x86_64 | FileCheck %s
+CHECK: Directive(s): /DEFAULTLIB:"LIBCMT" /DEFAULTLIB:"OLDNAMES" 
diff --git a/test/tools/llvm-readobj/coff-exports.test b/test/tools/llvm-readobj/coff-exports.test
new file mode 100644
index 000000000000..54b42fef84b5
--- /dev/null
+++ b/test/tools/llvm-readobj/coff-exports.test
@@ -0,0 +1,11 @@
+RUN: llvm-readobj -coff-exports %p/Inputs/export-x86.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-X86
+RUN: llvm-readobj -coff-exports %p/Inputs/export-x64.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-X64
+RUN: llvm-readobj -coff-exports %p/Inputs/export-arm.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-ARM
+
+CHECK: Export {
+CHECK:   Ordinal: 1
+CHECK:   Name: function
+CHECK-X86:   RVA: 0x1000
+CHECK-X64:   RVA: 0x1000
+CHECK-ARM:   RVA: 0x1001
+CHECK: }
diff --git a/test/tools/llvm-readobj/coff-file-sections-reading.test b/test/tools/llvm-readobj/coff-file-sections-reading.test
index 5c44c16f0058..c2f02d47de48 100644
--- a/test/tools/llvm-readobj/coff-file-sections-reading.test
+++ b/test/tools/llvm-readobj/coff-file-sections-reading.test
@@ -4,7 +4,7 @@ CHECK: Symbols [
 CHECK:   Symbol {
 CHECK:     Name: .file
 CHECK:     Value: 0
-CHECK:     Section:  (65534)
+CHECK:     Section:  IMAGE_SYM_DEBUG (-2)
 CHECK:     BaseType: Null (0x0)
 CHECK:     ComplexType: Null (0x0)
 CHECK:     StorageClass: File (0x67)
diff --git a/test/tools/llvm-readobj/cxx-cli-aux.test b/test/tools/llvm-readobj/cxx-cli-aux.test
index 90e73c033a86..0b687793547e 100644
--- a/test/tools/llvm-readobj/cxx-cli-aux.test
+++ b/test/tools/llvm-readobj/cxx-cli-aux.test
@@ -9,7 +9,7 @@ CHECK:      Symbols [
 CHECK:        Symbol {
 CHECK:          Name: ?PerAppDomain@@$$Q3HA
 CHECK-NEXT:     Value: 4
-CHECK-NEXT:     Section:  (65535)
+CHECK-NEXT:     Section:  IMAGE_SYM_ABSOLUTE (-1)
 CHECK-NEXT:     BaseType: Null (0x0)
 CHECK-NEXT:     ComplexType: Null (0x0)
 CHECK-NEXT:     StorageClass: External (0x2)
@@ -21,14 +21,13 @@ CHECK-NEXT:       LineNumberCount: 0
 CHECK-NEXT:       Checksum: 0x0
 CHECK-NEXT:       Number: 0
 CHECK-NEXT:       Selection: NoDuplicates (0x1)
-CHECK-NEXT:       Unused: (00 00 00)
 CHECK-NEXT:     }
 CHECK-NEXT:   }
 
 CHECK:        Symbol {
 CHECK:          Name: 04000001
 CHECK-NEXT:     Value: 4
-CHECK-NEXT:     Section:  (65535)
+CHECK-NEXT:     Section:  IMAGE_SYM_ABSOLUTE (-1)
 CHECK-NEXT:     BaseType: Null (0x0)
 CHECK-NEXT:     ComplexType: Null (0x0)
 CHECK-NEXT:     StorageClass: CLRToken (0x6B)
@@ -37,6 +36,5 @@ CHECK-NEXT:     AuxCLRToken {
 CHECK-NEXT:       AuxType: 1
 CHECK-NEXT:       Reserved: 0
 CHECK-NEXT:       SymbolTableIndex: ?PerAppDomain@@$$Q3HA (19)
-CHECK-NEXT:       Unused: (00 00 00 00 00 00 00 00 00 00 00 00)
 CHECK-NEXT:     }
 CHECK-NEXT:   }
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index 39a8c0ef8991..fd030ef0b56e 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -10,6 +10,16 @@ RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF32
 RUN: llvm-readobj -h %p/Inputs/trivial.obj.elf-x86-64 \
 RUN:   | FileCheck %s -check-prefix ELF64
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.macho-i386 \
+RUN:   | FileCheck %s -check-prefix MACHO32
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.macho-x86-64 \
+RUN:   | FileCheck %s -check-prefix MACHO64
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.macho-ppc \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.macho-ppc64 \
+RUN:   | FileCheck %s -check-prefix MACHO-PPC64
+RUN: llvm-readobj -h %p/Inputs/trivial.obj.macho-arm \
+RUN:   | FileCheck %s -check-prefix MACHO-ARM
 RUN: llvm-readobj -h %p/Inputs/magic.coff-unknown \
 RUN:   | FileCheck %s -check-prefix COFF-UNKNOWN
 RUN: llvm-readobj -h %p/Inputs/magic.coff-importlib \
@@ -122,6 +132,88 @@ ELF64-NEXT:   SectionHeaderCount: 10
 ELF64-NEXT:   StringTableSectionIndex: 7
 ELF64-NEXT: }
 
+MACHO32:      File: {{(.*[/\\])?}}trivial.obj.macho-i386
+MACHO32-NEXT: Format: Mach-O 32-bit i386
+MACHO32-NEXT: Arch: i386
+MACHO32-NEXT: AddressSize: 32bit
+MACHO32-NEXT: MachHeader {
+MACHO32-NEXT:   Magic: Magic (0xFEEDFACE)
+MACHO32-NEXT:   CpuType: X86 (0x7)
+MACHO32-NEXT:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+MACHO32-NEXT:   FileType: Relocatable (0x1)
+MACHO32-NEXT:   NumOfLoadCommands: 3
+MACHO32-NEXT:   SizeOfLoadCommands: 296
+MACHO32-NEXT:   Flags [ (0x2000)
+MACHO32-NEXT:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MACHO32-NEXT:   ]
+MACHO32-NEXT: }
+
+MACHO64:      File: {{(.*[/\\])?}}trivial.obj.macho-x86-64
+MACHO64-NEXT: Format: Mach-O 64-bit x86-64
+MACHO64-NEXT: Arch: x86_64
+MACHO64-NEXT: AddressSize: 64bit
+MACHO64-NEXT: MachHeader {
+MACHO64-NEXT:   Magic: Magic64 (0xFEEDFACF)
+MACHO64-NEXT:   CpuType: X86-64 (0x1000007)
+MACHO64-NEXT:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+MACHO64-NEXT:   FileType: Relocatable (0x1)
+MACHO64-NEXT:   NumOfLoadCommands: 3
+MACHO64-NEXT:   SizeOfLoadCommands: 336
+MACHO64-NEXT:   Flags [ (0x2000)
+MACHO64-NEXT:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MACHO64-NEXT:   ]
+MACHO64-NEXT:   Reserved: 0x0
+MACHO64-NEXT: }
+
+MACHO-PPC:      File: {{(.*[/\\])?}}trivial.obj.macho-ppc
+MACHO-PPC-NEXT: Format: Mach-O 32-bit ppc
+MACHO-PPC-NEXT: Arch: powerpc
+MACHO-PPC-NEXT: AddressSize: 32bit
+MACHO-PPC-NEXT: MachHeader {
+MACHO-PPC-NEXT:   Magic: Magic (0xFEEDFACE)
+MACHO-PPC-NEXT:   CpuType: PowerPC (0x12)
+MACHO-PPC-NEXT:   CpuSubType: CPU_SUBTYPE_POWERPC_ALL (0x0)
+MACHO-PPC-NEXT:   FileType: Relocatable (0x1)
+MACHO-PPC-NEXT:   NumOfLoadCommands: 3
+MACHO-PPC-NEXT:   SizeOfLoadCommands: 500
+MACHO-PPC-NEXT:   Flags [ (0x2000)
+MACHO-PPC-NEXT:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MACHO-PPC-NEXT:   ]
+MACHO-PPC-NEXT: }
+
+MACHO-PPC64:      File: {{(.*[/\\])?}}trivial.obj.macho-ppc64
+MACHO-PPC64-NEXT: Format: Mach-O 64-bit ppc64
+MACHO-PPC64-NEXT: Arch: powerpc64
+MACHO-PPC64-NEXT: AddressSize: 64bit
+MACHO-PPC64-NEXT: MachHeader {
+MACHO-PPC64-NEXT:   Magic: Magic64 (0xFEEDFACF)
+MACHO-PPC64-NEXT:   CpuType: PowerPC64 (0x1000012)
+MACHO-PPC64-NEXT:   CpuSubtype: 0x0
+MACHO-PPC64-NEXT:   FileType: Relocatable (0x1)
+MACHO-PPC64-NEXT:   NumOfLoadCommands: 3
+MACHO-PPC64-NEXT:   SizeOfLoadCommands: 576
+MACHO-PPC64-NEXT:   Flags [ (0x2000)
+MACHO-PPC64-NEXT:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MACHO-PPC64-NEXT:   ]
+MACHO-PPC64-NEXT:   Reserved: 0x0
+MACHO-PPC64-NEXT: }
+
+MACHO-ARM:      File: {{(.*[/\\])?}}trivial.obj.macho-arm
+MACHO-ARM-NEXT: Format: Mach-O arm
+MACHO-ARM-NEXT: Arch: arm
+MACHO-ARM-NEXT: AddressSize: 32bit
+MACHO-ARM-NEXT: MachHeader {
+MACHO-ARM-NEXT:   Magic: Magic (0xFEEDFACE)
+MACHO-ARM-NEXT:   CpuType: Arm (0xC)
+MACHO-ARM-NEXT:   CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+MACHO-ARM-NEXT:   FileType: Relocatable (0x1)
+MACHO-ARM-NEXT:   NumOfLoadCommands: 3
+MACHO-ARM-NEXT:   SizeOfLoadCommands: 636
+MACHO-ARM-NEXT:   Flags [ (0x2000)
+MACHO-ARM-NEXT:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+MACHO-ARM-NEXT:   ]
+MACHO-ARM-NEXT: }
+
 PE32:      File: {{(.*[/\\])?}}trivial.exe.coff-i386
 PE32-NEXT: Format: COFF-i386
 PE32-NEXT: Arch: i386
@@ -159,7 +251,7 @@ PE32-NEXT:   MinorSubsystemVersion: 0
 PE32-NEXT:   SizeOfImage: 16384
 PE32-NEXT:   SizeOfHeaders: 1024
 PE32-NEXT:   Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3)
-PE32-NEXT:   Subsystem [ (0x8140)
+PE32-NEXT:   Characteristics [ (0x8140)
 PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40)
 PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100)
 PE32-NEXT:     IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE (0x8000)
@@ -204,6 +296,25 @@ PE32-NEXT:     ReservedRVA: 0x0
 PE32-NEXT:     ReservedSize: 0x0
 PE32-NEXT:   }
 PE32-NEXT: }
+PE32-NEXT: DOSHeader {
+PE32-NEXT:   Magic: MZ
+PE32-NEXT:   UsedBytesInTheLastPage: 144
+PE32-NEXT:   FileSizeInPages: 3
+PE32-NEXT:   NumberOfRelocationItems: 0
+PE32-NEXT:   HeaderSizeInParagraphs: 4
+PE32-NEXT:   MinimumExtraParagraphs: 0
+PE32-NEXT:   MaximumExtraParagraphs: 65535
+PE32-NEXT:   InitialRelativeSS: 0
+PE32-NEXT:   InitialSP: 184
+PE32-NEXT:   Checksum: 0
+PE32-NEXT:   InitialIP: 0
+PE32-NEXT:   InitialRelativeCS: 0
+PE32-NEXT:   AddressOfRelocationTable: 64
+PE32-NEXT:   OverlayNumber: 0
+PE32-NEXT:   OEMid: 0
+PE32-NEXT:   OEMinfo: 0
+PE32-NEXT:   AddressOfNewExeHeader: 176
+PE32-NEXT: }
 
 COFF-UNKNOWN:      Format: COFF-<unknown arch>
 COFF-UNKNOWN-NEXT: Arch: unknown
@@ -224,12 +335,11 @@ COFF-IMPORTLIB-NEXT: Arch: unknown
 COFF-IMPORTLIB-NEXT: AddressSize: 32bit
 COFF-IMPORTLIB-NEXT: ImageFileHeader {
 COFF-IMPORTLIB-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
-COFF-IMPORTLIB-NEXT:   SectionCount: 65535
+COFF-IMPORTLIB-NEXT:   SectionCount: 0
 COFF-IMPORTLIB-NEXT:   TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000)
-COFF-IMPORTLIB-NEXT:   PointerToSymbolTable: 0x528542EB
-COFF-IMPORTLIB-NEXT:   SymbolCount: 20
+COFF-IMPORTLIB-NEXT:   PointerToSymbolTable: 0x0
+COFF-IMPORTLIB-NEXT:   SymbolCount: 0
 COFF-IMPORTLIB-NEXT:   OptionalHeaderSize: 0
-COFF-IMPORTLIB-NEXT:   Characteristics [ (0x8)
-COFF-IMPORTLIB-NEXT:     IMAGE_FILE_LOCAL_SYMS_STRIPPED (0x8)
+COFF-IMPORTLIB-NEXT:   Characteristics [ (0x0)
 COFF-IMPORTLIB-NEXT:   ]
 COFF-IMPORTLIB-NEXT: }
diff --git a/test/tools/llvm-readobj/imports.test b/test/tools/llvm-readobj/imports.test
new file mode 100644
index 000000000000..58512f42adcd
--- /dev/null
+++ b/test/tools/llvm-readobj/imports.test
@@ -0,0 +1,88 @@
+RUN: llvm-readobj --coff-imports %p/Inputs/imports.exe.coff-i386 | FileCheck -check-prefix=X86 %s
+RUN: llvm-readobj --coff-imports %p/Inputs/imports.exe.coff-x86-64 | FileCheck -check-prefix=X64  %s
+
+X86:      Import {
+X86-NEXT:   Name: KERNEL32.dll
+X86-NEXT:   ImportLookupTableRVA: 0x2108
+X86-NEXT:   ImportAddressTableRVA: 0x2000
+X86-NEXT:   Symbol: ExitProcess (337)
+X86-NEXT:   Symbol: GetProcAddress (669)
+X86-NEXT:   Symbol: FreeLibrary (414)
+X86-NEXT:   Symbol: GetLastError (592)
+X86-NEXT:   Symbol: RaiseException (1087)
+X86-NEXT:   Symbol: LoadLibraryExA (934)
+X86-NEXT: }
+X86-NEXT: Import {
+X86-NEXT:   Name: USER32.dll
+X86-NEXT:   ImportLookupTableRVA: 0x2124
+X86-NEXT:   ImportAddressTableRVA: 0x201C
+X86-NEXT:   Symbol: MessageBoxA (582)
+X86-NEXT: }
+X86-NEXT: Import {
+X86-NEXT:   Name: mydll.dll
+X86-NEXT:   ImportLookupTableRVA: 0x212C
+X86-NEXT:   ImportAddressTableRVA: 0x2024
+X86-NEXT:   Symbol: Func1 (0)
+X86-NEXT:   Symbol: Func2 (1)
+X86-NEXT:   Symbol:  (3)
+X86-NEXT: }
+X86-NEXT: DelayImport {
+X86-NEXT:   Name: lazyload.dll
+X86-NEXT:   Attributes: 0x1
+X86-NEXT:   ModuleHandle: 0x301C
+X86-NEXT:   ImportAddressTable: 0x3010
+X86-NEXT:   ImportNameTable: 0x2090
+X86-NEXT:   BoundDelayImportTable: 0x20AC
+X86-NEXT:   UnloadDelayImportTable: 0x0
+X86-NEXT:   Import {
+X86-NEXT:     Symbol: Func5 (0)
+X86-NEXT:     Address: 0x401073
+X86-NEXT:   }
+X86-NEXT:   Import {
+X86-NEXT:     Symbol: Func4 (0)
+X86-NEXT:     Address: 0x401052
+X86-NEXT:   }
+X86-NEXT: }
+
+X64:      Import {
+X64-NEXT:   Name: KERNEL32.dll
+X64-NEXT:   ImportLookupTableRVA: 0x2170
+X64-NEXT:   ImportAddressTableRVA: 0x2000
+X64-NEXT:   Symbol: ExitProcess (343)
+X64-NEXT:   Symbol: GetProcAddress (676)
+X64-NEXT:   Symbol: FreeLibrary (420)
+X64-NEXT:   Symbol: GetLastError (598)
+X64-NEXT:   Symbol: RaiseException (1091)
+X64-NEXT:   Symbol: LoadLibraryExA (937)
+X64-NEXT: }
+X64-NEXT: Import {
+X64-NEXT:   Name: USER32.dll
+X64-NEXT:   ImportLookupTableRVA: 0x21A8
+X64-NEXT:   ImportAddressTableRVA: 0x2038
+X64-NEXT:   Symbol: MessageBoxA (586)
+X64-NEXT: }
+X64-NEXT: Import {
+X64-NEXT:   Name: mydll.dll
+X64-NEXT:   ImportLookupTableRVA: 0x21B8
+X64-NEXT:   ImportAddressTableRVA: 0x2048
+X64-NEXT:   Symbol: Func1 (0)
+X64-NEXT:   Symbol: Func2 (1)
+X64-NEXT:   Symbol:  (3)
+X64-NEXT: }
+X64-NEXT: DelayImport {
+X64-NEXT:   Name: lazyload.dll
+X64-NEXT:   Attributes: 0x1
+X64-NEXT:   ModuleHandle: 0x3028
+X64-NEXT:   ImportAddressTable: 0x3010
+X64-NEXT:   ImportNameTable: 0x20E0
+X64-NEXT:   BoundDelayImportTable: 0x2108
+X64-NEXT:   UnloadDelayImportTable: 0x0
+X64-NEXT:   Import {
+X64-NEXT:     Symbol: Func5 (0)
+X64-NEXT:     Address: 0x1400010F1
+X64-NEXT:   }
+X64-NEXT:   Import {
+X64-NEXT:     Symbol: Func4 (0)
+X64-NEXT:     Address: 0x140001066
+X64-NEXT:   }
+X64-NEXT: }
diff --git a/test/tools/llvm-readobj/peplus.test b/test/tools/llvm-readobj/peplus.test
index 8e6f55085475..4d8d25db894c 100644
--- a/test/tools/llvm-readobj/peplus.test
+++ b/test/tools/llvm-readobj/peplus.test
@@ -35,7 +35,7 @@ CHECK:   MinorSubsystemVersion: 0
 CHECK:   SizeOfImage: 8192
 CHECK:   SizeOfHeaders: 512
 CHECK:   Subsystem: IMAGE_SUBSYSTEM_WINDOWS_CUI (0x3)
-CHECK:   Subsystem [ (0x8160)
+CHECK:   Characteristics [ (0x8160)
 CHECK:     IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE (0x40)
 CHECK:     IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA (0x20)
 CHECK:     IMAGE_DLL_CHARACTERISTICS_NX_COMPAT (0x100)
diff --git a/test/tools/llvm-readobj/reloc-types.test b/test/tools/llvm-readobj/reloc-types.test
index 0c8b54dbc6bf..36e2f7010bdf 100644
--- a/test/tools/llvm-readobj/reloc-types.test
+++ b/test/tools/llvm-readobj/reloc-types.test
@@ -149,7 +149,7 @@ ELF-PPC64: Type: R_PPC64_GOT_TPREL16_HA (90)
 ELF-PPC64: Type: R_PPC64_TLSGD (107)
 ELF-PPC64: Type: R_PPC64_TLSLD (108)
 
-ELF-AARCH64: Type: R_AARCH64_NONE (256)
+ELF-AARCH64: Type: R_AARCH64_NONE (0)
 ELF-AARCH64: Type: R_AARCH64_ABS64 (257)
 ELF-AARCH64: Type: R_AARCH64_ABS32 (258)
 ELF-AARCH64: Type: R_AARCH64_ABS16 (259)
@@ -169,6 +169,7 @@ ELF-AARCH64: Type: R_AARCH64_MOVW_SABS_G2 (272)
 ELF-AARCH64: Type: R_AARCH64_LD_PREL_LO19 (273)
 ELF-AARCH64: Type: R_AARCH64_ADR_PREL_LO21 (274)
 ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21 (275)
+ELF-AARCH64: Type: R_AARCH64_ADR_PREL_PG_HI21_NC (276)
 ELF-AARCH64: Type: R_AARCH64_ADD_ABS_LO12_NC (277)
 ELF-AARCH64: Type: R_AARCH64_LDST8_ABS_LO12_NC (278)
 ELF-AARCH64: Type: R_AARCH64_TSTBR14 (279)
@@ -178,9 +179,39 @@ ELF-AARCH64: Type: R_AARCH64_CALL26 (283)
 ELF-AARCH64: Type: R_AARCH64_LDST16_ABS_LO12_NC (284)
 ELF-AARCH64: Type: R_AARCH64_LDST32_ABS_LO12_NC (285)
 ELF-AARCH64: Type: R_AARCH64_LDST64_ABS_LO12_NC (286)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G0 (287)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G0_NC (288)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G1 (289)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G1_NC (290)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G2 (291)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G2_NC (292)
+ELF-AARCH64: Type: R_AARCH64_MOVW_PREL_G3 (293)
 ELF-AARCH64: Type: R_AARCH64_LDST128_ABS_LO12_NC (299)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G0 (300)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G0_NC (301)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G1 (302)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G1_NC (303)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G2 (304)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G2_NC (305)
+ELF-AARCH64: Type: R_AARCH64_MOVW_GOTOFF_G3 (306)
+ELF-AARCH64: Type: R_AARCH64_GOTREL64 (307)
+ELF-AARCH64: Type: R_AARCH64_GOTREL32 (308)
+ELF-AARCH64: Type: R_AARCH64_GOT_LD_PREL19 (309)
+ELF-AARCH64: Type: R_AARCH64_LD64_GOTOFF_LO15 (310)
 ELF-AARCH64: Type: R_AARCH64_ADR_GOT_PAGE (311)
 ELF-AARCH64: Type: R_AARCH64_LD64_GOT_LO12_NC (312)
+ELF-AARCH64: Type: R_AARCH64_LD64_GOTPAGE_LO15 (313)
+ELF-AARCH64: Type: R_AARCH64_TLSGD_ADR_PREL21 (512)
+ELF-AARCH64: Type: R_AARCH64_TLSGD_ADR_PAGE21 (513)
+ELF-AARCH64: Type: R_AARCH64_TLSGD_ADD_LO12_NC (514)
+ELF-AARCH64: Type: R_AARCH64_TLSGD_MOVW_G1 (515)
+ELF-AARCH64: Type: R_AARCH64_TLSGD_MOVW_G0_NC (516)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADR_PREL21 (517)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADR_PAGE21 (518)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_ADD_LO12_NC (519)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_G1 (520)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_G0_NC (521)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LD_PREL19 (522)
 ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G2 (523)
 ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1 (524)
 ELF-AARCH64: Type: R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC (525)
@@ -218,10 +249,29 @@ ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12 (556)
 ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC (557)
 ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12 (558)
 ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC (559)
-ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE (562)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD_PREL19 (560)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PREL21 (561)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADR_PAGE21 (562)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_LD64_LO12_NC (563)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD_LO12_NC (564)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G1 (565)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_OFF_G0_NC (566)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_LDR (567)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC_ADD (568)
 ELF-AARCH64: Type: R_AARCH64_TLSDESC_CALL (569)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12 (570)
+ELF-AARCH64: Type: R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC (571)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12 (572)
+ELF-AARCH64: Type: R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC (573)
+ELF-AARCH64: Type: R_AARCH64_COPY (1024)
+ELF-AARCH64: Type: R_AARCH64_GLOB_DAT (1025)
+ELF-AARCH64: Type: R_AARCH64_JUMP_SLOT (1026)
+ELF-AARCH64: Type: R_AARCH64_RELATIVE (1027)
+ELF-AARCH64: Type: R_AARCH64_TLS_DTPREL64 (1028)
+ELF-AARCH64: Type: R_AARCH64_TLS_DTPMOD64 (1029)
+ELF-AARCH64: Type: R_AARCH64_TLS_TPREL64 (1030)
+ELF-AARCH64: Type: R_AARCH64_TLSDESC (1031)
+ELF-AARCH64: Type: R_AARCH64_IRELATIVE (1032)
 
 ELF-ARM: Type: R_ARM_NONE (0)
 ELF-ARM: Type: R_ARM_PC24 (1)
@@ -250,7 +300,6 @@ ELF-ARM: Type: R_ARM_RELATIVE (23)
 ELF-ARM: Type: R_ARM_GOTOFF32 (24)
 ELF-ARM: Type: R_ARM_BASE_PREL (25)
 ELF-ARM: Type: R_ARM_GOT_BREL (26)
-ELF-ARM: Type: R_ARM_PLT32 (27)
 ELF-ARM: Type: R_ARM_CALL (28)
 ELF-ARM: Type: R_ARM_JUMP24 (29)
 ELF-ARM: Type: R_ARM_THM_JUMP24 (30)
@@ -354,6 +403,7 @@ ELF-ARM: Type: R_ARM_PRIVATE_15 (127)
 ELF-ARM: Type: R_ARM_ME_TOO (128)
 ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ16 (129)
 ELF-ARM: Type: R_ARM_THM_TLS_DESCSEQ32 (130)
+ELF-ARM: Type: R_ARM_IRELATIVE (160)
 
 ELF-MIPS: Type: R_MIPS_NONE (0)
 ELF-MIPS: Type: R_MIPS_16 (1)
diff --git a/test/tools/llvm-readobj/relocations.test b/test/tools/llvm-readobj/relocations.test
index 864ded35a4e4..2e11aa27c37b 100644
--- a/test/tools/llvm-readobj/relocations.test
+++ b/test/tools/llvm-readobj/relocations.test
@@ -1,5 +1,9 @@
 RUN: llvm-readobj -r %p/Inputs/trivial.obj.coff-i386 \
 RUN:   | FileCheck %s -check-prefix COFF
+RUN: llvm-readobj -r %p/Inputs/bad-relocs.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix BAD-COFF-RELOCS
+RUN: llvm-readobj -r %p/Inputs/relocs-no-symtab.obj.coff-i386 \
+RUN:   | FileCheck %s -check-prefix BAD-COFF-RELOCS
 RUN: llvm-readobj -r %p/Inputs/trivial.obj.elf-i386 \
 RUN:   | FileCheck %s -check-prefix ELF
 RUN: llvm-readobj -r %p/Inputs/trivial.obj.macho-i386 \
@@ -21,6 +25,12 @@ COFF-NEXT:     0xE IMAGE_REL_I386_REL32 _SomeOtherFunction
 COFF-NEXT:   }
 COFF-NEXT: ]
 
+BAD-COFF-RELOCS:      Relocations [
+BAD-COFF-RELOCS-NEXT:   Section (1) sec {
+BAD-COFF-RELOCS-NEXT:     0xDEADBEEF IMAGE_REL_I386_ABSOLUTE -
+BAD-COFF-RELOCS-NEXT:   }
+BAD-COFF-RELOCS-NEXT: ]
+
 ELF:      Relocations [
 ELF-NEXT:   Section (2) .rel.text {
 ELF-NEXT:     0xC R_386_GOTPC _GLOBAL_OFFSET_TABLE_ 0x0
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 972d8e6f4ef7..4024878d2bde 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -52,7 +52,6 @@ COFF-NEXT:           LineNumberCount: 0
 COFF-NEXT:           Checksum: 0x0
 COFF-NEXT:           Number: 1
 COFF-NEXT:           Selection: 0x0
-COFF-NEXT:           Unused: (00 00 00)
 COFF-NEXT:         }
 COFF-NEXT:       }
 COFF-NEXT:       Symbol {
diff --git a/test/tools/llvm-readobj/symbols.test b/test/tools/llvm-readobj/symbols.test
index 26830ac46a8a..71955e0d8235 100644
--- a/test/tools/llvm-readobj/symbols.test
+++ b/test/tools/llvm-readobj/symbols.test
@@ -7,7 +7,7 @@ COFF:      Symbols [
 COFF-NEXT:   Symbol {
 COFF-NEXT:     Name: @comp.id
 COFF-NEXT:    Value: 14766605
-COFF-NEXT:    Section:  (65535)
+COFF-NEXT:    Section:  IMAGE_SYM_ABSOLUTE (-1)
 COFF-NEXT:     BaseType: Null (0x0)
 COFF-NEXT:     ComplexType: Null (0x0)
 COFF-NEXT:    StorageClass: Static (0x3)
@@ -16,7 +16,7 @@ COFF-NEXT:   }
 COFF-NEXT:   Symbol {
 COFF-NEXT:    Name: @feat.00
 COFF-NEXT:    Value: 2147484049
-COFF-NEXT:     Section:  (65535)
+COFF-NEXT:     Section:  IMAGE_SYM_ABSOLUTE (-1)
 COFF-NEXT:     BaseType: Null (0x0)
 COFF-NEXT:    ComplexType: Null (0x0)
 COFF-NEXT:    StorageClass: Static (0x3)
@@ -37,7 +37,6 @@ COFF-NEXT:       LineNumberCount: 0
 COFF-NEXT:       Checksum: 0x0
 COFF-NEXT:       Number: 1
 COFF-NEXT:       Selection: 0x0
-COFF-NEXT:       Unused: (00 00 00)
 COFF-NEXT:     }
 COFF-NEXT:   }
 
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe
new file mode 100755
index 000000000000..ba3154cd516e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Info.plist b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Info.plist
new file mode 100644
index 000000000000..4e84ad0cc599
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Info.plist
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+	<dict>
+		<key>CFBundleDevelopmentRegion</key>
+		<string>English</string>
+		<key>CFBundleIdentifier</key>
+		<string>com.apple.xcode.dsym.dsym-test-exe-differentname</string>
+		<key>CFBundleInfoDictionaryVersion</key>
+		<string>6.0</string>
+		<key>CFBundlePackageType</key>
+		<string>dSYM</string>
+		<key>CFBundleSignature</key>
+		<string>????</string>
+		<key>CFBundleShortVersionString</key>
+		<string>1.0</string>
+		<key>CFBundleVersion</key>
+		<string>1</string>
+	</dict>
+</plist>
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Resources/DWARF/dsym-test-exe-second b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Resources/DWARF/dsym-test-exe-second
new file mode 100644
index 000000000000..c30dba3ff98a
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-differentname.dSYM/Contents/Resources/DWARF/dsym-test-exe-second
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-second b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-second
new file mode 100755
index 000000000000..ba3154cd516e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe-second
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Info.plist b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Info.plist
new file mode 100644
index 000000000000..35b1c113da4a
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Info.plist
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+	<dict>
+		<key>CFBundleDevelopmentRegion</key>
+		<string>English</string>
+		<key>CFBundleIdentifier</key>
+		<string>com.apple.xcode.dsym.dsym-test-exe</string>
+		<key>CFBundleInfoDictionaryVersion</key>
+		<string>6.0</string>
+		<key>CFBundlePackageType</key>
+		<string>dSYM</string>
+		<key>CFBundleSignature</key>
+		<string>????</string>
+		<key>CFBundleShortVersionString</key>
+		<string>1.0</string>
+		<key>CFBundleVersion</key>
+		<string>1</string>
+	</dict>
+</plist>
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Resources/DWARF/dsym-test-exe b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Resources/DWARF/dsym-test-exe
new file mode 100644
index 000000000000..c30dba3ff98a
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test-exe.dSYM/Contents/Resources/DWARF/dsym-test-exe
diff --git a/test/tools/llvm-symbolizer/Inputs/dsym-test.c b/test/tools/llvm-symbolizer/Inputs/dsym-test.c
new file mode 100644
index 000000000000..84d5ad9e9143
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/dsym-test.c
@@ -0,0 +1,8 @@
+// clang -c dsym-test.c -g
+// clang dsym-test.o -g -o dsym-test-exe
+// dsymutil dsym-test-exe
+// clang dsym-test.o -g -o dsym-test-exe-second
+// dsymutil dsym-test-exe-second -o dsym-test-exe-differentname.dSYM
+int main() {
+  return 0;
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/ppc64 b/test/tools/llvm-symbolizer/Inputs/ppc64
new file mode 100755
index 000000000000..2356e4347468
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/ppc64
diff --git a/test/tools/llvm-symbolizer/dsym.test b/test/tools/llvm-symbolizer/dsym.test
new file mode 100644
index 000000000000..326602d3f733
--- /dev/null
+++ b/test/tools/llvm-symbolizer/dsym.test
@@ -0,0 +1,14 @@
+RUN: echo "%p/Inputs/dsym-test-exe 0x0000000100000f90" > %t.input
+RUN: echo "%p/Inputs/dsym-test-exe-second 0x0000000100000f90" >> %t.input
+RUN: llvm-symbolizer < %t.input | FileCheck %s --check-prefix=CHECK-NOHINT
+RUN: llvm-symbolizer -dsym-hint=%p/Inputs/dsym-test-exe-differentname.dSYM < %t.input | FileCheck %s --check-prefix=CHECK-HINT
+
+CHECK-NOHINT: main
+CHECK-NOHINT: dsym-test.c
+CHECK-NOHINT: main
+CHECK-NOHINT: ??:0:0
+
+CHECK-HINT: main
+CHECK-HINT: dsym-test.c
+CHECK-HINT: main
+CHECK-HINT: dsym-test.c
diff --git a/test/tools/llvm-symbolizer/ppc64.test b/test/tools/llvm-symbolizer/ppc64.test
new file mode 100644
index 000000000000..fc8e4ffc7e2e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/ppc64.test
@@ -0,0 +1,11 @@
+// ppc64 was compiled from this source on a big-endian 64-bit PowerPC box
+// with just "clang -nostdlib":
+int foo() { return 0; }
+int bar() { return foo(); }
+int _start() { return bar(); }
+
+RUN: %python -c "print('0x1000014c\n0x1000018c\n0x100001cc')" | llvm-symbolizer -obj=%p/Inputs/ppc64 | FileCheck %s
+
+CHECK: foo
+CHECK: bar
+CHECK: _start
diff --git a/test/tools/llvm-vtabledump/Inputs/trivial.obj.coff-i386 b/test/tools/llvm-vtabledump/Inputs/trivial.obj.coff-i386
new file mode 100644
index 000000000000..3b9395583eb5
--- /dev/null
+++ b/test/tools/llvm-vtabledump/Inputs/trivial.obj.coff-i386
diff --git a/test/tools/llvm-vtabledump/Inputs/trivial.obj.elf-i386 b/test/tools/llvm-vtabledump/Inputs/trivial.obj.elf-i386
new file mode 100644
index 000000000000..1a5c929b0829
--- /dev/null
+++ b/test/tools/llvm-vtabledump/Inputs/trivial.obj.elf-i386
diff --git a/test/tools/llvm-vtabledump/trivial.test b/test/tools/llvm-vtabledump/trivial.test
new file mode 100644
index 000000000000..92bd058cfb18
--- /dev/null
+++ b/test/tools/llvm-vtabledump/trivial.test
@@ -0,0 +1,58 @@
+RUN: llvm-vtabledump %p/Inputs/trivial.obj.coff-i386 \
+RUN:   | FileCheck %s --check-prefix=COFF-I386
+
+RUN: llvm-vtabledump %p/Inputs/trivial.obj.elf-i386 \
+RUN:   | FileCheck %s --check-prefix=ELF-I386
+
+COFF-I386:      ??_7S@@6B@[0]: ??_R4S@@6B@
+COFF-I386-NEXT: ??_7S@@6B@[4]: ??_GS@@UAEPAXI@Z
+COFF-I386-NEXT: ??_8S@@7B@[0]: -4
+COFF-I386-NEXT: ??_8S@@7B@[4]: 4
+COFF-I386-NEXT: ??_R4S@@6B@[IsImageRelative]: 0
+COFF-I386-NEXT: ??_R4S@@6B@[OffsetToTop]: 0
+COFF-I386-NEXT: ??_R4S@@6B@[VFPtrOffset]: 0
+COFF-I386-NEXT: ??_R4S@@6B@[TypeDescriptor]: ??_R0?AUS@@@8
+COFF-I386-NEXT: ??_R4S@@6B@[ClassHierarchyDescriptor]: ??_R3S@@8
+COFF-I386-NEXT: ??_R3A@@8[AlwaysZero]: 0
+COFF-I386-NEXT: ??_R3A@@8[Flags]: 0
+COFF-I386-NEXT: ??_R3A@@8[NumClasses]: 1
+COFF-I386-NEXT: ??_R3A@@8[BaseClassArray]: ??_R2A@@8
+COFF-I386-NEXT: ??_R3S@@8[AlwaysZero]: 0
+COFF-I386-NEXT: ??_R3S@@8[Flags]: 0
+COFF-I386-NEXT: ??_R3S@@8[NumClasses]: 2
+COFF-I386-NEXT: ??_R3S@@8[BaseClassArray]: ??_R2S@@8
+COFF-I386-NEXT: ??_R2A@@8[0]: ??_R1A@?0A@EA@A@@8
+COFF-I386-NEXT: ??_R2S@@8[0]: ??_R1A@?0A@EA@S@@8
+COFF-I386-NEXT: ??_R2S@@8[4]: ??_R1A@33FA@A@@8
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[TypeDescriptor]: ??_R0?AUA@@@8
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[NumBases]: 0
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[OffsetInVBase]: 0
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[VBPtrOffset]: 4
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[OffsetInVBTable]: 4
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[Flags]: 80
+COFF-I386-NEXT: ??_R1A@33FA@A@@8[ClassHierarchyDescriptor]: ??_R3A@@8
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[TypeDescriptor]: ??_R0?AUA@@@8
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[NumBases]: 0
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[OffsetInVBase]: 0
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[VBPtrOffset]: -1
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[OffsetInVBTable]: 0
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[Flags]: 64
+COFF-I386-NEXT: ??_R1A@?0A@EA@A@@8[ClassHierarchyDescriptor]: ??_R3A@@8
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[TypeDescriptor]: ??_R0?AUS@@@8
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[NumBases]: 1
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[OffsetInVBase]: 0
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[VBPtrOffset]: -1
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[OffsetInVBTable]: 0
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[Flags]: 64
+COFF-I386-NEXT: ??_R1A@?0A@EA@S@@8[ClassHierarchyDescriptor]: ??_R3S@@8
+COFF-I386-NEXT: ??_R0?AUA@@@8[VFPtr]: ??_7type_info@@6B@
+COFF-I386-NEXT: ??_R0?AUA@@@8[AlwaysZero]: 0
+COFF-I386-NEXT: ??_R0?AUA@@@8[MangledName]: .?AUA@@
+COFF-I386-NEXT: ??_R0?AUS@@@8[VFPtr]: ??_7type_info@@6B@
+COFF-I386-NEXT: ??_R0?AUS@@@8[AlwaysZero]: 0
+COFF-I386-NEXT: ??_R0?AUS@@@8[MangledName]: .?AUS@@
+
+ELF-I386:      _ZTS1A: 1A
+ELF-I386-NEXT: _ZTV1A[0]: 0
+ELF-I386-NEXT: _ZTV1A[4]: _ZTI1A
+ELF-I386-NEXT: _ZTV1A[8]: _ZN1A1fEv